@aiello/wechat-to-markdown 1.2.7 → 1.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -25,7 +25,9 @@ var __toModule = (module2) => {
25
25
  // src/index.ts
26
26
  __export(exports, {
27
27
  Status: () => Status,
28
- default: () => transformHtml2Markdown
28
+ default: () => transformHtml2Markdown,
29
+ getTurnDownService: () => getTurnDownService,
30
+ parseHTML: () => parseHTML
29
31
  });
30
32
 
31
33
  // node_modules/tsup/assets/cjs_shims.js
@@ -156,53 +158,59 @@ var getError = (code) => {
156
158
  msg: errObj[code]
157
159
  };
158
160
  };
159
- async function transformHtml2Markdown(url) {
160
- const u = new URL(url);
161
- u.searchParams.delete("poc_token");
162
- let json = await import_axios.default.request({
163
- url: u.href,
164
- method: "get",
165
- timeout: 3e4,
166
- transformResponse(res) {
167
- return res;
168
- }
169
- }).then((res) => {
170
- var _a;
171
- const $ = (0, import_cheerio2.load)(res["data"]);
172
- let title = $("#activity-name").text();
173
- title = title.trim() || "";
174
- const author = Array.from(new Set([
175
- (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
176
- ...$("#js_name").text().split("\n")
177
- ].map((item) => item ? item.trim() : "").filter(Boolean))).join("\n");
178
- const htmlEl = $("#js_content");
179
- const html = htmlEl.html();
180
- if (html && html.length > 0) {
181
- let res2 = getTurnDownService({ url: u.href }).turndown(html);
182
- res2 = `## ${title}
161
+ async function parseHTML(htmlRaw, meta) {
162
+ var _a;
163
+ const $ = (0, import_cheerio2.load)(htmlRaw);
164
+ let title = $("#activity-name").text();
165
+ title = title.trim() || "";
166
+ const author = Array.from(new Set([
167
+ (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
168
+ ...$("#js_name").text().split("\n")
169
+ ].map((item) => item ? item.trim() : "").filter(Boolean))).join("\n");
170
+ const htmlEl = $("#js_content");
171
+ const html = htmlEl.html();
172
+ if (html && html.length > 0) {
173
+ let res = getTurnDownService(meta).turndown(html);
174
+ res = `## ${title}
183
175
 
184
176
  ## \u4F5C\u8005 ${author}
185
177
 
186
- ` + res2;
187
- return {
188
- success: true,
189
- code: Status.Success,
190
- data: {
191
- title,
192
- author,
193
- content: res2
194
- }
195
- };
196
- }
197
- return getError(Status.Fail);
198
- }).catch((err) => {
178
+ ` + res;
179
+ return {
180
+ success: true,
181
+ code: Status.Success,
182
+ data: {
183
+ title,
184
+ author,
185
+ content: res
186
+ }
187
+ };
188
+ }
189
+ return getError(Status.Fail);
190
+ }
191
+ async function transformHtml2Markdown(url) {
192
+ const u = new URL(url);
193
+ u.searchParams.delete("poc_token");
194
+ try {
195
+ const res = await import_axios.default.request({
196
+ url: u.href,
197
+ method: "get",
198
+ timeout: 3e4,
199
+ maxRedirects: 5,
200
+ transformResponse(res2) {
201
+ return res2;
202
+ }
203
+ });
204
+ return parseHTML(res.data, { url: u.href });
205
+ } catch (err) {
199
206
  console.log(err);
200
- return err;
201
- });
202
- return json;
207
+ return getError(Status.Fail);
208
+ }
203
209
  }
204
210
  // Annotate the CommonJS export names for ESM import in node:
205
211
  0 && (module.exports = {
206
- Status
212
+ Status,
213
+ getTurnDownService,
214
+ parseHTML
207
215
  });
208
216
  //# sourceMappingURL=index.cjs.map
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../src/index.ts", "../node_modules/tsup/assets/cjs_shims.js", "../src/error.ts", "../src/type.ts", "../src/turndownCode.ts", "../src/formatHtml.ts"],
4
- "sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n let json: TurnDownResult = await axios\n .request({\n url: u.href,\n method: 'get',\n timeout: 30000,\n transformResponse(res) {\n return res\n },\n })\n .then((res) => {\n const $ = load(res['data'])\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService({ url: u.href }).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n })\n .catch((err) => {\n console.log(err)\n return err\n })\n\n return json\n}\n", "export const importMetaUrlShim =\n typeof document === 'undefined'\n ? new (require('u' + 'rl').URL)('file:' + __filename).href\n : (document.currentScript && document.currentScript.src) ||\n new URL('main.js', document.baseURI).href\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
- "mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;;;ACAO,IAAM,oBACX,OAAO,aAAa,cAChB,IAAK,SAAQ,QAAY,IAAK,UAAU,YAAY,OACnD,SAAS,iBAAiB,SAAS,cAAc,OAClD,IAAI,IAAI,WAAW,SAAS,SAAS;;;ADJ3C,mBAAkB;AAClB,sBAAqB;;;AEDd,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB,sBAA4B;AAC5B,iCAA8B;;;ACJ9B,qBAAoB;AAWb,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,uBAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,wBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,qCAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AJ3EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI,OAAuB,MAAM,qBAC5B,QAAQ;AAAA,IACL,KAAK,EAAE;AAAA,IACP,QAAQ;AAAA,IACR,SAAS;AAAA,IACT,kBAAkB,KAAK;AACnB,aAAO;AAAA;AAAA,KAGd,KAAK,CAAC,QAAQ;AAjCvB;AAkCY,UAAM,IAAI,0BAAK,IAAI;AAEnB,QAAI,QAAQ,EAAE,kBAAkB;AAEhC,YAAQ,MAAM,UAAU;AACxB,UAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,MACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,MAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,MAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,UAAM,SAAS,EAAE;AACjB,UAAM,OAAO,OAAO;AAEpB,QAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,UAAI,OAAM,mBAAmB,EAAE,KAAK,EAAE,QAAQ,SAAS;AAEvD,aAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,aAAO;AAAA,QACH,SAAS;AAAA,QACT,MAAM,OAAO;AAAA,QACb,MAAM;AAAA,UACF;AAAA,UACA;AAAA,UACA,SAAS;AAAA;AAAA;AAAA;AAKrB,WAAO,SAAS,OAAO;AAAA,KAE1B,MAAM,CAAC,QAAQ;AACZ,YAAQ,IAAI;AACZ,WAAO;AAAA;AAGf,SAAO;AAAA;",
4
+ "sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.request({\n url: u.href,\n method: 'get',\n timeout: 30000,\n maxRedirects: 5,\n transformResponse(res) {\n return res\n },\n })\n return parseHTML(res.data, { url: u.href })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n", "export const importMetaUrlShim =\n typeof document === 'undefined'\n ? new (require('u' + 'rl').URL)('file:' + __filename).href\n : (document.currentScript && document.currentScript.src) ||\n new URL('main.js', document.baseURI).href\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
+ "mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAO,IAAM,oBACX,OAAO,aAAa,cAChB,IAAK,SAAQ,QAAY,IAAK,UAAU,YAAY,OACnD,SAAS,iBAAiB,SAAS,cAAc,OAClD,IAAI,IAAI,WAAW,SAAS,SAAS;;;ADJ3C,mBAAkB;AAClB,sBAAqB;;;AEDd,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB,sBAA4B;AAC5B,iCAA8B;;;ACJ9B,qBAAoB;AAWb,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,uBAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,wBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,qCAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AJ3EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,yBAAgC,SAAiB,MAAuB;AAhBxE;AAiBI,QAAM,IAAI,0BAAK;AAEf,MAAI,QAAQ,EAAE,kBAAkB;AAEhC,UAAQ,MAAM,UAAU;AACxB,QAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,IACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,IAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,IAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,QAAM,SAAS,EAAE;AACjB,QAAM,OAAO,OAAO;AAEpB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,MAAM,SAAS;AAE5C,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT,MAAM,OAAO;AAAA,MACb,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA;AAAA;AAAA;AAKrB,SAAO,SAAS,OAAO;AAAA;AAG3B,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI;AACA,UAAM,MAAM,MAAM,qBAAM,QAAQ;AAAA,MAC5B,KAAK,EAAE;AAAA,MACP,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,cAAc;AAAA,MACd,kBAAkB,MAAK;AACnB,eAAO;AAAA;AAAA;AAGf,WAAO,UAAU,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,WAC/B,KAAP;AACE,YAAQ,IAAI;AACZ,WAAO,SAAS,OAAO;AAAA;AAAA;",
6
6
  "names": []
7
7
  }
package/dist/index.d.ts CHANGED
@@ -1,3 +1,5 @@
1
+ import turnDownService from 'turndown';
2
+
1
3
  interface TurnDownResult {
2
4
  success: boolean;
3
5
  code: number;
@@ -13,6 +15,30 @@ declare const enum Status {
13
15
  Fail = 400
14
16
  }
15
17
 
18
+ /**
19
+ * html 转换 markdown 格式
20
+ */
21
+
22
+ interface Params {
23
+ url: string;
24
+ }
25
+ declare function getTurnDownService(params: Params): turnDownService;
26
+
27
+ declare function parseHTML(htmlRaw: string, meta: {
28
+ url: string;
29
+ }): Promise<{
30
+ code: number;
31
+ success: boolean;
32
+ msg: string;
33
+ } | {
34
+ success: boolean;
35
+ code: Status;
36
+ data: {
37
+ title: string;
38
+ author: string;
39
+ content: string;
40
+ };
41
+ }>;
16
42
  declare function transformHtml2Markdown(url: string): Promise<TurnDownResult>;
17
43
 
18
- export { Status, TurnDownResult, transformHtml2Markdown as default };
44
+ export { Status, TurnDownResult, transformHtml2Markdown as default, getTurnDownService, parseHTML };
package/dist/index.js CHANGED
@@ -123,53 +123,59 @@ var getError = (code) => {
123
123
  msg: errObj[code]
124
124
  };
125
125
  };
126
- async function transformHtml2Markdown(url) {
127
- const u = new URL(url);
128
- u.searchParams.delete("poc_token");
129
- let json = await axios.request({
130
- url: u.href,
131
- method: "get",
132
- timeout: 3e4,
133
- transformResponse(res) {
134
- return res;
135
- }
136
- }).then((res) => {
137
- var _a;
138
- const $ = load(res["data"]);
139
- let title = $("#activity-name").text();
140
- title = title.trim() || "";
141
- const author = Array.from(new Set([
142
- (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
143
- ...$("#js_name").text().split("\n")
144
- ].map((item) => item ? item.trim() : "").filter(Boolean))).join("\n");
145
- const htmlEl = $("#js_content");
146
- const html = htmlEl.html();
147
- if (html && html.length > 0) {
148
- let res2 = getTurnDownService({ url: u.href }).turndown(html);
149
- res2 = `## ${title}
126
+ async function parseHTML(htmlRaw, meta) {
127
+ var _a;
128
+ const $ = load(htmlRaw);
129
+ let title = $("#activity-name").text();
130
+ title = title.trim() || "";
131
+ const author = Array.from(new Set([
132
+ (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
133
+ ...$("#js_name").text().split("\n")
134
+ ].map((item) => item ? item.trim() : "").filter(Boolean))).join("\n");
135
+ const htmlEl = $("#js_content");
136
+ const html = htmlEl.html();
137
+ if (html && html.length > 0) {
138
+ let res = getTurnDownService(meta).turndown(html);
139
+ res = `## ${title}
150
140
 
151
141
  ## \u4F5C\u8005 ${author}
152
142
 
153
- ` + res2;
154
- return {
155
- success: true,
156
- code: Status.Success,
157
- data: {
158
- title,
159
- author,
160
- content: res2
161
- }
162
- };
163
- }
164
- return getError(Status.Fail);
165
- }).catch((err) => {
143
+ ` + res;
144
+ return {
145
+ success: true,
146
+ code: Status.Success,
147
+ data: {
148
+ title,
149
+ author,
150
+ content: res
151
+ }
152
+ };
153
+ }
154
+ return getError(Status.Fail);
155
+ }
156
+ async function transformHtml2Markdown(url) {
157
+ const u = new URL(url);
158
+ u.searchParams.delete("poc_token");
159
+ try {
160
+ const res = await axios.request({
161
+ url: u.href,
162
+ method: "get",
163
+ timeout: 3e4,
164
+ maxRedirects: 5,
165
+ transformResponse(res2) {
166
+ return res2;
167
+ }
168
+ });
169
+ return parseHTML(res.data, { url: u.href });
170
+ } catch (err) {
166
171
  console.log(err);
167
- return err;
168
- });
169
- return json;
172
+ return getError(Status.Fail);
173
+ }
170
174
  }
171
175
  export {
172
176
  Status,
173
- transformHtml2Markdown as default
177
+ transformHtml2Markdown as default,
178
+ getTurnDownService,
179
+ parseHTML
174
180
  };
175
181
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../src/index.ts", "../src/error.ts", "../src/type.ts", "../src/turndownCode.ts", "../src/formatHtml.ts"],
4
- "sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n let json: TurnDownResult = await axios\n .request({\n url: u.href,\n method: 'get',\n timeout: 30000,\n transformResponse(res) {\n return res\n },\n })\n .then((res) => {\n const $ = load(res['data'])\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService({ url: u.href }).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n })\n .catch((err) => {\n console.log(err)\n return err\n })\n\n return json\n}\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
- "mappings": ";AAAA;AACA;;;ACDO,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB;AACA;;;ACJA;AAWO,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,QAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,oBAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AH3EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI,OAAuB,MAAM,MAC5B,QAAQ;AAAA,IACL,KAAK,EAAE;AAAA,IACP,QAAQ;AAAA,IACR,SAAS;AAAA,IACT,kBAAkB,KAAK;AACnB,aAAO;AAAA;AAAA,KAGd,KAAK,CAAC,QAAQ;AAjCvB;AAkCY,UAAM,IAAI,KAAK,IAAI;AAEnB,QAAI,QAAQ,EAAE,kBAAkB;AAEhC,YAAQ,MAAM,UAAU;AACxB,UAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,MACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,MAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,MAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,UAAM,SAAS,EAAE;AACjB,UAAM,OAAO,OAAO;AAEpB,QAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,UAAI,OAAM,mBAAmB,EAAE,KAAK,EAAE,QAAQ,SAAS;AAEvD,aAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,aAAO;AAAA,QACH,SAAS;AAAA,QACT,MAAM,OAAO;AAAA,QACb,MAAM;AAAA,UACF;AAAA,UACA;AAAA,UACA,SAAS;AAAA;AAAA;AAAA;AAKrB,WAAO,SAAS,OAAO;AAAA,KAE1B,MAAM,CAAC,QAAQ;AACZ,YAAQ,IAAI;AACZ,WAAO;AAAA;AAGf,SAAO;AAAA;",
4
+ "sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport async function parseHTML(htmlRaw: string, meta: { url: string }) {\n const $ = load(htmlRaw)\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService(meta).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n}\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n try {\n const res = await axios.request({\n url: u.href,\n method: 'get',\n timeout: 30000,\n maxRedirects: 5,\n transformResponse(res) {\n return res\n },\n })\n return parseHTML(res.data, { url: u.href })\n } catch (err) {\n console.log(err)\n return getError(Status.Fail)\n }\n}\n\nexport { getTurnDownService } from './turndownCode'\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport TurndownPluginGfm from '@guyplusplus/turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n TurndownPluginGfm.gfm(turndownService)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
+ "mappings": ";AAAA;AACA;;;ACDO,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB;AACA;;;ACJA;AAWO,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,QAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,oBAAkB,IAAI;AAEtB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AH3EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,yBAAgC,SAAiB,MAAuB;AAhBxE;AAiBI,QAAM,IAAI,KAAK;AAEf,MAAI,QAAQ,EAAE,kBAAkB;AAEhC,UAAQ,MAAM,UAAU;AACxB,QAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,IACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,IAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,IAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,QAAM,SAAS,EAAE;AACjB,QAAM,OAAO,OAAO;AAEpB,MAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,QAAI,MAAM,mBAAmB,MAAM,SAAS;AAE5C,UAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,WAAO;AAAA,MACH,SAAS;AAAA,MACT,MAAM,OAAO;AAAA,MACb,MAAM;AAAA,QACF;AAAA,QACA;AAAA,QACA,SAAS;AAAA;AAAA;AAAA;AAKrB,SAAO,SAAS,OAAO;AAAA;AAG3B,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI;AACA,UAAM,MAAM,MAAM,MAAM,QAAQ;AAAA,MAC5B,KAAK,EAAE;AAAA,MACP,QAAQ;AAAA,MACR,SAAS;AAAA,MACT,cAAc;AAAA,MACd,kBAAkB,MAAK;AACnB,eAAO;AAAA;AAAA;AAGf,WAAO,UAAU,IAAI,MAAM,EAAE,KAAK,EAAE;AAAA,WAC/B,KAAP;AACE,YAAQ,IAAI;AACZ,WAAO,SAAS,OAAO;AAAA;AAAA;",
6
6
  "names": []
7
7
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aiello/wechat-to-markdown",
3
- "version": "1.2.7",
3
+ "version": "1.2.9",
4
4
  "description": "解析微信文章 URL 为 markdown",
5
5
  "author": "Aiello Chan<aiello.chan@gmail.com>",
6
6
  "keywords": [