@aiello/wechat-to-markdown 1.2.3 → 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,6 +4,12 @@ Fork from https://github.com/LuckyRyan-web/wechat-to-markdown
4
4
 
5
5
  add some bug fix
6
6
 
7
+ ```bash
8
+ yarn add @aiello/wechat-to-markdown
9
+ # or
10
+ npm install @aiello/wechat-to-markdown
11
+ ```
12
+
7
13
  ## description
8
14
 
9
15
  Enter the WeChat public address to convert it to markdown format
@@ -31,7 +37,7 @@ interface TurnDownResult {
31
37
  ## CommonJs
32
38
 
33
39
  ```javascript
34
- const transformHtml2Markdown = require('@ryan-liu/wechat-to-markdown').default
40
+ const transformHtml2Markdown = require('@aiello/wechat-to-markdown').default
35
41
 
36
42
  setTimeout(async () => {
37
43
  const articleData = await transformHtml2Markdown('https://mp.weixin.qq.com/s/9d5DWg7YdMHPvVl-2KLH2w')
@@ -47,7 +53,7 @@ setTimeout(async () => {
47
53
  ## vue3
48
54
  **index.ts**
49
55
  ```javascript
50
- import transformHtml2Markdown from '@ryan-liu/wechat-to-markdown'
56
+ import transformHtml2Markdown from '@aiello/wechat-to-markdown'
51
57
 
52
58
  setup() {
53
59
  const getData = async () => {
package/dist/index.cjs CHANGED
@@ -91,43 +91,62 @@ function figure2markdown(figureHTML) {
91
91
  }
92
92
 
93
93
  // src/turndownCode.ts
94
- var turndownService = new import_turndown.default({
95
- codeBlockStyle: "fenced",
96
- hr: ""
97
- });
98
- turndownService.use(import_turndown_plugin_gfm.gfm);
99
- turndownService.addRule("pre2Code", {
100
- filter: ["pre"],
101
- replacement(content, node) {
102
- const len = content.length;
103
- const isCode = content[0] === "`" && content[len - 1] === "`";
104
- let pre_Markdown = "";
105
- if (isCode) {
106
- pre_Markdown = formatCode(node.innerHTML);
94
+ function getTurnDownService(params) {
95
+ const turndownService = new import_turndown.default({
96
+ codeBlockStyle: "fenced",
97
+ hr: ""
98
+ });
99
+ turndownService.use(import_turndown_plugin_gfm.gfm);
100
+ let videoCounter = 0;
101
+ turndownService.addRule("pre2Code", {
102
+ filter: ["pre"],
103
+ replacement(content, node) {
104
+ const len = content.length;
105
+ const isCode = content[0] === "`" && content[len - 1] === "`";
106
+ let pre_Markdown = "";
107
+ if (isCode) {
108
+ pre_Markdown = formatCode(node.innerHTML);
109
+ }
110
+ const res = isCode ? pre_Markdown : content;
111
+ return "```\n" + res + "\n```\n";
107
112
  }
108
- const res = isCode ? pre_Markdown : content;
109
- return "```\n" + res + "\n```\n";
110
- }
111
- }).addRule("getImage", {
112
- filter: ["img"],
113
- replacement(content, node) {
114
- const src = node.getAttribute("data-src") || "";
115
- return src ? `
113
+ }).addRule("getImage", {
114
+ filter: ["img"],
115
+ replacement(content, node) {
116
+ const src = node.getAttribute("data-src") || "";
117
+ return src ? `
116
118
 
117
- ![](${src})
119
+ ![](${src})
118
120
 
119
121
  ` : "";
120
- }
121
- }).addRule("lineBreaks", {
122
- filter: "br",
123
- replacement: () => "\n"
124
- }).addRule("img2Code", {
125
- filter: ["figure"],
126
- replacement(content, node) {
127
- const res = figure2markdown(node.innerHTML);
128
- return res || "";
129
- }
130
- });
122
+ }
123
+ }).addRule("video", {
124
+ filter: (node) => {
125
+ return node.tagName.toLowerCase() === "iframe" && node.className.includes("video_iframe");
126
+ },
127
+ replacement(content, _node) {
128
+ const node = _node;
129
+ const cover = decodeURIComponent(node.getAttribute("data-cover") || "");
130
+ const u = new URL(params.url);
131
+ u.hash = `js_mp_video_container_${videoCounter++}`;
132
+ return cover ? `
133
+
134
+ [![](${cover})](${u.href})
135
+
136
+ ` : "";
137
+ }
138
+ }).addRule("lineBreaks", {
139
+ filter: "br",
140
+ replacement: () => "\n"
141
+ }).addRule("img2Code", {
142
+ filter: ["figure"],
143
+ replacement(content, node) {
144
+ const res = figure2markdown(node.innerHTML);
145
+ return res || "";
146
+ }
147
+ });
148
+ return turndownService;
149
+ }
131
150
 
132
151
  // src/index.ts
133
152
  var getError = (code) => {
@@ -138,21 +157,28 @@ var getError = (code) => {
138
157
  };
139
158
  };
140
159
  async function transformHtml2Markdown(url) {
160
+ const u = new URL(url);
161
+ u.searchParams.delete("poc_token");
141
162
  let json = await import_axios.default.request({
142
- url,
163
+ url: u.href,
143
164
  method: "get",
144
165
  timeout: 3e4,
145
166
  transformResponse(res) {
146
167
  return res;
147
168
  }
148
169
  }).then((res) => {
149
- const $ = import_cheerio2.default.load(res["data"]);
170
+ var _a;
171
+ const $ = (0, import_cheerio2.load)(res["data"]);
150
172
  let title = $("#activity-name").text();
151
173
  title = title.trim() || "";
152
- const author = Array.from(new Set($("#js_name").text().split("\n").map((item) => item.trim()).filter(Boolean))).join("\n");
153
- const html = $("#js_content").html();
174
+ const author = Array.from(new Set([
175
+ (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
176
+ ...$("#js_name").text().split("\n")
177
+ ].map((item) => item ? item.trim() : "").filter(Boolean))).join("\n");
178
+ const htmlEl = $("#js_content");
179
+ const html = htmlEl.html();
154
180
  if (html && html.length > 0) {
155
- let res2 = turndownService.turndown(html);
181
+ let res2 = getTurnDownService({ url: u.href }).turndown(html);
156
182
  res2 = `## ${title}
157
183
 
158
184
  ## \u4F5C\u8005 ${author}
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../src/index.ts", "../node_modules/tsup/assets/cjs_shims.js", "../src/error.ts", "../src/type.ts", "../src/turndownCode.ts", "../src/formatHtml.ts"],
4
- "sourcesContent": ["import axios from 'axios'\nimport cheerio from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { turndownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n let json: TurnDownResult = await axios\n .request({\n url,\n method: 'get',\n timeout: 30000,\n transformResponse(res) {\n return res\n },\n })\n .then((res) => {\n const $ = cheerio.load(res['data'])\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n\n const author = Array.from(\n new Set(\n $('#js_name')\n .text()\n .split('\\n')\n .map((item) => item.trim())\n .filter(Boolean)\n )\n ).join('\\n')\n\n const html = $('#js_content').html()\n\n if (html && html.length > 0) {\n let res = turndownService.turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n })\n .catch((err) => {\n console.log(err)\n return err\n })\n\n return json\n}\n", "export const importMetaUrlShim =\n typeof document === 'undefined'\n ? new (require('u' + 'rl').URL)('file:' + __filename).href\n : (document.currentScript && document.currentScript.src) ||\n new URL('main.js', document.baseURI).href\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport { gfm } from 'turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\nconst turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n})\n\nturndownService.use(gfm)\n\n// \u81EA\u5B9A\u4E49\u914D\u7F6E\nturndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n ![](${src}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\nexport { turndownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
- "mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;;;ACAO,IAAM,oBACX,OAAO,aAAa,cAChB,IAAK,SAAQ,QAAY,IAAK,UAAU,YAAY,OACnD,SAAS,iBAAiB,SAAS,cAAc,OAClD,IAAI,IAAI,WAAW,SAAS,SAAS;;;ADJ3C,mBAAkB;AAClB,sBAAoB;;;AEDb,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB,sBAA4B;AAC5B,iCAAoB;;;ACJpB,qBAAoB;AAWb,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,uBAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD9DJ,IAAM,kBAAkB,IAAI,wBAAgB;AAAA,EACxC,gBAAgB;AAAA,EAChB,IAAI;AAAA;AAGR,gBAAgB,IAAI;AAGpB,gBACK,QAAQ,YAAY;AAAA,EACjB,QAAQ,CAAC;AAAA,EACT,YAAY,SAAS,MAAW;AAC5B,UAAM,MAAM,QAAQ;AAEpB,UAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,QAAI,eAAe;AAEnB,QAAI,QAAQ;AACR,qBAAe,WAAW,KAAK;AAAA;AAGnC,UAAM,MAAM,SAAS,eAAe;AAEpC,WAAO,UAAU,MAAM;AAAA;AAAA,GAG9B,QAAQ,YAAY;AAAA,EACjB,QAAQ,CAAC;AAAA,EACT,YAAY,SAAS,MAAW;AAC5B,UAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,WAAO,MAAM;AAAA;AAAA,OAAY;AAAA;AAAA,IAAc;AAAA;AAAA,GAG9C,QAAQ,cAAc;AAAA,EACnB,QAAQ;AAAA,EACR,aAAa,MAAM;AAAA,GAEtB,QAAQ,YAAY;AAAA,EACjB,QAAQ,CAAC;AAAA,EACT,YAAY,SAAS,MAAW;AAC5B,UAAM,MAAM,gBAAgB,KAAK;AACjC,WAAO,OAAO;AAAA;AAAA;;;AJ5C1B,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,sCACI,KACuB;AACvB,MAAI,OAAuB,MAAM,qBAC5B,QAAQ;AAAA,IACL;AAAA,IACA,QAAQ;AAAA,IACR,SAAS;AAAA,IACT,kBAAkB,KAAK;AACnB,aAAO;AAAA;AAAA,KAGd,KAAK,CAAC,QAAQ;AACX,UAAM,IAAI,wBAAQ,KAAK,IAAI;AAE3B,QAAI,QAAQ,EAAE,kBAAkB;AAEhC,YAAQ,MAAM,UAAU;AAExB,UAAM,SAAS,MAAM,KACjB,IAAI,IACA,EAAE,YACG,OACA,MAAM,MACN,IAAI,CAAC,SAAS,KAAK,QACnB,OAAO,WAElB,KAAK;AAEP,UAAM,OAAO,EAAE,eAAe;AAE9B,QAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,UAAI,OAAM,gBAAgB,SAAS;AAEnC,aAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,aAAO;AAAA,QACH,SAAS;AAAA,QACT,MAAM,OAAO;AAAA,QACb,MAAM;AAAA,UACF;AAAA,UACA;AAAA,UACA,SAAS;AAAA;AAAA;AAAA;AAKrB,WAAO,SAAS,OAAO;AAAA,KAE1B,MAAM,CAAC,QAAQ;AACZ,YAAQ,IAAI;AACZ,WAAO;AAAA;AAGf,SAAO;AAAA;",
4
+ "sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n let json: TurnDownResult = await axios\n .request({\n url: u.href,\n method: 'get',\n timeout: 30000,\n transformResponse(res) {\n return res\n },\n })\n .then((res) => {\n const $ = load(res['data'])\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService({ url: u.href }).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n })\n .catch((err) => {\n console.log(err)\n return err\n })\n\n return json\n}\n", "export const importMetaUrlShim =\n typeof document === 'undefined'\n ? new (require('u' + 'rl').URL)('file:' + __filename).href\n : (document.currentScript && document.currentScript.src) ||\n new URL('main.js', document.baseURI).href\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport { gfm } from 'turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n turndownService.use(gfm)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
+ "mappings": ";;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;;;ACAO,IAAM,oBACX,OAAO,aAAa,cAChB,IAAK,SAAQ,QAAY,IAAK,UAAU,YAAY,OACnD,SAAS,iBAAiB,SAAS,cAAc,OAClD,IAAI,IAAI,WAAW,SAAS,SAAS;;;ADJ3C,mBAAkB;AAClB,sBAAqB;;;AEDd,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB,sBAA4B;AAC5B,iCAAoB;;;ACJpB,qBAAoB;AAWb,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,uBAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,wBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,kBAAgB,IAAI;AAEpB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AJ3EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI,OAAuB,MAAM,qBAC5B,QAAQ;AAAA,IACL,KAAK,EAAE;AAAA,IACP,QAAQ;AAAA,IACR,SAAS;AAAA,IACT,kBAAkB,KAAK;AACnB,aAAO;AAAA;AAAA,KAGd,KAAK,CAAC,QAAQ;AAjCvB;AAkCY,UAAM,IAAI,0BAAK,IAAI;AAEnB,QAAI,QAAQ,EAAE,kBAAkB;AAEhC,YAAQ,MAAM,UAAU;AACxB,UAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,MACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,MAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,MAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,UAAM,SAAS,EAAE;AACjB,UAAM,OAAO,OAAO;AAEpB,QAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,UAAI,OAAM,mBAAmB,EAAE,KAAK,EAAE,QAAQ,SAAS;AAEvD,aAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,aAAO;AAAA,QACH,SAAS;AAAA,QACT,MAAM,OAAO;AAAA,QACb,MAAM;AAAA,UACF;AAAA,UACA;AAAA,UACA,SAAS;AAAA;AAAA;AAAA;AAKrB,WAAO,SAAS,OAAO;AAAA,KAE1B,MAAM,CAAC,QAAQ;AACZ,YAAQ,IAAI;AACZ,WAAO;AAAA;AAGf,SAAO;AAAA;",
6
6
  "names": []
7
7
  }
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  // src/index.ts
2
2
  import axios from "axios";
3
- import cheerio2 from "cheerio";
3
+ import { load } from "cheerio";
4
4
 
5
5
  // src/error.ts
6
6
  var errObj = {
@@ -58,43 +58,62 @@ function figure2markdown(figureHTML) {
58
58
  }
59
59
 
60
60
  // src/turndownCode.ts
61
- var turndownService = new turnDownService({
62
- codeBlockStyle: "fenced",
63
- hr: ""
64
- });
65
- turndownService.use(gfm);
66
- turndownService.addRule("pre2Code", {
67
- filter: ["pre"],
68
- replacement(content, node) {
69
- const len = content.length;
70
- const isCode = content[0] === "`" && content[len - 1] === "`";
71
- let pre_Markdown = "";
72
- if (isCode) {
73
- pre_Markdown = formatCode(node.innerHTML);
61
+ function getTurnDownService(params) {
62
+ const turndownService = new turnDownService({
63
+ codeBlockStyle: "fenced",
64
+ hr: ""
65
+ });
66
+ turndownService.use(gfm);
67
+ let videoCounter = 0;
68
+ turndownService.addRule("pre2Code", {
69
+ filter: ["pre"],
70
+ replacement(content, node) {
71
+ const len = content.length;
72
+ const isCode = content[0] === "`" && content[len - 1] === "`";
73
+ let pre_Markdown = "";
74
+ if (isCode) {
75
+ pre_Markdown = formatCode(node.innerHTML);
76
+ }
77
+ const res = isCode ? pre_Markdown : content;
78
+ return "```\n" + res + "\n```\n";
74
79
  }
75
- const res = isCode ? pre_Markdown : content;
76
- return "```\n" + res + "\n```\n";
77
- }
78
- }).addRule("getImage", {
79
- filter: ["img"],
80
- replacement(content, node) {
81
- const src = node.getAttribute("data-src") || "";
82
- return src ? `
80
+ }).addRule("getImage", {
81
+ filter: ["img"],
82
+ replacement(content, node) {
83
+ const src = node.getAttribute("data-src") || "";
84
+ return src ? `
83
85
 
84
- ![](${src})
86
+ ![](${src})
85
87
 
86
88
  ` : "";
87
- }
88
- }).addRule("lineBreaks", {
89
- filter: "br",
90
- replacement: () => "\n"
91
- }).addRule("img2Code", {
92
- filter: ["figure"],
93
- replacement(content, node) {
94
- const res = figure2markdown(node.innerHTML);
95
- return res || "";
96
- }
97
- });
89
+ }
90
+ }).addRule("video", {
91
+ filter: (node) => {
92
+ return node.tagName.toLowerCase() === "iframe" && node.className.includes("video_iframe");
93
+ },
94
+ replacement(content, _node) {
95
+ const node = _node;
96
+ const cover = decodeURIComponent(node.getAttribute("data-cover") || "");
97
+ const u = new URL(params.url);
98
+ u.hash = `js_mp_video_container_${videoCounter++}`;
99
+ return cover ? `
100
+
101
+ [![](${cover})](${u.href})
102
+
103
+ ` : "";
104
+ }
105
+ }).addRule("lineBreaks", {
106
+ filter: "br",
107
+ replacement: () => "\n"
108
+ }).addRule("img2Code", {
109
+ filter: ["figure"],
110
+ replacement(content, node) {
111
+ const res = figure2markdown(node.innerHTML);
112
+ return res || "";
113
+ }
114
+ });
115
+ return turndownService;
116
+ }
98
117
 
99
118
  // src/index.ts
100
119
  var getError = (code) => {
@@ -105,21 +124,28 @@ var getError = (code) => {
105
124
  };
106
125
  };
107
126
  async function transformHtml2Markdown(url) {
127
+ const u = new URL(url);
128
+ u.searchParams.delete("poc_token");
108
129
  let json = await axios.request({
109
- url,
130
+ url: u.href,
110
131
  method: "get",
111
132
  timeout: 3e4,
112
133
  transformResponse(res) {
113
134
  return res;
114
135
  }
115
136
  }).then((res) => {
116
- const $ = cheerio2.load(res["data"]);
137
+ var _a;
138
+ const $ = load(res["data"]);
117
139
  let title = $("#activity-name").text();
118
140
  title = title.trim() || "";
119
- const author = Array.from(new Set($("#js_name").text().split("\n").map((item) => item.trim()).filter(Boolean))).join("\n");
120
- const html = $("#js_content").html();
141
+ const author = Array.from(new Set([
142
+ (_a = $('meta[name="author"]')) == null ? void 0 : _a.attr("content"),
143
+ ...$("#js_name").text().split("\n")
144
+ ].map((item) => item ? item.trim() : "").filter(Boolean))).join("\n");
145
+ const htmlEl = $("#js_content");
146
+ const html = htmlEl.html();
121
147
  if (html && html.length > 0) {
122
- let res2 = turndownService.turndown(html);
148
+ let res2 = getTurnDownService({ url: u.href }).turndown(html);
123
149
  res2 = `## ${title}
124
150
 
125
151
  ## \u4F5C\u8005 ${author}
package/dist/index.js.map CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "version": 3,
3
3
  "sources": ["../src/index.ts", "../src/error.ts", "../src/type.ts", "../src/turndownCode.ts", "../src/formatHtml.ts"],
4
- "sourcesContent": ["import axios from 'axios'\nimport cheerio from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { turndownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n let json: TurnDownResult = await axios\n .request({\n url,\n method: 'get',\n timeout: 30000,\n transformResponse(res) {\n return res\n },\n })\n .then((res) => {\n const $ = cheerio.load(res['data'])\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n\n const author = Array.from(\n new Set(\n $('#js_name')\n .text()\n .split('\\n')\n .map((item) => item.trim())\n .filter(Boolean)\n )\n ).join('\\n')\n\n const html = $('#js_content').html()\n\n if (html && html.length > 0) {\n let res = turndownService.turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n })\n .catch((err) => {\n console.log(err)\n return err\n })\n\n return json\n}\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport { gfm } from 'turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\nconst turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n})\n\nturndownService.use(gfm)\n\n// \u81EA\u5B9A\u4E49\u914D\u7F6E\nturndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n ![](${src}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\nexport { turndownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
- "mappings": ";AAAA;AACA;;;ACDO,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB;AACA;;;ACJA;AAWO,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,QAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD9DJ,IAAM,kBAAkB,IAAI,gBAAgB;AAAA,EACxC,gBAAgB;AAAA,EAChB,IAAI;AAAA;AAGR,gBAAgB,IAAI;AAGpB,gBACK,QAAQ,YAAY;AAAA,EACjB,QAAQ,CAAC;AAAA,EACT,YAAY,SAAS,MAAW;AAC5B,UAAM,MAAM,QAAQ;AAEpB,UAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,QAAI,eAAe;AAEnB,QAAI,QAAQ;AACR,qBAAe,WAAW,KAAK;AAAA;AAGnC,UAAM,MAAM,SAAS,eAAe;AAEpC,WAAO,UAAU,MAAM;AAAA;AAAA,GAG9B,QAAQ,YAAY;AAAA,EACjB,QAAQ,CAAC;AAAA,EACT,YAAY,SAAS,MAAW;AAC5B,UAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,WAAO,MAAM;AAAA;AAAA,OAAY;AAAA;AAAA,IAAc;AAAA;AAAA,GAG9C,QAAQ,cAAc;AAAA,EACnB,QAAQ;AAAA,EACR,aAAa,MAAM;AAAA,GAEtB,QAAQ,YAAY;AAAA,EACjB,QAAQ,CAAC;AAAA,EACT,YAAY,SAAS,MAAW;AAC5B,UAAM,MAAM,gBAAgB,KAAK;AACjC,WAAO,OAAO;AAAA;AAAA;;;AH5C1B,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,sCACI,KACuB;AACvB,MAAI,OAAuB,MAAM,MAC5B,QAAQ;AAAA,IACL;AAAA,IACA,QAAQ;AAAA,IACR,SAAS;AAAA,IACT,kBAAkB,KAAK;AACnB,aAAO;AAAA;AAAA,KAGd,KAAK,CAAC,QAAQ;AACX,UAAM,IAAI,SAAQ,KAAK,IAAI;AAE3B,QAAI,QAAQ,EAAE,kBAAkB;AAEhC,YAAQ,MAAM,UAAU;AAExB,UAAM,SAAS,MAAM,KACjB,IAAI,IACA,EAAE,YACG,OACA,MAAM,MACN,IAAI,CAAC,SAAS,KAAK,QACnB,OAAO,WAElB,KAAK;AAEP,UAAM,OAAO,EAAE,eAAe;AAE9B,QAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,UAAI,OAAM,gBAAgB,SAAS;AAEnC,aAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,aAAO;AAAA,QACH,SAAS;AAAA,QACT,MAAM,OAAO;AAAA,QACb,MAAM;AAAA,UACF;AAAA,UACA;AAAA,UACA,SAAS;AAAA;AAAA;AAAA;AAKrB,WAAO,SAAS,OAAO;AAAA,KAE1B,MAAM,CAAC,QAAQ;AACZ,YAAQ,IAAI;AACZ,WAAO;AAAA;AAGf,SAAO;AAAA;",
4
+ "sourcesContent": ["import axios from 'axios'\nimport { load } from 'cheerio'\nimport { errObj } from './error'\nimport { TurnDownResult, Status } from './type'\nimport { getTurnDownService } from './turndownCode'\n\nconst getError = (code: number) => {\n return {\n code,\n success: false,\n msg: errObj[code],\n }\n}\n\nexport { TurnDownResult, Status }\n\nexport default async function transformHtml2Markdown(\n url: string\n): Promise<TurnDownResult> {\n const u = new URL(url)\n // \u79FB\u9664\u8BE5\u53C2\u6570\n // \u907F\u514D\u51FA\u73B0 302 \u8DF3\u8F6C\n u.searchParams.delete('poc_token')\n\n let json: TurnDownResult = await axios\n .request({\n url: u.href,\n method: 'get',\n timeout: 30000,\n transformResponse(res) {\n return res\n },\n })\n .then((res) => {\n const $ = load(res['data'])\n\n let title = $('#activity-name').text()\n\n title = title.trim() || ''\n const author = Array.from(\n new Set(\n [\n $('meta[name=\"author\"]')?.attr('content'),\n ...$('#js_name').text().split('\\n'),\n ]\n .map((item) => (item ? item.trim() : ''))\n .filter(Boolean)\n )\n ).join('\\n')\n\n const htmlEl = $('#js_content')\n const html = htmlEl.html()\n\n if (html && html.length > 0) {\n let res = getTurnDownService({ url: u.href }).turndown(html)\n\n res = `## ${title} \\n \\n` + `## \u4F5C\u8005 ${author} \\n \\n` + res\n\n return {\n success: true,\n code: Status.Success,\n data: {\n title,\n author,\n content: res,\n },\n }\n }\n\n return getError(Status.Fail)\n })\n .catch((err) => {\n console.log(err)\n return err\n })\n\n return json\n}\n", "export const errObj: {\n [key: number]: string\n} = {\n '400': '\u5185\u5BB9\u89E3\u6790\u5931\u8D25',\n}\n", "export interface TurnDownResult {\n success: boolean\n code: number\n data?: {\n title?: string\n author?: string\n content?: string\n }\n msg?: string\n}\n\nexport const enum Status {\n Success = 200,\n Fail = 400,\n}\n", "/**\n * html \u8F6C\u6362 markdown \u683C\u5F0F\n */\nimport turnDownService from 'turndown'\nimport { gfm } from 'turndown-plugin-gfm'\nimport { formatCode, figure2markdown } from './formatHtml'\n\ninterface Params {\n url: string\n}\n\nfunction getTurnDownService(params: Params) {\n const turndownService = new turnDownService({\n codeBlockStyle: 'fenced',\n hr: '',\n })\n\n turndownService.use(gfm)\n\n let videoCounter = 0\n\n // \u81EA\u5B9A\u4E49\u914D\u7F6E\n turndownService\n .addRule('pre2Code', {\n filter: ['pre'],\n replacement(content, node: any) {\n const len = content.length\n // \u5FAE\u4FE1\u6587\u7AE0\u83B7\u53D6\u5230\u7684 content\uFF0C \u4F1A\u51FA\u73B0\u9996\u5C3E\u90FD\u6709 '`'\n const isCode = content[0] === '`' && content[len - 1] === '`'\n\n let pre_Markdown = ''\n\n if (isCode) {\n pre_Markdown = formatCode(node.innerHTML)\n }\n\n const res = isCode ? pre_Markdown : content\n\n return '```\\n' + res + '\\n```\\n'\n },\n })\n .addRule('getImage', {\n filter: ['img'],\n replacement(content, node: any) {\n const src = node.getAttribute('data-src') || ''\n\n return src ? `\\n\\n![](${src}) \\n\\n` : ''\n },\n })\n .addRule('video', {\n filter: (node: HTMLElement) => {\n return (\n node.tagName.toLowerCase() === 'iframe' &&\n node.className.includes('video_iframe')\n )\n },\n replacement(content, _node: Node) {\n const node = _node as HTMLIFrameElement\n\n const cover = decodeURIComponent(\n node.getAttribute('data-cover') || ''\n )\n\n const u = new URL(params.url)\n u.hash = `js_mp_video_container_${videoCounter++}`\n\n return cover ? `\\n\\n[![](${cover})](${u.href}) \\n\\n` : ''\n },\n })\n .addRule('lineBreaks', {\n filter: 'br',\n replacement: () => '\\n',\n })\n .addRule('img2Code', {\n filter: ['figure'],\n replacement(content, node: any) {\n const res = figure2markdown(node.innerHTML)\n return res || ''\n },\n })\n\n return turndownService\n}\n\nexport { getTurnDownService }\n", "import cheerio from 'cheerio'\n\n/**\n * \u5FAE\u4FE1\u4E0D\u540C\u4EE3\u7801\u98CE\u683C\n * 1. <code><span>code</span></code>\n * 2. <code><span><span>123</span><br></span></code>\n * turndown \u4E0D\u89E3\u6790 code \u4E0B\u7684 br \u6807\u7B7E\uFF0C\u9700\u8981\u4F7F\u7528\u6B63\u5219\u66FF\u6362 br \u6807\u7B7E\u4E3A \\n \u624D\u53EF\u4EE5\u7EE7\u7EED\u89E3\u6790\n * @param htmlStr\n * @returns\n */\n\nexport function formatCode(htmlStr: string) {\n let code = htmlStr\n\n code = code.replace(/<br>/gi, '\\n')\n\n code = code.replace(/&nbsp;/gi, ' ')\n\n code = code.replace(/&lt;/gi, '<')\n\n code = code.replace(/&gt;/gi, '>')\n\n code = code.replace(/&amp;/gi, '&')\n\n code = code.replace(/&quot;/gi, '\"')\n\n code = code.replace(/&apos;/gi, '\u2018')\n\n code = code.replace(/&times;/gi, '*')\n\n code = code.replace(/&divide;/gi, '%')\n\n const $ = cheerio.load(code)\n\n return $.text()\n}\n\n/**\n * \u89E3\u51B3\u5982\u4E0B\u683C\u5F0F\n * <figcaption><img><figcaption></figcaption></figcaption>\n * @param figureHTML\n * @returns\n */\nexport function figure2markdown(figureHTML: string) {\n const imgRegex = /<img.*?data-src=['\"](.*?)['\"]/\n\n const descRegex = /\\<figcaption .*?>(.+)<\\/figcaption>/\n\n const imgArr = figureHTML.match(imgRegex)\n\n const descArr = figureHTML.match(descRegex)\n\n let imgUrl = ''\n\n let desc = ''\n\n if (Array.isArray(imgArr)) {\n imgUrl = imgArr[1]\n }\n\n if (Array.isArray(descArr)) {\n desc = descArr[1]\n }\n\n // img \u53EF\u80FD\u6CA1\u6709\u56FE\u7247\u8BF4\u660E\n if (imgUrl) {\n return `\\n\\n ![${desc}](${imgUrl}) \\n\\n`\n }\n\n return\n}\n"],
5
+ "mappings": ";AAAA;AACA;;;ACDO,IAAM,SAET;AAAA,EACA,OAAO;AAAA;;;ACQJ,IAAW;AAAX,UAAW,SAAX;AACH,+BAAU,OAAV;AACA,4BAAO,OAAP;AAAA,GAFc;;;ACRlB;AACA;;;ACJA;AAWO,oBAAoB,SAAiB;AACxC,MAAI,OAAO;AAEX,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,UAAU;AAE9B,SAAO,KAAK,QAAQ,WAAW;AAE/B,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,YAAY;AAEhC,SAAO,KAAK,QAAQ,aAAa;AAEjC,SAAO,KAAK,QAAQ,cAAc;AAElC,QAAM,IAAI,QAAQ,KAAK;AAEvB,SAAO,EAAE;AAAA;AASN,yBAAyB,YAAoB;AAChD,QAAM,WAAW;AAEjB,QAAM,YAAY;AAElB,QAAM,SAAS,WAAW,MAAM;AAEhC,QAAM,UAAU,WAAW,MAAM;AAEjC,MAAI,SAAS;AAEb,MAAI,OAAO;AAEX,MAAI,MAAM,QAAQ,SAAS;AACvB,aAAS,OAAO;AAAA;AAGpB,MAAI,MAAM,QAAQ,UAAU;AACxB,WAAO,QAAQ;AAAA;AAInB,MAAI,QAAQ;AACR,WAAO;AAAA;AAAA,KAAU,SAAS;AAAA;AAAA;AAAA;AAG9B;AAAA;;;AD1DJ,4BAA4B,QAAgB;AACxC,QAAM,kBAAkB,IAAI,gBAAgB;AAAA,IACxC,gBAAgB;AAAA,IAChB,IAAI;AAAA;AAGR,kBAAgB,IAAI;AAEpB,MAAI,eAAe;AAGnB,kBACK,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,QAAQ;AAEpB,YAAM,SAAS,QAAQ,OAAO,OAAO,QAAQ,MAAM,OAAO;AAE1D,UAAI,eAAe;AAEnB,UAAI,QAAQ;AACR,uBAAe,WAAW,KAAK;AAAA;AAGnC,YAAM,MAAM,SAAS,eAAe;AAEpC,aAAO,UAAU,MAAM;AAAA;AAAA,KAG9B,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,KAAK,aAAa,eAAe;AAE7C,aAAO,MAAM;AAAA;AAAA,MAAW;AAAA;AAAA,IAAc;AAAA;AAAA,KAG7C,QAAQ,SAAS;AAAA,IACd,QAAQ,CAAC,SAAsB;AAC3B,aACI,KAAK,QAAQ,kBAAkB,YAC/B,KAAK,UAAU,SAAS;AAAA;AAAA,IAGhC,YAAY,SAAS,OAAa;AAC9B,YAAM,OAAO;AAEb,YAAM,QAAQ,mBACV,KAAK,aAAa,iBAAiB;AAGvC,YAAM,IAAI,IAAI,IAAI,OAAO;AACzB,QAAE,OAAO,yBAAyB;AAElC,aAAO,QAAQ;AAAA;AAAA,OAAY,WAAW,EAAE;AAAA;AAAA,IAAe;AAAA;AAAA,KAG9D,QAAQ,cAAc;AAAA,IACnB,QAAQ;AAAA,IACR,aAAa,MAAM;AAAA,KAEtB,QAAQ,YAAY;AAAA,IACjB,QAAQ,CAAC;AAAA,IACT,YAAY,SAAS,MAAW;AAC5B,YAAM,MAAM,gBAAgB,KAAK;AACjC,aAAO,OAAO;AAAA;AAAA;AAI1B,SAAO;AAAA;;;AH3EX,IAAM,WAAW,CAAC,SAAiB;AAC/B,SAAO;AAAA,IACH;AAAA,IACA,SAAS;AAAA,IACT,KAAK,OAAO;AAAA;AAAA;AAMpB,sCACI,KACuB;AACvB,QAAM,IAAI,IAAI,IAAI;AAGlB,IAAE,aAAa,OAAO;AAEtB,MAAI,OAAuB,MAAM,MAC5B,QAAQ;AAAA,IACL,KAAK,EAAE;AAAA,IACP,QAAQ;AAAA,IACR,SAAS;AAAA,IACT,kBAAkB,KAAK;AACnB,aAAO;AAAA;AAAA,KAGd,KAAK,CAAC,QAAQ;AAjCvB;AAkCY,UAAM,IAAI,KAAK,IAAI;AAEnB,QAAI,QAAQ,EAAE,kBAAkB;AAEhC,YAAQ,MAAM,UAAU;AACxB,UAAM,SAAS,MAAM,KACjB,IAAI,IACA;AAAA,MACI,QAAE,2BAAF,mBAA0B,KAAK;AAAA,MAC/B,GAAG,EAAE,YAAY,OAAO,MAAM;AAAA,MAE7B,IAAI,CAAC,SAAU,OAAO,KAAK,SAAS,IACpC,OAAO,WAElB,KAAK;AAEP,UAAM,SAAS,EAAE;AACjB,UAAM,OAAO,OAAO;AAEpB,QAAI,QAAQ,KAAK,SAAS,GAAG;AACzB,UAAI,OAAM,mBAAmB,EAAE,KAAK,EAAE,QAAQ,SAAS;AAEvD,aAAM,MAAM;AAAA;AAAA,kBAAyB;AAAA;AAAA,IAAiB;AAEtD,aAAO;AAAA,QACH,SAAS;AAAA,QACT,MAAM,OAAO;AAAA,QACb,MAAM;AAAA,UACF;AAAA,UACA;AAAA,UACA,SAAS;AAAA;AAAA;AAAA;AAKrB,WAAO,SAAS,OAAO;AAAA,KAE1B,MAAM,CAAC,QAAQ;AACZ,YAAQ,IAAI;AACZ,WAAO;AAAA;AAGf,SAAO;AAAA;",
6
6
  "names": []
7
7
  }
package/package.json CHANGED
@@ -1,66 +1,67 @@
1
1
  {
2
- "name": "@aiello/wechat-to-markdown",
3
- "version": "1.2.3",
4
- "description": "解析微信文章 URL 为 markdown",
5
- "author": "Aiello Chan<aiello.chan@gmail.com>",
6
- "keywords": [
7
- "wechat to markdown",
8
- "markdown"
9
- ],
10
- "main": "dist/index.cjs",
11
- "module": "dist/index.js",
12
- "types": "dist/index.d.ts",
13
- "files": [
14
- "dist"
15
- ],
16
- "type": "module",
17
- "license": "MIT",
18
- "devDependencies": {
19
- "@ryan-liu/eslint-config-jsx-config": "^1.0.2",
20
- "@types/fs-extra": "^9.0.13",
21
- "@types/turndown": "^5.0.1",
22
- "cross-spawn": "^7.0.3",
23
- "eslint": "^7.32.0",
24
- "husky": "^7.0.2",
25
- "inquirer": "^8.2.0",
26
- "lint-staged": "^11.1.2",
27
- "prettier": "^2.4.1",
28
- "tsup": "^5.4.0",
29
- "typescript": "^4.4.3",
30
- "vitest": "^1.3.1"
31
- },
32
- "scripts": {
33
- "dev": "yarn build --watch",
34
- "build": "tsup src/index.ts --dts --format cjs,esm",
35
- "lint": "lint-staged",
36
- "pub": "node tools/publish.js",
37
- "test": "vitest"
38
- },
39
- "husky": {
40
- "hooks": {
41
- "pre-commit": "lint-staged"
42
- }
43
- },
44
- "lint-staged": {
45
- "*.{ts,js}": [
46
- "eslint --fix --quiet"
2
+ "name": "@aiello/wechat-to-markdown",
3
+ "version": "1.2.6",
4
+ "description": "解析微信文章 URL 为 markdown",
5
+ "author": "Aiello Chan<aiello.chan@gmail.com>",
6
+ "keywords": [
7
+ "wechat to markdown",
8
+ "markdown"
9
+ ],
10
+ "main": "dist/index.cjs",
11
+ "module": "dist/index.js",
12
+ "types": "dist/index.d.ts",
13
+ "files": [
14
+ "dist"
47
15
  ],
48
- "*.json": [
49
- "prettier --write"
50
- ]
51
- },
52
- "tsup": {
53
- "splitting": false,
54
- "sourcemap": true,
55
- "clean": true,
56
- "external": [
57
- "html2markdown"
58
- ]
59
- },
60
- "dependencies": {
61
- "axios": "^0.22.0",
62
- "cheerio": "^1.0.0-rc.10",
63
- "turndown": "^7.1.1",
64
- "turndown-plugin-gfm": "^1.0.2"
65
- }
16
+ "type": "module",
17
+ "license": "MIT",
18
+ "devDependencies": {
19
+ "@ryan-liu/eslint-config-jsx-config": "^1.0.2",
20
+ "@types/fs-extra": "^9.0.13",
21
+ "@types/turndown": "^5.0.1",
22
+ "cross-spawn": "^7.0.3",
23
+ "eslint": "^7.32.0",
24
+ "husky": "^7.0.2",
25
+ "inquirer": "^8.2.0",
26
+ "lint-staged": "^11.1.2",
27
+ "prettier": "^2.4.1",
28
+ "tsup": "^5.4.0",
29
+ "typescript": "^4.4.3",
30
+ "vitest": "^1.3.1"
31
+ },
32
+ "scripts": {
33
+ "dev": "yarn build --watch",
34
+ "build": "tsup src/index.ts --dts --format cjs,esm",
35
+ "lint": "lint-staged",
36
+ "pub": "node tools/publish.js",
37
+ "test": "vitest"
38
+ },
39
+ "husky": {
40
+ "hooks": {
41
+ "pre-commit": "lint-staged"
42
+ }
43
+ },
44
+ "lint-staged": {
45
+ "*.{ts,js}": [
46
+ "eslint --fix --quiet"
47
+ ],
48
+ "*.json": [
49
+ "prettier --write"
50
+ ]
51
+ },
52
+ "tsup": {
53
+ "splitting": false,
54
+ "sourcemap": true,
55
+ "clean": true,
56
+ "external": [
57
+ "html2markdown"
58
+ ]
59
+ },
60
+ "dependencies": {
61
+ "@guyplusplus/turndown-plugin-gfm": "^1.0.7",
62
+ "axios": "^0.22.0",
63
+ "cheerio": "^1.0.0-rc.10",
64
+ "turndown": "^7.1.1",
65
+ "turndown-plugin-gfm": "1.0.2"
66
+ }
66
67
  }