ns-rss-spider 0.0.29 → 0.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
1
  export { parseRss } from "./parse";
2
2
  export { strategies } from "./strategy";
3
3
  export { parseContent } from "./parseContent";
4
+ export { getWebContent } from './utils/browser';
package/dist/cjs/index.js CHANGED
@@ -19,6 +19,7 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
19
19
  // src/index.ts
20
20
  var src_exports = {};
21
21
  __export(src_exports, {
22
+ getWebContent: () => import_browser.getWebContent,
22
23
  parseContent: () => import_parseContent.parseContent,
23
24
  parseRss: () => import_parse.parseRss,
24
25
  strategies: () => import_strategy.strategies
@@ -27,8 +28,10 @@ module.exports = __toCommonJS(src_exports);
27
28
  var import_parse = require("./parse");
28
29
  var import_strategy = require("./strategy");
29
30
  var import_parseContent = require("./parseContent");
31
+ var import_browser = require("./utils/browser");
30
32
  // Annotate the CommonJS export names for ESM import in node:
31
33
  0 && (module.exports = {
34
+ getWebContent,
32
35
  parseContent,
33
36
  parseRss,
34
37
  strategies
@@ -22,8 +22,36 @@ __export(kr_exports, {
22
22
  _36kr: () => _36kr
23
23
  });
24
24
  module.exports = __toCommonJS(kr_exports);
25
+ var import_zx = require("zx");
26
+ var import_cheerio = require("cheerio");
27
+ var import_constants = require("../utils/constants");
28
+ var import_browser = require("../utils/browser");
25
29
  var _36kr = {
26
- parse: true
30
+ parse: true,
31
+ getThumbs: async (items) => {
32
+ const url = "https://m.36kr.com/";
33
+ const html = await (0, import_browser.getWebContent)(url, {
34
+ userAgent: import_constants.iosUA
35
+ }).catch((e) => {
36
+ console.error(import_zx.chalk.red(`获取 ${url} 错误`), e);
37
+ });
38
+ if (!html)
39
+ return void 0;
40
+ const $ = (0, import_cheerio.load)(html, null, false);
41
+ const kv = {};
42
+ $(".home-flow > .flow-list .article-item a").each((_, $a) => {
43
+ var _a, _b;
44
+ const thumb = $($a).find("img").attr("src");
45
+ const href = $($a).attr("href") || "";
46
+ const reg = /\/p\/([\d]+)/;
47
+ const articleId = (_a = reg.exec(href)) == null ? void 0 : _a[1];
48
+ const guid = (_b = items.find((a) => (a.guid || a.link || "").includes(`/p/${articleId}`))) == null ? void 0 : _b.guid;
49
+ if (guid && thumb) {
50
+ kv[guid] = thumb;
51
+ }
52
+ });
53
+ return kv;
54
+ }
27
55
  };
28
56
  // Annotate the CommonJS export names for ESM import in node:
29
57
  0 && (module.exports = {
@@ -35,6 +35,7 @@ module.exports = __toCommonJS(cnbeta_exports);
35
35
  var import_axios = __toESM(require("axios"));
36
36
  var import_zx = require("zx");
37
37
  var import_cheerio = require("cheerio");
38
+ var import_constants = require("../utils/constants");
38
39
  var cnbeta = {
39
40
  parse: true,
40
41
  fetcher: "http",
@@ -59,7 +60,10 @@ var cnbeta = {
59
60
  ignoreProbeImage: true,
60
61
  getThumbs: async (items) => {
61
62
  const res = await import_axios.default.get("https://m.cnbeta.com.tw/", {
62
- responseType: "text"
63
+ responseType: "text",
64
+ headers: {
65
+ "User-Agent": import_constants.iosUA
66
+ }
63
67
  }).catch((e) => {
64
68
  console.error(import_zx.chalk.red("获取 m.cnbeta.com.tw 错误"), e);
65
69
  });
@@ -36,6 +36,7 @@ var import_utils = require("../parseContent/utils");
36
36
  var import_axios = __toESM(require("axios"));
37
37
  var import_zx = require("zx");
38
38
  var import_cheerio = require("cheerio");
39
+ var import_constants = require("../utils/constants");
39
40
  var ithome = {
40
41
  parse: true,
41
42
  fetcher: "http",
@@ -64,7 +65,10 @@ var ithome = {
64
65
  },
65
66
  getThumbs: async (articles) => {
66
67
  const res = await import_axios.default.get("https://m.ithome.com/", {
67
- responseType: "text"
68
+ responseType: "text",
69
+ headers: {
70
+ "User-Agent": import_constants.iosUA
71
+ }
68
72
  }).catch((e) => {
69
73
  console.error(import_zx.chalk.red("获取 https://m.ithome.com/ 错误"), e);
70
74
  });
@@ -0,0 +1,3 @@
1
+ export declare function getWebContent(url: string, options?: {
2
+ userAgent?: string;
3
+ }): Promise<string>;
@@ -0,0 +1,46 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
3
+ var __getOwnPropNames = Object.getOwnPropertyNames;
4
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
5
+ var __export = (target, all) => {
6
+ for (var name in all)
7
+ __defProp(target, name, { get: all[name], enumerable: true });
8
+ };
9
+ var __copyProps = (to, from, except, desc) => {
10
+ if (from && typeof from === "object" || typeof from === "function") {
11
+ for (let key of __getOwnPropNames(from))
12
+ if (!__hasOwnProp.call(to, key) && key !== except)
13
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
14
+ }
15
+ return to;
16
+ };
17
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
18
+
19
+ // src/utils/browser.ts
20
+ var browser_exports = {};
21
+ __export(browser_exports, {
22
+ getWebContent: () => getWebContent
23
+ });
24
+ module.exports = __toCommonJS(browser_exports);
25
+ var import_playwright = require("playwright");
26
+ async function getWebContent(url, options) {
27
+ const browser = await import_playwright.chromium.launch();
28
+ const page = await browser.newPage({
29
+ userAgent: options == null ? void 0 : options.userAgent
30
+ });
31
+ let resolve;
32
+ const waitForLoadPromise = new Promise((r) => {
33
+ resolve = r;
34
+ });
35
+ page.on("load", (page2) => {
36
+ page2.locator("html").innerHTML().then((h) => resolve == null ? void 0 : resolve(h));
37
+ });
38
+ await page.goto(url);
39
+ const content = await waitForLoadPromise;
40
+ await browser.close();
41
+ return content;
42
+ }
43
+ // Annotate the CommonJS export names for ESM import in node:
44
+ 0 && (module.exports = {
45
+ getWebContent
46
+ });
@@ -0,0 +1 @@
1
+ export declare const iosUA = "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1";
@@ -0,0 +1,29 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
3
+ var __getOwnPropNames = Object.getOwnPropertyNames;
4
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
5
+ var __export = (target, all) => {
6
+ for (var name in all)
7
+ __defProp(target, name, { get: all[name], enumerable: true });
8
+ };
9
+ var __copyProps = (to, from, except, desc) => {
10
+ if (from && typeof from === "object" || typeof from === "function") {
11
+ for (let key of __getOwnPropNames(from))
12
+ if (!__hasOwnProp.call(to, key) && key !== except)
13
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
14
+ }
15
+ return to;
16
+ };
17
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
18
+
19
+ // src/utils/constants.ts
20
+ var constants_exports = {};
21
+ __export(constants_exports, {
22
+ iosUA: () => iosUA
23
+ });
24
+ module.exports = __toCommonJS(constants_exports);
25
+ var iosUA = "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1";
26
+ // Annotate the CommonJS export names for ESM import in node:
27
+ 0 && (module.exports = {
28
+ iosUA
29
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ns-rss-spider",
3
- "version": "0.0.29",
3
+ "version": "0.0.30",
4
4
  "description": "",
5
5
  "main": "dist/cjs/index.js",
6
6
  "types": "dist/cjs/index.d.ts",
@@ -39,6 +39,7 @@
39
39
  "dayjs": "^1.11.10",
40
40
  "html-entities": "^2.4.0",
41
41
  "lodash": "^4.17.21",
42
+ "playwright": "^1.41.2",
42
43
  "probe-image-size": "^7.2.3",
43
44
  "rss-parser": "^3.13.0",
44
45
  "yargs-parser": "^21.1.1",