ns-rss-spider 0.0.20 → 0.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,18 @@
1
1
  import { ServerInfo } from "./upload";
2
- export declare function parseRss(name: string, feed: string, server?: ServerInfo): Promise<import("./types").Article[] | {
3
- storePath: any;
4
- type: "simple" | "rich";
5
- guid: string;
6
- title: string;
7
- link: string;
8
- description: string;
9
- pubDate: Date;
10
- author?: string | undefined;
11
- source?: string | undefined;
12
- categories?: string | undefined;
13
- }[]>;
2
+ import { Article } from "./types";
3
+ export declare function parseRss(name: string, feed: string, server?: ServerInfo): Promise<never[] | {
4
+ contents: Article[] | {
5
+ storePath: any;
6
+ type: "rich" | "simple";
7
+ guid: string;
8
+ title: string;
9
+ link: string;
10
+ description: string;
11
+ pubDate: Date;
12
+ author?: string | undefined;
13
+ source?: string | undefined;
14
+ categories?: string | undefined;
15
+ thumb?: string | undefined;
16
+ }[];
17
+ thumbs: Record<string, string> | undefined;
18
+ }>;
package/dist/cjs/parse.js CHANGED
@@ -65,6 +65,7 @@ async function parseRss(name, feed, server) {
65
65
  item.guid = item.link;
66
66
  }
67
67
  });
68
+ const strategy = import_strategy.strategies[name];
68
69
  const contents = [];
69
70
  for (let item of result.items) {
70
71
  console.log(import_zx.chalk.green(`正在解析文章 【${item.title}】`));
@@ -72,11 +73,25 @@ async function parseRss(name, feed, server) {
72
73
  console.log(JSON.stringify(item));
73
74
  throw Error(`item has no guid`);
74
75
  }
75
- const content = await (0, import_parseContent.parseContent)(item, import_strategy.strategies[name]).catch((e) => console.error(import_zx.chalk.red("文章解析失败"), e.message));
76
+ const content = await (0, import_parseContent.parseContent)(item, strategy).catch((e) => console.error(import_zx.chalk.red("文章解析失败"), e));
76
77
  if (content) {
77
78
  contents.push(content);
78
79
  }
79
80
  }
81
+ let thumbs;
82
+ if (strategy == null ? void 0 : strategy.getThumbs) {
83
+ console.log(import_zx.chalk.green(`正在获取缩略图数据`));
84
+ const thumbKv = await (strategy == null ? void 0 : strategy.getThumbs(contents));
85
+ if (thumbKv && Object.keys(thumbKv).length) {
86
+ console.log(import_zx.chalk.green(`正在更新缩略图`));
87
+ thumbs = thumbKv;
88
+ contents.forEach((content) => {
89
+ if (thumbKv[content.guid]) {
90
+ content.thumb = thumbKv[content.guid];
91
+ }
92
+ });
93
+ }
94
+ }
80
95
  if (server) {
81
96
  const r = await (0, import_upload.uploadContent)({
82
97
  app: name,
@@ -89,9 +104,12 @@ async function parseRss(name, feed, server) {
89
104
  }).catch((e) => {
90
105
  });
91
106
  }
92
- return r;
107
+ return {
108
+ contents: r,
109
+ thumbs
110
+ };
93
111
  } else {
94
- return contents;
112
+ return { contents, thumbs };
95
113
  }
96
114
  }
97
115
  // Annotate the CommonJS export names for ESM import in node:
@@ -106,7 +106,7 @@ async function parseContent($, $element, item, strategy) {
106
106
  return {
107
107
  type: "rich",
108
108
  ...(0, import_getBasicFromItem.getBasicFromItem)(item),
109
- content: (0, import_html_entities.decode)($element.html().trim()),
109
+ content,
110
110
  ssrContent: (0, import_generateSsrContent.generateSsrContent)(content, images),
111
111
  images
112
112
  };
@@ -1,6 +1,8 @@
1
+ var __create = Object.create;
1
2
  var __defProp = Object.defineProperty;
2
3
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
3
4
  var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __getProtoOf = Object.getPrototypeOf;
4
6
  var __hasOwnProp = Object.prototype.hasOwnProperty;
5
7
  var __export = (target, all) => {
6
8
  for (var name in all)
@@ -14,6 +16,14 @@ var __copyProps = (to, from, except, desc) => {
14
16
  }
15
17
  return to;
16
18
  };
19
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
20
+ // If the importer is in node compatibility mode or this is not an ESM
21
+ // file that has been converted to a CommonJS file using a Babel-
22
+ // compatible transform (i.e. "__esModule" has not been set), then set
23
+ // "default" to the CommonJS "module.exports" for node compatibility.
24
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
25
+ mod
26
+ ));
17
27
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
18
28
 
19
29
  // src/parsers/cnbeta.ts
@@ -22,6 +32,9 @@ __export(cnbeta_exports, {
22
32
  cnbeta: () => cnbeta
23
33
  });
24
34
  module.exports = __toCommonJS(cnbeta_exports);
35
+ var import_axios = __toESM(require("axios"));
36
+ var import_zx = require("zx");
37
+ var import_cheerio = require("cheerio");
25
38
  var cnbeta = {
26
39
  parse: true,
27
40
  fetcher: "http",
@@ -43,7 +56,31 @@ var cnbeta = {
43
56
  阅读全文`, "")
44
57
  };
45
58
  },
46
- ignoreProbeImage: true
59
+ ignoreProbeImage: true,
60
+ getThumbs: async (articles) => {
61
+ const res = await import_axios.default.get("https://m.cnbeta.com.tw/", {
62
+ responseType: "text"
63
+ }).catch((e) => {
64
+ console.error(import_zx.chalk.red("获取 m.cnbeta.com.tw 错误"), e);
65
+ });
66
+ if (!res)
67
+ return void 0;
68
+ const html = res.data;
69
+ const $ = (0, import_cheerio.load)(html, null, false);
70
+ const kv = {};
71
+ $("#main > section li .txt_thumb > a").each((_, $a) => {
72
+ var _a, _b;
73
+ const thumb = $($a).find("img").attr("src");
74
+ const href = $($a).attr("href") || "";
75
+ const reg = /([\d]+).htm/;
76
+ const articleId = (_a = reg.exec(href)) == null ? void 0 : _a[1];
77
+ const guid = (_b = articles.find((a) => a.guid.includes(`/${articleId}.htm`))) == null ? void 0 : _b.guid;
78
+ if (guid && thumb) {
79
+ kv[guid] = thumb;
80
+ }
81
+ });
82
+ return kv;
83
+ }
47
84
  };
48
85
  // Annotate the CommonJS export names for ESM import in node:
49
86
  0 && (module.exports = {
@@ -11,6 +11,7 @@ export interface ParseStrategy {
11
11
  getContentFromHtml?: ($: CheerioAPI, node: Cheerio<AnyNode>) => string;
12
12
  getExtraItems?: ($: CheerioAPI, current: RichArticle, item: Item) => Promise<Record<string, any>>;
13
13
  ignoreProbeImage?: boolean;
14
+ getThumbs?: (articles: Article[]) => Promise<Record<string, string> | undefined>;
14
15
  }
15
16
  export interface SimpleArticle {
16
17
  type: 'simple';
@@ -23,6 +24,7 @@ export interface SimpleArticle {
23
24
  author?: string;
24
25
  source?: string;
25
26
  categories?: string;
27
+ thumb?: string;
26
28
  }
27
29
  export interface RichArticleImage {
28
30
  url: string;
@@ -21,4 +21,5 @@ export declare function uploadContent({ app, getUploadApi, items, }: {
21
21
  author?: string | undefined;
22
22
  source?: string | undefined;
23
23
  categories?: string | undefined;
24
+ thumb?: string | undefined;
24
25
  }[]>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ns-rss-spider",
3
- "version": "0.0.20",
3
+ "version": "0.0.22",
4
4
  "description": "",
5
5
  "main": "dist/cjs/index.js",
6
6
  "types": "dist/cjs/index.d.ts",