ns-rss-spider 0.0.19 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  import { ServerInfo } from "./upload";
2
- export declare function parseRss(name: string, feed: string, server?: ServerInfo): Promise<import("./types").Article[] | {
2
+ import { Article } from "./types";
3
+ export declare function parseRss(name: string, feed: string, server?: ServerInfo): Promise<Article[] | {
3
4
  storePath: any;
4
5
  type: "simple" | "rich";
5
6
  guid: string;
@@ -10,4 +11,5 @@ export declare function parseRss(name: string, feed: string, server?: ServerInfo
10
11
  author?: string | undefined;
11
12
  source?: string | undefined;
12
13
  categories?: string | undefined;
14
+ thumb?: string | undefined;
13
15
  }[]>;
package/dist/cjs/parse.js CHANGED
@@ -65,6 +65,7 @@ async function parseRss(name, feed, server) {
65
65
  item.guid = item.link;
66
66
  }
67
67
  });
68
+ const strategy = import_strategy.strategies[name];
68
69
  const contents = [];
69
70
  for (let item of result.items) {
70
71
  console.log(import_zx.chalk.green(`正在解析文章 【${item.title}】`));
@@ -72,11 +73,23 @@ async function parseRss(name, feed, server) {
72
73
  console.log(JSON.stringify(item));
73
74
  throw Error(`item has no guid`);
74
75
  }
75
- const content = await (0, import_parseContent.parseContent)(item, import_strategy.strategies[name]).catch((e) => console.error(import_zx.chalk.red("文章解析失败"), e.message));
76
+ const content = await (0, import_parseContent.parseContent)(item, strategy).catch((e) => console.error(import_zx.chalk.red("文章解析失败"), e));
76
77
  if (content) {
77
78
  contents.push(content);
78
79
  }
79
80
  }
81
+ if (strategy == null ? void 0 : strategy.getThumbs) {
82
+ console.log(import_zx.chalk.green(`正在获取缩略图数据`));
83
+ const thumbKv = await (strategy == null ? void 0 : strategy.getThumbs(contents));
84
+ if (thumbKv && Object.keys(thumbKv).length) {
85
+ console.log(import_zx.chalk.green(`正在更新缩略图`));
86
+ contents.forEach((content) => {
87
+ if (thumbKv[content.guid]) {
88
+ content.thumb = thumbKv[content.guid];
89
+ }
90
+ });
91
+ }
92
+ }
80
93
  if (server) {
81
94
  const r = await (0, import_upload.uploadContent)({
82
95
  app: name,
@@ -55,6 +55,13 @@ async function parseContent($, $element, item, strategy) {
55
55
  }
56
56
  });
57
57
  for (let item2 of srcs) {
58
+ if (strategy == null ? void 0 : strategy.ignoreProbeImage) {
59
+ images.push({
60
+ url: item2.src,
61
+ title: item2.title
62
+ });
63
+ continue;
64
+ }
58
65
  console.log(import_zx.chalk.green("正在解析图片", item2.src));
59
66
  const result = await (0, import_probe_image_size.default)(item2.src, {
60
67
  rejectUnauthorized: false,
@@ -99,7 +106,7 @@ async function parseContent($, $element, item, strategy) {
99
106
  return {
100
107
  type: "rich",
101
108
  ...(0, import_getBasicFromItem.getBasicFromItem)(item),
102
- content: (0, import_html_entities.decode)($element.html().trim()),
109
+ content,
103
110
  ssrContent: (0, import_generateSsrContent.generateSsrContent)(content, images),
104
111
  images
105
112
  };
@@ -1,6 +1,8 @@
1
+ var __create = Object.create;
1
2
  var __defProp = Object.defineProperty;
2
3
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
3
4
  var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __getProtoOf = Object.getPrototypeOf;
4
6
  var __hasOwnProp = Object.prototype.hasOwnProperty;
5
7
  var __export = (target, all) => {
6
8
  for (var name in all)
@@ -14,6 +16,14 @@ var __copyProps = (to, from, except, desc) => {
14
16
  }
15
17
  return to;
16
18
  };
19
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
20
+ // If the importer is in node compatibility mode or this is not an ESM
21
+ // file that has been converted to a CommonJS file using a Babel-
22
+ // compatible transform (i.e. "__esModule" has not been set), then set
23
+ // "default" to the CommonJS "module.exports" for node compatibility.
24
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
25
+ mod
26
+ ));
17
27
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
18
28
 
19
29
  // src/parsers/cnbeta.ts
@@ -22,6 +32,9 @@ __export(cnbeta_exports, {
22
32
  cnbeta: () => cnbeta
23
33
  });
24
34
  module.exports = __toCommonJS(cnbeta_exports);
35
+ var import_axios = __toESM(require("axios"));
36
+ var import_zx = require("zx");
37
+ var import_cheerio = require("cheerio");
25
38
  var cnbeta = {
26
39
  parse: true,
27
40
  fetcher: "http",
@@ -42,6 +55,31 @@ var cnbeta = {
42
55
  description: rich.description.replace(`
43
56
  阅读全文`, "")
44
57
  };
58
+ },
59
+ ignoreProbeImage: true,
60
+ getThumbs: async (articles) => {
61
+ const res = await import_axios.default.get("https://m.cnbeta.com.tw/", {
62
+ responseType: "text"
63
+ }).catch((e) => {
64
+ console.error(import_zx.chalk.red("获取 m.cnbeta.com.tw 错误"), e);
65
+ });
66
+ if (!res)
67
+ return void 0;
68
+ const html = res.data;
69
+ const $ = (0, import_cheerio.load)(html, null, false);
70
+ const kv = {};
71
+ $("#it_tech .txt_thumb > a").each((_, $a) => {
72
+ var _a, _b;
73
+ const thumb = $($a).find("img").attr("src");
74
+ const href = $($a).attr("href") || "";
75
+ const reg = /([\d]+).htm/;
76
+ const articleId = (_a = reg.exec(href)) == null ? void 0 : _a[1];
77
+ const guid = (_b = articles.find((a) => a.guid.includes(`/${articleId}.htm`))) == null ? void 0 : _b.guid;
78
+ if (guid && thumb) {
79
+ kv[guid] = thumb;
80
+ }
81
+ });
82
+ return kv;
45
83
  }
46
84
  };
47
85
  // Annotate the CommonJS export names for ESM import in node:
@@ -10,6 +10,8 @@ export interface ParseStrategy {
10
10
  getContentElementFromArticle?: (aritcle: CheerioAPI) => Cheerio<AnyNode>;
11
11
  getContentFromHtml?: ($: CheerioAPI, node: Cheerio<AnyNode>) => string;
12
12
  getExtraItems?: ($: CheerioAPI, current: RichArticle, item: Item) => Promise<Record<string, any>>;
13
+ ignoreProbeImage?: boolean;
14
+ getThumbs?: (articles: Article[]) => Promise<Record<string, string> | undefined>;
13
15
  }
14
16
  export interface SimpleArticle {
15
17
  type: 'simple';
@@ -22,6 +24,7 @@ export interface SimpleArticle {
22
24
  author?: string;
23
25
  source?: string;
24
26
  categories?: string;
27
+ thumb?: string;
25
28
  }
26
29
  export interface RichArticleImage {
27
30
  url: string;
@@ -21,4 +21,5 @@ export declare function uploadContent({ app, getUploadApi, items, }: {
21
21
  author?: string | undefined;
22
22
  source?: string | undefined;
23
23
  categories?: string | undefined;
24
+ thumb?: string | undefined;
24
25
  }[]>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ns-rss-spider",
3
- "version": "0.0.19",
3
+ "version": "0.0.21",
4
4
  "description": "",
5
5
  "main": "dist/cjs/index.js",
6
6
  "types": "dist/cjs/index.d.ts",