ns-rss-spider 0.0.19 → 0.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/parse.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { ServerInfo } from "./upload";
|
|
2
|
-
|
|
2
|
+
import { Article } from "./types";
|
|
3
|
+
export declare function parseRss(name: string, feed: string, server?: ServerInfo): Promise<Article[] | {
|
|
3
4
|
storePath: any;
|
|
4
5
|
type: "simple" | "rich";
|
|
5
6
|
guid: string;
|
|
@@ -10,4 +11,5 @@ export declare function parseRss(name: string, feed: string, server?: ServerInfo
|
|
|
10
11
|
author?: string | undefined;
|
|
11
12
|
source?: string | undefined;
|
|
12
13
|
categories?: string | undefined;
|
|
14
|
+
thumb?: string | undefined;
|
|
13
15
|
}[]>;
|
package/dist/cjs/parse.js
CHANGED
|
@@ -65,6 +65,7 @@ async function parseRss(name, feed, server) {
|
|
|
65
65
|
item.guid = item.link;
|
|
66
66
|
}
|
|
67
67
|
});
|
|
68
|
+
const strategy = import_strategy.strategies[name];
|
|
68
69
|
const contents = [];
|
|
69
70
|
for (let item of result.items) {
|
|
70
71
|
console.log(import_zx.chalk.green(`正在解析文章 【${item.title}】`));
|
|
@@ -72,11 +73,23 @@ async function parseRss(name, feed, server) {
|
|
|
72
73
|
console.log(JSON.stringify(item));
|
|
73
74
|
throw Error(`item has no guid`);
|
|
74
75
|
}
|
|
75
|
-
const content = await (0, import_parseContent.parseContent)(item,
|
|
76
|
+
const content = await (0, import_parseContent.parseContent)(item, strategy).catch((e) => console.error(import_zx.chalk.red("文章解析失败"), e));
|
|
76
77
|
if (content) {
|
|
77
78
|
contents.push(content);
|
|
78
79
|
}
|
|
79
80
|
}
|
|
81
|
+
if (strategy == null ? void 0 : strategy.getThumbs) {
|
|
82
|
+
console.log(import_zx.chalk.green(`正在获取缩略图数据`));
|
|
83
|
+
const thumbKv = await (strategy == null ? void 0 : strategy.getThumbs(contents));
|
|
84
|
+
if (thumbKv && Object.keys(thumbKv).length) {
|
|
85
|
+
console.log(import_zx.chalk.green(`正在更新缩略图`));
|
|
86
|
+
contents.forEach((content) => {
|
|
87
|
+
if (thumbKv[content.guid]) {
|
|
88
|
+
content.thumb = thumbKv[content.guid];
|
|
89
|
+
}
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
80
93
|
if (server) {
|
|
81
94
|
const r = await (0, import_upload.uploadContent)({
|
|
82
95
|
app: name,
|
|
@@ -55,6 +55,13 @@ async function parseContent($, $element, item, strategy) {
|
|
|
55
55
|
}
|
|
56
56
|
});
|
|
57
57
|
for (let item2 of srcs) {
|
|
58
|
+
if (strategy == null ? void 0 : strategy.ignoreProbeImage) {
|
|
59
|
+
images.push({
|
|
60
|
+
url: item2.src,
|
|
61
|
+
title: item2.title
|
|
62
|
+
});
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
58
65
|
console.log(import_zx.chalk.green("正在解析图片", item2.src));
|
|
59
66
|
const result = await (0, import_probe_image_size.default)(item2.src, {
|
|
60
67
|
rejectUnauthorized: false,
|
|
@@ -99,7 +106,7 @@ async function parseContent($, $element, item, strategy) {
|
|
|
99
106
|
return {
|
|
100
107
|
type: "rich",
|
|
101
108
|
...(0, import_getBasicFromItem.getBasicFromItem)(item),
|
|
102
|
-
content
|
|
109
|
+
content,
|
|
103
110
|
ssrContent: (0, import_generateSsrContent.generateSsrContent)(content, images),
|
|
104
111
|
images
|
|
105
112
|
};
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
var __create = Object.create;
|
|
1
2
|
var __defProp = Object.defineProperty;
|
|
2
3
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
4
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
4
6
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
7
|
var __export = (target, all) => {
|
|
6
8
|
for (var name in all)
|
|
@@ -14,6 +16,14 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
14
16
|
}
|
|
15
17
|
return to;
|
|
16
18
|
};
|
|
19
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
21
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
22
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
23
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
24
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
25
|
+
mod
|
|
26
|
+
));
|
|
17
27
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
28
|
|
|
19
29
|
// src/parsers/cnbeta.ts
|
|
@@ -22,6 +32,9 @@ __export(cnbeta_exports, {
|
|
|
22
32
|
cnbeta: () => cnbeta
|
|
23
33
|
});
|
|
24
34
|
module.exports = __toCommonJS(cnbeta_exports);
|
|
35
|
+
var import_axios = __toESM(require("axios"));
|
|
36
|
+
var import_zx = require("zx");
|
|
37
|
+
var import_cheerio = require("cheerio");
|
|
25
38
|
var cnbeta = {
|
|
26
39
|
parse: true,
|
|
27
40
|
fetcher: "http",
|
|
@@ -42,6 +55,31 @@ var cnbeta = {
|
|
|
42
55
|
description: rich.description.replace(`
|
|
43
56
|
阅读全文`, "")
|
|
44
57
|
};
|
|
58
|
+
},
|
|
59
|
+
ignoreProbeImage: true,
|
|
60
|
+
getThumbs: async (articles) => {
|
|
61
|
+
const res = await import_axios.default.get("https://m.cnbeta.com.tw/", {
|
|
62
|
+
responseType: "text"
|
|
63
|
+
}).catch((e) => {
|
|
64
|
+
console.error(import_zx.chalk.red("获取 m.cnbeta.com.tw 错误"), e);
|
|
65
|
+
});
|
|
66
|
+
if (!res)
|
|
67
|
+
return void 0;
|
|
68
|
+
const html = res.data;
|
|
69
|
+
const $ = (0, import_cheerio.load)(html, null, false);
|
|
70
|
+
const kv = {};
|
|
71
|
+
$("#it_tech .txt_thumb > a").each((_, $a) => {
|
|
72
|
+
var _a, _b;
|
|
73
|
+
const thumb = $($a).find("img").attr("src");
|
|
74
|
+
const href = $($a).attr("href") || "";
|
|
75
|
+
const reg = /([\d]+).htm/;
|
|
76
|
+
const articleId = (_a = reg.exec(href)) == null ? void 0 : _a[1];
|
|
77
|
+
const guid = (_b = articles.find((a) => a.guid.includes(`/${articleId}.htm`))) == null ? void 0 : _b.guid;
|
|
78
|
+
if (guid && thumb) {
|
|
79
|
+
kv[guid] = thumb;
|
|
80
|
+
}
|
|
81
|
+
});
|
|
82
|
+
return kv;
|
|
45
83
|
}
|
|
46
84
|
};
|
|
47
85
|
// Annotate the CommonJS export names for ESM import in node:
|
package/dist/cjs/types.d.ts
CHANGED
|
@@ -10,6 +10,8 @@ export interface ParseStrategy {
|
|
|
10
10
|
getContentElementFromArticle?: (aritcle: CheerioAPI) => Cheerio<AnyNode>;
|
|
11
11
|
getContentFromHtml?: ($: CheerioAPI, node: Cheerio<AnyNode>) => string;
|
|
12
12
|
getExtraItems?: ($: CheerioAPI, current: RichArticle, item: Item) => Promise<Record<string, any>>;
|
|
13
|
+
ignoreProbeImage?: boolean;
|
|
14
|
+
getThumbs?: (articles: Article[]) => Promise<Record<string, string> | undefined>;
|
|
13
15
|
}
|
|
14
16
|
export interface SimpleArticle {
|
|
15
17
|
type: 'simple';
|
|
@@ -22,6 +24,7 @@ export interface SimpleArticle {
|
|
|
22
24
|
author?: string;
|
|
23
25
|
source?: string;
|
|
24
26
|
categories?: string;
|
|
27
|
+
thumb?: string;
|
|
25
28
|
}
|
|
26
29
|
export interface RichArticleImage {
|
|
27
30
|
url: string;
|