ns-rss-spider 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -0
- package/dist/cjs/cli.d.ts +2 -0
- package/dist/cjs/cli.js +61 -0
- package/dist/cjs/index.d.ts +3 -0
- package/dist/cjs/index.js +35 -0
- package/dist/cjs/parse.d.ts +1 -0
- package/dist/cjs/parse.js +59 -0
- package/dist/cjs/parseContent/getArticleHtml.d.ts +2 -0
- package/dist/cjs/parseContent/getArticleHtml.js +49 -0
- package/dist/cjs/parseContent/index.d.ts +16 -0
- package/dist/cjs/parseContent/index.js +80 -0
- package/dist/cjs/parseContent/parseContent.d.ts +4 -0
- package/dist/cjs/parseContent/parseContent.js +108 -0
- package/dist/cjs/parseContent/stripeHtml.d.ts +2 -0
- package/dist/cjs/parseContent/stripeHtml.js +46 -0
- package/dist/cjs/parseContent/utils.d.ts +2 -0
- package/dist/cjs/parseContent/utils.js +39 -0
- package/dist/cjs/parsers/cnbeta.d.ts +2 -0
- package/dist/cjs/parsers/cnbeta.js +44 -0
- package/dist/cjs/parsers/ifanr.d.ts +2 -0
- package/dist/cjs/parsers/ifanr.js +36 -0
- package/dist/cjs/parsers/ithome.d.ts +2 -0
- package/dist/cjs/parsers/ithome.js +53 -0
- package/dist/cjs/parsers/theverge.d.ts +2 -0
- package/dist/cjs/parsers/theverge.js +31 -0
- package/dist/cjs/strategy.d.ts +2 -0
- package/dist/cjs/strategy.js +38 -0
- package/dist/cjs/types.d.ts +30 -0
- package/dist/cjs/types.js +17 -0
- package/dist/cjs/utils/getData.d.ts +1 -0
- package/dist/cjs/utils/getData.js +63 -0
- package/package.json +43 -0
package/README.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# rss-spider
|
|
2
|
+
|
|
3
|
+
[](https://npmjs.org/package/rss-spider)
|
|
4
|
+
[](https://npmjs.org/package/rss-spider)
|
|
5
|
+
|
|
6
|
+
## Install
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
$ yarn install
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
$ npm run dev
|
|
14
|
+
$ npm run build
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Options
|
|
18
|
+
|
|
19
|
+
TODO
|
|
20
|
+
|
|
21
|
+
## LICENSE
|
|
22
|
+
|
|
23
|
+
MIT
|
package/dist/cjs/cli.js
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __copyProps = (to, from, except, desc) => {
|
|
9
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
10
|
+
for (let key of __getOwnPropNames(from))
|
|
11
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
12
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
13
|
+
}
|
|
14
|
+
return to;
|
|
15
|
+
};
|
|
16
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
17
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
18
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
19
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
20
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
21
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
22
|
+
mod
|
|
23
|
+
));
|
|
24
|
+
|
|
25
|
+
// src/cli.ts
|
|
26
|
+
var import_promises = require("fs/promises");
|
|
27
|
+
var import_yargs_parser = __toESM(require("yargs-parser"));
|
|
28
|
+
var import_index = require("./index");
|
|
29
|
+
var import_zx = require("zx");
|
|
30
|
+
var import_getData = require("./utils/getData");
|
|
31
|
+
var argv = (0, import_yargs_parser.default)(process.argv);
|
|
32
|
+
if (argv.v || argv.version) {
|
|
33
|
+
const pkg = require("../../package.json");
|
|
34
|
+
console.log(pkg.version);
|
|
35
|
+
process.exit(0);
|
|
36
|
+
}
|
|
37
|
+
async function main() {
|
|
38
|
+
const data = (0, import_getData.getData)() || {};
|
|
39
|
+
const { cmd, ...rest } = data;
|
|
40
|
+
let rst;
|
|
41
|
+
switch (cmd) {
|
|
42
|
+
case "parse":
|
|
43
|
+
rst = await (0, import_index.parseRss)(rest.name, rest.feed);
|
|
44
|
+
break;
|
|
45
|
+
default:
|
|
46
|
+
console.warn("未知命令", cmd);
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
if (process.env.DISP_ARGV) {
|
|
50
|
+
console.info("写入结果文件");
|
|
51
|
+
return (0, import_promises.writeFile)("./result.json", JSON.stringify(rst || {}));
|
|
52
|
+
} else {
|
|
53
|
+
console.info("输出结果");
|
|
54
|
+
console.log(rst);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
main().catch((err) => {
|
|
58
|
+
console.error(import_zx.chalk.red(err.message));
|
|
59
|
+
console.error(err);
|
|
60
|
+
process.exit(1);
|
|
61
|
+
});
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/index.ts
|
|
20
|
+
var src_exports = {};
|
|
21
|
+
__export(src_exports, {
|
|
22
|
+
parseContent: () => import_parseContent.parseContent,
|
|
23
|
+
parseRss: () => import_parse.parseRss,
|
|
24
|
+
strategies: () => import_strategy.strategies
|
|
25
|
+
});
|
|
26
|
+
module.exports = __toCommonJS(src_exports);
|
|
27
|
+
var import_parse = require("./parse");
|
|
28
|
+
var import_strategy = require("./strategy");
|
|
29
|
+
var import_parseContent = require("./parseContent");
|
|
30
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
31
|
+
0 && (module.exports = {
|
|
32
|
+
parseContent,
|
|
33
|
+
parseRss,
|
|
34
|
+
strategies
|
|
35
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function parseRss(name: string, feed: string): Promise<(import("./types").SimpleContent | import("./types").RichContent)[]>;
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
var __create = Object.create;
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
var __export = (target, all) => {
|
|
8
|
+
for (var name in all)
|
|
9
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
+
};
|
|
11
|
+
var __copyProps = (to, from, except, desc) => {
|
|
12
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
+
for (let key of __getOwnPropNames(from))
|
|
14
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
+
}
|
|
17
|
+
return to;
|
|
18
|
+
};
|
|
19
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
21
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
22
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
23
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
24
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
25
|
+
mod
|
|
26
|
+
));
|
|
27
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
28
|
+
|
|
29
|
+
// src/parse.ts
|
|
30
|
+
var parse_exports = {};
|
|
31
|
+
__export(parse_exports, {
|
|
32
|
+
parseRss: () => parseRss
|
|
33
|
+
});
|
|
34
|
+
module.exports = __toCommonJS(parse_exports);
|
|
35
|
+
var import_rss_parser = __toESM(require("rss-parser"));
|
|
36
|
+
var import_zx = require("zx");
|
|
37
|
+
var import_strategy = require("./strategy");
|
|
38
|
+
var import_parseContent = require("./parseContent");
|
|
39
|
+
async function parseRss(name, feed) {
|
|
40
|
+
const parser = new import_rss_parser.default();
|
|
41
|
+
console.log(import_zx.chalk.blue("正在拉取 rss 列表"));
|
|
42
|
+
const result = await parser.parseURL(feed);
|
|
43
|
+
if (!result.items.length) {
|
|
44
|
+
throw Error("rss no conent");
|
|
45
|
+
}
|
|
46
|
+
const contents = [];
|
|
47
|
+
for (let item of result.items) {
|
|
48
|
+
console.log(import_zx.chalk.blue(`正在解析文章 【${item.title}】`));
|
|
49
|
+
const content = await (0, import_parseContent.parseContent)(item, import_strategy.strategies[name]).catch((e) => console.error(import_zx.chalk.red("文章解析失败"), e.message));
|
|
50
|
+
if (content) {
|
|
51
|
+
contents.push(content);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return contents;
|
|
55
|
+
}
|
|
56
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
57
|
+
0 && (module.exports = {
|
|
58
|
+
parseRss
|
|
59
|
+
});
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
var __create = Object.create;
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
var __export = (target, all) => {
|
|
8
|
+
for (var name in all)
|
|
9
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
+
};
|
|
11
|
+
var __copyProps = (to, from, except, desc) => {
|
|
12
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
+
for (let key of __getOwnPropNames(from))
|
|
14
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
+
}
|
|
17
|
+
return to;
|
|
18
|
+
};
|
|
19
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
21
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
22
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
23
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
24
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
25
|
+
mod
|
|
26
|
+
));
|
|
27
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
28
|
+
|
|
29
|
+
// src/parseContent/getArticleHtml.ts
|
|
30
|
+
var getArticleHtml_exports = {};
|
|
31
|
+
__export(getArticleHtml_exports, {
|
|
32
|
+
getArticleHtml: () => getArticleHtml
|
|
33
|
+
});
|
|
34
|
+
module.exports = __toCommonJS(getArticleHtml_exports);
|
|
35
|
+
var import_axios = __toESM(require("axios"));
|
|
36
|
+
var getArticleHtml = async (url, fetcher) => {
|
|
37
|
+
if (fetcher === "playwright") {
|
|
38
|
+
throw Error("todo");
|
|
39
|
+
} else {
|
|
40
|
+
const rst = await import_axios.default.get(url, {
|
|
41
|
+
responseType: "text"
|
|
42
|
+
});
|
|
43
|
+
return rst.data;
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
47
|
+
0 && (module.exports = {
|
|
48
|
+
getArticleHtml
|
|
49
|
+
});
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { Item as RssItem } from "rss-parser";
|
|
2
|
+
import { Content, ParseStrategy } from "../types";
|
|
3
|
+
/**
|
|
4
|
+
* 解析文章内容
|
|
5
|
+
* 1. 获取 html 片段。(rss、http、playwright)
|
|
6
|
+
* 2. 对 html 处理
|
|
7
|
+
* 清理无用样式
|
|
8
|
+
* 3. 从 html 计算 images、videos、parsed(todo)
|
|
9
|
+
* 4. 获取自定义的额外解析项(可选)
|
|
10
|
+
* 5. 返回结果
|
|
11
|
+
*
|
|
12
|
+
* @param item
|
|
13
|
+
* @param strategy
|
|
14
|
+
* @returns
|
|
15
|
+
*/
|
|
16
|
+
export declare function parseContent(item: RssItem, strategy?: ParseStrategy): Promise<Content>;
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
var __create = Object.create;
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
var __export = (target, all) => {
|
|
8
|
+
for (var name in all)
|
|
9
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
+
};
|
|
11
|
+
var __copyProps = (to, from, except, desc) => {
|
|
12
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
+
for (let key of __getOwnPropNames(from))
|
|
14
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
+
}
|
|
17
|
+
return to;
|
|
18
|
+
};
|
|
19
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
21
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
22
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
23
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
24
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
25
|
+
mod
|
|
26
|
+
));
|
|
27
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
28
|
+
|
|
29
|
+
// src/parseContent/index.ts
|
|
30
|
+
var parseContent_exports = {};
|
|
31
|
+
__export(parseContent_exports, {
|
|
32
|
+
parseContent: () => parseContent
|
|
33
|
+
});
|
|
34
|
+
module.exports = __toCommonJS(parseContent_exports);
|
|
35
|
+
var import_getArticleHtml = require("./getArticleHtml");
|
|
36
|
+
var cheerio = __toESM(require("cheerio"));
|
|
37
|
+
var import_stripeHtml = require("./stripeHtml");
|
|
38
|
+
var import_parseContent = require("./parseContent");
|
|
39
|
+
var import_zx = require("zx");
|
|
40
|
+
async function parseContent(item, strategy) {
|
|
41
|
+
var _a, _b;
|
|
42
|
+
let element;
|
|
43
|
+
let $;
|
|
44
|
+
if (!strategy || !strategy.fetcher) {
|
|
45
|
+
$ = cheerio.load(item.content || "", {}, false);
|
|
46
|
+
element = $.root();
|
|
47
|
+
} else {
|
|
48
|
+
console.log(import_zx.chalk.blue("正在拉取文章内容"), item.link);
|
|
49
|
+
const article = await (0, import_getArticleHtml.getArticleHtml)(item.link, strategy.fetcher);
|
|
50
|
+
$ = cheerio.load(article);
|
|
51
|
+
element = ((_a = strategy.getContentHtmlFromArticle) == null ? void 0 : _a.call(strategy, $)) || $.root();
|
|
52
|
+
}
|
|
53
|
+
console.log(import_zx.chalk.blue("正在预处理 html"));
|
|
54
|
+
(0, import_stripeHtml.stripeHtml)($, element);
|
|
55
|
+
if (!(strategy == null ? void 0 : strategy.parse)) {
|
|
56
|
+
return {
|
|
57
|
+
type: "simple",
|
|
58
|
+
title: item.title,
|
|
59
|
+
description: ((_b = element.html()) == null ? void 0 : _b.trim()) || "",
|
|
60
|
+
pubDate: new Date(item.pubDate)
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
console.log(import_zx.chalk.blue("正在解析文章内容"));
|
|
64
|
+
const rich = await (0, import_parseContent.parseContent)($, element, item, strategy);
|
|
65
|
+
let extra = {};
|
|
66
|
+
if (strategy.getExtraItems) {
|
|
67
|
+
console.log(import_zx.chalk.blue("正在执行 getExtraItems"));
|
|
68
|
+
extra = await strategy.getExtraItems($, rich, item).catch((e) => {
|
|
69
|
+
console.error(import_zx.chalk.red("getExtraItems 识别"), e);
|
|
70
|
+
}) || {};
|
|
71
|
+
}
|
|
72
|
+
return {
|
|
73
|
+
...rich,
|
|
74
|
+
...extra
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
78
|
+
0 && (module.exports = {
|
|
79
|
+
parseContent
|
|
80
|
+
});
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import { Item as RssItem } from "rss-parser";
|
|
2
|
+
import type { ParseStrategy, RichContent } from "../types";
|
|
3
|
+
import { AnyNode, Cheerio, CheerioAPI } from "cheerio";
|
|
4
|
+
export declare function parseContent($: CheerioAPI, $element: Cheerio<AnyNode>, item: RssItem, strategy?: ParseStrategy): Promise<RichContent>;
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
var __create = Object.create;
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
var __export = (target, all) => {
|
|
8
|
+
for (var name in all)
|
|
9
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
+
};
|
|
11
|
+
var __copyProps = (to, from, except, desc) => {
|
|
12
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
+
for (let key of __getOwnPropNames(from))
|
|
14
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
+
}
|
|
17
|
+
return to;
|
|
18
|
+
};
|
|
19
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
21
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
22
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
23
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
24
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
25
|
+
mod
|
|
26
|
+
));
|
|
27
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
28
|
+
|
|
29
|
+
// src/parseContent/parseContent.ts
|
|
30
|
+
var parseContent_exports = {};
|
|
31
|
+
__export(parseContent_exports, {
|
|
32
|
+
parseContent: () => parseContent
|
|
33
|
+
});
|
|
34
|
+
module.exports = __toCommonJS(parseContent_exports);
|
|
35
|
+
var import_utils = require("./utils");
|
|
36
|
+
var import_probe_image_size = __toESM(require("probe-image-size"));
|
|
37
|
+
var import_zx = require("zx");
|
|
38
|
+
var import_html_entities = require("html-entities");
|
|
39
|
+
async function parseContent($, $element, item, strategy) {
|
|
40
|
+
var _a;
|
|
41
|
+
const srcs = [];
|
|
42
|
+
const images = [];
|
|
43
|
+
(0, import_utils.walk_the_DOM)($, $element, (node) => {
|
|
44
|
+
if (node.is("img")) {
|
|
45
|
+
const src = node.attr("src");
|
|
46
|
+
const title = node.attr("title") || void 0;
|
|
47
|
+
if (src) {
|
|
48
|
+
srcs.push({
|
|
49
|
+
src,
|
|
50
|
+
title
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
for (let item2 of srcs) {
|
|
56
|
+
console.log(import_zx.chalk.blue("正在解析图片", item2.src));
|
|
57
|
+
const result = await (0, import_probe_image_size.default)(item2.src, {
|
|
58
|
+
rejectUnauthorized: false,
|
|
59
|
+
open_timeout: 1e3 * 5
|
|
60
|
+
}).catch((e) => {
|
|
61
|
+
console.error("解析图片失败", e.message);
|
|
62
|
+
return Error((e == null ? void 0 : e.message) || e || "unknown error");
|
|
63
|
+
});
|
|
64
|
+
if (result instanceof Error) {
|
|
65
|
+
images.push({
|
|
66
|
+
url: item2.src,
|
|
67
|
+
title: item2.title
|
|
68
|
+
});
|
|
69
|
+
} else {
|
|
70
|
+
images.push({
|
|
71
|
+
url: item2.src,
|
|
72
|
+
title: item2.title,
|
|
73
|
+
width: result.width,
|
|
74
|
+
height: result.height,
|
|
75
|
+
type: result.mime,
|
|
76
|
+
widthUnit: result.wUnits !== "px" ? result.wUnits : void 0,
|
|
77
|
+
heightUnit: result.hUnits !== "px" ? result.hUnits : void 0,
|
|
78
|
+
realUrl: result.url !== item2.src ? result.url : void 0
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
$($element).find("img").each((_, img) => {
|
|
83
|
+
const src = $(img).attr("src");
|
|
84
|
+
if (src) {
|
|
85
|
+
$(img).removeAttr("src");
|
|
86
|
+
$(img).attr("data-src", src);
|
|
87
|
+
const meta = images.find((i) => i.url === src);
|
|
88
|
+
if (meta == null ? void 0 : meta.width) {
|
|
89
|
+
$(img).attr("data-w", `${meta.width}`);
|
|
90
|
+
}
|
|
91
|
+
if (meta == null ? void 0 : meta.height) {
|
|
92
|
+
$(img).attr("data-h", `${meta.height}`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
return {
|
|
97
|
+
type: "rich",
|
|
98
|
+
title: item.title,
|
|
99
|
+
description: ((_a = item.contentSnippet) == null ? void 0 : _a.trim()) || "",
|
|
100
|
+
content: (0, import_html_entities.decode)($element.html().trim()),
|
|
101
|
+
pubDate: new Date(item.pubDate),
|
|
102
|
+
images
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
106
|
+
0 && (module.exports = {
|
|
107
|
+
parseContent
|
|
108
|
+
});
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parseContent/stripeHtml.ts
|
|
20
|
+
var stripeHtml_exports = {};
|
|
21
|
+
__export(stripeHtml_exports, {
|
|
22
|
+
stripeHtml: () => stripeHtml
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(stripeHtml_exports);
|
|
25
|
+
var import_utils = require("./utils");
|
|
26
|
+
function stripeHtml($, element) {
|
|
27
|
+
$(element).find("script").remove();
|
|
28
|
+
(0, import_utils.walk_the_DOM)($, element, (el) => {
|
|
29
|
+
for (let attr of [
|
|
30
|
+
"id",
|
|
31
|
+
"style",
|
|
32
|
+
"class",
|
|
33
|
+
"width",
|
|
34
|
+
"height"
|
|
35
|
+
]) {
|
|
36
|
+
$(el).removeAttr(attr);
|
|
37
|
+
}
|
|
38
|
+
if ($(el).is("a")) {
|
|
39
|
+
$(el).removeAttr("target");
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
44
|
+
0 && (module.exports = {
|
|
45
|
+
stripeHtml
|
|
46
|
+
});
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parseContent/utils.ts
|
|
20
|
+
var utils_exports = {};
|
|
21
|
+
__export(utils_exports, {
|
|
22
|
+
walk_the_DOM: () => walk_the_DOM
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(utils_exports);
|
|
25
|
+
function walk_the_DOM($, n, func) {
|
|
26
|
+
let node = n;
|
|
27
|
+
if (node.length === 0) {
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
func(node);
|
|
31
|
+
const children = node.children();
|
|
32
|
+
$(children).each((_, c) => {
|
|
33
|
+
walk_the_DOM($, $(c), func);
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
37
|
+
0 && (module.exports = {
|
|
38
|
+
walk_the_DOM
|
|
39
|
+
});
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parsers/cnbeta.ts
|
|
20
|
+
var cnbeta_exports = {};
|
|
21
|
+
__export(cnbeta_exports, {
|
|
22
|
+
cnbeta: () => cnbeta
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(cnbeta_exports);
|
|
25
|
+
var cnbeta = {
|
|
26
|
+
parse: true,
|
|
27
|
+
fetcher: "http",
|
|
28
|
+
getContentHtmlFromArticle: ($) => {
|
|
29
|
+
const el = $(".article-content");
|
|
30
|
+
el.remove(".tac");
|
|
31
|
+
return el;
|
|
32
|
+
},
|
|
33
|
+
getExtraItems: async ($, rich) => {
|
|
34
|
+
return {
|
|
35
|
+
description: rich.description.replace(`
|
|
36
|
+
阅读全文`, ""),
|
|
37
|
+
pageFrom: $(".cnbeta-article .title .source a").text().toLowerCase()
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
};
|
|
41
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
42
|
+
0 && (module.exports = {
|
|
43
|
+
cnbeta
|
|
44
|
+
});
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parsers/ifanr.ts
|
|
20
|
+
var ifanr_exports = {};
|
|
21
|
+
__export(ifanr_exports, {
|
|
22
|
+
ifanr: () => ifanr
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(ifanr_exports);
|
|
25
|
+
var ifanr = {
|
|
26
|
+
parse: true,
|
|
27
|
+
fetcher: "http",
|
|
28
|
+
getContentHtmlFromArticle: ($) => {
|
|
29
|
+
const el = $("#article-content-wrapper article");
|
|
30
|
+
return el;
|
|
31
|
+
}
|
|
32
|
+
};
|
|
33
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
34
|
+
0 && (module.exports = {
|
|
35
|
+
ifanr
|
|
36
|
+
});
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parsers/ithome.ts
|
|
20
|
+
var ithome_exports = {};
|
|
21
|
+
__export(ithome_exports, {
|
|
22
|
+
ithome: () => ithome
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(ithome_exports);
|
|
25
|
+
var import_utils = require("../parseContent/utils");
|
|
26
|
+
var ithome = {
|
|
27
|
+
parse: true,
|
|
28
|
+
fetcher: "http",
|
|
29
|
+
getContentHtmlFromArticle: ($) => {
|
|
30
|
+
const el = $("#paragraph");
|
|
31
|
+
$(el).find("img").each((_, img) => {
|
|
32
|
+
const original = $(img).attr("data-original");
|
|
33
|
+
if (original) {
|
|
34
|
+
$(img).attr("src", original);
|
|
35
|
+
$(img).removeAttr("data-original");
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
[".dy-live-bar", ".tougao-user", ".ad-tips", "dir"].forEach((sel) => {
|
|
39
|
+
$(el).find(sel).remove();
|
|
40
|
+
});
|
|
41
|
+
(0, import_utils.walk_the_DOM)($, $(el), (c) => {
|
|
42
|
+
$(c).removeAttr("data-vmark");
|
|
43
|
+
});
|
|
44
|
+
return $(el);
|
|
45
|
+
},
|
|
46
|
+
getExtraItems: async ($, rich, item) => {
|
|
47
|
+
return {};
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
51
|
+
0 && (module.exports = {
|
|
52
|
+
ithome
|
|
53
|
+
});
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parsers/theverge.ts
|
|
20
|
+
var theverge_exports = {};
|
|
21
|
+
__export(theverge_exports, {
|
|
22
|
+
theverge: () => theverge
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(theverge_exports);
|
|
25
|
+
var theverge = {
|
|
26
|
+
parse: true
|
|
27
|
+
};
|
|
28
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
29
|
+
0 && (module.exports = {
|
|
30
|
+
theverge
|
|
31
|
+
});
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/strategy.ts
|
|
20
|
+
var strategy_exports = {};
|
|
21
|
+
__export(strategy_exports, {
|
|
22
|
+
strategies: () => strategies
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(strategy_exports);
|
|
25
|
+
var import_cnbeta = require("./parsers/cnbeta");
|
|
26
|
+
var import_ifanr = require("./parsers/ifanr");
|
|
27
|
+
var import_theverge = require("./parsers/theverge");
|
|
28
|
+
var import_ithome = require("./parsers/ithome");
|
|
29
|
+
var strategies = {
|
|
30
|
+
cnbeta: import_cnbeta.cnbeta,
|
|
31
|
+
ifanr: import_ifanr.ifanr,
|
|
32
|
+
theverge: import_theverge.theverge,
|
|
33
|
+
ithome: import_ithome.ithome
|
|
34
|
+
};
|
|
35
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
36
|
+
0 && (module.exports = {
|
|
37
|
+
strategies
|
|
38
|
+
});
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { AnyNode, Cheerio, CheerioAPI } from 'cheerio';
|
|
2
|
+
import { Item } from 'rss-parser';
|
|
3
|
+
export interface ParseStrategy {
|
|
4
|
+
parse: boolean;
|
|
5
|
+
fetcher?: 'http' | 'playwright';
|
|
6
|
+
getContentHtmlFromArticle?: (aritcle: CheerioAPI) => Cheerio<AnyNode>;
|
|
7
|
+
getExtraItems?: (html: CheerioAPI, current: RichContent, item: Item) => Promise<Record<string, any>>;
|
|
8
|
+
}
|
|
9
|
+
export interface SimpleContent {
|
|
10
|
+
type: 'simple';
|
|
11
|
+
title: string;
|
|
12
|
+
description: string;
|
|
13
|
+
pubDate: Date;
|
|
14
|
+
}
|
|
15
|
+
export interface RichContentImage {
|
|
16
|
+
url: string;
|
|
17
|
+
type?: string;
|
|
18
|
+
width?: number;
|
|
19
|
+
height?: number;
|
|
20
|
+
realUrl?: string;
|
|
21
|
+
widthUnit?: string;
|
|
22
|
+
heightUnit?: string;
|
|
23
|
+
title?: string;
|
|
24
|
+
}
|
|
25
|
+
export interface RichContent extends Omit<SimpleContent, 'type'> {
|
|
26
|
+
type: 'rich';
|
|
27
|
+
content: string;
|
|
28
|
+
images: RichContentImage[];
|
|
29
|
+
}
|
|
30
|
+
export type Content = SimpleContent | RichContent;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __copyProps = (to, from, except, desc) => {
|
|
6
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
7
|
+
for (let key of __getOwnPropNames(from))
|
|
8
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
9
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
10
|
+
}
|
|
11
|
+
return to;
|
|
12
|
+
};
|
|
13
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
14
|
+
|
|
15
|
+
// src/types.ts
|
|
16
|
+
var types_exports = {};
|
|
17
|
+
module.exports = __toCommonJS(types_exports);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const getData: (str?: string) => any;
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
|
|
30
|
+
// src/utils/getData.ts
|
|
31
|
+
var getData_exports = {};
|
|
32
|
+
__export(getData_exports, {
|
|
33
|
+
getData: () => getData
|
|
34
|
+
});
|
|
35
|
+
module.exports = __toCommonJS(getData_exports);
|
|
36
|
+
var import_yargs_parser = __toESM(require("yargs-parser"));
|
|
37
|
+
var getData = function(str) {
|
|
38
|
+
let argv_str = "";
|
|
39
|
+
if (str) {
|
|
40
|
+
argv_str = str;
|
|
41
|
+
} else if (process.env.DISP_ARGV) {
|
|
42
|
+
argv_str = `--data=${process.env.DISP_ARGV}`;
|
|
43
|
+
} else if (process.argv) {
|
|
44
|
+
argv_str = process.argv;
|
|
45
|
+
}
|
|
46
|
+
;
|
|
47
|
+
const argv = (0, import_yargs_parser.default)(argv_str);
|
|
48
|
+
if (argv.data && typeof argv.data == "string") {
|
|
49
|
+
try {
|
|
50
|
+
const data = JSON.parse(decodeURIComponent(Buffer.from(argv.data, "base64").toString()));
|
|
51
|
+
return data;
|
|
52
|
+
} catch (e) {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
;
|
|
56
|
+
}
|
|
57
|
+
;
|
|
58
|
+
return null;
|
|
59
|
+
};
|
|
60
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
61
|
+
0 && (module.exports = {
|
|
62
|
+
getData
|
|
63
|
+
});
|
package/package.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "ns-rss-spider",
|
|
3
|
+
"version": "0.0.2",
|
|
4
|
+
"description": "",
|
|
5
|
+
"main": "dist/cjs/index.js",
|
|
6
|
+
"types": "dist/cjs/index.d.ts",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"dev": "father dev",
|
|
9
|
+
"test": "vitest run",
|
|
10
|
+
"build": "father build",
|
|
11
|
+
"build:deps": "father prebundle",
|
|
12
|
+
"check": "father doctor",
|
|
13
|
+
"prepublishOnly": "father doctor && npm run build",
|
|
14
|
+
"pub": "npm run check && npm version patch && npm run build && npm publish --registry=https://registry.npmjs.org && git push origin master --tags && npm run sync"
|
|
15
|
+
},
|
|
16
|
+
"bin": {
|
|
17
|
+
"ns-rss-spider": "./dist/cjs/cli.js"
|
|
18
|
+
},
|
|
19
|
+
"keywords": [],
|
|
20
|
+
"authors": [],
|
|
21
|
+
"license": "MIT",
|
|
22
|
+
"files": [
|
|
23
|
+
"dist",
|
|
24
|
+
"compiled"
|
|
25
|
+
],
|
|
26
|
+
"publishConfig": {
|
|
27
|
+
"access": "public"
|
|
28
|
+
},
|
|
29
|
+
"devDependencies": {
|
|
30
|
+
"@types/probe-image-size": "^7.2.4",
|
|
31
|
+
"father": "^4.4.0",
|
|
32
|
+
"vitest": "^1.2.2"
|
|
33
|
+
},
|
|
34
|
+
"dependencies": {
|
|
35
|
+
"axios": "^1.6.7",
|
|
36
|
+
"cheerio": "^1.0.0-rc.12",
|
|
37
|
+
"html-entities": "^2.4.0",
|
|
38
|
+
"probe-image-size": "^7.2.3",
|
|
39
|
+
"rss-parser": "^3.13.0",
|
|
40
|
+
"yargs-parser": "^21.1.1",
|
|
41
|
+
"zx": "4.x"
|
|
42
|
+
}
|
|
43
|
+
}
|