ns-rss-spider 1.1.5 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/parseContent/index.js +36 -5
- package/dist/cjs/parseContent/newsletter/parseHtml.d.ts +4 -0
- package/dist/cjs/parseContent/newsletter/parseHtml.js +54 -0
- package/dist/cjs/parseContent/newsletter/parseNewsLetter.d.ts +4 -0
- package/dist/cjs/parseContent/newsletter/parseNewsLetter.js +50 -0
- package/dist/cjs/parseContent/newsletter/parseNoteItem.d.ts +4 -0
- package/dist/cjs/parseContent/newsletter/parseNoteItem.js +59 -0
- package/dist/cjs/parseContent/newsletter/plainToGroup.d.ts +8 -0
- package/dist/cjs/parseContent/newsletter/plainToGroup.js +73 -0
- package/dist/cjs/parseContent/newsletter/tpl.d.ts +11 -0
- package/dist/cjs/parseContent/newsletter/tpl.js +86 -0
- package/dist/cjs/parseContent/newsletter/types.d.ts +46 -0
- package/dist/cjs/parseContent/newsletter/types.js +17 -0
- package/dist/cjs/parseContent/parseContent.d.ts +2 -2
- package/dist/cjs/parseContent/parseContent.js +3 -6
- package/dist/cjs/parseContent/stripeHtml.d.ts +2 -1
- package/dist/cjs/parseContent/stripeHtml.js +6 -1
- package/dist/cjs/parsers/avanderlee.js +10 -1
- package/dist/cjs/parsers/cnbeta.js +1 -1
- package/dist/cjs/parsers/cssweekly.d.ts +4 -0
- package/dist/cjs/parsers/cssweekly.js +51 -0
- package/dist/cjs/parsers/iosdevweekly.d.ts +4 -0
- package/dist/cjs/parsers/iosdevweekly.js +56 -0
- package/dist/cjs/parsers/javascriptweekly.d.ts +8 -0
- package/dist/cjs/parsers/javascriptweekly.js +124 -0
- package/dist/cjs/strategy.js +10 -1
- package/dist/cjs/types.d.ts +18 -3
- package/dist/cjs/upload/index.d.ts +1 -1
- package/dist/cjs/utils/arraySplit.d.ts +1 -0
- package/dist/cjs/utils/arraySplit.js +42 -0
- package/package.json +1 -1
|
@@ -49,19 +49,50 @@ async function parseContent(item, strategy) {
|
|
|
49
49
|
console.log(import_zx.chalk.green("正在拉取文章内容"), item.link);
|
|
50
50
|
const article = await (0, import_getArticleHtml.getArticleHtml)(item.link, strategy.fetcher);
|
|
51
51
|
$ = cheerio.load(article);
|
|
52
|
-
element = ((_a = strategy.getContentElementFromArticle) == null ? void 0 : _a.call(strategy, $)) || $.root();
|
|
52
|
+
element = ((_a = strategy.getContentElementFromArticle) == null ? void 0 : _a.call(strategy, $)) || $.root().find("body");
|
|
53
|
+
}
|
|
54
|
+
let newsletter;
|
|
55
|
+
if ((strategy == null ? void 0 : strategy.articleType) === "newsletter") {
|
|
56
|
+
console.log("解析 newsletter");
|
|
57
|
+
const ns = await strategy.run($, element).catch((e) => {
|
|
58
|
+
console.error(import_zx.chalk.red("parse topic error"));
|
|
59
|
+
console.error(e);
|
|
60
|
+
return void 0;
|
|
61
|
+
});
|
|
62
|
+
if (!ns || !ns.topics.length) {
|
|
63
|
+
console.error(import_zx.chalk.red("未解析到 topics"));
|
|
64
|
+
console.log(ns);
|
|
65
|
+
} else {
|
|
66
|
+
newsletter = ns;
|
|
67
|
+
}
|
|
53
68
|
}
|
|
54
69
|
console.log(import_zx.chalk.green("正在预处理 html"));
|
|
55
|
-
(0, import_stripeHtml.stripeHtml)($, element);
|
|
56
|
-
|
|
70
|
+
(0, import_stripeHtml.stripeHtml)($, element, strategy);
|
|
71
|
+
const base = {
|
|
72
|
+
...(0, import_getBasicFromItem.getBasicFromItem)(item),
|
|
73
|
+
content: ((_b = element.html()) == null ? void 0 : _b.trim()) || ""
|
|
74
|
+
};
|
|
75
|
+
if (!(strategy == null ? void 0 : strategy.parse) || strategy.articleType === "newsletter" && !newsletter) {
|
|
57
76
|
return {
|
|
58
77
|
type: "simple",
|
|
59
|
-
...
|
|
60
|
-
|
|
78
|
+
...base
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
if (strategy.articleType === "newsletter") {
|
|
82
|
+
return {
|
|
83
|
+
type: "newsletter",
|
|
84
|
+
...base,
|
|
85
|
+
newsletter
|
|
61
86
|
};
|
|
62
87
|
}
|
|
63
88
|
console.log(import_zx.chalk.green("正在解析文章内容"));
|
|
64
89
|
const rich = await (0, import_parseContent.parseContent)($, element, item, strategy);
|
|
90
|
+
if (!rich) {
|
|
91
|
+
return {
|
|
92
|
+
type: "simple",
|
|
93
|
+
...base
|
|
94
|
+
};
|
|
95
|
+
}
|
|
65
96
|
let extra = {};
|
|
66
97
|
if (rich.type === "rich") {
|
|
67
98
|
if (strategy.getExtraItems) {
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parseContent/newsletter/parseHtml.ts
|
|
20
|
+
var parseHtml_exports = {};
|
|
21
|
+
__export(parseHtml_exports, {
|
|
22
|
+
parseHtml: () => parseHtml
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(parseHtml_exports);
|
|
25
|
+
var import_html_entities = require("html-entities");
|
|
26
|
+
var parseHtml = ($, $el, options = {}) => {
|
|
27
|
+
const { removeLink } = options;
|
|
28
|
+
$el.find("*").each(function() {
|
|
29
|
+
const tag = this.tagName.toLowerCase();
|
|
30
|
+
if (tag === "a") {
|
|
31
|
+
if (removeLink) {
|
|
32
|
+
$(this).replaceWith($(`<span class="ns-desc-link" data-href="${this.attribs.href}">${$(this).text()}</span>`));
|
|
33
|
+
} else {
|
|
34
|
+
this.attribs = {
|
|
35
|
+
href: this.attribs.href
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
} else if (tag === "image") {
|
|
39
|
+
this.attribs = {
|
|
40
|
+
src: this.attribs.src
|
|
41
|
+
};
|
|
42
|
+
} else {
|
|
43
|
+
this.attribs = {};
|
|
44
|
+
}
|
|
45
|
+
if (["div", "span", "p", "ol", "ul", "blockquote", "a", "hr"].includes(tag) && $(this).children().length === 0 && $(this).text().trim() === "") {
|
|
46
|
+
$(this).remove();
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
return (0, import_html_entities.decode)($el.html().trim());
|
|
50
|
+
};
|
|
51
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
52
|
+
0 && (module.exports = {
|
|
53
|
+
parseHtml
|
|
54
|
+
});
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parseContent/newsletter/parseNewsLetter.ts
|
|
20
|
+
var parseNewsLetter_exports = {};
|
|
21
|
+
__export(parseNewsLetter_exports, {
|
|
22
|
+
parseNewsLetter: () => parseNewsLetter
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(parseNewsLetter_exports);
|
|
25
|
+
var import_parseHtml = require("./parseHtml");
|
|
26
|
+
function parseNewsLetter(tpl, $, $root) {
|
|
27
|
+
let rst = {
|
|
28
|
+
name: "",
|
|
29
|
+
topics: []
|
|
30
|
+
};
|
|
31
|
+
if (tpl.description) {
|
|
32
|
+
const $desc = $root.find(tpl.description);
|
|
33
|
+
if ($desc.html()) {
|
|
34
|
+
const desc = (0, import_parseHtml.parseHtml)($, $root.find(tpl.description), {
|
|
35
|
+
removeLink: tpl.descriptionRemoveLink
|
|
36
|
+
});
|
|
37
|
+
if (desc) {
|
|
38
|
+
rst.description = desc;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
if (tpl.image) {
|
|
43
|
+
rst.image = $root.find(tpl.image).first().attr("src");
|
|
44
|
+
}
|
|
45
|
+
return rst;
|
|
46
|
+
}
|
|
47
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
48
|
+
0 && (module.exports = {
|
|
49
|
+
parseNewsLetter
|
|
50
|
+
});
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { AnyNode, Cheerio, CheerioAPI } from "cheerio";
|
|
2
|
+
import { ParseTpl, ParseTplTopicCommon } from "./types";
|
|
3
|
+
import { SnapshotNote } from './types';
|
|
4
|
+
export declare function parseNoteItem($: CheerioAPI, $el: Cheerio<AnyNode>, tpl: ParseTplTopicCommon, transform?: ParseTpl["transform"]): SnapshotNote | undefined;
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parseContent/newsletter/parseNoteItem.ts
|
|
20
|
+
var parseNoteItem_exports = {};
|
|
21
|
+
__export(parseNoteItem_exports, {
|
|
22
|
+
parseNoteItem: () => parseNoteItem
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(parseNoteItem_exports);
|
|
25
|
+
var import_parseHtml = require("./parseHtml");
|
|
26
|
+
function parseNoteItem($, $el, tpl, transform) {
|
|
27
|
+
const $link = $el.find(tpl.noteLink).first();
|
|
28
|
+
const url = $link.attr("href");
|
|
29
|
+
if (!url) {
|
|
30
|
+
return void 0;
|
|
31
|
+
}
|
|
32
|
+
const $tag = $el.find(tpl.noteTag).first();
|
|
33
|
+
const tag = $tag.text().trim();
|
|
34
|
+
const $desc = $el.find(tpl.noteBody).first();
|
|
35
|
+
let title = tpl.noteNoTitle ? "" : $link.text().trim();
|
|
36
|
+
title = (transform == null ? void 0 : transform("noteTitle", title)) || title;
|
|
37
|
+
let descriptionHtml;
|
|
38
|
+
if (tpl.noteRichDesc) {
|
|
39
|
+
descriptionHtml = (0, import_parseHtml.parseHtml)($, $desc, {
|
|
40
|
+
removeLink: tpl.noteRichDescRemoveLink
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
let description = $desc.text().trim() || "";
|
|
44
|
+
description = (transform == null ? void 0 : transform("noteDesc", description)) || description;
|
|
45
|
+
const rst = {
|
|
46
|
+
title: tpl.noteNoTitle ? "" : $link.text().trim(),
|
|
47
|
+
url,
|
|
48
|
+
description,
|
|
49
|
+
tag
|
|
50
|
+
};
|
|
51
|
+
if (descriptionHtml) {
|
|
52
|
+
rst.descriptionHtml = descriptionHtml;
|
|
53
|
+
}
|
|
54
|
+
return rst;
|
|
55
|
+
}
|
|
56
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
57
|
+
0 && (module.exports = {
|
|
58
|
+
parseNoteItem
|
|
59
|
+
});
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { AnyNode, Cheerio, CheerioAPI } from "cheerio";
|
|
2
|
+
export declare const emptyGroup: string;
|
|
3
|
+
export declare const emptyNote: string;
|
|
4
|
+
export declare function plainToGroup({ root, isTopicTitle, isTitle, }: {
|
|
5
|
+
root: Cheerio<AnyNode>;
|
|
6
|
+
isTopicTitle: (node: Cheerio<AnyNode>) => boolean;
|
|
7
|
+
isTitle: (node: Cheerio<AnyNode>) => boolean;
|
|
8
|
+
}): CheerioAPI;
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parseContent/newsletter/plainToGroup.ts
|
|
20
|
+
var plainToGroup_exports = {};
|
|
21
|
+
__export(plainToGroup_exports, {
|
|
22
|
+
emptyGroup: () => emptyGroup,
|
|
23
|
+
emptyNote: () => emptyNote,
|
|
24
|
+
plainToGroup: () => plainToGroup
|
|
25
|
+
});
|
|
26
|
+
module.exports = __toCommonJS(plainToGroup_exports);
|
|
27
|
+
var import_cheerio = require("cheerio");
|
|
28
|
+
var emptyGroup = `
|
|
29
|
+
<div class='ns-topic'>
|
|
30
|
+
<div class='ns-topic-title'></div>
|
|
31
|
+
<div class='ns-topic-content'></div>
|
|
32
|
+
</div>
|
|
33
|
+
`.trim();
|
|
34
|
+
var emptyNote = `
|
|
35
|
+
<div class='ns-note'>
|
|
36
|
+
<div class='ns-note-title'></div>
|
|
37
|
+
<div class='ns-note-content'></div>
|
|
38
|
+
</div>
|
|
39
|
+
`.trim();
|
|
40
|
+
function plainToGroup({
|
|
41
|
+
root,
|
|
42
|
+
isTopicTitle,
|
|
43
|
+
isTitle
|
|
44
|
+
}) {
|
|
45
|
+
const $ = (0, import_cheerio.load)("<body></body>");
|
|
46
|
+
let currentTopic = $(emptyGroup);
|
|
47
|
+
let currentNote = null;
|
|
48
|
+
root.children().each((_i, node) => {
|
|
49
|
+
const $node = $(node);
|
|
50
|
+
if (isTopicTitle($node)) {
|
|
51
|
+
currentTopic = $(emptyGroup);
|
|
52
|
+
currentTopic.find(".ns-topic-title").append($node.clone());
|
|
53
|
+
$("body").append(currentTopic);
|
|
54
|
+
} else if (isTitle($node)) {
|
|
55
|
+
currentNote = $(emptyNote);
|
|
56
|
+
currentNote.find(".ns-note-title").append($node.clone());
|
|
57
|
+
currentTopic.find(".ns-topic-content").append(currentNote);
|
|
58
|
+
} else {
|
|
59
|
+
if (currentNote) {
|
|
60
|
+
currentNote.find(".ns-note-content").append($node.clone());
|
|
61
|
+
} else {
|
|
62
|
+
console.error("invalid currentNote");
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
return $;
|
|
67
|
+
}
|
|
68
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
69
|
+
0 && (module.exports = {
|
|
70
|
+
emptyGroup,
|
|
71
|
+
emptyNote,
|
|
72
|
+
plainToGroup
|
|
73
|
+
});
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { AnyNode, Cheerio, CheerioAPI } from "cheerio";
|
|
2
|
+
import type { SnapshotResult } from "./types";
|
|
3
|
+
import { ParseTpl } from "./types";
|
|
4
|
+
/**
|
|
5
|
+
* linear 和 group 并存场景下的解析思路
|
|
6
|
+
*
|
|
7
|
+
* 1. 通过 selector 选出 linerTitle、linearItem、group、linerTitle、linearItem、group... 序列
|
|
8
|
+
* 2. 遇到一个 linearTitle 或者一个 group,就组成一个新的 group
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
export declare const parse: (tpl: ParseTpl, $: CheerioAPI, el: Cheerio<AnyNode>) => SnapshotResult;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parseContent/newsletter/tpl.ts
|
|
20
|
+
var tpl_exports = {};
|
|
21
|
+
__export(tpl_exports, {
|
|
22
|
+
parse: () => parse
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(tpl_exports);
|
|
25
|
+
var import_parseNewsLetter = require("./parseNewsLetter");
|
|
26
|
+
var import_parseNoteItem = require("./parseNoteItem");
|
|
27
|
+
var parse = (tpl, $, el) => {
|
|
28
|
+
let rst = (0, import_parseNewsLetter.parseNewsLetter)(tpl, $, $(el));
|
|
29
|
+
let selectors = [];
|
|
30
|
+
if (tpl.linearTopic) {
|
|
31
|
+
selectors = selectors.concat([tpl.linearTopic.title, tpl.linearTopic.item]);
|
|
32
|
+
}
|
|
33
|
+
if (tpl.groupTopic) {
|
|
34
|
+
selectors.push(`${tpl.groupTopic.group}`.trim());
|
|
35
|
+
}
|
|
36
|
+
const topicFlags = $(el).find(selectors.join(","));
|
|
37
|
+
let t = [];
|
|
38
|
+
topicFlags.each((_, el2) => {
|
|
39
|
+
var _a, _b, _c, _d;
|
|
40
|
+
if (el2.nodeType !== 1) {
|
|
41
|
+
throw Error("invalid title or item: nodeType !== 1");
|
|
42
|
+
}
|
|
43
|
+
const $el = $(el2);
|
|
44
|
+
const isLinearTitle = (_a = tpl.linearTopic) == null ? void 0 : _a.matchTitle($el);
|
|
45
|
+
const isLinearItem = (_b = tpl.linearTopic) == null ? void 0 : _b.matchItem($el);
|
|
46
|
+
if (isLinearTitle) {
|
|
47
|
+
let title = $el.text().trim();
|
|
48
|
+
title = ((_c = tpl.transform) == null ? void 0 : _c.call(tpl, "noteTitle", title)) || title;
|
|
49
|
+
t.push({
|
|
50
|
+
title,
|
|
51
|
+
notes: []
|
|
52
|
+
});
|
|
53
|
+
} else if (isLinearItem) {
|
|
54
|
+
if (!t.length) {
|
|
55
|
+
t.push({
|
|
56
|
+
title: "",
|
|
57
|
+
notes: []
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
const item = (0, import_parseNoteItem.parseNoteItem)($, $el, tpl.linearTopic, tpl.transform);
|
|
61
|
+
if (item) {
|
|
62
|
+
t[t.length - 1].notes.push(item);
|
|
63
|
+
}
|
|
64
|
+
} else {
|
|
65
|
+
const $group = $el;
|
|
66
|
+
const gTpl = tpl.groupTopic;
|
|
67
|
+
let title = $group.find(gTpl.title).first().text().trim();
|
|
68
|
+
title = ((_d = tpl.transform) == null ? void 0 : _d.call(tpl, "noteTitle", title)) || title;
|
|
69
|
+
const notes = Array.from($group.find(gTpl.item)).map((item) => {
|
|
70
|
+
const $item = $(item);
|
|
71
|
+
return (0, import_parseNoteItem.parseNoteItem)($, $item, gTpl, tpl.transform);
|
|
72
|
+
}).filter((n) => !!n);
|
|
73
|
+
t.push({
|
|
74
|
+
title,
|
|
75
|
+
notes
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
t = t.filter((t2) => t2.notes.length > 0);
|
|
80
|
+
rst.topics = t;
|
|
81
|
+
return rst;
|
|
82
|
+
};
|
|
83
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
84
|
+
0 && (module.exports = {
|
|
85
|
+
parse
|
|
86
|
+
});
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { AnyNode, Cheerio } from "cheerio";
|
|
2
|
+
export type Selector = string;
|
|
3
|
+
export interface ParseTplTopicCommon {
|
|
4
|
+
root?: Selector;
|
|
5
|
+
title: Selector;
|
|
6
|
+
item: Selector;
|
|
7
|
+
noteLink: Selector;
|
|
8
|
+
noteBody?: Selector;
|
|
9
|
+
noteFrom?: Selector;
|
|
10
|
+
noteNoTitle?: boolean;
|
|
11
|
+
noteTag?: Selector;
|
|
12
|
+
noteRichDesc?: boolean;
|
|
13
|
+
noteRichDescRemoveLink?: boolean;
|
|
14
|
+
}
|
|
15
|
+
export interface ParseTpl {
|
|
16
|
+
description?: Selector;
|
|
17
|
+
descriptionRemoveLink?: boolean;
|
|
18
|
+
image?: Selector;
|
|
19
|
+
transform?: (type: "date" | "noteTitle" | "noteDesc", content: string) => string | undefined;
|
|
20
|
+
linearTopic?: ParseTplTopicCommon & {
|
|
21
|
+
matchTitle: (el: Cheerio<AnyNode>) => boolean;
|
|
22
|
+
matchItem: (el: Cheerio<AnyNode>) => boolean;
|
|
23
|
+
};
|
|
24
|
+
groupTopic?: {
|
|
25
|
+
group: Selector;
|
|
26
|
+
} & ParseTplTopicCommon;
|
|
27
|
+
}
|
|
28
|
+
export interface SnapshotResult {
|
|
29
|
+
name: string;
|
|
30
|
+
description?: string;
|
|
31
|
+
image?: string;
|
|
32
|
+
topics: SnapshotTopic[];
|
|
33
|
+
}
|
|
34
|
+
export interface SnapshotTopic {
|
|
35
|
+
title: string;
|
|
36
|
+
description?: string;
|
|
37
|
+
image?: string;
|
|
38
|
+
notes: SnapshotNote[];
|
|
39
|
+
}
|
|
40
|
+
export interface SnapshotNote {
|
|
41
|
+
url: string;
|
|
42
|
+
title: string;
|
|
43
|
+
description: string;
|
|
44
|
+
descriptionHtml?: string;
|
|
45
|
+
tag?: string;
|
|
46
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __copyProps = (to, from, except, desc) => {
|
|
6
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
7
|
+
for (let key of __getOwnPropNames(from))
|
|
8
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
9
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
10
|
+
}
|
|
11
|
+
return to;
|
|
12
|
+
};
|
|
13
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
14
|
+
|
|
15
|
+
// src/parseContent/newsletter/types.ts
|
|
16
|
+
var types_exports = {};
|
|
17
|
+
module.exports = __toCommonJS(types_exports);
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import type { Article,
|
|
1
|
+
import type { Article, ParseStrategyArticle, RssItem } from "../types";
|
|
2
2
|
import { AnyNode, Cheerio, CheerioAPI } from "cheerio";
|
|
3
|
-
export declare function parseContent($: CheerioAPI, $element: Cheerio<AnyNode>, item: RssItem, strategy?:
|
|
3
|
+
export declare function parseContent($: CheerioAPI, $element: Cheerio<AnyNode>, item: RssItem, strategy?: ParseStrategyArticle): Promise<Article | undefined>;
|
|
@@ -47,7 +47,7 @@ async function parseContent($, $element, item, strategy) {
|
|
|
47
47
|
if (node.is("img")) {
|
|
48
48
|
const src = node.attr("src");
|
|
49
49
|
const title = node.attr("title") || void 0;
|
|
50
|
-
if (src) {
|
|
50
|
+
if (src && !src.startsWith("data:")) {
|
|
51
51
|
srcs.push({
|
|
52
52
|
src,
|
|
53
53
|
title
|
|
@@ -55,6 +55,7 @@ async function parseContent($, $element, item, strategy) {
|
|
|
55
55
|
}
|
|
56
56
|
}
|
|
57
57
|
});
|
|
58
|
+
console.log("开始解析图片,数目:", srcs.length);
|
|
58
59
|
for (let item2 of srcs) {
|
|
59
60
|
if (strategy == null ? void 0 : strategy.ignoreProbeImage) {
|
|
60
61
|
images.push({
|
|
@@ -109,11 +110,7 @@ async function parseContent($, $element, item, strategy) {
|
|
|
109
110
|
if (!content) {
|
|
110
111
|
console.error(import_zx.chalk.red("解析 html 内容出错, html 如下"));
|
|
111
112
|
console.log($.html());
|
|
112
|
-
return
|
|
113
|
-
type: "simple",
|
|
114
|
-
content: "",
|
|
115
|
-
...(0, import_getBasicFromItem.getBasicFromItem)(item)
|
|
116
|
-
};
|
|
113
|
+
return void 0;
|
|
117
114
|
}
|
|
118
115
|
return {
|
|
119
116
|
type: "rich",
|
|
@@ -1,2 +1,3 @@
|
|
|
1
1
|
import type { Cheerio, AnyNode, CheerioAPI } from "cheerio";
|
|
2
|
-
|
|
2
|
+
import type { ParseStrategy } from "../types";
|
|
3
|
+
export declare function stripeHtml($: CheerioAPI, element: Cheerio<AnyNode>, strategy?: ParseStrategy): void;
|
|
@@ -23,7 +23,7 @@ __export(stripeHtml_exports, {
|
|
|
23
23
|
});
|
|
24
24
|
module.exports = __toCommonJS(stripeHtml_exports);
|
|
25
25
|
var import_utils = require("./utils");
|
|
26
|
-
function stripeHtml($, element) {
|
|
26
|
+
function stripeHtml($, element, strategy) {
|
|
27
27
|
$(element).find("script").remove();
|
|
28
28
|
(0, import_utils.walk_the_DOM)($, element, (el) => {
|
|
29
29
|
for (let attr of [
|
|
@@ -39,6 +39,11 @@ function stripeHtml($, element) {
|
|
|
39
39
|
$(el).removeAttr("target");
|
|
40
40
|
}
|
|
41
41
|
if ($(el).is("img")) {
|
|
42
|
+
if (strategy == null ? void 0 : strategy.imageSrcAttr) {
|
|
43
|
+
const src2 = $(el).attr(strategy.imageSrcAttr);
|
|
44
|
+
$(el).attr("src", src2);
|
|
45
|
+
;
|
|
46
|
+
}
|
|
42
47
|
const src = $(el).attr("src");
|
|
43
48
|
if (src == null ? void 0 : src.startsWith("//")) {
|
|
44
49
|
console.log("img `//` 开头域名添加 https", src);
|
|
@@ -23,7 +23,16 @@ __export(avanderlee_exports, {
|
|
|
23
23
|
});
|
|
24
24
|
module.exports = __toCommonJS(avanderlee_exports);
|
|
25
25
|
var avanderlee = {
|
|
26
|
-
parse:
|
|
26
|
+
parse: true,
|
|
27
|
+
fetcher: "http",
|
|
28
|
+
imageSrcAttr: "data-lazy-src",
|
|
29
|
+
getContentElementFromArticle: ($) => {
|
|
30
|
+
const el = $("#content > article > section.post-content");
|
|
31
|
+
for (let sel of [".gotngnp", ".article-inline-newsletter-container", "noscript", 'a > svg[aria-hidden="true"]']) {
|
|
32
|
+
$(el).find(sel).remove();
|
|
33
|
+
}
|
|
34
|
+
return el;
|
|
35
|
+
}
|
|
27
36
|
};
|
|
28
37
|
// Annotate the CommonJS export names for ESM import in node:
|
|
29
38
|
0 && (module.exports = {
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parsers/cssweekly.ts
|
|
20
|
+
var cssweekly_exports = {};
|
|
21
|
+
__export(cssweekly_exports, {
|
|
22
|
+
cssweekly: () => cssweekly,
|
|
23
|
+
tpl: () => tpl
|
|
24
|
+
});
|
|
25
|
+
module.exports = __toCommonJS(cssweekly_exports);
|
|
26
|
+
var import_tpl = require("../parseContent/newsletter/tpl");
|
|
27
|
+
var tpl = {
|
|
28
|
+
description: ".newsletter-intro",
|
|
29
|
+
groupTopic: {
|
|
30
|
+
group: "section.newsletter-section",
|
|
31
|
+
title: "h2.section-title",
|
|
32
|
+
item: "article.newsletter-article",
|
|
33
|
+
noteLink: ".newsletter-header h2.article-title a",
|
|
34
|
+
noteBody: "p",
|
|
35
|
+
noteRichDesc: true
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
var cssweekly = {
|
|
39
|
+
parse: true,
|
|
40
|
+
fetcher: "http",
|
|
41
|
+
articleType: "newsletter",
|
|
42
|
+
run: async ($, el) => {
|
|
43
|
+
const rst = await (0, import_tpl.parse)(tpl, $, el);
|
|
44
|
+
return rst;
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
48
|
+
0 && (module.exports = {
|
|
49
|
+
cssweekly,
|
|
50
|
+
tpl
|
|
51
|
+
});
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parsers/iosdevweekly.ts
|
|
20
|
+
var iosdevweekly_exports = {};
|
|
21
|
+
__export(iosdevweekly_exports, {
|
|
22
|
+
iosdevweekly: () => iosdevweekly,
|
|
23
|
+
tpl: () => tpl
|
|
24
|
+
});
|
|
25
|
+
module.exports = __toCommonJS(iosdevweekly_exports);
|
|
26
|
+
var import_tpl = require("../parseContent/newsletter/tpl");
|
|
27
|
+
var tpl = {
|
|
28
|
+
description: ".category.cc-comment #comment > .item--issue",
|
|
29
|
+
descriptionRemoveLink: true,
|
|
30
|
+
groupTopic: {
|
|
31
|
+
group: ".issue__body > .category",
|
|
32
|
+
title: "h2.category__title",
|
|
33
|
+
item: ".item.item--issue",
|
|
34
|
+
noteLink: "h3.item__title > a",
|
|
35
|
+
noteBody: "p",
|
|
36
|
+
noteRichDesc: true,
|
|
37
|
+
noteRichDescRemoveLink: true
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
var iosdevweekly = {
|
|
41
|
+
parse: true,
|
|
42
|
+
fetcher: "http",
|
|
43
|
+
articleType: "newsletter",
|
|
44
|
+
getContentElementFromArticle($) {
|
|
45
|
+
return $("article.issue");
|
|
46
|
+
},
|
|
47
|
+
run: async ($, el) => {
|
|
48
|
+
const rst = await (0, import_tpl.parse)(tpl, $, el);
|
|
49
|
+
return rst;
|
|
50
|
+
}
|
|
51
|
+
};
|
|
52
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
53
|
+
0 && (module.exports = {
|
|
54
|
+
iosdevweekly,
|
|
55
|
+
tpl
|
|
56
|
+
});
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { ParseTpl } from "../parseContent/newsletter/types";
|
|
2
|
+
import { ParseStrategy } from "../types";
|
|
3
|
+
import { CheerioAPI } from "cheerio";
|
|
4
|
+
export declare const tpl: ParseTpl;
|
|
5
|
+
export declare function transform($: CheerioAPI): CheerioAPI;
|
|
6
|
+
export declare function transformFromInDesc($: CheerioAPI): CheerioAPI;
|
|
7
|
+
export declare function transformExceptionGroup($: CheerioAPI): CheerioAPI;
|
|
8
|
+
export declare const javascriptweekly: ParseStrategy;
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parsers/javascriptweekly.ts
|
|
20
|
+
var javascriptweekly_exports = {};
|
|
21
|
+
__export(javascriptweekly_exports, {
|
|
22
|
+
javascriptweekly: () => javascriptweekly,
|
|
23
|
+
tpl: () => tpl,
|
|
24
|
+
transform: () => transform,
|
|
25
|
+
transformExceptionGroup: () => transformExceptionGroup,
|
|
26
|
+
transformFromInDesc: () => transformFromInDesc
|
|
27
|
+
});
|
|
28
|
+
module.exports = __toCommonJS(javascriptweekly_exports);
|
|
29
|
+
var import_tpl = require("../parseContent/newsletter/tpl");
|
|
30
|
+
var import_arraySplit = require("../utils/arraySplit");
|
|
31
|
+
var r = "#content";
|
|
32
|
+
var tpl = {
|
|
33
|
+
image: `${r} .el-fullwidthimage img`,
|
|
34
|
+
transform: (type, desc) => {
|
|
35
|
+
var _a;
|
|
36
|
+
if (type === "noteDesc") {
|
|
37
|
+
let d = desc.trim();
|
|
38
|
+
if (d.startsWith("—")) {
|
|
39
|
+
d = d.substring(1).trim();
|
|
40
|
+
}
|
|
41
|
+
return d;
|
|
42
|
+
} else if (type === "noteTitle" && desc.endsWith(":")) {
|
|
43
|
+
return desc.substring(0, desc.length - 1);
|
|
44
|
+
} else if (type === "date") {
|
|
45
|
+
return ((_a = desc.split("—")[1]) == null ? void 0 : _a.trim()) || "";
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
linearTopic: {
|
|
49
|
+
title: `${r} > .el-heading`,
|
|
50
|
+
matchTitle: (el) => el.hasClass("el-heading"),
|
|
51
|
+
item: `${r} > .el-item, ${r} > .miniitem.item`,
|
|
52
|
+
matchItem: (el) => ["el-item", "miniitem"].some((cls) => el.hasClass(cls)),
|
|
53
|
+
noteLink: ".desc a",
|
|
54
|
+
noteBody: ".desc",
|
|
55
|
+
noteFrom: ".name",
|
|
56
|
+
noteTag: ".tag-sponsor",
|
|
57
|
+
noteRichDesc: true,
|
|
58
|
+
noteRichDescRemoveLink: true
|
|
59
|
+
},
|
|
60
|
+
groupTopic: {
|
|
61
|
+
group: `${r} .content.el-md, ${r} > .el-subtable table.content.el-content.briefs`,
|
|
62
|
+
title: "tbody > tr > td > p",
|
|
63
|
+
item: "tbody > tr > td > ul > li",
|
|
64
|
+
noteLink: "a",
|
|
65
|
+
noteBody: "*",
|
|
66
|
+
noteTag: ".tag-sponsor",
|
|
67
|
+
noteRichDesc: true,
|
|
68
|
+
noteRichDescRemoveLink: true
|
|
69
|
+
//noteDescIncludeTitle: true,
|
|
70
|
+
}
|
|
71
|
+
};
|
|
72
|
+
function transform($) {
|
|
73
|
+
transformExceptionGroup($);
|
|
74
|
+
transformFromInDesc($);
|
|
75
|
+
return $;
|
|
76
|
+
}
|
|
77
|
+
function transformFromInDesc($) {
|
|
78
|
+
const items = $(tpl.linearTopic.item);
|
|
79
|
+
items.each((i, item) => {
|
|
80
|
+
const $item = $(item);
|
|
81
|
+
if ($item.hasClass("miniitem")) {
|
|
82
|
+
$item.find("tbody > tr > td > p.desc > .name").last().remove();
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
return $;
|
|
86
|
+
}
|
|
87
|
+
function transformExceptionGroup($) {
|
|
88
|
+
const groups = $(tpl.groupTopic.group);
|
|
89
|
+
groups.each((i, group) => {
|
|
90
|
+
const $group = $(group).find("tbody > tr > td");
|
|
91
|
+
if (Array.from($group.children()).map((el) => el.tagName.toLowerCase()).join("") === "pp") {
|
|
92
|
+
const noteEl = $group.children().last();
|
|
93
|
+
const lis = (0, import_arraySplit.arraySplit)(Array.from(noteEl.contents()), (el) => el.nodeType === 1 && el.tagName === "br");
|
|
94
|
+
noteEl.replaceWith($("<ul></ul>")).html("");
|
|
95
|
+
lis.forEach((li) => {
|
|
96
|
+
const l = $("<li></li>");
|
|
97
|
+
const p = $("<p></p>");
|
|
98
|
+
li.forEach((el) => {
|
|
99
|
+
p.append(el);
|
|
100
|
+
});
|
|
101
|
+
l.append(p);
|
|
102
|
+
$group.children().last().append(l);
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
return $;
|
|
107
|
+
}
|
|
108
|
+
var javascriptweekly = {
|
|
109
|
+
parse: true,
|
|
110
|
+
fetcher: "http",
|
|
111
|
+
articleType: "newsletter",
|
|
112
|
+
run: async ($, el) => {
|
|
113
|
+
const rst = await (0, import_tpl.parse)(tpl, transform($), el);
|
|
114
|
+
return rst;
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
118
|
+
0 && (module.exports = {
|
|
119
|
+
javascriptweekly,
|
|
120
|
+
tpl,
|
|
121
|
+
transform,
|
|
122
|
+
transformExceptionGroup,
|
|
123
|
+
transformFromInDesc
|
|
124
|
+
});
|
package/dist/cjs/strategy.js
CHANGED
|
@@ -33,6 +33,9 @@ var import_tmtpost = require("./parsers/tmtpost");
|
|
|
33
33
|
var import_leiphone = require("./parsers/leiphone");
|
|
34
34
|
var import_oschina = require("./parsers/oschina");
|
|
35
35
|
var import_avanderlee = require("./parsers/avanderlee");
|
|
36
|
+
var import_iosdevweekly = require("./parsers/iosdevweekly");
|
|
37
|
+
var import_cssweekly = require("./parsers/cssweekly");
|
|
38
|
+
var import_javascriptweekly = require("./parsers/javascriptweekly");
|
|
36
39
|
var strategies = {
|
|
37
40
|
cnbeta: import_cnbeta.cnbeta,
|
|
38
41
|
ifanr: import_ifanr.ifanr,
|
|
@@ -44,7 +47,13 @@ var strategies = {
|
|
|
44
47
|
tmtpost: import_tmtpost.tmtpost,
|
|
45
48
|
leiphone: import_leiphone.leiphone,
|
|
46
49
|
oschina: import_oschina.oschina,
|
|
47
|
-
avanderlee: import_avanderlee.avanderlee
|
|
50
|
+
avanderlee: import_avanderlee.avanderlee,
|
|
51
|
+
iosdevweekly: import_iosdevweekly.iosdevweekly,
|
|
52
|
+
cssweekly: import_cssweekly.cssweekly,
|
|
53
|
+
javascriptweekly: import_javascriptweekly.javascriptweekly,
|
|
54
|
+
nodeweekly: import_javascriptweekly.javascriptweekly,
|
|
55
|
+
reactstatus: import_javascriptweekly.javascriptweekly,
|
|
56
|
+
frontendfocus: import_javascriptweekly.javascriptweekly
|
|
48
57
|
};
|
|
49
58
|
// Annotate the CommonJS export names for ESM import in node:
|
|
50
59
|
0 && (module.exports = {
|
package/dist/cjs/types.d.ts
CHANGED
|
@@ -1,18 +1,28 @@
|
|
|
1
1
|
import type { AnyNode, Cheerio, CheerioAPI } from 'cheerio';
|
|
2
2
|
import { Item } from 'rss-parser';
|
|
3
|
+
import { SnapshotResult } from './parseContent/newsletter/types';
|
|
3
4
|
export type RssItem = Item & {
|
|
4
5
|
source?: string;
|
|
5
6
|
author?: string;
|
|
6
7
|
};
|
|
7
|
-
|
|
8
|
+
interface ParseStrategyBase {
|
|
8
9
|
parse: boolean;
|
|
9
10
|
fetcher?: 'http' | 'playwright';
|
|
10
11
|
getContentElementFromArticle?: (aritcle: CheerioAPI) => Cheerio<AnyNode>;
|
|
12
|
+
imageSrcAttr?: string;
|
|
13
|
+
getThumbs?: (items: Item[]) => Promise<Record<string, string> | undefined>;
|
|
14
|
+
}
|
|
15
|
+
export interface ParseStrategyArticle extends ParseStrategyBase {
|
|
16
|
+
articleType?: 'article';
|
|
11
17
|
getContentFromHtml?: ($: CheerioAPI, node: Cheerio<AnyNode>) => string;
|
|
12
18
|
getExtraItems?: ($: CheerioAPI, current: RichArticle, item: Item) => Promise<Record<string, any>>;
|
|
13
19
|
ignoreProbeImage?: boolean;
|
|
14
|
-
getThumbs?: (items: Item[]) => Promise<Record<string, string> | undefined>;
|
|
15
20
|
}
|
|
21
|
+
export interface ParseStrategyMewsLetter extends ParseStrategyBase {
|
|
22
|
+
articleType: 'newsletter';
|
|
23
|
+
run: ($: CheerioAPI, node: Cheerio<AnyNode>) => Promise<SnapshotResult>;
|
|
24
|
+
}
|
|
25
|
+
export type ParseStrategy = ParseStrategyArticle | ParseStrategyMewsLetter;
|
|
16
26
|
export interface SimpleArticle {
|
|
17
27
|
type: 'simple';
|
|
18
28
|
guid: string;
|
|
@@ -42,4 +52,9 @@ export interface RichArticle extends Omit<SimpleArticle, 'type'> {
|
|
|
42
52
|
ssrContent: string;
|
|
43
53
|
images: RichArticleImage[];
|
|
44
54
|
}
|
|
45
|
-
export
|
|
55
|
+
export interface NewsletterArticle extends Omit<SimpleArticle, 'type'> {
|
|
56
|
+
type: 'newsletter';
|
|
57
|
+
newsletter: SnapshotResult;
|
|
58
|
+
}
|
|
59
|
+
export type Article = SimpleArticle | RichArticle | NewsletterArticle;
|
|
60
|
+
export {};
|
|
@@ -12,7 +12,7 @@ export declare function uploadContent({ app, getUploadApi, items, }: {
|
|
|
12
12
|
items: Article[];
|
|
13
13
|
} & ServerInfo): Promise<Article[] | {
|
|
14
14
|
storePath: any;
|
|
15
|
-
type: "simple" | "rich";
|
|
15
|
+
type: "newsletter" | "simple" | "rich";
|
|
16
16
|
guid: string;
|
|
17
17
|
title: string;
|
|
18
18
|
link: string;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const arraySplit: <T>(arr: T[], fn: (item: T) => boolean) => T[][];
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/utils/arraySplit.ts
|
|
20
|
+
var arraySplit_exports = {};
|
|
21
|
+
__export(arraySplit_exports, {
|
|
22
|
+
arraySplit: () => arraySplit
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(arraySplit_exports);
|
|
25
|
+
var arraySplit = (arr, fn) => {
|
|
26
|
+
const rst = [];
|
|
27
|
+
let tmp = [];
|
|
28
|
+
arr.forEach((el, i) => {
|
|
29
|
+
if (fn(el)) {
|
|
30
|
+
rst.push(tmp);
|
|
31
|
+
tmp = [];
|
|
32
|
+
} else {
|
|
33
|
+
tmp.push(el);
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
rst.push(tmp);
|
|
37
|
+
return rst;
|
|
38
|
+
};
|
|
39
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
40
|
+
0 && (module.exports = {
|
|
41
|
+
arraySplit
|
|
42
|
+
});
|