ns-rss-spider 0.0.15 → 0.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/parse.d.ts +3 -0
- package/dist/cjs/parseContent/generateSsrContent.d.ts +8 -0
- package/dist/cjs/parseContent/generateSsrContent.js +48 -0
- package/dist/cjs/parseContent/getBasicFromItem.d.ts +2 -0
- package/dist/cjs/parseContent/getBasicFromItem.js +41 -0
- package/dist/cjs/parseContent/index.d.ts +1 -2
- package/dist/cjs/parseContent/index.js +3 -5
- package/dist/cjs/parseContent/parseContent.d.ts +1 -2
- package/dist/cjs/parseContent/parseContent.js +5 -7
- package/dist/cjs/types.d.ts +9 -1
- package/dist/cjs/upload/index.d.ts +3 -0
- package/package.json +1 -1
package/dist/cjs/parse.d.ts
CHANGED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parseContent/generateSsrContent.ts
|
|
20
|
+
var generateSsrContent_exports = {};
|
|
21
|
+
__export(generateSsrContent_exports, {
|
|
22
|
+
generateSsrContent: () => generateSsrContent
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(generateSsrContent_exports);
|
|
25
|
+
var import_cheerio = require("cheerio");
|
|
26
|
+
var import_html_entities = require("html-entities");
|
|
27
|
+
function generateSsrContent(content, images) {
|
|
28
|
+
const $ = (0, import_cheerio.load)(content, null, false);
|
|
29
|
+
$("img").each((_, img) => {
|
|
30
|
+
const src = $(img).attr("src");
|
|
31
|
+
if (src) {
|
|
32
|
+
const meta = images.find((i) => i.url === src);
|
|
33
|
+
const $pl = $(`<span data-img-placeholder></span>`);
|
|
34
|
+
$pl.attr({
|
|
35
|
+
"data-w": (meta == null ? void 0 : meta.width) ? `${meta.width}` : "",
|
|
36
|
+
"data-h": (meta == null ? void 0 : meta.height) ? `${meta.height}` : "",
|
|
37
|
+
"data-src": src
|
|
38
|
+
});
|
|
39
|
+
$pl.attr("style", `display: inline-block;width: ${(meta == null ? void 0 : meta.width) || 1}px; height: ${(meta == null ? void 0 : meta.height) || 1}px`);
|
|
40
|
+
$(img).replaceWith($pl);
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
return (0, import_html_entities.decode)($.html()).trim();
|
|
44
|
+
}
|
|
45
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
46
|
+
0 && (module.exports = {
|
|
47
|
+
generateSsrContent
|
|
48
|
+
});
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parseContent/getBasicFromItem.ts
|
|
20
|
+
var getBasicFromItem_exports = {};
|
|
21
|
+
__export(getBasicFromItem_exports, {
|
|
22
|
+
getBasicFromItem: () => getBasicFromItem
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(getBasicFromItem_exports);
|
|
25
|
+
function getBasicFromItem(item) {
|
|
26
|
+
var _a, _b;
|
|
27
|
+
return {
|
|
28
|
+
guid: item.guid,
|
|
29
|
+
title: item.title,
|
|
30
|
+
link: item.link,
|
|
31
|
+
description: item.summary || ((_a = item.contentSnippet) == null ? void 0 : _a.trim()) || "",
|
|
32
|
+
pubDate: new Date(item.pubDate),
|
|
33
|
+
author: item.author || item.creator || "",
|
|
34
|
+
source: item.source || "",
|
|
35
|
+
categories: ((_b = item.categories) == null ? void 0 : _b.join(",")) || ""
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
39
|
+
0 && (module.exports = {
|
|
40
|
+
getBasicFromItem
|
|
41
|
+
});
|
|
@@ -37,6 +37,7 @@ var cheerio = __toESM(require("cheerio"));
|
|
|
37
37
|
var import_stripeHtml = require("./stripeHtml");
|
|
38
38
|
var import_parseContent = require("./parseContent");
|
|
39
39
|
var import_zx = require("zx");
|
|
40
|
+
var import_getBasicFromItem = require("./getBasicFromItem");
|
|
40
41
|
async function parseContent(item, strategy) {
|
|
41
42
|
var _a, _b;
|
|
42
43
|
let element;
|
|
@@ -55,11 +56,8 @@ async function parseContent(item, strategy) {
|
|
|
55
56
|
if (!(strategy == null ? void 0 : strategy.parse)) {
|
|
56
57
|
return {
|
|
57
58
|
type: "simple",
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
link: item.link,
|
|
61
|
-
description: ((_b = element.html()) == null ? void 0 : _b.trim()) || "",
|
|
62
|
-
pubDate: new Date(item.pubDate)
|
|
59
|
+
...(0, import_getBasicFromItem.getBasicFromItem)(item),
|
|
60
|
+
content: ((_b = element.html()) == null ? void 0 : _b.trim()) || ""
|
|
63
61
|
};
|
|
64
62
|
}
|
|
65
63
|
console.log(import_zx.chalk.green("正在解析文章内容"));
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import type { ParseStrategy, RichArticle } from "../types";
|
|
1
|
+
import type { ParseStrategy, RichArticle, RssItem } from "../types";
|
|
3
2
|
import { AnyNode, Cheerio, CheerioAPI } from "cheerio";
|
|
4
3
|
export declare function parseContent($: CheerioAPI, $element: Cheerio<AnyNode>, item: RssItem, strategy?: ParseStrategy): Promise<RichArticle>;
|
|
@@ -36,8 +36,9 @@ var import_utils = require("./utils");
|
|
|
36
36
|
var import_probe_image_size = __toESM(require("probe-image-size"));
|
|
37
37
|
var import_zx = require("zx");
|
|
38
38
|
var import_html_entities = require("html-entities");
|
|
39
|
+
var import_generateSsrContent = require("./generateSsrContent");
|
|
40
|
+
var import_getBasicFromItem = require("./getBasicFromItem");
|
|
39
41
|
async function parseContent($, $element, item, strategy) {
|
|
40
|
-
var _a;
|
|
41
42
|
const srcs = [];
|
|
42
43
|
const images = [];
|
|
43
44
|
(0, import_utils.walk_the_DOM)($, $element, (node) => {
|
|
@@ -82,7 +83,6 @@ async function parseContent($, $element, item, strategy) {
|
|
|
82
83
|
$($element).find("img").each((_, img) => {
|
|
83
84
|
const src = $(img).attr("src");
|
|
84
85
|
if (src) {
|
|
85
|
-
$(img).removeAttr("src");
|
|
86
86
|
$(img).attr("data-src", src);
|
|
87
87
|
const meta = images.find((i) => i.url === src);
|
|
88
88
|
if (meta == null ? void 0 : meta.width) {
|
|
@@ -93,14 +93,12 @@ async function parseContent($, $element, item, strategy) {
|
|
|
93
93
|
}
|
|
94
94
|
}
|
|
95
95
|
});
|
|
96
|
+
const content = (0, import_html_entities.decode)($element.html().trim());
|
|
96
97
|
return {
|
|
97
98
|
type: "rich",
|
|
98
|
-
|
|
99
|
-
title: item.title,
|
|
100
|
-
link: item.link,
|
|
101
|
-
description: ((_a = item.contentSnippet) == null ? void 0 : _a.trim()) || "",
|
|
99
|
+
...(0, import_getBasicFromItem.getBasicFromItem)(item),
|
|
102
100
|
content: (0, import_html_entities.decode)($element.html().trim()),
|
|
103
|
-
|
|
101
|
+
ssrContent: (0, import_generateSsrContent.generateSsrContent)(content, images),
|
|
104
102
|
images
|
|
105
103
|
};
|
|
106
104
|
}
|
package/dist/cjs/types.d.ts
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
import type { AnyNode, Cheerio, CheerioAPI } from 'cheerio';
|
|
2
2
|
import { Item } from 'rss-parser';
|
|
3
|
+
export type RssItem = Item & {
|
|
4
|
+
source?: string;
|
|
5
|
+
author?: string;
|
|
6
|
+
};
|
|
3
7
|
export interface ParseStrategy {
|
|
4
8
|
parse: boolean;
|
|
5
9
|
fetcher?: 'http' | 'playwright';
|
|
@@ -12,7 +16,11 @@ export interface SimpleArticle {
|
|
|
12
16
|
title: string;
|
|
13
17
|
link: string;
|
|
14
18
|
description: string;
|
|
19
|
+
content: string;
|
|
15
20
|
pubDate: Date;
|
|
21
|
+
author?: string;
|
|
22
|
+
source?: string;
|
|
23
|
+
categories?: string;
|
|
16
24
|
}
|
|
17
25
|
export interface RichArticleImage {
|
|
18
26
|
url: string;
|
|
@@ -26,7 +34,7 @@ export interface RichArticleImage {
|
|
|
26
34
|
}
|
|
27
35
|
export interface RichArticle extends Omit<SimpleArticle, 'type'> {
|
|
28
36
|
type: 'rich';
|
|
29
|
-
|
|
37
|
+
ssrContent: string;
|
|
30
38
|
images: RichArticleImage[];
|
|
31
39
|
}
|
|
32
40
|
export type Article = SimpleArticle | RichArticle;
|