ns-rss-spider 1.0.14 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/parseContent/parseContent.js +3 -1
- package/dist/cjs/parsers/leiphone.d.ts +2 -0
- package/dist/cjs/parsers/leiphone.js +81 -0
- package/dist/cjs/parsers/oschina.d.ts +2 -0
- package/dist/cjs/parsers/oschina.js +37 -0
- package/dist/cjs/strategy.js +5 -1
- package/dist/cjs/types.d.ts +1 -0
- package/dist/cjs/upload/index.d.ts +1 -0
- package/dist/cjs/utils/getArticleImage.d.ts +11 -0
- package/dist/cjs/utils/getArticleImage.js +37 -0
- package/package.json +1 -1
|
@@ -38,6 +38,7 @@ var import_zx = require("zx");
|
|
|
38
38
|
var import_html_entities = require("html-entities");
|
|
39
39
|
var import_generateSsrContent = require("./generateSsrContent");
|
|
40
40
|
var import_getBasicFromItem = require("./getBasicFromItem");
|
|
41
|
+
var import_getArticleImage = require("../utils/getArticleImage");
|
|
41
42
|
async function parseContent($, $element, item, strategy) {
|
|
42
43
|
var _a;
|
|
43
44
|
const srcs = [];
|
|
@@ -117,7 +118,8 @@ async function parseContent($, $element, item, strategy) {
|
|
|
117
118
|
...(0, import_getBasicFromItem.getBasicFromItem)(item),
|
|
118
119
|
content,
|
|
119
120
|
ssrContent: (0, import_generateSsrContent.generateSsrContent)(content, images),
|
|
120
|
-
images
|
|
121
|
+
images,
|
|
122
|
+
image: (0, import_getArticleImage.getImage)(images)
|
|
121
123
|
};
|
|
122
124
|
}
|
|
123
125
|
// Annotate the CommonJS export names for ESM import in node:
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
var __create = Object.create;
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
var __export = (target, all) => {
|
|
8
|
+
for (var name in all)
|
|
9
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
+
};
|
|
11
|
+
var __copyProps = (to, from, except, desc) => {
|
|
12
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
+
for (let key of __getOwnPropNames(from))
|
|
14
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
+
}
|
|
17
|
+
return to;
|
|
18
|
+
};
|
|
19
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
21
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
22
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
23
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
24
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
25
|
+
mod
|
|
26
|
+
));
|
|
27
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
28
|
+
|
|
29
|
+
// src/parsers/leiphone.ts
|
|
30
|
+
var leiphone_exports = {};
|
|
31
|
+
__export(leiphone_exports, {
|
|
32
|
+
leiphone: () => leiphone
|
|
33
|
+
});
|
|
34
|
+
module.exports = __toCommonJS(leiphone_exports);
|
|
35
|
+
var import_axios = __toESM(require("axios"));
|
|
36
|
+
var import_zx = require("zx");
|
|
37
|
+
var import_cheerio = require("cheerio");
|
|
38
|
+
var import_constants = require("../utils/constants");
|
|
39
|
+
var leiphone = {
|
|
40
|
+
parse: true,
|
|
41
|
+
fetcher: "http",
|
|
42
|
+
getContentElementFromArticle: ($) => {
|
|
43
|
+
const el = $(".article-template .lph-article-comView");
|
|
44
|
+
return $(el);
|
|
45
|
+
},
|
|
46
|
+
getThumbs: async (articles) => {
|
|
47
|
+
const res = await import_axios.default.get("https://www.leiphone.com/", {
|
|
48
|
+
responseType: "text",
|
|
49
|
+
headers: {
|
|
50
|
+
"User-Agent": import_constants.chromeUA
|
|
51
|
+
}
|
|
52
|
+
}).catch((e) => {
|
|
53
|
+
console.error(import_zx.chalk.red("【get thumb】获取 html 错误"), e);
|
|
54
|
+
});
|
|
55
|
+
if (!res)
|
|
56
|
+
return void 0;
|
|
57
|
+
const html = res.data;
|
|
58
|
+
const $ = (0, import_cheerio.load)(html, null, false);
|
|
59
|
+
const kv = {};
|
|
60
|
+
$(".lph-pageList ul.clr > li .img > a:nth-child(2)").each((_, $a) => {
|
|
61
|
+
var _a;
|
|
62
|
+
const thumb = $($a).find("img").attr("data-original");
|
|
63
|
+
if (!thumb) {
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
const href = $($a).attr("href") || "";
|
|
67
|
+
const guid = (_a = articles.find((a) => {
|
|
68
|
+
const r = a.guid == href;
|
|
69
|
+
return r;
|
|
70
|
+
})) == null ? void 0 : _a.guid;
|
|
71
|
+
if (guid && thumb) {
|
|
72
|
+
kv[guid] = thumb;
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
return kv;
|
|
76
|
+
}
|
|
77
|
+
};
|
|
78
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
79
|
+
0 && (module.exports = {
|
|
80
|
+
leiphone
|
|
81
|
+
});
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/parsers/oschina.ts
|
|
20
|
+
var oschina_exports = {};
|
|
21
|
+
__export(oschina_exports, {
|
|
22
|
+
oschina: () => oschina
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(oschina_exports);
|
|
25
|
+
var oschina = {
|
|
26
|
+
parse: true,
|
|
27
|
+
fetcher: "http",
|
|
28
|
+
getContentElementFromArticle: ($) => {
|
|
29
|
+
const el = $(".article-detail > .content");
|
|
30
|
+
$(el).find(".ad-wrap").remove();
|
|
31
|
+
return $(el);
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
35
|
+
0 && (module.exports = {
|
|
36
|
+
oschina
|
|
37
|
+
});
|
package/dist/cjs/strategy.js
CHANGED
|
@@ -30,6 +30,8 @@ var import_kr = require("./parsers/36kr");
|
|
|
30
30
|
var import_techrunch = require("./parsers/techrunch");
|
|
31
31
|
var import_mydrivers = require("./parsers/mydrivers");
|
|
32
32
|
var import_tmtpost = require("./parsers/tmtpost");
|
|
33
|
+
var import_leiphone = require("./parsers/leiphone");
|
|
34
|
+
var import_oschina = require("./parsers/oschina");
|
|
33
35
|
var strategies = {
|
|
34
36
|
cnbeta: import_cnbeta.cnbeta,
|
|
35
37
|
ifanr: import_ifanr.ifanr,
|
|
@@ -38,7 +40,9 @@ var strategies = {
|
|
|
38
40
|
"36kr": import_kr._36kr,
|
|
39
41
|
techrunch: import_techrunch.techrunch,
|
|
40
42
|
mydrivers: import_mydrivers.mydrivers,
|
|
41
|
-
tmtpost: import_tmtpost.tmtpost
|
|
43
|
+
tmtpost: import_tmtpost.tmtpost,
|
|
44
|
+
leiphone: import_leiphone.leiphone,
|
|
45
|
+
oschina: import_oschina.oschina
|
|
42
46
|
};
|
|
43
47
|
// Annotate the CommonJS export names for ESM import in node:
|
|
44
48
|
0 && (module.exports = {
|
package/dist/cjs/types.d.ts
CHANGED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { RichArticleImage } from "../types";
|
|
2
|
+
/**
|
|
3
|
+
* 从 article images 中获取 image 数据。目前用作无 thumb 时的降级
|
|
4
|
+
* 若都没有尺寸,选第一张
|
|
5
|
+
对于有尺寸的图片,按如下优先级
|
|
6
|
+
第一个长宽比大于 1.5 且长大于 80 小于 1000 的图片
|
|
7
|
+
若没有,则第一个图片
|
|
8
|
+
* @param images
|
|
9
|
+
* @returns
|
|
10
|
+
*/
|
|
11
|
+
export declare const getImage: (images: RichArticleImage[]) => string | undefined;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/utils/getArticleImage.ts
|
|
20
|
+
var getArticleImage_exports = {};
|
|
21
|
+
__export(getArticleImage_exports, {
|
|
22
|
+
getImage: () => getImage
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(getArticleImage_exports);
|
|
25
|
+
var getImage = (images) => {
|
|
26
|
+
if (!(images == null ? void 0 : images.length)) {
|
|
27
|
+
return void 0;
|
|
28
|
+
}
|
|
29
|
+
const img = images.find((img2) => {
|
|
30
|
+
return img2.width && img2.height && img2.width / img2.height >= 1.5 && img2.width < 2e3;
|
|
31
|
+
});
|
|
32
|
+
return img == null ? void 0 : img.url;
|
|
33
|
+
};
|
|
34
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
35
|
+
0 && (module.exports = {
|
|
36
|
+
getImage
|
|
37
|
+
});
|