ns-rss-spider 0.0.17 → 0.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/parse.js CHANGED
@@ -39,7 +39,11 @@ var import_parseContent = require("./parseContent");
39
39
  var import_upload = require("./upload");
40
40
  var import_axios = __toESM(require("axios"));
41
41
  async function parseRss(name, feed, server) {
42
- const parser = new import_rss_parser.default();
42
+ const parser = new import_rss_parser.default({
43
+ customFields: {
44
+ item: ["source"]
45
+ }
46
+ });
43
47
  console.log(import_zx.chalk.green("正在拉取 rss 列表"));
44
48
  const result = await parser.parseURL(feed);
45
49
  if (!result.items.length) {
@@ -49,7 +49,7 @@ async function parseContent(item, strategy) {
49
49
  console.log(import_zx.chalk.green("正在拉取文章内容"), item.link);
50
50
  const article = await (0, import_getArticleHtml.getArticleHtml)(item.link, strategy.fetcher);
51
51
  $ = cheerio.load(article);
52
- element = ((_a = strategy.getContentHtmlFromArticle) == null ? void 0 : _a.call(strategy, $)) || $.root();
52
+ element = ((_a = strategy.getContentElementFromArticle) == null ? void 0 : _a.call(strategy, $)) || $.root();
53
53
  }
54
54
  console.log(import_zx.chalk.green("正在预处理 html"));
55
55
  (0, import_stripeHtml.stripeHtml)($, element);
@@ -39,6 +39,7 @@ var import_html_entities = require("html-entities");
39
39
  var import_generateSsrContent = require("./generateSsrContent");
40
40
  var import_getBasicFromItem = require("./getBasicFromItem");
41
41
  async function parseContent($, $element, item, strategy) {
42
+ var _a;
42
43
  const srcs = [];
43
44
  const images = [];
44
45
  (0, import_utils.walk_the_DOM)($, $element, (node) => {
@@ -93,7 +94,8 @@ async function parseContent($, $element, item, strategy) {
93
94
  }
94
95
  }
95
96
  });
96
- const content = (0, import_html_entities.decode)($element.html().trim());
97
+ const elCont = ((_a = strategy == null ? void 0 : strategy.getContentFromHtml) == null ? void 0 : _a.call(strategy, $, $element)) || $element.html() || "";
98
+ const content = (0, import_html_entities.decode)(elCont.trim());
97
99
  return {
98
100
  type: "rich",
99
101
  ...(0, import_getBasicFromItem.getBasicFromItem)(item),
@@ -25,16 +25,22 @@ module.exports = __toCommonJS(cnbeta_exports);
25
25
  var cnbeta = {
26
26
  parse: true,
27
27
  fetcher: "http",
28
- getContentHtmlFromArticle: ($) => {
28
+ getContentElementFromArticle: ($) => {
29
29
  const el = $(".article-content");
30
- el.remove(".tac");
31
30
  return el;
32
31
  },
32
+ getContentFromHtml: ($, $el) => {
33
+ const desc = $(".cnbeta-article-body .article-summary");
34
+ if (desc.length) {
35
+ $(desc).find(".topic").remove();
36
+ $(desc).find("p").removeAttr("class").addClass("introduction");
37
+ }
38
+ return (desc.html() || "") + ($el.html() || "");
39
+ },
33
40
  getExtraItems: async ($, rich) => {
34
41
  return {
35
42
  description: rich.description.replace(`
36
- 阅读全文`, ""),
37
- pageFrom: $(".cnbeta-article .title .source a").text().toLowerCase()
43
+ 阅读全文`, "")
38
44
  };
39
45
  }
40
46
  };
@@ -23,12 +23,7 @@ __export(ifanr_exports, {
23
23
  });
24
24
  module.exports = __toCommonJS(ifanr_exports);
25
25
  var ifanr = {
26
- parse: true,
27
- fetcher: "http",
28
- getContentHtmlFromArticle: ($) => {
29
- const el = $("#article-content-wrapper article");
30
- return el;
31
- }
26
+ parse: true
32
27
  };
33
28
  // Annotate the CommonJS export names for ESM import in node:
34
29
  0 && (module.exports = {
@@ -26,7 +26,7 @@ var import_utils = require("../parseContent/utils");
26
26
  var ithome = {
27
27
  parse: true,
28
28
  fetcher: "http",
29
- getContentHtmlFromArticle: ($) => {
29
+ getContentElementFromArticle: ($) => {
30
30
  const el = $("#paragraph");
31
31
  $(el).find("img").each((_, img) => {
32
32
  const original = $(img).attr("data-original");
@@ -7,8 +7,9 @@ export type RssItem = Item & {
7
7
  export interface ParseStrategy {
8
8
  parse: boolean;
9
9
  fetcher?: 'http' | 'playwright';
10
- getContentHtmlFromArticle?: (aritcle: CheerioAPI) => Cheerio<AnyNode>;
11
- getExtraItems?: (html: CheerioAPI, current: RichArticle, item: Item) => Promise<Record<string, any>>;
10
+ getContentElementFromArticle?: (aritcle: CheerioAPI) => Cheerio<AnyNode>;
11
+ getContentFromHtml?: ($: CheerioAPI, node: Cheerio<AnyNode>) => string;
12
+ getExtraItems?: ($: CheerioAPI, current: RichArticle, item: Item) => Promise<Record<string, any>>;
12
13
  }
13
14
  export interface SimpleArticle {
14
15
  type: 'simple';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ns-rss-spider",
3
- "version": "0.0.17",
3
+ "version": "0.0.19",
4
4
  "description": "",
5
5
  "main": "dist/cjs/index.js",
6
6
  "types": "dist/cjs/index.d.ts",
@@ -10,7 +10,6 @@
10
10
  "build": "father build",
11
11
  "build:deps": "father prebundle",
12
12
  "check": "father doctor",
13
- "prepublishOnly": "father doctor && npm run build",
14
13
  "sync": "echo '去更新 registry 版本: https://www.npmmirror.com/sync/ns-rss-spider'",
15
14
  "pub": "npm run check && npm version patch && npm run build && npm publish --registry=https://registry.npmjs.org && git push origin master --tags && npm run sync"
16
15
  },