ns-rss-spider 0.0.14 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/parse.js CHANGED
@@ -56,6 +56,11 @@ async function parseRss(name, feed, server) {
56
56
  if (!result.items.length) {
57
57
  return [];
58
58
  }
59
+ result.items.forEach((item) => {
60
+ if (!item.guid) {
61
+ item.guid = item.link;
62
+ }
63
+ });
59
64
  const contents = [];
60
65
  for (let item of result.items) {
61
66
  console.log(import_zx.chalk.green(`正在解析文章 【${item.title}】`));
@@ -0,0 +1,8 @@
1
+ /**
2
+ * 从 content 解析出 app content
3
+ *
4
+ * 主要包括:
5
+ * 1. 图片 placeholder 处理
6
+ */
7
+ import { RichArticleImage } from "../types";
8
+ export declare function generateSsrContent(content: string, images: RichArticleImage[]): string;
@@ -0,0 +1,48 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
3
+ var __getOwnPropNames = Object.getOwnPropertyNames;
4
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
5
+ var __export = (target, all) => {
6
+ for (var name in all)
7
+ __defProp(target, name, { get: all[name], enumerable: true });
8
+ };
9
+ var __copyProps = (to, from, except, desc) => {
10
+ if (from && typeof from === "object" || typeof from === "function") {
11
+ for (let key of __getOwnPropNames(from))
12
+ if (!__hasOwnProp.call(to, key) && key !== except)
13
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
14
+ }
15
+ return to;
16
+ };
17
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
18
+
19
+ // src/parseContent/generateSsrContent.ts
20
+ var generateSsrContent_exports = {};
21
+ __export(generateSsrContent_exports, {
22
+ generateSsrContent: () => generateSsrContent
23
+ });
24
+ module.exports = __toCommonJS(generateSsrContent_exports);
25
+ var import_cheerio = require("cheerio");
26
+ var import_html_entities = require("html-entities");
27
+ function generateSsrContent(content, images) {
28
+ const $ = (0, import_cheerio.load)(content, null, false);
29
+ $("img").each((_, img) => {
30
+ const src = $(img).attr("src");
31
+ if (src) {
32
+ const meta = images.find((i) => i.url === src);
33
+ const $pl = $(`<span data-img-placeholder></span>`);
34
+ $pl.attr({
35
+ "data-w": (meta == null ? void 0 : meta.width) ? `${meta.width}` : "",
36
+ "data-h": (meta == null ? void 0 : meta.height) ? `${meta.height}` : "",
37
+ "data-src": src
38
+ });
39
+ $pl.attr("style", `display: inline-block;width: ${(meta == null ? void 0 : meta.width) || 1}px; height: ${(meta == null ? void 0 : meta.height) || 1}px`);
40
+ $(img).replaceWith($pl);
41
+ }
42
+ });
43
+ return (0, import_html_entities.decode)($.html()).trim();
44
+ }
45
+ // Annotate the CommonJS export names for ESM import in node:
46
+ 0 && (module.exports = {
47
+ generateSsrContent
48
+ });
@@ -36,6 +36,7 @@ var import_utils = require("./utils");
36
36
  var import_probe_image_size = __toESM(require("probe-image-size"));
37
37
  var import_zx = require("zx");
38
38
  var import_html_entities = require("html-entities");
39
+ var import_generateSsrContent = require("./generateSsrContent");
39
40
  async function parseContent($, $element, item, strategy) {
40
41
  var _a;
41
42
  const srcs = [];
@@ -82,7 +83,6 @@ async function parseContent($, $element, item, strategy) {
82
83
  $($element).find("img").each((_, img) => {
83
84
  const src = $(img).attr("src");
84
85
  if (src) {
85
- $(img).removeAttr("src");
86
86
  $(img).attr("data-src", src);
87
87
  const meta = images.find((i) => i.url === src);
88
88
  if (meta == null ? void 0 : meta.width) {
@@ -93,6 +93,7 @@ async function parseContent($, $element, item, strategy) {
93
93
  }
94
94
  }
95
95
  });
96
+ const content = (0, import_html_entities.decode)($element.html().trim());
96
97
  return {
97
98
  type: "rich",
98
99
  guid: item.guid,
@@ -100,6 +101,7 @@ async function parseContent($, $element, item, strategy) {
100
101
  link: item.link,
101
102
  description: ((_a = item.contentSnippet) == null ? void 0 : _a.trim()) || "",
102
103
  content: (0, import_html_entities.decode)($element.html().trim()),
104
+ ssrContent: (0, import_generateSsrContent.generateSsrContent)(content, images),
103
105
  pubDate: new Date(item.pubDate),
104
106
  images
105
107
  };
@@ -0,0 +1,2 @@
1
+ import { ParseStrategy } from "../types";
2
+ export declare const _36kr: ParseStrategy;
@@ -0,0 +1,31 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
3
+ var __getOwnPropNames = Object.getOwnPropertyNames;
4
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
5
+ var __export = (target, all) => {
6
+ for (var name in all)
7
+ __defProp(target, name, { get: all[name], enumerable: true });
8
+ };
9
+ var __copyProps = (to, from, except, desc) => {
10
+ if (from && typeof from === "object" || typeof from === "function") {
11
+ for (let key of __getOwnPropNames(from))
12
+ if (!__hasOwnProp.call(to, key) && key !== except)
13
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
14
+ }
15
+ return to;
16
+ };
17
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
18
+
19
+ // src/parsers/36kr.ts
20
+ var kr_exports = {};
21
+ __export(kr_exports, {
22
+ _36kr: () => _36kr
23
+ });
24
+ module.exports = __toCommonJS(kr_exports);
25
+ var _36kr = {
26
+ parse: true
27
+ };
28
+ // Annotate the CommonJS export names for ESM import in node:
29
+ 0 && (module.exports = {
30
+ _36kr
31
+ });
@@ -26,11 +26,13 @@ var import_cnbeta = require("./parsers/cnbeta");
26
26
  var import_ifanr = require("./parsers/ifanr");
27
27
  var import_theverge = require("./parsers/theverge");
28
28
  var import_ithome = require("./parsers/ithome");
29
+ var import_kr = require("./parsers/36kr");
29
30
  var strategies = {
30
31
  cnbeta: import_cnbeta.cnbeta,
31
32
  ifanr: import_ifanr.ifanr,
32
33
  theverge: import_theverge.theverge,
33
- ithome: import_ithome.ithome
34
+ ithome: import_ithome.ithome,
35
+ "36kr": import_kr._36kr
34
36
  };
35
37
  // Annotate the CommonJS export names for ESM import in node:
36
38
  0 && (module.exports = {
@@ -27,6 +27,7 @@ export interface RichArticleImage {
27
27
  export interface RichArticle extends Omit<SimpleArticle, 'type'> {
28
28
  type: 'rich';
29
29
  content: string;
30
+ ssrContent: string;
30
31
  images: RichArticleImage[];
31
32
  }
32
33
  export type Article = SimpleArticle | RichArticle;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ns-rss-spider",
3
- "version": "0.0.14",
3
+ "version": "0.0.16",
4
4
  "description": "",
5
5
  "main": "dist/cjs/index.js",
6
6
  "types": "dist/cjs/index.d.ts",