rssany 0.1.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +23 -27
  2. package/app/plugins/builtin/agi-eval-evaluation.rssany.js +7 -8
  3. package/app/plugins/builtin/amii-research-talent.rssany.js +6 -7
  4. package/app/plugins/builtin/anthropic-research.rssany.js +6 -8
  5. package/app/plugins/builtin/appen-resources.rssany.js +6 -7
  6. package/app/plugins/builtin/baai-wudao-paper-article.rssany.js +9 -10
  7. package/app/plugins/builtin/baaidata-csdn.rssany.js +6 -7
  8. package/app/plugins/builtin/baidu-research.rssany.js +5 -8
  9. package/app/plugins/builtin/brightdata-blog.rssany.js +7 -12
  10. package/app/plugins/builtin/bytedance-seed-research.rssany.js +5 -7
  11. package/app/plugins/builtin/email.rssany.js +9 -9
  12. package/app/plugins/builtin/five-radar.rssany.js +10 -12
  13. package/app/plugins/builtin/flageval-news.rssany.js +5 -7
  14. package/app/plugins/builtin/google-deepmind-research.rssany.js +7 -9
  15. package/app/plugins/builtin/google-research-datasets.rssany.js +6 -8
  16. package/app/plugins/builtin/google-research.rssany.js +6 -8
  17. package/app/plugins/builtin/hacker-news-newest.rssany.js +7 -9
  18. package/app/plugins/builtin/harvard-dataverse.rssany.js +6 -8
  19. package/app/plugins/builtin/huaweicloud-bbs-blogs.rssany.js +7 -9
  20. package/app/plugins/builtin/lingowhale.rssany.js +7 -9
  21. package/app/plugins/builtin/meituan-tech.rssany.js +7 -10
  22. package/app/plugins/builtin/meta-ai-publications.rssany.js +6 -11
  23. package/app/plugins/builtin/mila-quebec.rssany.js +6 -8
  24. package/app/plugins/builtin/mit-csail-research.rssany.js +7 -9
  25. package/app/plugins/builtin/moonshot.rssany.js +6 -8
  26. package/app/plugins/builtin/opendatalab-news.rssany.js +6 -7
  27. package/app/plugins/builtin/opendatalab.rssany.js +5 -6
  28. package/app/plugins/builtin/opendrivelab-autonomous-driving.rssany.js +6 -7
  29. package/app/plugins/builtin/opendrivelab-embodiedai.rssany.js +7 -8
  30. package/app/plugins/builtin/opendrivelab-publications.rssany.js +7 -9
  31. package/app/plugins/builtin/opendrivelab.rssany.js +7 -8
  32. package/app/plugins/builtin/paperswithcode.rssany.js +6 -8
  33. package/app/plugins/builtin/pjlab-adg-publications.rssany.js +8 -10
  34. package/app/plugins/builtin/rss.rssany.js +11 -12
  35. package/app/plugins/builtin/selectdataset.rssany.js +6 -8
  36. package/app/plugins/builtin/sensetime-tech-achievements.rssany.js +7 -8
  37. package/app/plugins/builtin/supervisely-blog.rssany.js +6 -8
  38. package/app/plugins/builtin/theinformation-briefings.rssany.js +144 -136
  39. package/app/plugins/builtin/uci-ml-repository.rssany.js +6 -7
  40. package/app/plugins/builtin/venturebeat.rssany.js +7 -9
  41. package/app/plugins/builtin/worldlabs.rssany.js +6 -8
  42. package/app/plugins/builtin/x.rssany.js +7 -9
  43. package/app/plugins/builtin/xiaohongshu.rssany.js +119 -56
  44. package/app/plugins/builtin/zhipu-research.rssany.js +7 -10
  45. package/app/plugins/site.rssany.js +25 -25
  46. package/{statics → app/statics}/README.md +7 -7
  47. package/bin/rssany.js +226 -6
  48. package/dist/index.js +545 -396
  49. package/dist/index.js.map +1 -1
  50. package/package.json +20 -13
  51. package/scripts/dev.mjs +114 -0
  52. package/scripts/reset.mjs +1 -1
  53. package/app/plugins/builtin/google.rssany.js +0 -187
  54. package/init/config.json +0 -17
  55. package/init/sources.json +0 -353
  56. package/statics/401.html +0 -56
  57. package/statics/404.html +0 -12
  58. package/statics/image.png +0 -0
  59. package/webui/build/200.html +0 -49
  60. package/webui/build/_app/env.js +0 -1
  61. package/webui/build/_app/immutable/assets/0.BB88QFoe.css +0 -1
  62. package/webui/build/_app/immutable/assets/10.Dj8_pmut.css +0 -1
  63. package/webui/build/_app/immutable/assets/11.qYZMiTb0.css +0 -1
  64. package/webui/build/_app/immutable/assets/12.Ct59LCqW.css +0 -1
  65. package/webui/build/_app/immutable/assets/13.BhO9zvFi.css +0 -1
  66. package/webui/build/_app/immutable/assets/14.CujIhjQK.css +0 -1
  67. package/webui/build/_app/immutable/assets/15.nNGjXhCQ.css +0 -1
  68. package/webui/build/_app/immutable/assets/16.PP9XLDf7.css +0 -1
  69. package/webui/build/_app/immutable/assets/4.9wPHhVwv.css +0 -1
  70. package/webui/build/_app/immutable/assets/5.ClehBQ0g.css +0 -1
  71. package/webui/build/_app/immutable/assets/6.DSJfjJwx.css +0 -1
  72. package/webui/build/_app/immutable/assets/7.CrNxmd8B.css +0 -1
  73. package/webui/build/_app/immutable/assets/8.Ba5_jYIY.css +0 -1
  74. package/webui/build/_app/immutable/assets/9.m-LCx_kl.css +0 -1
  75. package/webui/build/_app/immutable/assets/BackToParentRoute.DGk-X5ow.css +0 -1
  76. package/webui/build/_app/immutable/assets/SourcesList.yTBBi3_m.css +0 -1
  77. package/webui/build/_app/immutable/assets/homeFeedPanelStore.CSvlNcpm.css +0 -1
  78. package/webui/build/_app/immutable/chunks/B-OsL1Ct.js +0 -1
  79. package/webui/build/_app/immutable/chunks/B2Q1a1-H.js +0 -2
  80. package/webui/build/_app/immutable/chunks/BK3WtZwv.js +0 -1
  81. package/webui/build/_app/immutable/chunks/BQqoDzLx.js +0 -1
  82. package/webui/build/_app/immutable/chunks/BUApaBEI.js +0 -1
  83. package/webui/build/_app/immutable/chunks/BbWUOQ_m.js +0 -1
  84. package/webui/build/_app/immutable/chunks/Bfc47y5P.js +0 -1
  85. package/webui/build/_app/immutable/chunks/Bp63qm3L.js +0 -1
  86. package/webui/build/_app/immutable/chunks/BwlaCkNX.js +0 -36
  87. package/webui/build/_app/immutable/chunks/C0J2-L94.js +0 -1
  88. package/webui/build/_app/immutable/chunks/CBY2biv-.js +0 -1
  89. package/webui/build/_app/immutable/chunks/CLOXMsDk.js +0 -36
  90. package/webui/build/_app/immutable/chunks/CVzlFH44.js +0 -1
  91. package/webui/build/_app/immutable/chunks/CWNeClHp.js +0 -6
  92. package/webui/build/_app/immutable/chunks/Cihqbfi5.js +0 -1
  93. package/webui/build/_app/immutable/chunks/D5GvRCv7.js +0 -1
  94. package/webui/build/_app/immutable/chunks/DEDI7Ecm.js +0 -1
  95. package/webui/build/_app/immutable/chunks/DFuhmi31.js +0 -1
  96. package/webui/build/_app/immutable/chunks/DMWEh-Ek.js +0 -2
  97. package/webui/build/_app/immutable/chunks/DgceFEv5.js +0 -1
  98. package/webui/build/_app/immutable/chunks/DjNLq3TF.js +0 -1
  99. package/webui/build/_app/immutable/chunks/Dt2CddFe.js +0 -1
  100. package/webui/build/_app/immutable/chunks/Dw782Tjs.js +0 -1
  101. package/webui/build/_app/immutable/chunks/SqCUd34O.js +0 -1
  102. package/webui/build/_app/immutable/chunks/Xy_fhzQq.js +0 -1
  103. package/webui/build/_app/immutable/chunks/hp4PFHFv.js +0 -1
  104. package/webui/build/_app/immutable/chunks/lk5LaiqA.js +0 -1
  105. package/webui/build/_app/immutable/chunks/mW5RwvnK.js +0 -13
  106. package/webui/build/_app/immutable/chunks/tB7QMF3U.js +0 -1
  107. package/webui/build/_app/immutable/chunks/xtNWTdbD.js +0 -1
  108. package/webui/build/_app/immutable/entry/app.B8zBPipq.js +0 -2
  109. package/webui/build/_app/immutable/entry/start.CxRCKeCl.js +0 -1
  110. package/webui/build/_app/immutable/nodes/0.ChLNE3xy.js +0 -11
  111. package/webui/build/_app/immutable/nodes/1.1N74-4Io.js +0 -1
  112. package/webui/build/_app/immutable/nodes/10.DY30t9Ib.js +0 -1
  113. package/webui/build/_app/immutable/nodes/11.ITuxnukH.js +0 -1
  114. package/webui/build/_app/immutable/nodes/12.qLzWqB1c.js +0 -1
  115. package/webui/build/_app/immutable/nodes/13.nT3SOzEB.js +0 -1
  116. package/webui/build/_app/immutable/nodes/14.BHnIxbVM.js +0 -1
  117. package/webui/build/_app/immutable/nodes/15.CLjT9il3.js +0 -1
  118. package/webui/build/_app/immutable/nodes/16.BD-mKCLN.js +0 -24
  119. package/webui/build/_app/immutable/nodes/17.BtYZF6FM.js +0 -1
  120. package/webui/build/_app/immutable/nodes/18.Ba_qJjp6.js +0 -1
  121. package/webui/build/_app/immutable/nodes/2.BYWOpaxy.js +0 -1
  122. package/webui/build/_app/immutable/nodes/3.Dt5o2Fmz.js +0 -1
  123. package/webui/build/_app/immutable/nodes/4.DTSxpKm7.js +0 -2
  124. package/webui/build/_app/immutable/nodes/5.Dy3vSsIP.js +0 -1
  125. package/webui/build/_app/immutable/nodes/6.DvclsL6H.js +0 -1
  126. package/webui/build/_app/immutable/nodes/7.D2nJy-Uz.js +0 -1
  127. package/webui/build/_app/immutable/nodes/8.C75mhrqs.js +0 -1
  128. package/webui/build/_app/immutable/nodes/9.Bp_QXw3w.js +0 -1
  129. package/webui/build/_app/version.json +0 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rssany",
3
- "version": "0.1.6",
3
+ "version": "0.3.0",
4
4
  "description": "Universal RSS/Atom/JSON Feed pipeline — fetches, extracts, parses and converts any web content into consumable feeds with plugin support",
5
5
  "author": "Joo",
6
6
  "type": "module",
@@ -21,14 +21,16 @@
21
21
  ".env.example",
22
22
  "README.md",
23
23
  "init",
24
- "scripts/reset.mjs"
24
+ "scripts/reset.mjs",
25
+ "scripts/dev.mjs"
25
26
  ],
26
27
  "engines": {
27
28
  "node": ">=20 <24"
28
29
  },
29
30
  "scripts": {
30
31
  "build": "vite build",
31
- "dev": "tsx app/index.ts",
32
+ "dev": "node scripts/dev.mjs",
33
+ "dev:backend": "cross-env PORT=3999 tsx app/index.ts",
32
34
  "start": "node dist/index.js",
33
35
  "serve:route": "node scripts/serve-route.mjs",
34
36
  "serve:app": "npx tsx app/index.ts",
@@ -39,14 +41,20 @@
39
41
  "typecheck": "tsc --noEmit",
40
42
  "reset": "node scripts/reset.mjs",
41
43
  "proxy-browser": "tsx scripts/proxy-browser.ts",
42
- "webui:install": "cd webui && npm install",
43
- "webui:dev": "cd webui && pnpm run dev",
44
- "webui:build": "cd webui && npm run build",
45
- "webui:watch": "cd webui && npm run build:watch",
44
+ "webui:install": "cd app/webui && npm install",
45
+ "webui:dev": "cd app/webui && npm run build:watch",
46
+ "webui:build": "cd app/webui && npm run build",
47
+ "webui:watch": "cd app/webui && npm run build:watch",
48
+ "dev:all": "npm run dev",
46
49
  "build:all": "npm run build && npm run webui:build",
47
50
  "prepublishOnly": "npm run build:all",
48
51
  "docker:build": "bash scripts/docker-build.sh",
49
- "docker:build:tag": "bash scripts/docker-build.sh"
52
+ "docker:build:tag": "bash scripts/docker-build.sh",
53
+ "landing:install": "cd landing && npm install",
54
+ "landing:dev": "cd landing && npm run dev",
55
+ "landing:build": "cd landing && npm run build",
56
+ "deploy": "node scripts/deploy-landing.mjs",
57
+ "deploy:landing": "node scripts/deploy-landing.mjs"
50
58
  },
51
59
  "keywords": [
52
60
  "rss",
@@ -60,7 +68,6 @@
60
68
  "@eslint/js": "^9.15.0",
61
69
  "@types/jsdom": "^21.1.7",
62
70
  "@types/mailparser": "^3.4.6",
63
- "@types/better-sqlite3": "^7.6.13",
64
71
  "@types/node": "^25.2.0",
65
72
  "@types/node-cron": "^3.0.11",
66
73
  "@types/nodemailer": "^7.0.11",
@@ -68,19 +75,19 @@
68
75
  "cross-env": "^7.0.3",
69
76
  "eslint": "^9.15.0",
70
77
  "globals": "^15.12.0",
78
+ "ssh2": "^1.17.0",
71
79
  "tsx": "^4.19.0",
72
80
  "typescript": "~5.6.0",
73
81
  "typescript-eslint": "^8.15.0",
74
- "vite": "^6.0.0",
82
+ "vite": "^6.4.2",
75
83
  "vitest": "^2.1.0"
76
84
  },
77
85
  "dependencies": {
78
- "@hono/node-server": "^1.13.0",
86
+ "@hono/node-server": "^1.19.10",
79
87
  "@mozilla/readability": "^0.6.0",
80
- "better-sqlite3": "^12.6.2",
81
88
  "cron-parser": "^5.0.0",
82
89
  "dotenv": "^16.4.7",
83
- "hono": "^4.6.0",
90
+ "hono": "^4.12.12",
84
91
  "https-proxy-agent": "^7.0.6",
85
92
  "imapflow": "^1.2.10",
86
93
  "jsdom": "^25.0.0",
@@ -0,0 +1,114 @@
1
+ #!/usr/bin/env node
2
+ import { spawn, execFileSync } from "node:child_process";
3
+ import { existsSync, statSync } from "node:fs";
4
+ import { join } from "node:path";
5
+
6
+ const root = join(import.meta.dirname, "..");
7
+ const npmCmd = "npm";
8
+ const webuiMarker = join(root, "app", "webui", "build", "200.html");
9
+ const initialMarkerMtime = existsSync(webuiMarker) ? statSync(webuiMarker).mtimeMs : 0;
10
+
11
+ const children = new Set();
12
+ let shuttingDown = false;
13
+
14
+ function spawnNpm(args, opts = {}) {
15
+ const child = spawn(npmCmd, args, {
16
+ cwd: root,
17
+ env: process.env,
18
+ stdio: opts.stdio ?? ["ignore", "pipe", "pipe"],
19
+ // Windows: Node 20.12+ / 22+ reject spawning .cmd without shell (CVE-2024-27980).
20
+ shell: process.platform === "win32",
21
+ });
22
+ children.add(child);
23
+ child.once("exit", () => children.delete(child));
24
+ return child;
25
+ }
26
+
27
+ function stopProcessTree(child) {
28
+ if (!child || child.killed) return;
29
+ if (process.platform === "win32") {
30
+ try {
31
+ execFileSync("taskkill.exe", ["/pid", String(child.pid), "/t", "/f"], { stdio: "ignore" });
32
+ return;
33
+ } catch {
34
+ // Fall through to plain kill.
35
+ }
36
+ }
37
+ child.kill("SIGTERM");
38
+ }
39
+
40
+ function shutdown(code = 0) {
41
+ if (shuttingDown) return;
42
+ shuttingDown = true;
43
+ for (const child of Array.from(children)) stopProcessTree(child);
44
+ process.exitCode = code;
45
+ }
46
+
47
+ function pipeOutput(child, label) {
48
+ child.stdout?.on("data", (chunk) => process.stdout.write(`[${label}] ${chunk}`));
49
+ child.stderr?.on("data", (chunk) => process.stderr.write(`[${label}] ${chunk}`));
50
+ }
51
+
52
+ function waitForInitialWebuiBuild(child) {
53
+ return new Promise((resolve, reject) => {
54
+ let done = false;
55
+ const finish = () => {
56
+ if (done) return;
57
+ done = true;
58
+ clearInterval(poll);
59
+ resolve();
60
+ };
61
+ const fail = (err) => {
62
+ if (done) return;
63
+ done = true;
64
+ clearInterval(poll);
65
+ reject(err);
66
+ };
67
+ const hasFreshMarker = () => {
68
+ if (!existsSync(webuiMarker)) return false;
69
+ return statSync(webuiMarker).mtimeMs > initialMarkerMtime;
70
+ };
71
+ const poll = setInterval(() => {
72
+ if (hasFreshMarker()) finish();
73
+ }, 500);
74
+ child.stdout?.on("data", (chunk) => {
75
+ const text = chunk.toString();
76
+ if (text.includes('Wrote site to "build"') || text.includes("Wrote site to 'build'")) {
77
+ finish();
78
+ }
79
+ });
80
+ child.once("exit", (code) => {
81
+ if (!done) fail(new Error(`webui watch exited before initial build completed (code ${code ?? "unknown"})`));
82
+ });
83
+ });
84
+ }
85
+
86
+ process.on("SIGINT", () => shutdown(0));
87
+ process.on("SIGTERM", () => shutdown(0));
88
+
89
+ console.log("Starting WebUI static build watcher...");
90
+ const webui = spawnNpm(["run", "webui:watch"]);
91
+ pipeOutput(webui, "webui");
92
+
93
+ try {
94
+ await waitForInitialWebuiBuild(webui);
95
+ console.log("WebUI initial build is ready. Starting backend server...");
96
+ const backend = spawnNpm(["run", "dev:backend"]);
97
+ pipeOutput(backend, "backend");
98
+
99
+ backend.once("exit", (code) => {
100
+ if (!shuttingDown) {
101
+ console.log(`Backend exited (code ${code ?? "unknown"}). Stopping WebUI watcher...`);
102
+ shutdown(code ?? 0);
103
+ }
104
+ });
105
+ webui.once("exit", (code) => {
106
+ if (!shuttingDown) {
107
+ console.log(`WebUI watcher exited (code ${code ?? "unknown"}). Stopping backend...`);
108
+ shutdown(code ?? 0);
109
+ }
110
+ });
111
+ } catch (err) {
112
+ console.error(err instanceof Error ? err.message : String(err));
113
+ shutdown(1);
114
+ }
package/scripts/reset.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
3
  * 停止占用 HTTP 服务端口的进程,并删除用户数据目录(与 app 中 PORT / RSSANY_USER_DIR 约定一致)。
4
- * 用法:pnpm reset 或 PORT=3000 pnpm reset
4
+ * 用法:npm run reset 或 PORT=3000 npm run reset
5
5
  */
6
6
 
7
7
  import { execFileSync } from "node:child_process";
@@ -1,187 +0,0 @@
1
- let _deps;
2
-
3
- // Google Search 插件:将搜索结果页转换为 FeedItem 列表(不含 enrich)
4
-
5
-
6
- function normalizeText(text) {
7
- return (text ?? "").replace(/\s+/g, " ").trim();
8
- }
9
-
10
- function isGoogleHost(hostname) {
11
- return /^([a-z0-9-]+\.)*google\.[a-z.]+$/i.test(hostname);
12
- }
13
-
14
- function resolveResultLink(rawHref, pageUrl) {
15
- if (!rawHref) return null;
16
- const href = rawHref.trim();
17
- if (!href || href.startsWith("#") || href.startsWith("javascript:")) return null;
18
-
19
- let url;
20
- try {
21
- url = new URL(href, pageUrl);
22
- } catch {
23
- return null;
24
- }
25
-
26
- if (isGoogleHost(url.hostname) && url.pathname === "/url") {
27
- const q = url.searchParams.get("q") ?? url.searchParams.get("url");
28
- if (!q) return null;
29
- try {
30
- const target = new URL(q);
31
- return /^https?:$/i.test(target.protocol) ? target.href : null;
32
- } catch {
33
- return null;
34
- }
35
- }
36
-
37
- if (!/^https?:$/i.test(url.protocol)) return null;
38
- if (isGoogleHost(url.hostname)) return null;
39
- return url.href;
40
- }
41
-
42
- function closestAnchor(node) {
43
- let cur = node;
44
- while (cur) {
45
- if (cur.tagName?.toLowerCase() === "a") return cur;
46
- cur = cur.parentNode ?? null;
47
- }
48
- return null;
49
- }
50
-
51
- function extractSnippet(startNode) {
52
- const snippetSelectors = [".VwiC3b", ".IsZvec", ".MUxGbd", ".lyLwlc"];
53
- let cur = startNode;
54
- for (let i = 0; i < 6 && cur; i += 1) {
55
- for (const sel of snippetSelectors) {
56
- const el = cur.querySelector?.(sel);
57
- const text = normalizeText(el?.textContent);
58
- if (text) return text;
59
- }
60
- cur = cur.parentNode ?? null;
61
- }
62
- return "";
63
- }
64
-
65
- function looksLikeBlockedPage(root, html, finalUrl) {
66
- const text = normalizeText(root.textContent).toLowerCase();
67
- const body = (html ?? "").toLowerCase();
68
- if (finalUrl.includes("/sorry/")) return true;
69
- if (root.querySelector("#captcha-form, #recaptcha, .g-recaptcha")) return true;
70
- if (body.includes("/httpservice/retry/enablejs")) return true;
71
- if (body.includes("id=\"yvlrue\"") || body.includes("if you're having trouble accessing google search")) return true;
72
- if (body.includes("sg_rel")) return true;
73
- return text.includes("about this page") || text.includes("unusual traffic") || text.includes("captcha");
74
- }
75
-
76
- async function checkAuth(page, _url) {
77
- try {
78
- const currentUrl = page.url() || "";
79
- if (currentUrl.includes("/sorry/") || currentUrl.includes("/httpservice/retry/enablejs")) return false;
80
-
81
- const blockedDom = await page.$("#captcha-form, #recaptcha, .g-recaptcha, #yvlrue");
82
- if (blockedDom) return false;
83
-
84
- const bodyText = await page.evaluate(() => (document.body?.innerText ?? "").toLowerCase());
85
- if (
86
- bodyText.includes("unusual traffic") ||
87
- bodyText.includes("about this page") ||
88
- bodyText.includes("captcha") ||
89
- bodyText.includes("if you're having trouble accessing google search")
90
- ) {
91
- return false;
92
- }
93
-
94
- // 首页或搜索页出现 q 输入框即可认为当前会话可用;若被风控会在上面的分支提前返回 false。
95
- const searchBox = await page.$('textarea[name="q"], input[name="q"]');
96
- return !!searchBox;
97
- } catch {
98
- return false;
99
- }
100
- }
101
-
102
- function parseFromResultBlocks(root, pageUrl) {
103
- const resultBlocks = root.querySelectorAll("#rso .MjjYud .A6K0A, #rso .MjjYud .tF2Cxc");
104
- const seen = new Set();
105
- const items = [];
106
-
107
- for (const block of resultBlocks) {
108
- const anchor =
109
- block.querySelector('a[jsname="UWckNb"][href]') ??
110
- block.querySelector(".yuRUbf a[href]") ??
111
- block.querySelector("a[href]");
112
- const link = resolveResultLink(anchor?.getAttribute("href"), pageUrl);
113
- if (!link || seen.has(link)) continue;
114
-
115
- const titleNode =
116
- block.querySelector("h3.LC20lb, h3.DKV0Md, h3.MBeuO, h3") ??
117
- anchor?.querySelector?.("h3");
118
- const title = normalizeText(titleNode?.textContent);
119
- if (!title) continue;
120
-
121
- seen.add(link);
122
- const summary = extractSnippet(block) || extractSnippet(titleNode ?? block) || title;
123
- items.push({
124
- guid: _deps.createHash("sha256").update(link).digest("hex"),
125
- title,
126
- link,
127
- pubDate: new Date(),
128
- author: "Google Search",
129
- summary,
130
- });
131
- }
132
-
133
- return items;
134
- }
135
-
136
- function parseFromHeadingFallback(root, pageUrl) {
137
- const seen = new Set();
138
- const items = [];
139
- const titles = root.querySelectorAll("h3");
140
- for (const h3 of titles) {
141
- const title = normalizeText(h3.textContent);
142
- if (!title) continue;
143
- const anchor = closestAnchor(h3);
144
- const link = resolveResultLink(anchor?.getAttribute("href"), pageUrl);
145
- if (!link || seen.has(link)) continue;
146
- seen.add(link);
147
- const summary = extractSnippet(h3) || title;
148
- items.push({
149
- guid: _deps.createHash("sha256").update(link).digest("hex"),
150
- title,
151
- link,
152
- pubDate: new Date(),
153
- author: "Google Search",
154
- summary,
155
- });
156
- }
157
- return items;
158
- }
159
-
160
- async function fetchItems(sourceId, ctx) {
161
- _deps = ctx.deps;
162
- const { html, finalUrl } = await ctx.fetchHtml(sourceId, { waitMs: 2500 });
163
- const root = _deps.parseHtml(html);
164
- const pageUrl = new URL(finalUrl);
165
-
166
- const fromBlocks = parseFromResultBlocks(root, pageUrl);
167
- const items = fromBlocks.length > 0 ? fromBlocks : parseFromHeadingFallback(root, pageUrl);
168
-
169
- if (items.length === 0) {
170
- if (looksLikeBlockedPage(root, html, finalUrl)) {
171
- throw new Error("[google] 命中 Google 验证页(reCAPTCHA/风控),当前会话无法稳定抓取搜索结果");
172
- }
173
- throw new Error("[google] 未解析到搜索结果,页面结构可能已变化");
174
- }
175
- return items;
176
- }
177
-
178
- export default {
179
- id: "google-search",
180
- listUrlPattern: /^https?:\/\/(www\.)?google\.[^/]+\/search(\?.*)?$/i,
181
- fetchItems,
182
- checkAuth,
183
- loginUrl: "https://www.google.com/",
184
- domain: "google.com",
185
- loginTimeoutMs: 5 * 60 * 1000,
186
- pollIntervalMs: 2000,
187
- };
package/init/config.json DELETED
@@ -1,17 +0,0 @@
1
- {
2
- "globalProxy": "",
3
- "pipeline": {
4
- "steps": [
5
- { "id": "tagger", "enabled": false },
6
- { "id": "translator", "enabled": false }
7
- ]
8
- },
9
- "deliver": {
10
- "url": "",
11
- "token": ""
12
- },
13
- "llm": {
14
- "baseUrl": "https://api.openai.com/v1",
15
- "model": "gpt-4o-mini"
16
- }
17
- }