@jackwener/opencli 0.9.6 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/ISSUE_TEMPLATE/bug_report.yml +83 -0
- package/.github/ISSUE_TEMPLATE/config.yml +8 -0
- package/.github/ISSUE_TEMPLATE/feature_request.yml +42 -0
- package/.github/ISSUE_TEMPLATE/new_site_adapter.yml +57 -0
- package/.github/dependabot.yml +27 -0
- package/.github/pull_request_template.md +24 -0
- package/.github/workflows/ci.yml +14 -8
- package/.github/workflows/e2e-headed.yml +6 -2
- package/.github/workflows/pkg-pr-new.yml +2 -2
- package/.github/workflows/release-please.yml +25 -0
- package/.github/workflows/release.yml +2 -2
- package/.github/workflows/security.yml +36 -0
- package/CDP.md +1 -1
- package/CDP.zh-CN.md +1 -1
- package/CLI-ELECTRON.md +89 -36
- package/CLI-EXPLORER.md +4 -4
- package/CONTRIBUTING.md +167 -0
- package/README.md +113 -89
- package/README.zh-CN.md +114 -91
- package/SKILL.md +10 -8
- package/TESTING.md +7 -7
- package/dist/browser/daemon-client.d.ts +37 -0
- package/dist/browser/daemon-client.js +82 -0
- package/dist/browser/discover.d.ts +11 -34
- package/dist/browser/discover.js +15 -190
- package/dist/browser/errors.d.ts +6 -20
- package/dist/browser/errors.js +24 -63
- package/dist/browser/index.d.ts +2 -11
- package/dist/browser/index.js +5 -11
- package/dist/browser/mcp.d.ts +9 -18
- package/dist/browser/mcp.js +70 -284
- package/dist/browser/page.d.ts +28 -6
- package/dist/browser/page.js +210 -85
- package/dist/browser.test.js +4 -202
- package/dist/build-manifest.d.ts +26 -0
- package/dist/build-manifest.js +132 -60
- package/dist/build-manifest.test.d.ts +1 -0
- package/dist/build-manifest.test.js +26 -0
- package/dist/cli-manifest.json +1582 -29
- package/dist/clis/bilibili/download.d.ts +10 -0
- package/dist/clis/bilibili/download.js +135 -0
- package/dist/clis/chatwise/ask.d.ts +1 -0
- package/dist/clis/chatwise/ask.js +76 -0
- package/dist/clis/chatwise/export.d.ts +1 -0
- package/dist/clis/chatwise/export.js +46 -0
- package/dist/clis/chatwise/history.d.ts +1 -0
- package/dist/clis/chatwise/history.js +43 -0
- package/dist/clis/chatwise/model.d.ts +1 -0
- package/dist/clis/chatwise/model.js +81 -0
- package/dist/clis/chatwise/new.d.ts +1 -0
- package/dist/clis/chatwise/new.js +18 -0
- package/dist/clis/chatwise/read.d.ts +1 -0
- package/dist/clis/chatwise/read.js +39 -0
- package/dist/clis/chatwise/screenshot.d.ts +1 -0
- package/dist/clis/chatwise/screenshot.js +27 -0
- package/dist/clis/chatwise/send.d.ts +1 -0
- package/dist/clis/chatwise/send.js +45 -0
- package/dist/clis/chatwise/status.d.ts +1 -0
- package/dist/clis/chatwise/status.js +22 -0
- package/dist/clis/discord-app/channels.d.ts +1 -0
- package/dist/clis/discord-app/channels.js +45 -0
- package/dist/clis/discord-app/members.d.ts +1 -0
- package/dist/clis/discord-app/members.js +38 -0
- package/dist/clis/discord-app/read.d.ts +1 -0
- package/dist/clis/discord-app/read.js +45 -0
- package/dist/clis/discord-app/search.d.ts +1 -0
- package/dist/clis/discord-app/search.js +56 -0
- package/dist/clis/discord-app/send.d.ts +1 -0
- package/dist/clis/discord-app/send.js +27 -0
- package/dist/clis/discord-app/servers.d.ts +1 -0
- package/dist/clis/discord-app/servers.js +36 -0
- package/dist/clis/discord-app/status.d.ts +1 -0
- package/dist/clis/discord-app/status.js +16 -0
- package/dist/clis/feishu/new.d.ts +1 -0
- package/dist/clis/feishu/new.js +27 -0
- package/dist/clis/feishu/read.d.ts +1 -0
- package/dist/clis/feishu/read.js +40 -0
- package/dist/clis/feishu/search.d.ts +1 -0
- package/dist/clis/feishu/search.js +30 -0
- package/dist/clis/feishu/send.d.ts +1 -0
- package/dist/clis/feishu/send.js +39 -0
- package/dist/clis/feishu/status.d.ts +1 -0
- package/dist/clis/feishu/status.js +28 -0
- package/dist/clis/grok/ask.d.ts +1 -0
- package/dist/clis/grok/ask.js +82 -0
- package/dist/clis/grok/debug.d.ts +1 -0
- package/dist/clis/grok/debug.js +45 -0
- package/dist/clis/jimeng/generate.yaml +84 -0
- package/dist/clis/jimeng/history.yaml +47 -0
- package/dist/clis/linux-do/categories.yaml +41 -0
- package/dist/clis/linux-do/category.yaml +49 -0
- package/dist/clis/linux-do/hot.yaml +50 -0
- package/dist/clis/linux-do/latest.yaml +40 -0
- package/dist/clis/linux-do/search.yaml +45 -0
- package/dist/clis/linux-do/topic.yaml +38 -0
- package/dist/clis/neteasemusic/like.d.ts +1 -0
- package/dist/clis/neteasemusic/like.js +25 -0
- package/dist/clis/neteasemusic/lyrics.d.ts +1 -0
- package/dist/clis/neteasemusic/lyrics.js +47 -0
- package/dist/clis/neteasemusic/next.d.ts +1 -0
- package/dist/clis/neteasemusic/next.js +26 -0
- package/dist/clis/neteasemusic/play.d.ts +1 -0
- package/dist/clis/neteasemusic/play.js +26 -0
- package/dist/clis/neteasemusic/playing.d.ts +1 -0
- package/dist/clis/neteasemusic/playing.js +59 -0
- package/dist/clis/neteasemusic/playlist.d.ts +1 -0
- package/dist/clis/neteasemusic/playlist.js +46 -0
- package/dist/clis/neteasemusic/prev.d.ts +1 -0
- package/dist/clis/neteasemusic/prev.js +25 -0
- package/dist/clis/neteasemusic/search.d.ts +1 -0
- package/dist/clis/neteasemusic/search.js +52 -0
- package/dist/clis/neteasemusic/status.d.ts +1 -0
- package/dist/clis/neteasemusic/status.js +16 -0
- package/dist/clis/neteasemusic/volume.d.ts +1 -0
- package/dist/clis/neteasemusic/volume.js +54 -0
- package/dist/clis/notion/export.d.ts +1 -0
- package/dist/clis/notion/export.js +31 -0
- package/dist/clis/notion/favorites.d.ts +1 -0
- package/dist/clis/notion/favorites.js +84 -0
- package/dist/clis/notion/new.d.ts +1 -0
- package/dist/clis/notion/new.js +34 -0
- package/dist/clis/notion/read.d.ts +1 -0
- package/dist/clis/notion/read.js +30 -0
- package/dist/clis/notion/search.d.ts +1 -0
- package/dist/clis/notion/search.js +46 -0
- package/dist/clis/notion/sidebar.d.ts +1 -0
- package/dist/clis/notion/sidebar.js +41 -0
- package/dist/clis/notion/status.d.ts +1 -0
- package/dist/clis/notion/status.js +16 -0
- package/dist/clis/notion/write.d.ts +1 -0
- package/dist/clis/notion/write.js +40 -0
- package/dist/clis/twitter/download.d.ts +8 -0
- package/dist/clis/twitter/download.js +204 -0
- package/dist/clis/wechat/chats.d.ts +1 -0
- package/dist/clis/wechat/chats.js +28 -0
- package/dist/clis/wechat/contacts.d.ts +1 -0
- package/dist/clis/wechat/contacts.js +28 -0
- package/dist/clis/wechat/read.d.ts +1 -0
- package/dist/clis/wechat/read.js +58 -0
- package/dist/clis/wechat/search.d.ts +1 -0
- package/dist/clis/wechat/search.js +31 -0
- package/dist/clis/wechat/send.d.ts +1 -0
- package/dist/clis/wechat/send.js +42 -0
- package/dist/clis/wechat/status.d.ts +1 -0
- package/dist/clis/wechat/status.js +29 -0
- package/dist/clis/xiaohongshu/creator-note-detail.d.ts +10 -0
- package/dist/clis/xiaohongshu/creator-note-detail.js +88 -0
- package/dist/clis/xiaohongshu/creator-notes.d.ts +11 -0
- package/dist/clis/xiaohongshu/creator-notes.js +109 -0
- package/dist/clis/xiaohongshu/creator-profile.d.ts +10 -0
- package/dist/clis/xiaohongshu/creator-profile.js +54 -0
- package/dist/clis/xiaohongshu/creator-stats.d.ts +10 -0
- package/dist/clis/xiaohongshu/creator-stats.js +74 -0
- package/dist/clis/xiaohongshu/download.d.ts +7 -0
- package/dist/clis/xiaohongshu/download.js +155 -0
- package/dist/clis/xiaohongshu/search.js +1 -1
- package/dist/clis/xiaohongshu/user-helpers.d.ts +15 -0
- package/dist/clis/xiaohongshu/user-helpers.js +67 -0
- package/dist/clis/xiaohongshu/user-helpers.test.d.ts +1 -0
- package/dist/clis/xiaohongshu/user-helpers.test.js +81 -0
- package/dist/clis/xiaohongshu/user.js +46 -29
- package/dist/clis/zhihu/download.d.ts +11 -0
- package/dist/clis/zhihu/download.js +186 -0
- package/dist/clis/zhihu/download.test.d.ts +1 -0
- package/dist/clis/zhihu/download.test.js +10 -0
- package/dist/daemon.d.ts +13 -0
- package/dist/daemon.js +187 -0
- package/dist/doctor.d.ts +27 -61
- package/dist/doctor.js +70 -601
- package/dist/doctor.test.js +30 -170
- package/dist/download/index.d.ts +79 -0
- package/dist/download/index.js +325 -0
- package/dist/download/progress.d.ts +36 -0
- package/dist/download/progress.js +111 -0
- package/dist/engine.test.js +15 -0
- package/dist/main.js +22 -28
- package/dist/pipeline/executor.test.js +1 -0
- package/dist/pipeline/registry.js +2 -0
- package/dist/pipeline/steps/browser.js +2 -2
- package/dist/pipeline/steps/download.d.ts +34 -0
- package/dist/pipeline/steps/download.js +251 -0
- package/dist/pipeline/steps/intercept.js +1 -2
- package/dist/pipeline/template.js +28 -0
- package/dist/setup.d.ts +6 -0
- package/dist/setup.js +46 -160
- package/dist/types.d.ts +6 -0
- package/extension/icons/icon-128.png +0 -0
- package/extension/icons/icon-16.png +0 -0
- package/extension/icons/icon-32.png +0 -0
- package/extension/icons/icon-48.png +0 -0
- package/extension/manifest.json +31 -0
- package/extension/package.json +16 -0
- package/extension/src/background.ts +293 -0
- package/extension/src/cdp.ts +125 -0
- package/extension/src/protocol.ts +57 -0
- package/extension/store-assets/screenshot-1280x800.png +0 -0
- package/extension/tsconfig.json +15 -0
- package/extension/vite.config.ts +18 -0
- package/package.json +8 -7
- package/scripts/test-site.mjs +70 -0
- package/src/browser/daemon-client.ts +113 -0
- package/src/browser/discover.ts +18 -216
- package/src/browser/errors.ts +30 -100
- package/src/browser/index.ts +6 -12
- package/src/browser/mcp.ts +78 -278
- package/src/browser/page.ts +222 -88
- package/src/browser.test.ts +3 -210
- package/src/build-manifest.test.ts +28 -0
- package/src/build-manifest.ts +147 -57
- package/src/clis/bilibili/download.ts +161 -0
- package/src/clis/chatgpt/README.md +1 -1
- package/src/clis/chatgpt/README.zh-CN.md +1 -1
- package/src/clis/chatwise/README.md +38 -0
- package/src/clis/chatwise/README.zh-CN.md +38 -0
- package/src/clis/chatwise/ask.ts +87 -0
- package/src/clis/chatwise/export.ts +51 -0
- package/src/clis/chatwise/history.ts +47 -0
- package/src/clis/chatwise/model.ts +87 -0
- package/src/clis/chatwise/new.ts +21 -0
- package/src/clis/chatwise/read.ts +42 -0
- package/src/clis/chatwise/screenshot.ts +33 -0
- package/src/clis/chatwise/send.ts +50 -0
- package/src/clis/chatwise/status.ts +25 -0
- package/src/clis/discord-app/README.md +28 -0
- package/src/clis/discord-app/README.zh-CN.md +28 -0
- package/src/clis/discord-app/channels.ts +48 -0
- package/src/clis/discord-app/members.ts +41 -0
- package/src/clis/discord-app/read.ts +49 -0
- package/src/clis/discord-app/search.ts +64 -0
- package/src/clis/discord-app/send.ts +32 -0
- package/src/clis/discord-app/servers.ts +39 -0
- package/src/clis/discord-app/status.ts +18 -0
- package/src/clis/feishu/README.md +20 -0
- package/src/clis/feishu/README.zh-CN.md +20 -0
- package/src/clis/feishu/new.ts +32 -0
- package/src/clis/feishu/read.ts +48 -0
- package/src/clis/feishu/search.ts +35 -0
- package/src/clis/feishu/send.ts +46 -0
- package/src/clis/feishu/status.ts +34 -0
- package/src/clis/grok/ask.ts +90 -0
- package/src/clis/grok/debug.ts +49 -0
- package/src/clis/jimeng/generate.yaml +84 -0
- package/src/clis/jimeng/history.yaml +47 -0
- package/src/clis/linux-do/categories.yaml +41 -0
- package/src/clis/linux-do/category.yaml +49 -0
- package/src/clis/linux-do/hot.yaml +50 -0
- package/src/clis/linux-do/latest.yaml +40 -0
- package/src/clis/linux-do/search.yaml +45 -0
- package/src/clis/linux-do/topic.yaml +38 -0
- package/src/clis/neteasemusic/README.md +31 -0
- package/src/clis/neteasemusic/README.zh-CN.md +31 -0
- package/src/clis/neteasemusic/like.ts +28 -0
- package/src/clis/neteasemusic/lyrics.ts +53 -0
- package/src/clis/neteasemusic/next.ts +30 -0
- package/src/clis/neteasemusic/play.ts +30 -0
- package/src/clis/neteasemusic/playing.ts +62 -0
- package/src/clis/neteasemusic/playlist.ts +51 -0
- package/src/clis/neteasemusic/prev.ts +29 -0
- package/src/clis/neteasemusic/search.ts +58 -0
- package/src/clis/neteasemusic/status.ts +18 -0
- package/src/clis/neteasemusic/volume.ts +61 -0
- package/src/clis/notion/README.md +29 -0
- package/src/clis/notion/README.zh-CN.md +29 -0
- package/src/clis/notion/export.ts +36 -0
- package/src/clis/notion/favorites.ts +87 -0
- package/src/clis/notion/new.ts +39 -0
- package/src/clis/notion/read.ts +33 -0
- package/src/clis/notion/search.ts +54 -0
- package/src/clis/notion/sidebar.ts +44 -0
- package/src/clis/notion/status.ts +18 -0
- package/src/clis/notion/write.ts +45 -0
- package/src/clis/twitter/download.ts +227 -0
- package/src/clis/wechat/README.md +28 -0
- package/src/clis/wechat/README.zh-CN.md +28 -0
- package/src/clis/wechat/chats.ts +33 -0
- package/src/clis/wechat/contacts.ts +33 -0
- package/src/clis/wechat/read.ts +72 -0
- package/src/clis/wechat/search.ts +36 -0
- package/src/clis/wechat/send.ts +49 -0
- package/src/clis/wechat/status.ts +35 -0
- package/src/clis/xiaohongshu/creator-note-detail.ts +95 -0
- package/src/clis/xiaohongshu/creator-notes.ts +116 -0
- package/src/clis/xiaohongshu/creator-profile.ts +60 -0
- package/src/clis/xiaohongshu/creator-stats.ts +81 -0
- package/src/clis/xiaohongshu/download.ts +173 -0
- package/src/clis/xiaohongshu/search.ts +1 -1
- package/src/clis/xiaohongshu/user-helpers.test.ts +106 -0
- package/src/clis/xiaohongshu/user-helpers.ts +85 -0
- package/src/clis/xiaohongshu/user.ts +52 -32
- package/src/clis/zhihu/download.test.ts +12 -0
- package/src/clis/zhihu/download.ts +223 -0
- package/src/daemon.ts +217 -0
- package/src/doctor.test.ts +32 -193
- package/src/doctor.ts +74 -668
- package/src/download/index.ts +395 -0
- package/src/download/progress.ts +125 -0
- package/src/engine.test.ts +17 -0
- package/src/main.ts +18 -26
- package/src/pipeline/executor.test.ts +1 -0
- package/src/pipeline/registry.ts +2 -0
- package/src/pipeline/steps/browser.ts +2 -2
- package/src/pipeline/steps/download.ts +310 -0
- package/src/pipeline/steps/intercept.ts +1 -2
- package/src/pipeline/template.ts +26 -0
- package/src/setup.ts +47 -183
- package/src/types.ts +1 -0
- package/tests/e2e/browser-auth.test.ts +25 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Zhihu download — export articles to Markdown format.
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* opencli zhihu download --url "https://zhuanlan.zhihu.com/p/xxx" --output ./zhihu
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import * as fs from 'node:fs';
|
|
9
|
+
import * as path from 'node:path';
|
|
10
|
+
import { cli, Strategy } from '../../registry.js';
|
|
11
|
+
import { sanitizeFilename, httpDownload } from '../../download/index.js';
|
|
12
|
+
import { formatBytes } from '../../download/progress.js';
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Convert HTML content to Markdown.
|
|
16
|
+
* This is a simplified converter for Zhihu article content.
|
|
17
|
+
*/
|
|
18
|
+
export function htmlToMarkdown(html: string): string {
|
|
19
|
+
let md = html;
|
|
20
|
+
|
|
21
|
+
// Remove script and style tags
|
|
22
|
+
md = md.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
|
|
23
|
+
md = md.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
|
|
24
|
+
|
|
25
|
+
// Convert headers
|
|
26
|
+
md = md.replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n\n');
|
|
27
|
+
md = md.replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n\n');
|
|
28
|
+
md = md.replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n\n');
|
|
29
|
+
md = md.replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n\n');
|
|
30
|
+
|
|
31
|
+
// Convert paragraphs
|
|
32
|
+
md = md.replace(/<p[^>]*>([\s\S]*?)<\/p>/gi, '$1\n\n');
|
|
33
|
+
|
|
34
|
+
// Convert links
|
|
35
|
+
md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\/a>/gi, '[$2]($1)');
|
|
36
|
+
|
|
37
|
+
// Convert images
|
|
38
|
+
md = md.replace(/<img[^>]*src="([^"]*)"[^>]*alt="([^"]*)"[^>]*\/?>/gi, '');
|
|
39
|
+
md = md.replace(/<img[^>]*src="([^"]*)"[^>]*\/?>/gi, '');
|
|
40
|
+
|
|
41
|
+
// Convert lists
|
|
42
|
+
md = md.replace(/<ul[^>]*>([\s\S]*?)<\/ul>/gi, (match, content) => {
|
|
43
|
+
return content.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n') + '\n';
|
|
44
|
+
});
|
|
45
|
+
md = md.replace(/<ol[^>]*>([\s\S]*?)<\/ol>/gi, (match, content) => {
|
|
46
|
+
let index = 0;
|
|
47
|
+
return content.replace(
|
|
48
|
+
/<li[^>]*>([\s\S]*?)<\/li>/gi,
|
|
49
|
+
(_itemMatch: string, itemContent: string) => `${++index}. ${itemContent}\n`,
|
|
50
|
+
) + '\n';
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
// Convert bold and italic
|
|
54
|
+
md = md.replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**');
|
|
55
|
+
md = md.replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**');
|
|
56
|
+
md = md.replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*');
|
|
57
|
+
md = md.replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*');
|
|
58
|
+
|
|
59
|
+
// Convert code blocks
|
|
60
|
+
md = md.replace(/<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi, '```\n$1\n```\n\n');
|
|
61
|
+
md = md.replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`');
|
|
62
|
+
|
|
63
|
+
// Convert blockquotes
|
|
64
|
+
md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, (match, content) => {
|
|
65
|
+
return content.split('\n').map((line: string) => `> ${line}`).join('\n') + '\n\n';
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
// Convert line breaks
|
|
69
|
+
md = md.replace(/<br\s*\/?>/gi, '\n');
|
|
70
|
+
|
|
71
|
+
// Remove remaining HTML tags
|
|
72
|
+
md = md.replace(/<[^>]+>/g, '');
|
|
73
|
+
|
|
74
|
+
// Decode HTML entities
|
|
75
|
+
md = md.replace(/ /g, ' ');
|
|
76
|
+
md = md.replace(/</g, '<');
|
|
77
|
+
md = md.replace(/>/g, '>');
|
|
78
|
+
md = md.replace(/&/g, '&');
|
|
79
|
+
md = md.replace(/"/g, '"');
|
|
80
|
+
|
|
81
|
+
// Clean up extra whitespace
|
|
82
|
+
md = md.replace(/\n{3,}/g, '\n\n');
|
|
83
|
+
md = md.trim();
|
|
84
|
+
|
|
85
|
+
return md;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
cli({
|
|
89
|
+
site: 'zhihu',
|
|
90
|
+
name: 'download',
|
|
91
|
+
description: '导出知乎文章为 Markdown 格式',
|
|
92
|
+
domain: 'zhuanlan.zhihu.com',
|
|
93
|
+
strategy: Strategy.COOKIE,
|
|
94
|
+
args: [
|
|
95
|
+
{ name: 'url', required: true, help: 'Article URL (zhuanlan.zhihu.com/p/xxx)' },
|
|
96
|
+
{ name: 'output', default: './zhihu-articles', help: 'Output directory' },
|
|
97
|
+
{ name: 'download-images', type: 'boolean', default: false, help: 'Download images locally' },
|
|
98
|
+
],
|
|
99
|
+
columns: ['title', 'author', 'status', 'size'],
|
|
100
|
+
func: async (page, kwargs) => {
|
|
101
|
+
const url = kwargs.url;
|
|
102
|
+
const output = kwargs.output;
|
|
103
|
+
const downloadImages = kwargs['download-images'];
|
|
104
|
+
|
|
105
|
+
// Navigate to article page
|
|
106
|
+
await page.goto(url);
|
|
107
|
+
await page.wait(3);
|
|
108
|
+
|
|
109
|
+
// Extract article content
|
|
110
|
+
const data = await page.evaluate(`
|
|
111
|
+
(() => {
|
|
112
|
+
const result = {
|
|
113
|
+
title: '',
|
|
114
|
+
author: '',
|
|
115
|
+
content: '',
|
|
116
|
+
publishTime: '',
|
|
117
|
+
images: []
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
// Get title
|
|
121
|
+
const titleEl = document.querySelector('.Post-Title, h1.ContentItem-title, .ArticleTitle');
|
|
122
|
+
result.title = titleEl?.textContent?.trim() || 'untitled';
|
|
123
|
+
|
|
124
|
+
// Get author
|
|
125
|
+
const authorEl = document.querySelector('.AuthorInfo-name, .UserLink-link');
|
|
126
|
+
result.author = authorEl?.textContent?.trim() || 'unknown';
|
|
127
|
+
|
|
128
|
+
// Get publish time
|
|
129
|
+
const timeEl = document.querySelector('.ContentItem-time, .Post-Time');
|
|
130
|
+
result.publishTime = timeEl?.textContent?.trim() || '';
|
|
131
|
+
|
|
132
|
+
// Get content HTML
|
|
133
|
+
const contentEl = document.querySelector('.Post-RichTextContainer, .RichText, .ArticleContent');
|
|
134
|
+
if (contentEl) {
|
|
135
|
+
result.content = contentEl.innerHTML;
|
|
136
|
+
|
|
137
|
+
// Extract image URLs
|
|
138
|
+
contentEl.querySelectorAll('img').forEach(img => {
|
|
139
|
+
const src = img.getAttribute('data-original') || img.getAttribute('data-actualsrc') || img.src;
|
|
140
|
+
if (src && !src.includes('data:image')) {
|
|
141
|
+
result.images.push(src);
|
|
142
|
+
}
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return result;
|
|
147
|
+
})()
|
|
148
|
+
`);
|
|
149
|
+
|
|
150
|
+
if (!data || !data.content) {
|
|
151
|
+
return [{
|
|
152
|
+
title: 'Error',
|
|
153
|
+
author: '-',
|
|
154
|
+
status: 'failed',
|
|
155
|
+
size: 'Could not extract article content',
|
|
156
|
+
}];
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Create output directory
|
|
160
|
+
fs.mkdirSync(output, { recursive: true });
|
|
161
|
+
|
|
162
|
+
// Convert HTML to Markdown
|
|
163
|
+
let markdown = htmlToMarkdown(data.content);
|
|
164
|
+
|
|
165
|
+
// Create frontmatter
|
|
166
|
+
const frontmatter = [
|
|
167
|
+
'---',
|
|
168
|
+
`title: "${data.title.replace(/"/g, '\\"')}"`,
|
|
169
|
+
`author: "${data.author.replace(/"/g, '\\"')}"`,
|
|
170
|
+
`source: "${url}"`,
|
|
171
|
+
data.publishTime ? `date: "${data.publishTime}"` : '',
|
|
172
|
+
'---',
|
|
173
|
+
'',
|
|
174
|
+
].filter(Boolean).join('\n');
|
|
175
|
+
|
|
176
|
+
// Download images if requested
|
|
177
|
+
if (downloadImages && data.images && data.images.length > 0) {
|
|
178
|
+
const imagesDir = path.join(output, 'images');
|
|
179
|
+
fs.mkdirSync(imagesDir, { recursive: true });
|
|
180
|
+
|
|
181
|
+
const cookies = await page.evaluate(`(() => document.cookie)()`);
|
|
182
|
+
|
|
183
|
+
for (let i = 0; i < data.images.length; i++) {
|
|
184
|
+
const imgUrl = data.images[i];
|
|
185
|
+
const ext = imgUrl.match(/\.(jpg|jpeg|png|gif|webp)/i)?.[1] || 'jpg';
|
|
186
|
+
const imgFilename = `img_${i + 1}.${ext}`;
|
|
187
|
+
const imgPath = path.join(imagesDir, imgFilename);
|
|
188
|
+
|
|
189
|
+
try {
|
|
190
|
+
await httpDownload(imgUrl, imgPath, {
|
|
191
|
+
cookies: typeof cookies === 'string' ? cookies : '',
|
|
192
|
+
timeout: 30000,
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
// Replace image URL in markdown with local path
|
|
196
|
+
markdown = markdown.replace(
|
|
197
|
+
new RegExp(imgUrl.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'),
|
|
198
|
+
`./images/${imgFilename}`,
|
|
199
|
+
);
|
|
200
|
+
} catch {
|
|
201
|
+
// Keep original URL if download fails
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Write markdown file
|
|
207
|
+
const safeTitle = sanitizeFilename(data.title, 100);
|
|
208
|
+
const filename = `${safeTitle}.md`;
|
|
209
|
+
const filePath = path.join(output, filename);
|
|
210
|
+
|
|
211
|
+
const fullContent = frontmatter + '\n' + markdown;
|
|
212
|
+
fs.writeFileSync(filePath, fullContent, 'utf-8');
|
|
213
|
+
|
|
214
|
+
const size = Buffer.byteLength(fullContent, 'utf-8');
|
|
215
|
+
|
|
216
|
+
return [{
|
|
217
|
+
title: data.title,
|
|
218
|
+
author: data.author,
|
|
219
|
+
status: 'success',
|
|
220
|
+
size: formatBytes(size),
|
|
221
|
+
}];
|
|
222
|
+
},
|
|
223
|
+
});
|
package/src/daemon.ts
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* opencli micro-daemon — HTTP + WebSocket bridge between CLI and Chrome Extension.
|
|
3
|
+
*
|
|
4
|
+
* Architecture:
|
|
5
|
+
* CLI → HTTP POST /command → daemon → WebSocket → Extension
|
|
6
|
+
* Extension → WebSocket result → daemon → HTTP response → CLI
|
|
7
|
+
*
|
|
8
|
+
* Lifecycle:
|
|
9
|
+
* - Auto-spawned by opencli on first browser command
|
|
10
|
+
* - Auto-exits after 5 minutes of idle
|
|
11
|
+
* - Listens on localhost:19825
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { createServer, type IncomingMessage, type ServerResponse } from 'node:http';
|
|
15
|
+
import { WebSocketServer, WebSocket } from 'ws';
|
|
16
|
+
|
|
17
|
+
const PORT = parseInt(process.env.OPENCLI_DAEMON_PORT ?? '19825', 10);
|
|
18
|
+
const IDLE_TIMEOUT = 5 * 60 * 1000; // 5 minutes
|
|
19
|
+
|
|
20
|
+
// ─── State ───────────────────────────────────────────────────────────
|
|
21
|
+
|
|
22
|
+
let extensionWs: WebSocket | null = null;
|
|
23
|
+
const pending = new Map<string, {
|
|
24
|
+
resolve: (data: unknown) => void;
|
|
25
|
+
reject: (error: Error) => void;
|
|
26
|
+
timer: ReturnType<typeof setTimeout>;
|
|
27
|
+
}>();
|
|
28
|
+
let idleTimer: ReturnType<typeof setTimeout> | null = null;
|
|
29
|
+
|
|
30
|
+
// Extension log ring buffer
|
|
31
|
+
interface LogEntry { level: string; msg: string; ts: number; }
|
|
32
|
+
const LOG_BUFFER_SIZE = 200;
|
|
33
|
+
const logBuffer: LogEntry[] = [];
|
|
34
|
+
|
|
35
|
+
function pushLog(entry: LogEntry): void {
|
|
36
|
+
logBuffer.push(entry);
|
|
37
|
+
if (logBuffer.length > LOG_BUFFER_SIZE) logBuffer.shift();
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// ─── Idle auto-exit ──────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
function resetIdleTimer(): void {
|
|
43
|
+
if (idleTimer) clearTimeout(idleTimer);
|
|
44
|
+
idleTimer = setTimeout(() => {
|
|
45
|
+
console.error('[daemon] Idle timeout, shutting down');
|
|
46
|
+
process.exit(0);
|
|
47
|
+
}, IDLE_TIMEOUT);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// ─── HTTP Server ─────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
function readBody(req: IncomingMessage): Promise<string> {
|
|
53
|
+
return new Promise((resolve, reject) => {
|
|
54
|
+
const chunks: Buffer[] = [];
|
|
55
|
+
req.on('data', (c: Buffer) => chunks.push(c));
|
|
56
|
+
req.on('end', () => resolve(Buffer.concat(chunks).toString('utf-8')));
|
|
57
|
+
req.on('error', reject);
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function jsonResponse(res: ServerResponse, status: number, data: unknown): void {
|
|
62
|
+
res.writeHead(status, { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*' });
|
|
63
|
+
res.end(JSON.stringify(data));
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async function handleRequest(req: IncomingMessage, res: ServerResponse): Promise<void> {
|
|
67
|
+
res.setHeader('Access-Control-Allow-Origin', '*');
|
|
68
|
+
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
|
|
69
|
+
res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
|
|
70
|
+
if (req.method === 'OPTIONS') { res.writeHead(204); res.end(); return; }
|
|
71
|
+
|
|
72
|
+
const url = req.url ?? '/';
|
|
73
|
+
const pathname = url.split('?')[0];
|
|
74
|
+
|
|
75
|
+
if (req.method === 'GET' && pathname === '/status') {
|
|
76
|
+
jsonResponse(res, 200, {
|
|
77
|
+
ok: true,
|
|
78
|
+
extensionConnected: extensionWs?.readyState === WebSocket.OPEN,
|
|
79
|
+
pending: pending.size,
|
|
80
|
+
});
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (req.method === 'GET' && pathname === '/logs') {
|
|
85
|
+
const params = new URL(url, `http://localhost:${PORT}`).searchParams;
|
|
86
|
+
const level = params.get('level');
|
|
87
|
+
const filtered = level
|
|
88
|
+
? logBuffer.filter(e => e.level === level)
|
|
89
|
+
: logBuffer;
|
|
90
|
+
jsonResponse(res, 200, { ok: true, logs: filtered });
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (req.method === 'DELETE' && pathname === '/logs') {
|
|
95
|
+
logBuffer.length = 0;
|
|
96
|
+
jsonResponse(res, 200, { ok: true });
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if (req.method === 'POST' && url === '/command') {
|
|
101
|
+
resetIdleTimer();
|
|
102
|
+
try {
|
|
103
|
+
const body = JSON.parse(await readBody(req));
|
|
104
|
+
if (!body.id) {
|
|
105
|
+
jsonResponse(res, 400, { ok: false, error: 'Missing command id' });
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (!extensionWs || extensionWs.readyState !== WebSocket.OPEN) {
|
|
110
|
+
jsonResponse(res, 503, { id: body.id, ok: false, error: 'Extension not connected. Please install the opencli Browser Bridge extension.' });
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const result = await new Promise<unknown>((resolve, reject) => {
|
|
115
|
+
const timer = setTimeout(() => {
|
|
116
|
+
pending.delete(body.id);
|
|
117
|
+
reject(new Error('Command timeout (30s)'));
|
|
118
|
+
}, 30000);
|
|
119
|
+
pending.set(body.id, { resolve, reject, timer });
|
|
120
|
+
extensionWs!.send(JSON.stringify(body));
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
jsonResponse(res, 200, result);
|
|
124
|
+
} catch (err) {
|
|
125
|
+
jsonResponse(res, err instanceof Error && err.message.includes('timeout') ? 408 : 400, {
|
|
126
|
+
ok: false,
|
|
127
|
+
error: err instanceof Error ? err.message : 'Invalid request',
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
jsonResponse(res, 404, { error: 'Not found' });
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// ─── WebSocket for Extension ─────────────────────────────────────────
|
|
137
|
+
|
|
138
|
+
const httpServer = createServer((req, res) => { handleRequest(req, res).catch(() => { res.writeHead(500); res.end(); }); });
|
|
139
|
+
const wss = new WebSocketServer({ server: httpServer, path: '/ext' });
|
|
140
|
+
|
|
141
|
+
wss.on('connection', (ws) => {
|
|
142
|
+
console.error('[daemon] Extension connected');
|
|
143
|
+
extensionWs = ws;
|
|
144
|
+
|
|
145
|
+
ws.on('message', (data) => {
|
|
146
|
+
try {
|
|
147
|
+
const msg = JSON.parse(data.toString());
|
|
148
|
+
|
|
149
|
+
// Handle log messages from extension
|
|
150
|
+
if (msg.type === 'log') {
|
|
151
|
+
const prefix = msg.level === 'error' ? '❌' : msg.level === 'warn' ? '⚠️' : '📋';
|
|
152
|
+
console.error(`${prefix} [ext] ${msg.msg}`);
|
|
153
|
+
pushLog({ level: msg.level, msg: msg.msg, ts: msg.ts ?? Date.now() });
|
|
154
|
+
return;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Handle command results
|
|
158
|
+
const p = pending.get(msg.id);
|
|
159
|
+
if (p) {
|
|
160
|
+
clearTimeout(p.timer);
|
|
161
|
+
pending.delete(msg.id);
|
|
162
|
+
p.resolve(msg);
|
|
163
|
+
}
|
|
164
|
+
} catch {
|
|
165
|
+
// Ignore malformed messages
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
ws.on('close', () => {
|
|
170
|
+
console.error('[daemon] Extension disconnected');
|
|
171
|
+
if (extensionWs === ws) {
|
|
172
|
+
extensionWs = null;
|
|
173
|
+
// Reject all pending requests since the extension is gone
|
|
174
|
+
for (const [id, p] of pending) {
|
|
175
|
+
clearTimeout(p.timer);
|
|
176
|
+
p.reject(new Error('Extension disconnected'));
|
|
177
|
+
}
|
|
178
|
+
pending.clear();
|
|
179
|
+
}
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
ws.on('error', () => {
|
|
183
|
+
if (extensionWs === ws) extensionWs = null;
|
|
184
|
+
});
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
// ─── Start ───────────────────────────────────────────────────────────
|
|
188
|
+
|
|
189
|
+
httpServer.listen(PORT, '127.0.0.1', () => {
|
|
190
|
+
console.error(`[daemon] Listening on http://127.0.0.1:${PORT}`);
|
|
191
|
+
resetIdleTimer();
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
httpServer.on('error', (err: NodeJS.ErrnoException) => {
|
|
195
|
+
if (err.code === 'EADDRINUSE') {
|
|
196
|
+
console.error(`[daemon] Port ${PORT} already in use — another daemon is likely running. Exiting.`);
|
|
197
|
+
process.exit(0);
|
|
198
|
+
}
|
|
199
|
+
console.error('[daemon] Server error:', err.message);
|
|
200
|
+
process.exit(1);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
// Graceful shutdown
|
|
204
|
+
function shutdown(): void {
|
|
205
|
+
// Reject all pending requests so CLI doesn't hang
|
|
206
|
+
for (const [, p] of pending) {
|
|
207
|
+
clearTimeout(p.timer);
|
|
208
|
+
p.reject(new Error('Daemon shutting down'));
|
|
209
|
+
}
|
|
210
|
+
pending.clear();
|
|
211
|
+
if (extensionWs) extensionWs.close();
|
|
212
|
+
httpServer.close();
|
|
213
|
+
process.exit(0);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
process.on('SIGTERM', shutdown);
|
|
217
|
+
process.on('SIGINT', shutdown);
|