@jackwener/opencli 0.9.6 → 0.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/ISSUE_TEMPLATE/bug_report.yml +83 -0
- package/.github/ISSUE_TEMPLATE/config.yml +8 -0
- package/.github/ISSUE_TEMPLATE/feature_request.yml +42 -0
- package/.github/ISSUE_TEMPLATE/new_site_adapter.yml +57 -0
- package/.github/dependabot.yml +27 -0
- package/.github/pull_request_template.md +24 -0
- package/.github/workflows/ci.yml +14 -8
- package/.github/workflows/e2e-headed.yml +6 -2
- package/.github/workflows/pkg-pr-new.yml +2 -2
- package/.github/workflows/release-please.yml +25 -0
- package/.github/workflows/release.yml +2 -2
- package/.github/workflows/security.yml +36 -0
- package/CLI-ELECTRON.md +89 -36
- package/CONTRIBUTING.md +167 -0
- package/README.md +98 -32
- package/README.zh-CN.md +99 -33
- package/dist/browser/discover.js +22 -7
- package/dist/browser.test.js +23 -0
- package/dist/build-manifest.d.ts +26 -0
- package/dist/build-manifest.js +132 -60
- package/dist/build-manifest.test.d.ts +1 -0
- package/dist/build-manifest.test.js +26 -0
- package/dist/cli-manifest.json +1415 -29
- package/dist/clis/bilibili/download.d.ts +10 -0
- package/dist/clis/bilibili/download.js +135 -0
- package/dist/clis/chatwise/ask.d.ts +1 -0
- package/dist/clis/chatwise/ask.js +76 -0
- package/dist/clis/chatwise/export.d.ts +1 -0
- package/dist/clis/chatwise/export.js +46 -0
- package/dist/clis/chatwise/history.d.ts +1 -0
- package/dist/clis/chatwise/history.js +43 -0
- package/dist/clis/chatwise/model.d.ts +1 -0
- package/dist/clis/chatwise/model.js +81 -0
- package/dist/clis/chatwise/new.d.ts +1 -0
- package/dist/clis/chatwise/new.js +18 -0
- package/dist/clis/chatwise/read.d.ts +1 -0
- package/dist/clis/chatwise/read.js +39 -0
- package/dist/clis/chatwise/screenshot.d.ts +1 -0
- package/dist/clis/chatwise/screenshot.js +27 -0
- package/dist/clis/chatwise/send.d.ts +1 -0
- package/dist/clis/chatwise/send.js +45 -0
- package/dist/clis/chatwise/status.d.ts +1 -0
- package/dist/clis/chatwise/status.js +22 -0
- package/dist/clis/discord-app/channels.d.ts +1 -0
- package/dist/clis/discord-app/channels.js +45 -0
- package/dist/clis/discord-app/members.d.ts +1 -0
- package/dist/clis/discord-app/members.js +38 -0
- package/dist/clis/discord-app/read.d.ts +1 -0
- package/dist/clis/discord-app/read.js +45 -0
- package/dist/clis/discord-app/search.d.ts +1 -0
- package/dist/clis/discord-app/search.js +56 -0
- package/dist/clis/discord-app/send.d.ts +1 -0
- package/dist/clis/discord-app/send.js +27 -0
- package/dist/clis/discord-app/servers.d.ts +1 -0
- package/dist/clis/discord-app/servers.js +36 -0
- package/dist/clis/discord-app/status.d.ts +1 -0
- package/dist/clis/discord-app/status.js +16 -0
- package/dist/clis/feishu/new.d.ts +1 -0
- package/dist/clis/feishu/new.js +27 -0
- package/dist/clis/feishu/read.d.ts +1 -0
- package/dist/clis/feishu/read.js +40 -0
- package/dist/clis/feishu/search.d.ts +1 -0
- package/dist/clis/feishu/search.js +30 -0
- package/dist/clis/feishu/send.d.ts +1 -0
- package/dist/clis/feishu/send.js +39 -0
- package/dist/clis/feishu/status.d.ts +1 -0
- package/dist/clis/feishu/status.js +28 -0
- package/dist/clis/grok/ask.d.ts +1 -0
- package/dist/clis/grok/ask.js +82 -0
- package/dist/clis/grok/debug.d.ts +1 -0
- package/dist/clis/grok/debug.js +45 -0
- package/dist/clis/jimeng/generate.yaml +84 -0
- package/dist/clis/jimeng/history.yaml +47 -0
- package/dist/clis/linux-do/categories.yaml +41 -0
- package/dist/clis/linux-do/category.yaml +49 -0
- package/dist/clis/linux-do/hot.yaml +50 -0
- package/dist/clis/linux-do/latest.yaml +40 -0
- package/dist/clis/linux-do/search.yaml +45 -0
- package/dist/clis/linux-do/topic.yaml +38 -0
- package/dist/clis/notion/export.d.ts +1 -0
- package/dist/clis/notion/export.js +31 -0
- package/dist/clis/notion/favorites.d.ts +1 -0
- package/dist/clis/notion/favorites.js +84 -0
- package/dist/clis/notion/new.d.ts +1 -0
- package/dist/clis/notion/new.js +34 -0
- package/dist/clis/notion/read.d.ts +1 -0
- package/dist/clis/notion/read.js +30 -0
- package/dist/clis/notion/search.d.ts +1 -0
- package/dist/clis/notion/search.js +46 -0
- package/dist/clis/notion/sidebar.d.ts +1 -0
- package/dist/clis/notion/sidebar.js +41 -0
- package/dist/clis/notion/status.d.ts +1 -0
- package/dist/clis/notion/status.js +16 -0
- package/dist/clis/notion/write.d.ts +1 -0
- package/dist/clis/notion/write.js +40 -0
- package/dist/clis/twitter/download.d.ts +8 -0
- package/dist/clis/twitter/download.js +204 -0
- package/dist/clis/wechat/chats.d.ts +1 -0
- package/dist/clis/wechat/chats.js +28 -0
- package/dist/clis/wechat/contacts.d.ts +1 -0
- package/dist/clis/wechat/contacts.js +28 -0
- package/dist/clis/wechat/read.d.ts +1 -0
- package/dist/clis/wechat/read.js +58 -0
- package/dist/clis/wechat/search.d.ts +1 -0
- package/dist/clis/wechat/search.js +31 -0
- package/dist/clis/wechat/send.d.ts +1 -0
- package/dist/clis/wechat/send.js +42 -0
- package/dist/clis/wechat/status.d.ts +1 -0
- package/dist/clis/wechat/status.js +29 -0
- package/dist/clis/xiaohongshu/creator-note-detail.d.ts +10 -0
- package/dist/clis/xiaohongshu/creator-note-detail.js +88 -0
- package/dist/clis/xiaohongshu/creator-notes.d.ts +11 -0
- package/dist/clis/xiaohongshu/creator-notes.js +109 -0
- package/dist/clis/xiaohongshu/creator-profile.d.ts +10 -0
- package/dist/clis/xiaohongshu/creator-profile.js +54 -0
- package/dist/clis/xiaohongshu/creator-stats.d.ts +10 -0
- package/dist/clis/xiaohongshu/creator-stats.js +74 -0
- package/dist/clis/xiaohongshu/download.d.ts +7 -0
- package/dist/clis/xiaohongshu/download.js +155 -0
- package/dist/clis/xiaohongshu/search.js +1 -1
- package/dist/clis/xiaohongshu/user-helpers.d.ts +15 -0
- package/dist/clis/xiaohongshu/user-helpers.js +67 -0
- package/dist/clis/xiaohongshu/user-helpers.test.d.ts +1 -0
- package/dist/clis/xiaohongshu/user-helpers.test.js +81 -0
- package/dist/clis/xiaohongshu/user.js +46 -29
- package/dist/clis/zhihu/download.d.ts +11 -0
- package/dist/clis/zhihu/download.js +186 -0
- package/dist/clis/zhihu/download.test.d.ts +1 -0
- package/dist/clis/zhihu/download.test.js +10 -0
- package/dist/download/index.d.ts +79 -0
- package/dist/download/index.js +325 -0
- package/dist/download/progress.d.ts +36 -0
- package/dist/download/progress.js +111 -0
- package/dist/engine.test.js +15 -0
- package/dist/main.js +16 -3
- package/dist/pipeline/registry.js +2 -0
- package/dist/pipeline/steps/download.d.ts +34 -0
- package/dist/pipeline/steps/download.js +251 -0
- package/dist/pipeline/template.js +28 -0
- package/package.json +4 -3
- package/scripts/test-site.mjs +70 -0
- package/src/browser/discover.ts +23 -7
- package/src/browser.test.ts +23 -0
- package/src/build-manifest.test.ts +28 -0
- package/src/build-manifest.ts +147 -57
- package/src/clis/bilibili/download.ts +161 -0
- package/src/clis/chatwise/README.md +38 -0
- package/src/clis/chatwise/README.zh-CN.md +38 -0
- package/src/clis/chatwise/ask.ts +87 -0
- package/src/clis/chatwise/export.ts +51 -0
- package/src/clis/chatwise/history.ts +47 -0
- package/src/clis/chatwise/model.ts +87 -0
- package/src/clis/chatwise/new.ts +21 -0
- package/src/clis/chatwise/read.ts +42 -0
- package/src/clis/chatwise/screenshot.ts +33 -0
- package/src/clis/chatwise/send.ts +50 -0
- package/src/clis/chatwise/status.ts +25 -0
- package/src/clis/discord-app/README.md +28 -0
- package/src/clis/discord-app/README.zh-CN.md +28 -0
- package/src/clis/discord-app/channels.ts +48 -0
- package/src/clis/discord-app/members.ts +41 -0
- package/src/clis/discord-app/read.ts +49 -0
- package/src/clis/discord-app/search.ts +64 -0
- package/src/clis/discord-app/send.ts +32 -0
- package/src/clis/discord-app/servers.ts +39 -0
- package/src/clis/discord-app/status.ts +18 -0
- package/src/clis/feishu/README.md +20 -0
- package/src/clis/feishu/README.zh-CN.md +20 -0
- package/src/clis/feishu/new.ts +32 -0
- package/src/clis/feishu/read.ts +48 -0
- package/src/clis/feishu/search.ts +35 -0
- package/src/clis/feishu/send.ts +46 -0
- package/src/clis/feishu/status.ts +34 -0
- package/src/clis/grok/ask.ts +90 -0
- package/src/clis/grok/debug.ts +49 -0
- package/src/clis/jimeng/generate.yaml +84 -0
- package/src/clis/jimeng/history.yaml +47 -0
- package/src/clis/linux-do/categories.yaml +41 -0
- package/src/clis/linux-do/category.yaml +49 -0
- package/src/clis/linux-do/hot.yaml +50 -0
- package/src/clis/linux-do/latest.yaml +40 -0
- package/src/clis/linux-do/search.yaml +45 -0
- package/src/clis/linux-do/topic.yaml +38 -0
- package/src/clis/notion/README.md +29 -0
- package/src/clis/notion/README.zh-CN.md +29 -0
- package/src/clis/notion/export.ts +36 -0
- package/src/clis/notion/favorites.ts +87 -0
- package/src/clis/notion/new.ts +39 -0
- package/src/clis/notion/read.ts +33 -0
- package/src/clis/notion/search.ts +54 -0
- package/src/clis/notion/sidebar.ts +44 -0
- package/src/clis/notion/status.ts +18 -0
- package/src/clis/notion/write.ts +45 -0
- package/src/clis/twitter/download.ts +227 -0
- package/src/clis/wechat/README.md +28 -0
- package/src/clis/wechat/README.zh-CN.md +28 -0
- package/src/clis/wechat/chats.ts +33 -0
- package/src/clis/wechat/contacts.ts +33 -0
- package/src/clis/wechat/read.ts +72 -0
- package/src/clis/wechat/search.ts +36 -0
- package/src/clis/wechat/send.ts +49 -0
- package/src/clis/wechat/status.ts +35 -0
- package/src/clis/xiaohongshu/creator-note-detail.ts +95 -0
- package/src/clis/xiaohongshu/creator-notes.ts +116 -0
- package/src/clis/xiaohongshu/creator-profile.ts +60 -0
- package/src/clis/xiaohongshu/creator-stats.ts +81 -0
- package/src/clis/xiaohongshu/download.ts +173 -0
- package/src/clis/xiaohongshu/search.ts +1 -1
- package/src/clis/xiaohongshu/user-helpers.test.ts +106 -0
- package/src/clis/xiaohongshu/user-helpers.ts +85 -0
- package/src/clis/xiaohongshu/user.ts +52 -32
- package/src/clis/zhihu/download.test.ts +12 -0
- package/src/clis/zhihu/download.ts +223 -0
- package/src/download/index.ts +395 -0
- package/src/download/progress.ts +125 -0
- package/src/engine.test.ts +17 -0
- package/src/main.ts +12 -3
- package/src/pipeline/registry.ts +2 -0
- package/src/pipeline/steps/download.ts +310 -0
- package/src/pipeline/template.ts +26 -0
- package/tests/e2e/browser-auth.test.ts +25 -0
|
@@ -1,45 +1,65 @@
|
|
|
1
1
|
import { cli, Strategy } from '../../registry.js';
|
|
2
|
+
import { extractXhsUserNotes, normalizeXhsUserId } from './user-helpers.js';
|
|
3
|
+
|
|
4
|
+
async function readUserSnapshot(page: any) {
|
|
5
|
+
return await page.evaluate(`
|
|
6
|
+
(() => {
|
|
7
|
+
const safeClone = (value) => {
|
|
8
|
+
try {
|
|
9
|
+
return JSON.parse(JSON.stringify(value ?? null));
|
|
10
|
+
} catch {
|
|
11
|
+
return null;
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
const userStore = window.__INITIAL_STATE__?.user || {};
|
|
16
|
+
return {
|
|
17
|
+
noteGroups: safeClone(userStore.notes?._value || userStore.notes || []),
|
|
18
|
+
pageData: safeClone(userStore.userPageData?._value || userStore.userPageData || {}),
|
|
19
|
+
};
|
|
20
|
+
})()
|
|
21
|
+
`);
|
|
22
|
+
}
|
|
2
23
|
|
|
3
24
|
cli({
|
|
4
25
|
site: 'xiaohongshu',
|
|
5
26
|
name: 'user',
|
|
6
|
-
description: 'Get
|
|
7
|
-
domain: 'xiaohongshu.com',
|
|
8
|
-
strategy: Strategy.
|
|
27
|
+
description: 'Get public notes from a Xiaohongshu user profile',
|
|
28
|
+
domain: 'www.xiaohongshu.com',
|
|
29
|
+
strategy: Strategy.COOKIE,
|
|
9
30
|
browser: true,
|
|
10
31
|
args: [
|
|
11
|
-
{ name: 'id', type: 'string', required: true },
|
|
12
|
-
{ name: 'limit', type: 'int', default: 15 },
|
|
32
|
+
{ name: 'id', type: 'string', required: true, help: 'User id or profile URL' },
|
|
33
|
+
{ name: 'limit', type: 'int', default: 15, help: 'Number of notes to return' },
|
|
13
34
|
],
|
|
14
35
|
columns: ['id', 'title', 'type', 'likes', 'url'],
|
|
15
36
|
func: async (page, kwargs) => {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
await page.
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
}
|
|
37
|
+
const userId = normalizeXhsUserId(String(kwargs.id));
|
|
38
|
+
const limit = Math.max(1, Number(kwargs.limit ?? 15));
|
|
39
|
+
|
|
40
|
+
await page.goto(`https://www.xiaohongshu.com/user/profile/${userId}`);
|
|
41
|
+
await page.wait(3);
|
|
42
|
+
|
|
43
|
+
let snapshot = await readUserSnapshot(page);
|
|
44
|
+
let results = extractXhsUserNotes(snapshot ?? {}, userId);
|
|
45
|
+
let previousCount = results.length;
|
|
46
|
+
|
|
47
|
+
for (let i = 0; results.length < limit && i < 4; i += 1) {
|
|
48
|
+
await page.autoScroll({ times: 1, delayMs: 1500 });
|
|
49
|
+
await page.wait(1);
|
|
50
|
+
|
|
51
|
+
snapshot = await readUserSnapshot(page);
|
|
52
|
+
const nextResults = extractXhsUserNotes(snapshot ?? {}, userId);
|
|
53
|
+
if (nextResults.length <= previousCount) break;
|
|
54
|
+
|
|
55
|
+
results = nextResults;
|
|
56
|
+
previousCount = nextResults.length;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (results.length === 0) {
|
|
60
|
+
throw new Error('No public notes found for this Xiaohongshu user.');
|
|
41
61
|
}
|
|
42
62
|
|
|
43
|
-
return results.slice(0,
|
|
44
|
-
}
|
|
63
|
+
return results.slice(0, limit);
|
|
64
|
+
},
|
|
45
65
|
});
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { htmlToMarkdown } from './download.js';
|
|
3
|
+
|
|
4
|
+
describe('htmlToMarkdown', () => {
|
|
5
|
+
it('renders ordered lists with the original list item content', () => {
|
|
6
|
+
const html = '<ol><li>First item</li><li>Second item</li></ol>';
|
|
7
|
+
|
|
8
|
+
expect(htmlToMarkdown(html)).toContain('1. First item');
|
|
9
|
+
expect(htmlToMarkdown(html)).toContain('2. Second item');
|
|
10
|
+
expect(htmlToMarkdown(html)).not.toContain('$1');
|
|
11
|
+
});
|
|
12
|
+
});
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Zhihu download — export articles to Markdown format.
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* opencli zhihu download --url "https://zhuanlan.zhihu.com/p/xxx" --output ./zhihu
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import * as fs from 'node:fs';
|
|
9
|
+
import * as path from 'node:path';
|
|
10
|
+
import { cli, Strategy } from '../../registry.js';
|
|
11
|
+
import { sanitizeFilename, httpDownload } from '../../download/index.js';
|
|
12
|
+
import { formatBytes } from '../../download/progress.js';
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Convert HTML content to Markdown.
|
|
16
|
+
* This is a simplified converter for Zhihu article content.
|
|
17
|
+
*/
|
|
18
|
+
export function htmlToMarkdown(html: string): string {
|
|
19
|
+
let md = html;
|
|
20
|
+
|
|
21
|
+
// Remove script and style tags
|
|
22
|
+
md = md.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
|
|
23
|
+
md = md.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
|
|
24
|
+
|
|
25
|
+
// Convert headers
|
|
26
|
+
md = md.replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n\n');
|
|
27
|
+
md = md.replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n\n');
|
|
28
|
+
md = md.replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n\n');
|
|
29
|
+
md = md.replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n\n');
|
|
30
|
+
|
|
31
|
+
// Convert paragraphs
|
|
32
|
+
md = md.replace(/<p[^>]*>([\s\S]*?)<\/p>/gi, '$1\n\n');
|
|
33
|
+
|
|
34
|
+
// Convert links
|
|
35
|
+
md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\/a>/gi, '[$2]($1)');
|
|
36
|
+
|
|
37
|
+
// Convert images
|
|
38
|
+
md = md.replace(/<img[^>]*src="([^"]*)"[^>]*alt="([^"]*)"[^>]*\/?>/gi, '');
|
|
39
|
+
md = md.replace(/<img[^>]*src="([^"]*)"[^>]*\/?>/gi, '');
|
|
40
|
+
|
|
41
|
+
// Convert lists
|
|
42
|
+
md = md.replace(/<ul[^>]*>([\s\S]*?)<\/ul>/gi, (match, content) => {
|
|
43
|
+
return content.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n') + '\n';
|
|
44
|
+
});
|
|
45
|
+
md = md.replace(/<ol[^>]*>([\s\S]*?)<\/ol>/gi, (match, content) => {
|
|
46
|
+
let index = 0;
|
|
47
|
+
return content.replace(
|
|
48
|
+
/<li[^>]*>([\s\S]*?)<\/li>/gi,
|
|
49
|
+
(_itemMatch: string, itemContent: string) => `${++index}. ${itemContent}\n`,
|
|
50
|
+
) + '\n';
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
// Convert bold and italic
|
|
54
|
+
md = md.replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**');
|
|
55
|
+
md = md.replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**');
|
|
56
|
+
md = md.replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*');
|
|
57
|
+
md = md.replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*');
|
|
58
|
+
|
|
59
|
+
// Convert code blocks
|
|
60
|
+
md = md.replace(/<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi, '```\n$1\n```\n\n');
|
|
61
|
+
md = md.replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`');
|
|
62
|
+
|
|
63
|
+
// Convert blockquotes
|
|
64
|
+
md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, (match, content) => {
|
|
65
|
+
return content.split('\n').map((line: string) => `> ${line}`).join('\n') + '\n\n';
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
// Convert line breaks
|
|
69
|
+
md = md.replace(/<br\s*\/?>/gi, '\n');
|
|
70
|
+
|
|
71
|
+
// Remove remaining HTML tags
|
|
72
|
+
md = md.replace(/<[^>]+>/g, '');
|
|
73
|
+
|
|
74
|
+
// Decode HTML entities
|
|
75
|
+
md = md.replace(/ /g, ' ');
|
|
76
|
+
md = md.replace(/</g, '<');
|
|
77
|
+
md = md.replace(/>/g, '>');
|
|
78
|
+
md = md.replace(/&/g, '&');
|
|
79
|
+
md = md.replace(/"/g, '"');
|
|
80
|
+
|
|
81
|
+
// Clean up extra whitespace
|
|
82
|
+
md = md.replace(/\n{3,}/g, '\n\n');
|
|
83
|
+
md = md.trim();
|
|
84
|
+
|
|
85
|
+
return md;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
cli({
|
|
89
|
+
site: 'zhihu',
|
|
90
|
+
name: 'download',
|
|
91
|
+
description: '导出知乎文章为 Markdown 格式',
|
|
92
|
+
domain: 'zhuanlan.zhihu.com',
|
|
93
|
+
strategy: Strategy.COOKIE,
|
|
94
|
+
args: [
|
|
95
|
+
{ name: 'url', required: true, help: 'Article URL (zhuanlan.zhihu.com/p/xxx)' },
|
|
96
|
+
{ name: 'output', default: './zhihu-articles', help: 'Output directory' },
|
|
97
|
+
{ name: 'download-images', type: 'boolean', default: false, help: 'Download images locally' },
|
|
98
|
+
],
|
|
99
|
+
columns: ['title', 'author', 'status', 'size'],
|
|
100
|
+
func: async (page, kwargs) => {
|
|
101
|
+
const url = kwargs.url;
|
|
102
|
+
const output = kwargs.output;
|
|
103
|
+
const downloadImages = kwargs['download-images'];
|
|
104
|
+
|
|
105
|
+
// Navigate to article page
|
|
106
|
+
await page.goto(url);
|
|
107
|
+
await page.wait(3);
|
|
108
|
+
|
|
109
|
+
// Extract article content
|
|
110
|
+
const data = await page.evaluate(`
|
|
111
|
+
(() => {
|
|
112
|
+
const result = {
|
|
113
|
+
title: '',
|
|
114
|
+
author: '',
|
|
115
|
+
content: '',
|
|
116
|
+
publishTime: '',
|
|
117
|
+
images: []
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
// Get title
|
|
121
|
+
const titleEl = document.querySelector('.Post-Title, h1.ContentItem-title, .ArticleTitle');
|
|
122
|
+
result.title = titleEl?.textContent?.trim() || 'untitled';
|
|
123
|
+
|
|
124
|
+
// Get author
|
|
125
|
+
const authorEl = document.querySelector('.AuthorInfo-name, .UserLink-link');
|
|
126
|
+
result.author = authorEl?.textContent?.trim() || 'unknown';
|
|
127
|
+
|
|
128
|
+
// Get publish time
|
|
129
|
+
const timeEl = document.querySelector('.ContentItem-time, .Post-Time');
|
|
130
|
+
result.publishTime = timeEl?.textContent?.trim() || '';
|
|
131
|
+
|
|
132
|
+
// Get content HTML
|
|
133
|
+
const contentEl = document.querySelector('.Post-RichTextContainer, .RichText, .ArticleContent');
|
|
134
|
+
if (contentEl) {
|
|
135
|
+
result.content = contentEl.innerHTML;
|
|
136
|
+
|
|
137
|
+
// Extract image URLs
|
|
138
|
+
contentEl.querySelectorAll('img').forEach(img => {
|
|
139
|
+
const src = img.getAttribute('data-original') || img.getAttribute('data-actualsrc') || img.src;
|
|
140
|
+
if (src && !src.includes('data:image')) {
|
|
141
|
+
result.images.push(src);
|
|
142
|
+
}
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return result;
|
|
147
|
+
})()
|
|
148
|
+
`);
|
|
149
|
+
|
|
150
|
+
if (!data || !data.content) {
|
|
151
|
+
return [{
|
|
152
|
+
title: 'Error',
|
|
153
|
+
author: '-',
|
|
154
|
+
status: 'failed',
|
|
155
|
+
size: 'Could not extract article content',
|
|
156
|
+
}];
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Create output directory
|
|
160
|
+
fs.mkdirSync(output, { recursive: true });
|
|
161
|
+
|
|
162
|
+
// Convert HTML to Markdown
|
|
163
|
+
let markdown = htmlToMarkdown(data.content);
|
|
164
|
+
|
|
165
|
+
// Create frontmatter
|
|
166
|
+
const frontmatter = [
|
|
167
|
+
'---',
|
|
168
|
+
`title: "${data.title.replace(/"/g, '\\"')}"`,
|
|
169
|
+
`author: "${data.author.replace(/"/g, '\\"')}"`,
|
|
170
|
+
`source: "${url}"`,
|
|
171
|
+
data.publishTime ? `date: "${data.publishTime}"` : '',
|
|
172
|
+
'---',
|
|
173
|
+
'',
|
|
174
|
+
].filter(Boolean).join('\n');
|
|
175
|
+
|
|
176
|
+
// Download images if requested
|
|
177
|
+
if (downloadImages && data.images && data.images.length > 0) {
|
|
178
|
+
const imagesDir = path.join(output, 'images');
|
|
179
|
+
fs.mkdirSync(imagesDir, { recursive: true });
|
|
180
|
+
|
|
181
|
+
const cookies = await page.evaluate(`(() => document.cookie)()`);
|
|
182
|
+
|
|
183
|
+
for (let i = 0; i < data.images.length; i++) {
|
|
184
|
+
const imgUrl = data.images[i];
|
|
185
|
+
const ext = imgUrl.match(/\.(jpg|jpeg|png|gif|webp)/i)?.[1] || 'jpg';
|
|
186
|
+
const imgFilename = `img_${i + 1}.${ext}`;
|
|
187
|
+
const imgPath = path.join(imagesDir, imgFilename);
|
|
188
|
+
|
|
189
|
+
try {
|
|
190
|
+
await httpDownload(imgUrl, imgPath, {
|
|
191
|
+
cookies: typeof cookies === 'string' ? cookies : '',
|
|
192
|
+
timeout: 30000,
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
// Replace image URL in markdown with local path
|
|
196
|
+
markdown = markdown.replace(
|
|
197
|
+
new RegExp(imgUrl.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'),
|
|
198
|
+
`./images/${imgFilename}`,
|
|
199
|
+
);
|
|
200
|
+
} catch {
|
|
201
|
+
// Keep original URL if download fails
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Write markdown file
|
|
207
|
+
const safeTitle = sanitizeFilename(data.title, 100);
|
|
208
|
+
const filename = `${safeTitle}.md`;
|
|
209
|
+
const filePath = path.join(output, filename);
|
|
210
|
+
|
|
211
|
+
const fullContent = frontmatter + '\n' + markdown;
|
|
212
|
+
fs.writeFileSync(filePath, fullContent, 'utf-8');
|
|
213
|
+
|
|
214
|
+
const size = Buffer.byteLength(fullContent, 'utf-8');
|
|
215
|
+
|
|
216
|
+
return [{
|
|
217
|
+
title: data.title,
|
|
218
|
+
author: data.author,
|
|
219
|
+
status: 'success',
|
|
220
|
+
size: formatBytes(size),
|
|
221
|
+
}];
|
|
222
|
+
},
|
|
223
|
+
});
|