@jackwener/opencli 0.9.6 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/ISSUE_TEMPLATE/bug_report.yml +83 -0
- package/.github/ISSUE_TEMPLATE/config.yml +8 -0
- package/.github/ISSUE_TEMPLATE/feature_request.yml +42 -0
- package/.github/ISSUE_TEMPLATE/new_site_adapter.yml +57 -0
- package/.github/dependabot.yml +27 -0
- package/.github/pull_request_template.md +24 -0
- package/.github/workflows/ci.yml +14 -8
- package/.github/workflows/e2e-headed.yml +6 -2
- package/.github/workflows/pkg-pr-new.yml +2 -2
- package/.github/workflows/release-please.yml +25 -0
- package/.github/workflows/release.yml +2 -2
- package/.github/workflows/security.yml +36 -0
- package/CDP.md +1 -1
- package/CDP.zh-CN.md +1 -1
- package/CLI-ELECTRON.md +89 -36
- package/CLI-EXPLORER.md +4 -4
- package/CONTRIBUTING.md +167 -0
- package/README.md +113 -89
- package/README.zh-CN.md +114 -91
- package/SKILL.md +10 -8
- package/TESTING.md +7 -7
- package/dist/browser/daemon-client.d.ts +37 -0
- package/dist/browser/daemon-client.js +82 -0
- package/dist/browser/discover.d.ts +11 -34
- package/dist/browser/discover.js +15 -190
- package/dist/browser/errors.d.ts +6 -20
- package/dist/browser/errors.js +24 -63
- package/dist/browser/index.d.ts +2 -11
- package/dist/browser/index.js +5 -11
- package/dist/browser/mcp.d.ts +9 -18
- package/dist/browser/mcp.js +70 -284
- package/dist/browser/page.d.ts +28 -6
- package/dist/browser/page.js +210 -85
- package/dist/browser.test.js +4 -202
- package/dist/build-manifest.d.ts +26 -0
- package/dist/build-manifest.js +132 -60
- package/dist/build-manifest.test.d.ts +1 -0
- package/dist/build-manifest.test.js +26 -0
- package/dist/cli-manifest.json +1582 -29
- package/dist/clis/bilibili/download.d.ts +10 -0
- package/dist/clis/bilibili/download.js +135 -0
- package/dist/clis/chatwise/ask.d.ts +1 -0
- package/dist/clis/chatwise/ask.js +76 -0
- package/dist/clis/chatwise/export.d.ts +1 -0
- package/dist/clis/chatwise/export.js +46 -0
- package/dist/clis/chatwise/history.d.ts +1 -0
- package/dist/clis/chatwise/history.js +43 -0
- package/dist/clis/chatwise/model.d.ts +1 -0
- package/dist/clis/chatwise/model.js +81 -0
- package/dist/clis/chatwise/new.d.ts +1 -0
- package/dist/clis/chatwise/new.js +18 -0
- package/dist/clis/chatwise/read.d.ts +1 -0
- package/dist/clis/chatwise/read.js +39 -0
- package/dist/clis/chatwise/screenshot.d.ts +1 -0
- package/dist/clis/chatwise/screenshot.js +27 -0
- package/dist/clis/chatwise/send.d.ts +1 -0
- package/dist/clis/chatwise/send.js +45 -0
- package/dist/clis/chatwise/status.d.ts +1 -0
- package/dist/clis/chatwise/status.js +22 -0
- package/dist/clis/discord-app/channels.d.ts +1 -0
- package/dist/clis/discord-app/channels.js +45 -0
- package/dist/clis/discord-app/members.d.ts +1 -0
- package/dist/clis/discord-app/members.js +38 -0
- package/dist/clis/discord-app/read.d.ts +1 -0
- package/dist/clis/discord-app/read.js +45 -0
- package/dist/clis/discord-app/search.d.ts +1 -0
- package/dist/clis/discord-app/search.js +56 -0
- package/dist/clis/discord-app/send.d.ts +1 -0
- package/dist/clis/discord-app/send.js +27 -0
- package/dist/clis/discord-app/servers.d.ts +1 -0
- package/dist/clis/discord-app/servers.js +36 -0
- package/dist/clis/discord-app/status.d.ts +1 -0
- package/dist/clis/discord-app/status.js +16 -0
- package/dist/clis/feishu/new.d.ts +1 -0
- package/dist/clis/feishu/new.js +27 -0
- package/dist/clis/feishu/read.d.ts +1 -0
- package/dist/clis/feishu/read.js +40 -0
- package/dist/clis/feishu/search.d.ts +1 -0
- package/dist/clis/feishu/search.js +30 -0
- package/dist/clis/feishu/send.d.ts +1 -0
- package/dist/clis/feishu/send.js +39 -0
- package/dist/clis/feishu/status.d.ts +1 -0
- package/dist/clis/feishu/status.js +28 -0
- package/dist/clis/grok/ask.d.ts +1 -0
- package/dist/clis/grok/ask.js +82 -0
- package/dist/clis/grok/debug.d.ts +1 -0
- package/dist/clis/grok/debug.js +45 -0
- package/dist/clis/jimeng/generate.yaml +84 -0
- package/dist/clis/jimeng/history.yaml +47 -0
- package/dist/clis/linux-do/categories.yaml +41 -0
- package/dist/clis/linux-do/category.yaml +49 -0
- package/dist/clis/linux-do/hot.yaml +50 -0
- package/dist/clis/linux-do/latest.yaml +40 -0
- package/dist/clis/linux-do/search.yaml +45 -0
- package/dist/clis/linux-do/topic.yaml +38 -0
- package/dist/clis/neteasemusic/like.d.ts +1 -0
- package/dist/clis/neteasemusic/like.js +25 -0
- package/dist/clis/neteasemusic/lyrics.d.ts +1 -0
- package/dist/clis/neteasemusic/lyrics.js +47 -0
- package/dist/clis/neteasemusic/next.d.ts +1 -0
- package/dist/clis/neteasemusic/next.js +26 -0
- package/dist/clis/neteasemusic/play.d.ts +1 -0
- package/dist/clis/neteasemusic/play.js +26 -0
- package/dist/clis/neteasemusic/playing.d.ts +1 -0
- package/dist/clis/neteasemusic/playing.js +59 -0
- package/dist/clis/neteasemusic/playlist.d.ts +1 -0
- package/dist/clis/neteasemusic/playlist.js +46 -0
- package/dist/clis/neteasemusic/prev.d.ts +1 -0
- package/dist/clis/neteasemusic/prev.js +25 -0
- package/dist/clis/neteasemusic/search.d.ts +1 -0
- package/dist/clis/neteasemusic/search.js +52 -0
- package/dist/clis/neteasemusic/status.d.ts +1 -0
- package/dist/clis/neteasemusic/status.js +16 -0
- package/dist/clis/neteasemusic/volume.d.ts +1 -0
- package/dist/clis/neteasemusic/volume.js +54 -0
- package/dist/clis/notion/export.d.ts +1 -0
- package/dist/clis/notion/export.js +31 -0
- package/dist/clis/notion/favorites.d.ts +1 -0
- package/dist/clis/notion/favorites.js +84 -0
- package/dist/clis/notion/new.d.ts +1 -0
- package/dist/clis/notion/new.js +34 -0
- package/dist/clis/notion/read.d.ts +1 -0
- package/dist/clis/notion/read.js +30 -0
- package/dist/clis/notion/search.d.ts +1 -0
- package/dist/clis/notion/search.js +46 -0
- package/dist/clis/notion/sidebar.d.ts +1 -0
- package/dist/clis/notion/sidebar.js +41 -0
- package/dist/clis/notion/status.d.ts +1 -0
- package/dist/clis/notion/status.js +16 -0
- package/dist/clis/notion/write.d.ts +1 -0
- package/dist/clis/notion/write.js +40 -0
- package/dist/clis/twitter/download.d.ts +8 -0
- package/dist/clis/twitter/download.js +204 -0
- package/dist/clis/wechat/chats.d.ts +1 -0
- package/dist/clis/wechat/chats.js +28 -0
- package/dist/clis/wechat/contacts.d.ts +1 -0
- package/dist/clis/wechat/contacts.js +28 -0
- package/dist/clis/wechat/read.d.ts +1 -0
- package/dist/clis/wechat/read.js +58 -0
- package/dist/clis/wechat/search.d.ts +1 -0
- package/dist/clis/wechat/search.js +31 -0
- package/dist/clis/wechat/send.d.ts +1 -0
- package/dist/clis/wechat/send.js +42 -0
- package/dist/clis/wechat/status.d.ts +1 -0
- package/dist/clis/wechat/status.js +29 -0
- package/dist/clis/xiaohongshu/creator-note-detail.d.ts +10 -0
- package/dist/clis/xiaohongshu/creator-note-detail.js +88 -0
- package/dist/clis/xiaohongshu/creator-notes.d.ts +11 -0
- package/dist/clis/xiaohongshu/creator-notes.js +109 -0
- package/dist/clis/xiaohongshu/creator-profile.d.ts +10 -0
- package/dist/clis/xiaohongshu/creator-profile.js +54 -0
- package/dist/clis/xiaohongshu/creator-stats.d.ts +10 -0
- package/dist/clis/xiaohongshu/creator-stats.js +74 -0
- package/dist/clis/xiaohongshu/download.d.ts +7 -0
- package/dist/clis/xiaohongshu/download.js +155 -0
- package/dist/clis/xiaohongshu/search.js +1 -1
- package/dist/clis/xiaohongshu/user-helpers.d.ts +15 -0
- package/dist/clis/xiaohongshu/user-helpers.js +67 -0
- package/dist/clis/xiaohongshu/user-helpers.test.d.ts +1 -0
- package/dist/clis/xiaohongshu/user-helpers.test.js +81 -0
- package/dist/clis/xiaohongshu/user.js +46 -29
- package/dist/clis/zhihu/download.d.ts +11 -0
- package/dist/clis/zhihu/download.js +186 -0
- package/dist/clis/zhihu/download.test.d.ts +1 -0
- package/dist/clis/zhihu/download.test.js +10 -0
- package/dist/daemon.d.ts +13 -0
- package/dist/daemon.js +187 -0
- package/dist/doctor.d.ts +27 -61
- package/dist/doctor.js +70 -601
- package/dist/doctor.test.js +30 -170
- package/dist/download/index.d.ts +79 -0
- package/dist/download/index.js +325 -0
- package/dist/download/progress.d.ts +36 -0
- package/dist/download/progress.js +111 -0
- package/dist/engine.test.js +15 -0
- package/dist/main.js +22 -28
- package/dist/pipeline/executor.test.js +1 -0
- package/dist/pipeline/registry.js +2 -0
- package/dist/pipeline/steps/browser.js +2 -2
- package/dist/pipeline/steps/download.d.ts +34 -0
- package/dist/pipeline/steps/download.js +251 -0
- package/dist/pipeline/steps/intercept.js +1 -2
- package/dist/pipeline/template.js +28 -0
- package/dist/setup.d.ts +6 -0
- package/dist/setup.js +46 -160
- package/dist/types.d.ts +6 -0
- package/extension/icons/icon-128.png +0 -0
- package/extension/icons/icon-16.png +0 -0
- package/extension/icons/icon-32.png +0 -0
- package/extension/icons/icon-48.png +0 -0
- package/extension/manifest.json +31 -0
- package/extension/package.json +16 -0
- package/extension/src/background.ts +293 -0
- package/extension/src/cdp.ts +125 -0
- package/extension/src/protocol.ts +57 -0
- package/extension/store-assets/screenshot-1280x800.png +0 -0
- package/extension/tsconfig.json +15 -0
- package/extension/vite.config.ts +18 -0
- package/package.json +8 -7
- package/scripts/test-site.mjs +70 -0
- package/src/browser/daemon-client.ts +113 -0
- package/src/browser/discover.ts +18 -216
- package/src/browser/errors.ts +30 -100
- package/src/browser/index.ts +6 -12
- package/src/browser/mcp.ts +78 -278
- package/src/browser/page.ts +222 -88
- package/src/browser.test.ts +3 -210
- package/src/build-manifest.test.ts +28 -0
- package/src/build-manifest.ts +147 -57
- package/src/clis/bilibili/download.ts +161 -0
- package/src/clis/chatgpt/README.md +1 -1
- package/src/clis/chatgpt/README.zh-CN.md +1 -1
- package/src/clis/chatwise/README.md +38 -0
- package/src/clis/chatwise/README.zh-CN.md +38 -0
- package/src/clis/chatwise/ask.ts +87 -0
- package/src/clis/chatwise/export.ts +51 -0
- package/src/clis/chatwise/history.ts +47 -0
- package/src/clis/chatwise/model.ts +87 -0
- package/src/clis/chatwise/new.ts +21 -0
- package/src/clis/chatwise/read.ts +42 -0
- package/src/clis/chatwise/screenshot.ts +33 -0
- package/src/clis/chatwise/send.ts +50 -0
- package/src/clis/chatwise/status.ts +25 -0
- package/src/clis/discord-app/README.md +28 -0
- package/src/clis/discord-app/README.zh-CN.md +28 -0
- package/src/clis/discord-app/channels.ts +48 -0
- package/src/clis/discord-app/members.ts +41 -0
- package/src/clis/discord-app/read.ts +49 -0
- package/src/clis/discord-app/search.ts +64 -0
- package/src/clis/discord-app/send.ts +32 -0
- package/src/clis/discord-app/servers.ts +39 -0
- package/src/clis/discord-app/status.ts +18 -0
- package/src/clis/feishu/README.md +20 -0
- package/src/clis/feishu/README.zh-CN.md +20 -0
- package/src/clis/feishu/new.ts +32 -0
- package/src/clis/feishu/read.ts +48 -0
- package/src/clis/feishu/search.ts +35 -0
- package/src/clis/feishu/send.ts +46 -0
- package/src/clis/feishu/status.ts +34 -0
- package/src/clis/grok/ask.ts +90 -0
- package/src/clis/grok/debug.ts +49 -0
- package/src/clis/jimeng/generate.yaml +84 -0
- package/src/clis/jimeng/history.yaml +47 -0
- package/src/clis/linux-do/categories.yaml +41 -0
- package/src/clis/linux-do/category.yaml +49 -0
- package/src/clis/linux-do/hot.yaml +50 -0
- package/src/clis/linux-do/latest.yaml +40 -0
- package/src/clis/linux-do/search.yaml +45 -0
- package/src/clis/linux-do/topic.yaml +38 -0
- package/src/clis/neteasemusic/README.md +31 -0
- package/src/clis/neteasemusic/README.zh-CN.md +31 -0
- package/src/clis/neteasemusic/like.ts +28 -0
- package/src/clis/neteasemusic/lyrics.ts +53 -0
- package/src/clis/neteasemusic/next.ts +30 -0
- package/src/clis/neteasemusic/play.ts +30 -0
- package/src/clis/neteasemusic/playing.ts +62 -0
- package/src/clis/neteasemusic/playlist.ts +51 -0
- package/src/clis/neteasemusic/prev.ts +29 -0
- package/src/clis/neteasemusic/search.ts +58 -0
- package/src/clis/neteasemusic/status.ts +18 -0
- package/src/clis/neteasemusic/volume.ts +61 -0
- package/src/clis/notion/README.md +29 -0
- package/src/clis/notion/README.zh-CN.md +29 -0
- package/src/clis/notion/export.ts +36 -0
- package/src/clis/notion/favorites.ts +87 -0
- package/src/clis/notion/new.ts +39 -0
- package/src/clis/notion/read.ts +33 -0
- package/src/clis/notion/search.ts +54 -0
- package/src/clis/notion/sidebar.ts +44 -0
- package/src/clis/notion/status.ts +18 -0
- package/src/clis/notion/write.ts +45 -0
- package/src/clis/twitter/download.ts +227 -0
- package/src/clis/wechat/README.md +28 -0
- package/src/clis/wechat/README.zh-CN.md +28 -0
- package/src/clis/wechat/chats.ts +33 -0
- package/src/clis/wechat/contacts.ts +33 -0
- package/src/clis/wechat/read.ts +72 -0
- package/src/clis/wechat/search.ts +36 -0
- package/src/clis/wechat/send.ts +49 -0
- package/src/clis/wechat/status.ts +35 -0
- package/src/clis/xiaohongshu/creator-note-detail.ts +95 -0
- package/src/clis/xiaohongshu/creator-notes.ts +116 -0
- package/src/clis/xiaohongshu/creator-profile.ts +60 -0
- package/src/clis/xiaohongshu/creator-stats.ts +81 -0
- package/src/clis/xiaohongshu/download.ts +173 -0
- package/src/clis/xiaohongshu/search.ts +1 -1
- package/src/clis/xiaohongshu/user-helpers.test.ts +106 -0
- package/src/clis/xiaohongshu/user-helpers.ts +85 -0
- package/src/clis/xiaohongshu/user.ts +52 -32
- package/src/clis/zhihu/download.test.ts +12 -0
- package/src/clis/zhihu/download.ts +223 -0
- package/src/daemon.ts +217 -0
- package/src/doctor.test.ts +32 -193
- package/src/doctor.ts +74 -668
- package/src/download/index.ts +395 -0
- package/src/download/progress.ts +125 -0
- package/src/engine.test.ts +17 -0
- package/src/main.ts +18 -26
- package/src/pipeline/executor.test.ts +1 -0
- package/src/pipeline/registry.ts +2 -0
- package/src/pipeline/steps/browser.ts +2 -2
- package/src/pipeline/steps/download.ts +310 -0
- package/src/pipeline/steps/intercept.ts +1 -2
- package/src/pipeline/template.ts +26 -0
- package/src/setup.ts +47 -183
- package/src/types.ts +1 -0
- package/tests/e2e/browser-auth.test.ts +25 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Xiaohongshu download — download images and videos from a note.
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* opencli xiaohongshu download --note-id abc123 --output ./xhs
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import * as fs from 'node:fs';
|
|
9
|
+
import * as path from 'node:path';
|
|
10
|
+
import { cli, Strategy } from '../../registry.js';
|
|
11
|
+
import {
|
|
12
|
+
httpDownload,
|
|
13
|
+
sanitizeFilename,
|
|
14
|
+
detectContentType,
|
|
15
|
+
} from '../../download/index.js';
|
|
16
|
+
import { DownloadProgressTracker, formatBytes } from '../../download/progress.js';
|
|
17
|
+
|
|
18
|
+
cli({
|
|
19
|
+
site: 'xiaohongshu',
|
|
20
|
+
name: 'download',
|
|
21
|
+
description: '下载小红书笔记中的图片和视频',
|
|
22
|
+
domain: 'www.xiaohongshu.com',
|
|
23
|
+
strategy: Strategy.COOKIE,
|
|
24
|
+
args: [
|
|
25
|
+
{ name: 'note_id', required: true, help: 'Note ID (from URL)' },
|
|
26
|
+
{ name: 'output', default: './xiaohongshu-downloads', help: 'Output directory' },
|
|
27
|
+
],
|
|
28
|
+
columns: ['index', 'type', 'status', 'size'],
|
|
29
|
+
func: async (page, kwargs) => {
|
|
30
|
+
const noteId = kwargs.note_id;
|
|
31
|
+
const output = kwargs.output;
|
|
32
|
+
|
|
33
|
+
// Navigate to note page
|
|
34
|
+
await page.goto(`https://www.xiaohongshu.com/explore/${noteId}`);
|
|
35
|
+
await page.wait(3);
|
|
36
|
+
|
|
37
|
+
// Extract note info and media URLs
|
|
38
|
+
const data = await page.evaluate(`
|
|
39
|
+
(() => {
|
|
40
|
+
const result = {
|
|
41
|
+
noteId: '${noteId}',
|
|
42
|
+
title: '',
|
|
43
|
+
author: '',
|
|
44
|
+
media: []
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
// Get title
|
|
48
|
+
const titleEl = document.querySelector('.title, #detail-title, .note-content .title');
|
|
49
|
+
result.title = titleEl?.textContent?.trim() || 'untitled';
|
|
50
|
+
|
|
51
|
+
// Get author
|
|
52
|
+
const authorEl = document.querySelector('.username, .author-name, .name');
|
|
53
|
+
result.author = authorEl?.textContent?.trim() || 'unknown';
|
|
54
|
+
|
|
55
|
+
// Get images - try multiple selectors
|
|
56
|
+
const imageSelectors = [
|
|
57
|
+
'.swiper-slide img',
|
|
58
|
+
'.carousel-image img',
|
|
59
|
+
'.note-slider img',
|
|
60
|
+
'.note-image img',
|
|
61
|
+
'.image-wrapper img',
|
|
62
|
+
'#noteContainer img[src*="xhscdn"]',
|
|
63
|
+
'img[src*="ci.xiaohongshu.com"]'
|
|
64
|
+
];
|
|
65
|
+
|
|
66
|
+
const imageUrls = new Set();
|
|
67
|
+
for (const selector of imageSelectors) {
|
|
68
|
+
document.querySelectorAll(selector).forEach(img => {
|
|
69
|
+
let src = img.src || img.getAttribute('data-src') || '';
|
|
70
|
+
if (src && (src.includes('xhscdn') || src.includes('xiaohongshu'))) {
|
|
71
|
+
// Convert to high quality URL (remove resize parameters)
|
|
72
|
+
src = src.split('?')[0];
|
|
73
|
+
// Try to get original size
|
|
74
|
+
src = src.replace(/\\/imageView\\d+\\/\\d+\\/w\\/\\d+/, '');
|
|
75
|
+
imageUrls.add(src);
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Get video if exists
|
|
81
|
+
const videoSelectors = [
|
|
82
|
+
'video source',
|
|
83
|
+
'video[src]',
|
|
84
|
+
'.player video',
|
|
85
|
+
'.video-player video'
|
|
86
|
+
];
|
|
87
|
+
|
|
88
|
+
for (const selector of videoSelectors) {
|
|
89
|
+
document.querySelectorAll(selector).forEach(v => {
|
|
90
|
+
const src = v.src || v.getAttribute('src') || '';
|
|
91
|
+
if (src) {
|
|
92
|
+
result.media.push({
|
|
93
|
+
type: 'video',
|
|
94
|
+
url: src
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Add images to media
|
|
101
|
+
imageUrls.forEach(url => {
|
|
102
|
+
result.media.push({
|
|
103
|
+
type: 'image',
|
|
104
|
+
url: url
|
|
105
|
+
});
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
return result;
|
|
109
|
+
})()
|
|
110
|
+
`);
|
|
111
|
+
|
|
112
|
+
if (!data || !data.media || data.media.length === 0) {
|
|
113
|
+
return [{ index: 0, type: '-', status: 'failed', size: 'No media found' }];
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Extract cookies for authenticated downloads
|
|
117
|
+
const cookies = await page.evaluate(`(() => document.cookie)()`);
|
|
118
|
+
|
|
119
|
+
// Create output directory
|
|
120
|
+
const outputDir = path.join(output, noteId);
|
|
121
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
122
|
+
|
|
123
|
+
// Download all media files
|
|
124
|
+
const tracker = new DownloadProgressTracker(data.media.length, true);
|
|
125
|
+
const results: any[] = [];
|
|
126
|
+
|
|
127
|
+
for (let i = 0; i < data.media.length; i++) {
|
|
128
|
+
const media = data.media[i];
|
|
129
|
+
const ext = media.type === 'video' ? 'mp4' : 'jpg';
|
|
130
|
+
const filename = `${noteId}_${i + 1}.${ext}`;
|
|
131
|
+
const destPath = path.join(outputDir, filename);
|
|
132
|
+
|
|
133
|
+
const progressBar = tracker.onFileStart(filename, i);
|
|
134
|
+
|
|
135
|
+
try {
|
|
136
|
+
const result = await httpDownload(media.url, destPath, {
|
|
137
|
+
cookies: typeof cookies === 'string' ? cookies : '',
|
|
138
|
+
timeout: 60000,
|
|
139
|
+
onProgress: (received, total) => {
|
|
140
|
+
if (progressBar) progressBar.update(received, total);
|
|
141
|
+
},
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
if (progressBar) {
|
|
145
|
+
progressBar.complete(result.success, result.success ? formatBytes(result.size) : undefined);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
tracker.onFileComplete(result.success);
|
|
149
|
+
|
|
150
|
+
results.push({
|
|
151
|
+
index: i + 1,
|
|
152
|
+
type: media.type,
|
|
153
|
+
status: result.success ? 'success' : 'failed',
|
|
154
|
+
size: result.success ? formatBytes(result.size) : (result.error || 'unknown error'),
|
|
155
|
+
});
|
|
156
|
+
} catch (err: any) {
|
|
157
|
+
if (progressBar) progressBar.fail(err.message);
|
|
158
|
+
tracker.onFileComplete(false);
|
|
159
|
+
|
|
160
|
+
results.push({
|
|
161
|
+
index: i + 1,
|
|
162
|
+
type: media.type,
|
|
163
|
+
status: 'failed',
|
|
164
|
+
size: err.message,
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
tracker.finish();
|
|
170
|
+
|
|
171
|
+
return results;
|
|
172
|
+
},
|
|
173
|
+
});
|
|
@@ -43,7 +43,7 @@ cli({
|
|
|
43
43
|
const linkEl = el.querySelector('a[href*="/explore/"], a[href*="/search_result/"], a[href*="/note/"]');
|
|
44
44
|
|
|
45
45
|
const href = linkEl?.getAttribute('href') || '';
|
|
46
|
-
const noteId = href.match(/\\/(?:explore|note)\\/([a-
|
|
46
|
+
const noteId = href.match(/\\/(?:explore|note)\\/([a-zA-Z0-9]+)/)?.[1] || '';
|
|
47
47
|
|
|
48
48
|
results.push({
|
|
49
49
|
title: (titleEl?.textContent || '').trim(),
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
buildXhsNoteUrl,
|
|
4
|
+
extractXhsUserNotes,
|
|
5
|
+
flattenXhsNoteGroups,
|
|
6
|
+
normalizeXhsUserId,
|
|
7
|
+
} from './user-helpers.js';
|
|
8
|
+
|
|
9
|
+
describe('normalizeXhsUserId', () => {
|
|
10
|
+
it('extracts the profile id from a full Xiaohongshu URL', () => {
|
|
11
|
+
expect(
|
|
12
|
+
normalizeXhsUserId(
|
|
13
|
+
'https://www.xiaohongshu.com/user/profile/615529370000000002026001?xsec_source=pc_search'
|
|
14
|
+
)
|
|
15
|
+
).toBe('615529370000000002026001');
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it('keeps a bare profile id unchanged', () => {
|
|
19
|
+
expect(normalizeXhsUserId('615529370000000002026001')).toBe('615529370000000002026001');
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
describe('flattenXhsNoteGroups', () => {
|
|
24
|
+
it('flattens grouped note arrays and ignores empty groups', () => {
|
|
25
|
+
expect(flattenXhsNoteGroups([[{ id: 'a' }], [], null, [{ id: 'b' }]])).toEqual([
|
|
26
|
+
{ id: 'a' },
|
|
27
|
+
{ id: 'b' },
|
|
28
|
+
]);
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
describe('buildXhsNoteUrl', () => {
|
|
33
|
+
it('includes xsec token when available', () => {
|
|
34
|
+
expect(buildXhsNoteUrl('user123', 'note456', 'token789')).toBe(
|
|
35
|
+
'https://www.xiaohongshu.com/user/profile/user123/note456?xsec_token=token789&xsec_source=pc_user'
|
|
36
|
+
);
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
describe('extractXhsUserNotes', () => {
|
|
41
|
+
it('normalizes grouped note cards into CLI rows', () => {
|
|
42
|
+
const rows = extractXhsUserNotes(
|
|
43
|
+
{
|
|
44
|
+
noteGroups: [
|
|
45
|
+
[
|
|
46
|
+
{
|
|
47
|
+
id: 'note-1',
|
|
48
|
+
xsecToken: 'abc',
|
|
49
|
+
noteCard: {
|
|
50
|
+
noteId: 'note-1',
|
|
51
|
+
displayTitle: 'First note',
|
|
52
|
+
type: 'video',
|
|
53
|
+
interactInfo: { likedCount: '4.6万' },
|
|
54
|
+
user: { userId: 'user-1' },
|
|
55
|
+
},
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
noteCard: {
|
|
59
|
+
note_id: 'note-2',
|
|
60
|
+
display_title: 'Second note',
|
|
61
|
+
type: 'normal',
|
|
62
|
+
interact_info: { liked_count: 42 },
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
],
|
|
66
|
+
[],
|
|
67
|
+
],
|
|
68
|
+
},
|
|
69
|
+
'fallback-user'
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
expect(rows).toEqual([
|
|
73
|
+
{
|
|
74
|
+
id: 'note-1',
|
|
75
|
+
title: 'First note',
|
|
76
|
+
type: 'video',
|
|
77
|
+
likes: '4.6万',
|
|
78
|
+
url: 'https://www.xiaohongshu.com/user/profile/user-1/note-1?xsec_token=abc&xsec_source=pc_user',
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
id: 'note-2',
|
|
82
|
+
title: 'Second note',
|
|
83
|
+
type: 'normal',
|
|
84
|
+
likes: '42',
|
|
85
|
+
url: 'https://www.xiaohongshu.com/user/profile/fallback-user/note-2',
|
|
86
|
+
},
|
|
87
|
+
]);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it('deduplicates repeated notes by note id', () => {
|
|
91
|
+
const rows = extractXhsUserNotes(
|
|
92
|
+
{
|
|
93
|
+
noteGroups: [
|
|
94
|
+
[
|
|
95
|
+
{ noteCard: { noteId: 'dup-1', displayTitle: 'keep me' } },
|
|
96
|
+
{ noteCard: { noteId: 'dup-1', displayTitle: 'drop me' } },
|
|
97
|
+
],
|
|
98
|
+
],
|
|
99
|
+
},
|
|
100
|
+
'fallback-user'
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
expect(rows).toHaveLength(1);
|
|
104
|
+
expect(rows[0]?.title).toBe('keep me');
|
|
105
|
+
});
|
|
106
|
+
});
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
export interface XhsUserPageSnapshot {
|
|
2
|
+
noteGroups?: unknown;
|
|
3
|
+
pageData?: unknown;
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export interface XhsUserNoteRow {
|
|
7
|
+
id: string;
|
|
8
|
+
title: string;
|
|
9
|
+
type: string;
|
|
10
|
+
likes: string;
|
|
11
|
+
url: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function toCleanString(value: unknown): string {
|
|
15
|
+
return typeof value === 'string' ? value.trim() : value == null ? '' : String(value).trim();
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function normalizeXhsUserId(input: string): string {
|
|
19
|
+
const trimmed = toCleanString(input);
|
|
20
|
+
const withoutQuery = trimmed.replace(/[?#].*$/, '');
|
|
21
|
+
const matched = withoutQuery.match(/\/user\/profile\/([a-zA-Z0-9]+)/);
|
|
22
|
+
if (matched?.[1]) return matched[1];
|
|
23
|
+
return withoutQuery.replace(/\/+$/, '').split('/').pop() ?? withoutQuery;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function flattenXhsNoteGroups(noteGroups: unknown): any[] {
|
|
27
|
+
if (!Array.isArray(noteGroups)) return [];
|
|
28
|
+
|
|
29
|
+
const notes: any[] = [];
|
|
30
|
+
for (const group of noteGroups) {
|
|
31
|
+
if (!group) continue;
|
|
32
|
+
if (Array.isArray(group)) {
|
|
33
|
+
for (const item of group) {
|
|
34
|
+
if (item) notes.push(item);
|
|
35
|
+
}
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
notes.push(group);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return notes;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function buildXhsNoteUrl(userId: string, noteId: string, xsecToken?: string): string {
|
|
45
|
+
const cleanUserId = toCleanString(userId);
|
|
46
|
+
const cleanNoteId = toCleanString(noteId);
|
|
47
|
+
if (!cleanUserId || !cleanNoteId) return '';
|
|
48
|
+
|
|
49
|
+
const url = new URL(`https://www.xiaohongshu.com/user/profile/${cleanUserId}/${cleanNoteId}`);
|
|
50
|
+
const cleanToken = toCleanString(xsecToken);
|
|
51
|
+
if (cleanToken) {
|
|
52
|
+
url.searchParams.set('xsec_token', cleanToken);
|
|
53
|
+
url.searchParams.set('xsec_source', 'pc_user');
|
|
54
|
+
}
|
|
55
|
+
return url.toString();
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function extractXhsUserNotes(snapshot: XhsUserPageSnapshot, fallbackUserId: string): XhsUserNoteRow[] {
|
|
59
|
+
const notes = flattenXhsNoteGroups(snapshot.noteGroups);
|
|
60
|
+
const rows: XhsUserNoteRow[] = [];
|
|
61
|
+
const seen = new Set<string>();
|
|
62
|
+
|
|
63
|
+
for (const entry of notes) {
|
|
64
|
+
const noteCard = entry?.noteCard ?? entry?.note_card ?? entry;
|
|
65
|
+
if (!noteCard || typeof noteCard !== 'object') continue;
|
|
66
|
+
|
|
67
|
+
const noteId = toCleanString(noteCard.noteId ?? noteCard.note_id ?? entry?.noteId ?? entry?.note_id ?? entry?.id);
|
|
68
|
+
if (!noteId || seen.has(noteId)) continue;
|
|
69
|
+
seen.add(noteId);
|
|
70
|
+
|
|
71
|
+
const userId = toCleanString(noteCard.user?.userId ?? noteCard.user?.user_id ?? fallbackUserId);
|
|
72
|
+
const xsecToken = toCleanString(entry?.xsecToken ?? entry?.xsec_token ?? noteCard.xsecToken ?? noteCard.xsec_token);
|
|
73
|
+
const likes = toCleanString(noteCard.interactInfo?.likedCount ?? noteCard.interact_info?.liked_count ?? 0) || '0';
|
|
74
|
+
|
|
75
|
+
rows.push({
|
|
76
|
+
id: noteId,
|
|
77
|
+
title: toCleanString(noteCard.displayTitle ?? noteCard.display_title ?? noteCard.title),
|
|
78
|
+
type: toCleanString(noteCard.type),
|
|
79
|
+
likes,
|
|
80
|
+
url: buildXhsNoteUrl(userId || fallbackUserId, noteId, xsecToken),
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return rows;
|
|
85
|
+
}
|
|
@@ -1,45 +1,65 @@
|
|
|
1
1
|
import { cli, Strategy } from '../../registry.js';
|
|
2
|
+
import { extractXhsUserNotes, normalizeXhsUserId } from './user-helpers.js';
|
|
3
|
+
|
|
4
|
+
async function readUserSnapshot(page: any) {
|
|
5
|
+
return await page.evaluate(`
|
|
6
|
+
(() => {
|
|
7
|
+
const safeClone = (value) => {
|
|
8
|
+
try {
|
|
9
|
+
return JSON.parse(JSON.stringify(value ?? null));
|
|
10
|
+
} catch {
|
|
11
|
+
return null;
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
const userStore = window.__INITIAL_STATE__?.user || {};
|
|
16
|
+
return {
|
|
17
|
+
noteGroups: safeClone(userStore.notes?._value || userStore.notes || []),
|
|
18
|
+
pageData: safeClone(userStore.userPageData?._value || userStore.userPageData || {}),
|
|
19
|
+
};
|
|
20
|
+
})()
|
|
21
|
+
`);
|
|
22
|
+
}
|
|
2
23
|
|
|
3
24
|
cli({
|
|
4
25
|
site: 'xiaohongshu',
|
|
5
26
|
name: 'user',
|
|
6
|
-
description: 'Get
|
|
7
|
-
domain: 'xiaohongshu.com',
|
|
8
|
-
strategy: Strategy.
|
|
27
|
+
description: 'Get public notes from a Xiaohongshu user profile',
|
|
28
|
+
domain: 'www.xiaohongshu.com',
|
|
29
|
+
strategy: Strategy.COOKIE,
|
|
9
30
|
browser: true,
|
|
10
31
|
args: [
|
|
11
|
-
{ name: 'id', type: 'string', required: true },
|
|
12
|
-
{ name: 'limit', type: 'int', default: 15 },
|
|
32
|
+
{ name: 'id', type: 'string', required: true, help: 'User id or profile URL' },
|
|
33
|
+
{ name: 'limit', type: 'int', default: 15, help: 'Number of notes to return' },
|
|
13
34
|
],
|
|
14
35
|
columns: ['id', 'title', 'type', 'likes', 'url'],
|
|
15
36
|
func: async (page, kwargs) => {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
await page.
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
}
|
|
37
|
+
const userId = normalizeXhsUserId(String(kwargs.id));
|
|
38
|
+
const limit = Math.max(1, Number(kwargs.limit ?? 15));
|
|
39
|
+
|
|
40
|
+
await page.goto(`https://www.xiaohongshu.com/user/profile/${userId}`);
|
|
41
|
+
await page.wait(3);
|
|
42
|
+
|
|
43
|
+
let snapshot = await readUserSnapshot(page);
|
|
44
|
+
let results = extractXhsUserNotes(snapshot ?? {}, userId);
|
|
45
|
+
let previousCount = results.length;
|
|
46
|
+
|
|
47
|
+
for (let i = 0; results.length < limit && i < 4; i += 1) {
|
|
48
|
+
await page.autoScroll({ times: 1, delayMs: 1500 });
|
|
49
|
+
await page.wait(1);
|
|
50
|
+
|
|
51
|
+
snapshot = await readUserSnapshot(page);
|
|
52
|
+
const nextResults = extractXhsUserNotes(snapshot ?? {}, userId);
|
|
53
|
+
if (nextResults.length <= previousCount) break;
|
|
54
|
+
|
|
55
|
+
results = nextResults;
|
|
56
|
+
previousCount = nextResults.length;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (results.length === 0) {
|
|
60
|
+
throw new Error('No public notes found for this Xiaohongshu user.');
|
|
41
61
|
}
|
|
42
62
|
|
|
43
|
-
return results.slice(0,
|
|
44
|
-
}
|
|
63
|
+
return results.slice(0, limit);
|
|
64
|
+
},
|
|
45
65
|
});
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { htmlToMarkdown } from './download.js';
|
|
3
|
+
|
|
4
|
+
describe('htmlToMarkdown', () => {
|
|
5
|
+
it('renders ordered lists with the original list item content', () => {
|
|
6
|
+
const html = '<ol><li>First item</li><li>Second item</li></ol>';
|
|
7
|
+
|
|
8
|
+
expect(htmlToMarkdown(html)).toContain('1. First item');
|
|
9
|
+
expect(htmlToMarkdown(html)).toContain('2. Second item');
|
|
10
|
+
expect(htmlToMarkdown(html)).not.toContain('$1');
|
|
11
|
+
});
|
|
12
|
+
});
|