@jackwener/opencli 0.9.5 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.yml +83 -0
  2. package/.github/ISSUE_TEMPLATE/config.yml +8 -0
  3. package/.github/ISSUE_TEMPLATE/feature_request.yml +42 -0
  4. package/.github/ISSUE_TEMPLATE/new_site_adapter.yml +57 -0
  5. package/.github/dependabot.yml +27 -0
  6. package/.github/pull_request_template.md +24 -0
  7. package/.github/workflows/ci.yml +14 -8
  8. package/.github/workflows/e2e-headed.yml +6 -2
  9. package/.github/workflows/pkg-pr-new.yml +2 -2
  10. package/.github/workflows/release-please.yml +25 -0
  11. package/.github/workflows/release.yml +2 -2
  12. package/.github/workflows/security.yml +36 -0
  13. package/CLI-ELECTRON.md +89 -36
  14. package/CONTRIBUTING.md +167 -0
  15. package/README.md +98 -32
  16. package/README.zh-CN.md +99 -33
  17. package/dist/browser/discover.js +22 -7
  18. package/dist/browser.test.js +23 -0
  19. package/dist/build-manifest.d.ts +26 -0
  20. package/dist/build-manifest.js +132 -60
  21. package/dist/build-manifest.test.d.ts +1 -0
  22. package/dist/build-manifest.test.js +26 -0
  23. package/dist/cli-manifest.json +1875 -271
  24. package/dist/clis/antigravity/model.js +2 -2
  25. package/dist/clis/antigravity/send.js +2 -2
  26. package/dist/clis/bilibili/download.d.ts +10 -0
  27. package/dist/clis/bilibili/download.js +135 -0
  28. package/dist/clis/chatgpt/ask.d.ts +1 -0
  29. package/dist/clis/chatgpt/ask.js +68 -0
  30. package/dist/clis/chatgpt/send.js +11 -0
  31. package/dist/clis/chatwise/ask.d.ts +1 -0
  32. package/dist/clis/chatwise/ask.js +76 -0
  33. package/dist/clis/chatwise/export.d.ts +1 -0
  34. package/dist/clis/chatwise/export.js +46 -0
  35. package/dist/clis/chatwise/history.d.ts +1 -0
  36. package/dist/clis/chatwise/history.js +43 -0
  37. package/dist/clis/chatwise/model.d.ts +1 -0
  38. package/dist/clis/chatwise/model.js +81 -0
  39. package/dist/clis/chatwise/new.d.ts +1 -0
  40. package/dist/clis/chatwise/new.js +18 -0
  41. package/dist/clis/chatwise/read.d.ts +1 -0
  42. package/dist/clis/chatwise/read.js +39 -0
  43. package/dist/clis/chatwise/screenshot.d.ts +1 -0
  44. package/dist/clis/chatwise/screenshot.js +27 -0
  45. package/dist/clis/chatwise/send.d.ts +1 -0
  46. package/dist/clis/chatwise/send.js +45 -0
  47. package/dist/clis/chatwise/status.d.ts +1 -0
  48. package/dist/clis/chatwise/status.js +22 -0
  49. package/dist/clis/codex/ask.d.ts +1 -0
  50. package/dist/clis/codex/ask.js +67 -0
  51. package/dist/clis/codex/export.d.ts +1 -0
  52. package/dist/clis/codex/export.js +37 -0
  53. package/dist/clis/codex/history.d.ts +1 -0
  54. package/dist/clis/codex/history.js +43 -0
  55. package/dist/clis/codex/read.js +3 -5
  56. package/dist/clis/codex/screenshot.d.ts +1 -0
  57. package/dist/clis/codex/screenshot.js +27 -0
  58. package/dist/clis/codex/send.js +3 -6
  59. package/dist/clis/codex/status.js +2 -1
  60. package/dist/clis/cursor/ask.d.ts +1 -0
  61. package/dist/clis/cursor/ask.js +69 -0
  62. package/dist/clis/cursor/composer.js +9 -28
  63. package/dist/clis/cursor/export.d.ts +1 -0
  64. package/dist/clis/cursor/export.js +51 -0
  65. package/dist/clis/cursor/history.d.ts +1 -0
  66. package/dist/clis/cursor/history.js +43 -0
  67. package/dist/clis/cursor/new.js +4 -13
  68. package/dist/clis/cursor/screenshot.d.ts +1 -0
  69. package/dist/clis/cursor/screenshot.js +31 -0
  70. package/dist/clis/discord-app/channels.d.ts +1 -0
  71. package/dist/clis/discord-app/channels.js +45 -0
  72. package/dist/clis/discord-app/members.d.ts +1 -0
  73. package/dist/clis/discord-app/members.js +38 -0
  74. package/dist/clis/discord-app/read.d.ts +1 -0
  75. package/dist/clis/discord-app/read.js +45 -0
  76. package/dist/clis/discord-app/search.d.ts +1 -0
  77. package/dist/clis/discord-app/search.js +56 -0
  78. package/dist/clis/discord-app/send.d.ts +1 -0
  79. package/dist/clis/discord-app/send.js +27 -0
  80. package/dist/clis/discord-app/servers.d.ts +1 -0
  81. package/dist/clis/discord-app/servers.js +36 -0
  82. package/dist/clis/discord-app/status.d.ts +1 -0
  83. package/dist/clis/discord-app/status.js +16 -0
  84. package/dist/clis/feishu/new.d.ts +1 -0
  85. package/dist/clis/feishu/new.js +27 -0
  86. package/dist/clis/feishu/read.d.ts +1 -0
  87. package/dist/clis/feishu/read.js +40 -0
  88. package/dist/clis/feishu/search.d.ts +1 -0
  89. package/dist/clis/feishu/search.js +30 -0
  90. package/dist/clis/feishu/send.d.ts +1 -0
  91. package/dist/clis/feishu/send.js +39 -0
  92. package/dist/clis/feishu/status.d.ts +1 -0
  93. package/dist/clis/feishu/status.js +28 -0
  94. package/dist/clis/grok/ask.d.ts +1 -0
  95. package/dist/clis/grok/ask.js +82 -0
  96. package/dist/clis/grok/debug.d.ts +1 -0
  97. package/dist/clis/grok/debug.js +45 -0
  98. package/dist/clis/jimeng/generate.yaml +84 -0
  99. package/dist/clis/jimeng/history.yaml +47 -0
  100. package/dist/clis/linux-do/categories.yaml +41 -0
  101. package/dist/clis/linux-do/category.yaml +49 -0
  102. package/dist/clis/linux-do/hot.yaml +50 -0
  103. package/dist/clis/linux-do/latest.yaml +40 -0
  104. package/dist/clis/linux-do/search.yaml +45 -0
  105. package/dist/clis/linux-do/topic.yaml +38 -0
  106. package/dist/clis/notion/export.d.ts +1 -0
  107. package/dist/clis/notion/export.js +31 -0
  108. package/dist/clis/notion/favorites.d.ts +1 -0
  109. package/dist/clis/notion/favorites.js +84 -0
  110. package/dist/clis/notion/new.d.ts +1 -0
  111. package/dist/clis/notion/new.js +34 -0
  112. package/dist/clis/notion/read.d.ts +1 -0
  113. package/dist/clis/notion/read.js +30 -0
  114. package/dist/clis/notion/search.d.ts +1 -0
  115. package/dist/clis/notion/search.js +46 -0
  116. package/dist/clis/notion/sidebar.d.ts +1 -0
  117. package/dist/clis/notion/sidebar.js +41 -0
  118. package/dist/clis/notion/status.d.ts +1 -0
  119. package/dist/clis/notion/status.js +16 -0
  120. package/dist/clis/notion/write.d.ts +1 -0
  121. package/dist/clis/notion/write.js +40 -0
  122. package/dist/clis/twitter/download.d.ts +8 -0
  123. package/dist/clis/twitter/download.js +204 -0
  124. package/dist/clis/wechat/chats.d.ts +1 -0
  125. package/dist/clis/wechat/chats.js +28 -0
  126. package/dist/clis/wechat/contacts.d.ts +1 -0
  127. package/dist/clis/wechat/contacts.js +28 -0
  128. package/dist/clis/wechat/read.d.ts +1 -0
  129. package/dist/clis/wechat/read.js +58 -0
  130. package/dist/clis/wechat/search.d.ts +1 -0
  131. package/dist/clis/wechat/search.js +31 -0
  132. package/dist/clis/wechat/send.d.ts +1 -0
  133. package/dist/clis/wechat/send.js +42 -0
  134. package/dist/clis/wechat/status.d.ts +1 -0
  135. package/dist/clis/wechat/status.js +29 -0
  136. package/dist/clis/xiaohongshu/creator-note-detail.d.ts +10 -0
  137. package/dist/clis/xiaohongshu/creator-note-detail.js +88 -0
  138. package/dist/clis/xiaohongshu/creator-notes.d.ts +11 -0
  139. package/dist/clis/xiaohongshu/creator-notes.js +109 -0
  140. package/dist/clis/xiaohongshu/creator-profile.d.ts +10 -0
  141. package/dist/clis/xiaohongshu/creator-profile.js +54 -0
  142. package/dist/clis/xiaohongshu/creator-stats.d.ts +10 -0
  143. package/dist/clis/xiaohongshu/creator-stats.js +74 -0
  144. package/dist/clis/xiaohongshu/download.d.ts +7 -0
  145. package/dist/clis/xiaohongshu/download.js +155 -0
  146. package/dist/clis/xiaohongshu/search.js +1 -1
  147. package/dist/clis/xiaohongshu/user-helpers.d.ts +15 -0
  148. package/dist/clis/xiaohongshu/user-helpers.js +67 -0
  149. package/dist/clis/xiaohongshu/user-helpers.test.d.ts +1 -0
  150. package/dist/clis/xiaohongshu/user-helpers.test.js +81 -0
  151. package/dist/clis/xiaohongshu/user.js +46 -29
  152. package/dist/clis/zhihu/download.d.ts +11 -0
  153. package/dist/clis/zhihu/download.js +186 -0
  154. package/dist/clis/zhihu/download.test.d.ts +1 -0
  155. package/dist/clis/zhihu/download.test.js +10 -0
  156. package/dist/download/index.d.ts +79 -0
  157. package/dist/download/index.js +325 -0
  158. package/dist/download/progress.d.ts +36 -0
  159. package/dist/download/progress.js +111 -0
  160. package/dist/engine.test.js +15 -0
  161. package/dist/main.js +16 -3
  162. package/dist/pipeline/registry.js +2 -0
  163. package/dist/pipeline/steps/download.d.ts +34 -0
  164. package/dist/pipeline/steps/download.js +251 -0
  165. package/dist/pipeline/template.js +28 -0
  166. package/package.json +4 -3
  167. package/scripts/test-site.mjs +70 -0
  168. package/src/browser/discover.ts +23 -7
  169. package/src/browser.test.ts +23 -0
  170. package/src/build-manifest.test.ts +28 -0
  171. package/src/build-manifest.ts +147 -57
  172. package/src/clis/antigravity/README.md +2 -3
  173. package/src/clis/antigravity/README.zh-CN.md +2 -3
  174. package/src/clis/antigravity/SKILL.md +1 -1
  175. package/src/clis/antigravity/model.ts +2 -2
  176. package/src/clis/antigravity/send.ts +2 -2
  177. package/src/clis/bilibili/download.ts +161 -0
  178. package/src/clis/chatgpt/README.md +25 -16
  179. package/src/clis/chatgpt/README.zh-CN.md +27 -18
  180. package/src/clis/chatgpt/ask.ts +77 -0
  181. package/src/clis/chatgpt/send.ts +12 -0
  182. package/src/clis/chatwise/README.md +38 -0
  183. package/src/clis/chatwise/README.zh-CN.md +38 -0
  184. package/src/clis/chatwise/ask.ts +87 -0
  185. package/src/clis/chatwise/export.ts +51 -0
  186. package/src/clis/chatwise/history.ts +47 -0
  187. package/src/clis/chatwise/model.ts +87 -0
  188. package/src/clis/chatwise/new.ts +21 -0
  189. package/src/clis/chatwise/read.ts +42 -0
  190. package/src/clis/chatwise/screenshot.ts +33 -0
  191. package/src/clis/chatwise/send.ts +50 -0
  192. package/src/clis/chatwise/status.ts +25 -0
  193. package/src/clis/codex/ask.ts +77 -0
  194. package/src/clis/codex/export.ts +42 -0
  195. package/src/clis/codex/extract-diff.ts +1 -0
  196. package/src/clis/codex/history.ts +47 -0
  197. package/src/clis/codex/read.ts +5 -6
  198. package/src/clis/codex/screenshot.ts +33 -0
  199. package/src/clis/codex/send.ts +6 -7
  200. package/src/clis/codex/status.ts +4 -2
  201. package/src/clis/cursor/ask.ts +81 -0
  202. package/src/clis/cursor/composer.ts +9 -30
  203. package/src/clis/cursor/export.ts +57 -0
  204. package/src/clis/cursor/history.ts +47 -0
  205. package/src/clis/cursor/new.ts +4 -15
  206. package/src/clis/cursor/screenshot.ts +38 -0
  207. package/src/clis/discord-app/README.md +28 -0
  208. package/src/clis/discord-app/README.zh-CN.md +28 -0
  209. package/src/clis/discord-app/channels.ts +48 -0
  210. package/src/clis/discord-app/members.ts +41 -0
  211. package/src/clis/discord-app/read.ts +49 -0
  212. package/src/clis/discord-app/search.ts +64 -0
  213. package/src/clis/discord-app/send.ts +32 -0
  214. package/src/clis/discord-app/servers.ts +39 -0
  215. package/src/clis/discord-app/status.ts +18 -0
  216. package/src/clis/feishu/README.md +20 -0
  217. package/src/clis/feishu/README.zh-CN.md +20 -0
  218. package/src/clis/feishu/new.ts +32 -0
  219. package/src/clis/feishu/read.ts +48 -0
  220. package/src/clis/feishu/search.ts +35 -0
  221. package/src/clis/feishu/send.ts +46 -0
  222. package/src/clis/feishu/status.ts +34 -0
  223. package/src/clis/grok/ask.ts +90 -0
  224. package/src/clis/grok/debug.ts +49 -0
  225. package/src/clis/jimeng/generate.yaml +84 -0
  226. package/src/clis/jimeng/history.yaml +47 -0
  227. package/src/clis/linux-do/categories.yaml +41 -0
  228. package/src/clis/linux-do/category.yaml +49 -0
  229. package/src/clis/linux-do/hot.yaml +50 -0
  230. package/src/clis/linux-do/latest.yaml +40 -0
  231. package/src/clis/linux-do/search.yaml +45 -0
  232. package/src/clis/linux-do/topic.yaml +38 -0
  233. package/src/clis/notion/README.md +29 -0
  234. package/src/clis/notion/README.zh-CN.md +29 -0
  235. package/src/clis/notion/export.ts +36 -0
  236. package/src/clis/notion/favorites.ts +87 -0
  237. package/src/clis/notion/new.ts +39 -0
  238. package/src/clis/notion/read.ts +33 -0
  239. package/src/clis/notion/search.ts +54 -0
  240. package/src/clis/notion/sidebar.ts +44 -0
  241. package/src/clis/notion/status.ts +18 -0
  242. package/src/clis/notion/write.ts +45 -0
  243. package/src/clis/twitter/download.ts +227 -0
  244. package/src/clis/wechat/README.md +28 -0
  245. package/src/clis/wechat/README.zh-CN.md +28 -0
  246. package/src/clis/wechat/chats.ts +33 -0
  247. package/src/clis/wechat/contacts.ts +33 -0
  248. package/src/clis/wechat/read.ts +72 -0
  249. package/src/clis/wechat/search.ts +36 -0
  250. package/src/clis/wechat/send.ts +49 -0
  251. package/src/clis/wechat/status.ts +35 -0
  252. package/src/clis/xiaohongshu/creator-note-detail.ts +95 -0
  253. package/src/clis/xiaohongshu/creator-notes.ts +116 -0
  254. package/src/clis/xiaohongshu/creator-profile.ts +60 -0
  255. package/src/clis/xiaohongshu/creator-stats.ts +81 -0
  256. package/src/clis/xiaohongshu/download.ts +173 -0
  257. package/src/clis/xiaohongshu/search.ts +1 -1
  258. package/src/clis/xiaohongshu/user-helpers.test.ts +106 -0
  259. package/src/clis/xiaohongshu/user-helpers.ts +85 -0
  260. package/src/clis/xiaohongshu/user.ts +52 -32
  261. package/src/clis/zhihu/download.test.ts +12 -0
  262. package/src/clis/zhihu/download.ts +223 -0
  263. package/src/download/index.ts +395 -0
  264. package/src/download/progress.ts +125 -0
  265. package/src/engine.test.ts +17 -0
  266. package/src/main.ts +12 -3
  267. package/src/pipeline/registry.ts +2 -0
  268. package/src/pipeline/steps/download.ts +310 -0
  269. package/src/pipeline/template.ts +26 -0
  270. package/tests/e2e/browser-auth.test.ts +25 -0
@@ -0,0 +1,85 @@
1
+ export interface XhsUserPageSnapshot {
2
+ noteGroups?: unknown;
3
+ pageData?: unknown;
4
+ }
5
+
6
+ export interface XhsUserNoteRow {
7
+ id: string;
8
+ title: string;
9
+ type: string;
10
+ likes: string;
11
+ url: string;
12
+ }
13
+
14
+ function toCleanString(value: unknown): string {
15
+ return typeof value === 'string' ? value.trim() : value == null ? '' : String(value).trim();
16
+ }
17
+
18
+ export function normalizeXhsUserId(input: string): string {
19
+ const trimmed = toCleanString(input);
20
+ const withoutQuery = trimmed.replace(/[?#].*$/, '');
21
+ const matched = withoutQuery.match(/\/user\/profile\/([a-zA-Z0-9]+)/);
22
+ if (matched?.[1]) return matched[1];
23
+ return withoutQuery.replace(/\/+$/, '').split('/').pop() ?? withoutQuery;
24
+ }
25
+
26
+ export function flattenXhsNoteGroups(noteGroups: unknown): any[] {
27
+ if (!Array.isArray(noteGroups)) return [];
28
+
29
+ const notes: any[] = [];
30
+ for (const group of noteGroups) {
31
+ if (!group) continue;
32
+ if (Array.isArray(group)) {
33
+ for (const item of group) {
34
+ if (item) notes.push(item);
35
+ }
36
+ continue;
37
+ }
38
+ notes.push(group);
39
+ }
40
+
41
+ return notes;
42
+ }
43
+
44
+ export function buildXhsNoteUrl(userId: string, noteId: string, xsecToken?: string): string {
45
+ const cleanUserId = toCleanString(userId);
46
+ const cleanNoteId = toCleanString(noteId);
47
+ if (!cleanUserId || !cleanNoteId) return '';
48
+
49
+ const url = new URL(`https://www.xiaohongshu.com/user/profile/${cleanUserId}/${cleanNoteId}`);
50
+ const cleanToken = toCleanString(xsecToken);
51
+ if (cleanToken) {
52
+ url.searchParams.set('xsec_token', cleanToken);
53
+ url.searchParams.set('xsec_source', 'pc_user');
54
+ }
55
+ return url.toString();
56
+ }
57
+
58
+ export function extractXhsUserNotes(snapshot: XhsUserPageSnapshot, fallbackUserId: string): XhsUserNoteRow[] {
59
+ const notes = flattenXhsNoteGroups(snapshot.noteGroups);
60
+ const rows: XhsUserNoteRow[] = [];
61
+ const seen = new Set<string>();
62
+
63
+ for (const entry of notes) {
64
+ const noteCard = entry?.noteCard ?? entry?.note_card ?? entry;
65
+ if (!noteCard || typeof noteCard !== 'object') continue;
66
+
67
+ const noteId = toCleanString(noteCard.noteId ?? noteCard.note_id ?? entry?.noteId ?? entry?.note_id ?? entry?.id);
68
+ if (!noteId || seen.has(noteId)) continue;
69
+ seen.add(noteId);
70
+
71
+ const userId = toCleanString(noteCard.user?.userId ?? noteCard.user?.user_id ?? fallbackUserId);
72
+ const xsecToken = toCleanString(entry?.xsecToken ?? entry?.xsec_token ?? noteCard.xsecToken ?? noteCard.xsec_token);
73
+ const likes = toCleanString(noteCard.interactInfo?.likedCount ?? noteCard.interact_info?.liked_count ?? 0) || '0';
74
+
75
+ rows.push({
76
+ id: noteId,
77
+ title: toCleanString(noteCard.displayTitle ?? noteCard.display_title ?? noteCard.title),
78
+ type: toCleanString(noteCard.type),
79
+ likes,
80
+ url: buildXhsNoteUrl(userId || fallbackUserId, noteId, xsecToken),
81
+ });
82
+ }
83
+
84
+ return rows;
85
+ }
@@ -1,45 +1,65 @@
1
1
  import { cli, Strategy } from '../../registry.js';
2
+ import { extractXhsUserNotes, normalizeXhsUserId } from './user-helpers.js';
3
+
4
+ async function readUserSnapshot(page: any) {
5
+ return await page.evaluate(`
6
+ (() => {
7
+ const safeClone = (value) => {
8
+ try {
9
+ return JSON.parse(JSON.stringify(value ?? null));
10
+ } catch {
11
+ return null;
12
+ }
13
+ };
14
+
15
+ const userStore = window.__INITIAL_STATE__?.user || {};
16
+ return {
17
+ noteGroups: safeClone(userStore.notes?._value || userStore.notes || []),
18
+ pageData: safeClone(userStore.userPageData?._value || userStore.userPageData || {}),
19
+ };
20
+ })()
21
+ `);
22
+ }
2
23
 
3
24
  cli({
4
25
  site: 'xiaohongshu',
5
26
  name: 'user',
6
- description: 'Get user notes from Xiaohongshu',
7
- domain: 'xiaohongshu.com',
8
- strategy: Strategy.INTERCEPT,
27
+ description: 'Get public notes from a Xiaohongshu user profile',
28
+ domain: 'www.xiaohongshu.com',
29
+ strategy: Strategy.COOKIE,
9
30
  browser: true,
10
31
  args: [
11
- { name: 'id', type: 'string', required: true },
12
- { name: 'limit', type: 'int', default: 15 },
32
+ { name: 'id', type: 'string', required: true, help: 'User id or profile URL' },
33
+ { name: 'limit', type: 'int', default: 15, help: 'Number of notes to return' },
13
34
  ],
14
35
  columns: ['id', 'title', 'type', 'likes', 'url'],
15
36
  func: async (page, kwargs) => {
16
- await page.goto(`https://www.xiaohongshu.com/user/profile/${kwargs.id}`);
17
- await page.wait(5);
18
-
19
- await page.installInterceptor('v1/user/posted');
20
-
21
- // Trigger API by scrolling
22
- await page.autoScroll({ times: 2, delayMs: 2000 });
23
-
24
- // Retrieve data
25
- const requests = await page.getInterceptedRequests();
26
- if (!requests || requests.length === 0) return [];
27
-
28
- let results: any[] = [];
29
- for (const req of requests) {
30
- if (req.data && req.data.data && req.data.data.notes) {
31
- for (const note of req.data.data.notes) {
32
- results.push({
33
- id: note.note_id || note.id,
34
- title: note.display_title || '',
35
- type: note.type || '',
36
- likes: note.interact_info?.liked_count || '0',
37
- url: `https://www.xiaohongshu.com/explore/${note.note_id || note.id}`
38
- });
39
- }
40
- }
37
+ const userId = normalizeXhsUserId(String(kwargs.id));
38
+ const limit = Math.max(1, Number(kwargs.limit ?? 15));
39
+
40
+ await page.goto(`https://www.xiaohongshu.com/user/profile/${userId}`);
41
+ await page.wait(3);
42
+
43
+ let snapshot = await readUserSnapshot(page);
44
+ let results = extractXhsUserNotes(snapshot ?? {}, userId);
45
+ let previousCount = results.length;
46
+
47
+ for (let i = 0; results.length < limit && i < 4; i += 1) {
48
+ await page.autoScroll({ times: 1, delayMs: 1500 });
49
+ await page.wait(1);
50
+
51
+ snapshot = await readUserSnapshot(page);
52
+ const nextResults = extractXhsUserNotes(snapshot ?? {}, userId);
53
+ if (nextResults.length <= previousCount) break;
54
+
55
+ results = nextResults;
56
+ previousCount = nextResults.length;
57
+ }
58
+
59
+ if (results.length === 0) {
60
+ throw new Error('No public notes found for this Xiaohongshu user.');
41
61
  }
42
62
 
43
- return results.slice(0, kwargs.limit);
44
- }
63
+ return results.slice(0, limit);
64
+ },
45
65
  });
@@ -0,0 +1,12 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { htmlToMarkdown } from './download.js';
3
+
4
+ describe('htmlToMarkdown', () => {
5
+ it('renders ordered lists with the original list item content', () => {
6
+ const html = '<ol><li>First item</li><li>Second item</li></ol>';
7
+
8
+ expect(htmlToMarkdown(html)).toContain('1. First item');
9
+ expect(htmlToMarkdown(html)).toContain('2. Second item');
10
+ expect(htmlToMarkdown(html)).not.toContain('$1');
11
+ });
12
+ });
@@ -0,0 +1,223 @@
1
+ /**
2
+ * Zhihu download — export articles to Markdown format.
3
+ *
4
+ * Usage:
5
+ * opencli zhihu download --url "https://zhuanlan.zhihu.com/p/xxx" --output ./zhihu
6
+ */
7
+
8
+ import * as fs from 'node:fs';
9
+ import * as path from 'node:path';
10
+ import { cli, Strategy } from '../../registry.js';
11
+ import { sanitizeFilename, httpDownload } from '../../download/index.js';
12
+ import { formatBytes } from '../../download/progress.js';
13
+
14
+ /**
15
+ * Convert HTML content to Markdown.
16
+ * This is a simplified converter for Zhihu article content.
17
+ */
18
+ export function htmlToMarkdown(html: string): string {
19
+ let md = html;
20
+
21
+ // Remove script and style tags
22
+ md = md.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
23
+ md = md.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
24
+
25
+ // Convert headers
26
+ md = md.replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n\n');
27
+ md = md.replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n\n');
28
+ md = md.replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n\n');
29
+ md = md.replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n\n');
30
+
31
+ // Convert paragraphs
32
+ md = md.replace(/<p[^>]*>([\s\S]*?)<\/p>/gi, '$1\n\n');
33
+
34
+ // Convert links
35
+ md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\/a>/gi, '[$2]($1)');
36
+
37
+ // Convert images
38
+ md = md.replace(/<img[^>]*src="([^"]*)"[^>]*alt="([^"]*)"[^>]*\/?>/gi, '![$2]($1)');
39
+ md = md.replace(/<img[^>]*src="([^"]*)"[^>]*\/?>/gi, '![]($1)');
40
+
41
+ // Convert lists
42
+ md = md.replace(/<ul[^>]*>([\s\S]*?)<\/ul>/gi, (match, content) => {
43
+ return content.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n') + '\n';
44
+ });
45
+ md = md.replace(/<ol[^>]*>([\s\S]*?)<\/ol>/gi, (match, content) => {
46
+ let index = 0;
47
+ return content.replace(
48
+ /<li[^>]*>([\s\S]*?)<\/li>/gi,
49
+ (_itemMatch: string, itemContent: string) => `${++index}. ${itemContent}\n`,
50
+ ) + '\n';
51
+ });
52
+
53
+ // Convert bold and italic
54
+ md = md.replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**');
55
+ md = md.replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**');
56
+ md = md.replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*');
57
+ md = md.replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*');
58
+
59
+ // Convert code blocks
60
+ md = md.replace(/<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi, '```\n$1\n```\n\n');
61
+ md = md.replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`');
62
+
63
+ // Convert blockquotes
64
+ md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, (match, content) => {
65
+ return content.split('\n').map((line: string) => `> ${line}`).join('\n') + '\n\n';
66
+ });
67
+
68
+ // Convert line breaks
69
+ md = md.replace(/<br\s*\/?>/gi, '\n');
70
+
71
+ // Remove remaining HTML tags
72
+ md = md.replace(/<[^>]+>/g, '');
73
+
74
+ // Decode HTML entities
75
+ md = md.replace(/&nbsp;/g, ' ');
76
+ md = md.replace(/&lt;/g, '<');
77
+ md = md.replace(/&gt;/g, '>');
78
+ md = md.replace(/&amp;/g, '&');
79
+ md = md.replace(/&quot;/g, '"');
80
+
81
+ // Clean up extra whitespace
82
+ md = md.replace(/\n{3,}/g, '\n\n');
83
+ md = md.trim();
84
+
85
+ return md;
86
+ }
87
+
88
+ cli({
89
+ site: 'zhihu',
90
+ name: 'download',
91
+ description: '导出知乎文章为 Markdown 格式',
92
+ domain: 'zhuanlan.zhihu.com',
93
+ strategy: Strategy.COOKIE,
94
+ args: [
95
+ { name: 'url', required: true, help: 'Article URL (zhuanlan.zhihu.com/p/xxx)' },
96
+ { name: 'output', default: './zhihu-articles', help: 'Output directory' },
97
+ { name: 'download-images', type: 'boolean', default: false, help: 'Download images locally' },
98
+ ],
99
+ columns: ['title', 'author', 'status', 'size'],
100
+ func: async (page, kwargs) => {
101
+ const url = kwargs.url;
102
+ const output = kwargs.output;
103
+ const downloadImages = kwargs['download-images'];
104
+
105
+ // Navigate to article page
106
+ await page.goto(url);
107
+ await page.wait(3);
108
+
109
+ // Extract article content
110
+ const data = await page.evaluate(`
111
+ (() => {
112
+ const result = {
113
+ title: '',
114
+ author: '',
115
+ content: '',
116
+ publishTime: '',
117
+ images: []
118
+ };
119
+
120
+ // Get title
121
+ const titleEl = document.querySelector('.Post-Title, h1.ContentItem-title, .ArticleTitle');
122
+ result.title = titleEl?.textContent?.trim() || 'untitled';
123
+
124
+ // Get author
125
+ const authorEl = document.querySelector('.AuthorInfo-name, .UserLink-link');
126
+ result.author = authorEl?.textContent?.trim() || 'unknown';
127
+
128
+ // Get publish time
129
+ const timeEl = document.querySelector('.ContentItem-time, .Post-Time');
130
+ result.publishTime = timeEl?.textContent?.trim() || '';
131
+
132
+ // Get content HTML
133
+ const contentEl = document.querySelector('.Post-RichTextContainer, .RichText, .ArticleContent');
134
+ if (contentEl) {
135
+ result.content = contentEl.innerHTML;
136
+
137
+ // Extract image URLs
138
+ contentEl.querySelectorAll('img').forEach(img => {
139
+ const src = img.getAttribute('data-original') || img.getAttribute('data-actualsrc') || img.src;
140
+ if (src && !src.includes('data:image')) {
141
+ result.images.push(src);
142
+ }
143
+ });
144
+ }
145
+
146
+ return result;
147
+ })()
148
+ `);
149
+
150
+ if (!data || !data.content) {
151
+ return [{
152
+ title: 'Error',
153
+ author: '-',
154
+ status: 'failed',
155
+ size: 'Could not extract article content',
156
+ }];
157
+ }
158
+
159
+ // Create output directory
160
+ fs.mkdirSync(output, { recursive: true });
161
+
162
+ // Convert HTML to Markdown
163
+ let markdown = htmlToMarkdown(data.content);
164
+
165
+ // Create frontmatter
166
+ const frontmatter = [
167
+ '---',
168
+ `title: "${data.title.replace(/"/g, '\\"')}"`,
169
+ `author: "${data.author.replace(/"/g, '\\"')}"`,
170
+ `source: "${url}"`,
171
+ data.publishTime ? `date: "${data.publishTime}"` : '',
172
+ '---',
173
+ '',
174
+ ].filter(Boolean).join('\n');
175
+
176
+ // Download images if requested
177
+ if (downloadImages && data.images && data.images.length > 0) {
178
+ const imagesDir = path.join(output, 'images');
179
+ fs.mkdirSync(imagesDir, { recursive: true });
180
+
181
+ const cookies = await page.evaluate(`(() => document.cookie)()`);
182
+
183
+ for (let i = 0; i < data.images.length; i++) {
184
+ const imgUrl = data.images[i];
185
+ const ext = imgUrl.match(/\.(jpg|jpeg|png|gif|webp)/i)?.[1] || 'jpg';
186
+ const imgFilename = `img_${i + 1}.${ext}`;
187
+ const imgPath = path.join(imagesDir, imgFilename);
188
+
189
+ try {
190
+ await httpDownload(imgUrl, imgPath, {
191
+ cookies: typeof cookies === 'string' ? cookies : '',
192
+ timeout: 30000,
193
+ });
194
+
195
+ // Replace image URL in markdown with local path
196
+ markdown = markdown.replace(
197
+ new RegExp(imgUrl.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'),
198
+ `./images/${imgFilename}`,
199
+ );
200
+ } catch {
201
+ // Keep original URL if download fails
202
+ }
203
+ }
204
+ }
205
+
206
+ // Write markdown file
207
+ const safeTitle = sanitizeFilename(data.title, 100);
208
+ const filename = `${safeTitle}.md`;
209
+ const filePath = path.join(output, filename);
210
+
211
+ const fullContent = frontmatter + '\n' + markdown;
212
+ fs.writeFileSync(filePath, fullContent, 'utf-8');
213
+
214
+ const size = Buffer.byteLength(fullContent, 'utf-8');
215
+
216
+ return [{
217
+ title: data.title,
218
+ author: data.author,
219
+ status: 'success',
220
+ size: formatBytes(size),
221
+ }];
222
+ },
223
+ });