@jackwener/opencli 0.9.6 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.yml +83 -0
  2. package/.github/ISSUE_TEMPLATE/config.yml +8 -0
  3. package/.github/ISSUE_TEMPLATE/feature_request.yml +42 -0
  4. package/.github/ISSUE_TEMPLATE/new_site_adapter.yml +57 -0
  5. package/.github/dependabot.yml +27 -0
  6. package/.github/pull_request_template.md +24 -0
  7. package/.github/workflows/ci.yml +14 -8
  8. package/.github/workflows/e2e-headed.yml +6 -2
  9. package/.github/workflows/pkg-pr-new.yml +2 -2
  10. package/.github/workflows/release-please.yml +25 -0
  11. package/.github/workflows/release.yml +2 -2
  12. package/.github/workflows/security.yml +36 -0
  13. package/CLI-ELECTRON.md +89 -36
  14. package/CONTRIBUTING.md +167 -0
  15. package/README.md +98 -32
  16. package/README.zh-CN.md +99 -33
  17. package/dist/browser/discover.js +22 -7
  18. package/dist/browser.test.js +23 -0
  19. package/dist/build-manifest.d.ts +26 -0
  20. package/dist/build-manifest.js +132 -60
  21. package/dist/build-manifest.test.d.ts +1 -0
  22. package/dist/build-manifest.test.js +26 -0
  23. package/dist/cli-manifest.json +1415 -29
  24. package/dist/clis/bilibili/download.d.ts +10 -0
  25. package/dist/clis/bilibili/download.js +135 -0
  26. package/dist/clis/chatwise/ask.d.ts +1 -0
  27. package/dist/clis/chatwise/ask.js +76 -0
  28. package/dist/clis/chatwise/export.d.ts +1 -0
  29. package/dist/clis/chatwise/export.js +46 -0
  30. package/dist/clis/chatwise/history.d.ts +1 -0
  31. package/dist/clis/chatwise/history.js +43 -0
  32. package/dist/clis/chatwise/model.d.ts +1 -0
  33. package/dist/clis/chatwise/model.js +81 -0
  34. package/dist/clis/chatwise/new.d.ts +1 -0
  35. package/dist/clis/chatwise/new.js +18 -0
  36. package/dist/clis/chatwise/read.d.ts +1 -0
  37. package/dist/clis/chatwise/read.js +39 -0
  38. package/dist/clis/chatwise/screenshot.d.ts +1 -0
  39. package/dist/clis/chatwise/screenshot.js +27 -0
  40. package/dist/clis/chatwise/send.d.ts +1 -0
  41. package/dist/clis/chatwise/send.js +45 -0
  42. package/dist/clis/chatwise/status.d.ts +1 -0
  43. package/dist/clis/chatwise/status.js +22 -0
  44. package/dist/clis/discord-app/channels.d.ts +1 -0
  45. package/dist/clis/discord-app/channels.js +45 -0
  46. package/dist/clis/discord-app/members.d.ts +1 -0
  47. package/dist/clis/discord-app/members.js +38 -0
  48. package/dist/clis/discord-app/read.d.ts +1 -0
  49. package/dist/clis/discord-app/read.js +45 -0
  50. package/dist/clis/discord-app/search.d.ts +1 -0
  51. package/dist/clis/discord-app/search.js +56 -0
  52. package/dist/clis/discord-app/send.d.ts +1 -0
  53. package/dist/clis/discord-app/send.js +27 -0
  54. package/dist/clis/discord-app/servers.d.ts +1 -0
  55. package/dist/clis/discord-app/servers.js +36 -0
  56. package/dist/clis/discord-app/status.d.ts +1 -0
  57. package/dist/clis/discord-app/status.js +16 -0
  58. package/dist/clis/feishu/new.d.ts +1 -0
  59. package/dist/clis/feishu/new.js +27 -0
  60. package/dist/clis/feishu/read.d.ts +1 -0
  61. package/dist/clis/feishu/read.js +40 -0
  62. package/dist/clis/feishu/search.d.ts +1 -0
  63. package/dist/clis/feishu/search.js +30 -0
  64. package/dist/clis/feishu/send.d.ts +1 -0
  65. package/dist/clis/feishu/send.js +39 -0
  66. package/dist/clis/feishu/status.d.ts +1 -0
  67. package/dist/clis/feishu/status.js +28 -0
  68. package/dist/clis/grok/ask.d.ts +1 -0
  69. package/dist/clis/grok/ask.js +82 -0
  70. package/dist/clis/grok/debug.d.ts +1 -0
  71. package/dist/clis/grok/debug.js +45 -0
  72. package/dist/clis/jimeng/generate.yaml +84 -0
  73. package/dist/clis/jimeng/history.yaml +47 -0
  74. package/dist/clis/linux-do/categories.yaml +41 -0
  75. package/dist/clis/linux-do/category.yaml +49 -0
  76. package/dist/clis/linux-do/hot.yaml +50 -0
  77. package/dist/clis/linux-do/latest.yaml +40 -0
  78. package/dist/clis/linux-do/search.yaml +45 -0
  79. package/dist/clis/linux-do/topic.yaml +38 -0
  80. package/dist/clis/notion/export.d.ts +1 -0
  81. package/dist/clis/notion/export.js +31 -0
  82. package/dist/clis/notion/favorites.d.ts +1 -0
  83. package/dist/clis/notion/favorites.js +84 -0
  84. package/dist/clis/notion/new.d.ts +1 -0
  85. package/dist/clis/notion/new.js +34 -0
  86. package/dist/clis/notion/read.d.ts +1 -0
  87. package/dist/clis/notion/read.js +30 -0
  88. package/dist/clis/notion/search.d.ts +1 -0
  89. package/dist/clis/notion/search.js +46 -0
  90. package/dist/clis/notion/sidebar.d.ts +1 -0
  91. package/dist/clis/notion/sidebar.js +41 -0
  92. package/dist/clis/notion/status.d.ts +1 -0
  93. package/dist/clis/notion/status.js +16 -0
  94. package/dist/clis/notion/write.d.ts +1 -0
  95. package/dist/clis/notion/write.js +40 -0
  96. package/dist/clis/twitter/download.d.ts +8 -0
  97. package/dist/clis/twitter/download.js +204 -0
  98. package/dist/clis/wechat/chats.d.ts +1 -0
  99. package/dist/clis/wechat/chats.js +28 -0
  100. package/dist/clis/wechat/contacts.d.ts +1 -0
  101. package/dist/clis/wechat/contacts.js +28 -0
  102. package/dist/clis/wechat/read.d.ts +1 -0
  103. package/dist/clis/wechat/read.js +58 -0
  104. package/dist/clis/wechat/search.d.ts +1 -0
  105. package/dist/clis/wechat/search.js +31 -0
  106. package/dist/clis/wechat/send.d.ts +1 -0
  107. package/dist/clis/wechat/send.js +42 -0
  108. package/dist/clis/wechat/status.d.ts +1 -0
  109. package/dist/clis/wechat/status.js +29 -0
  110. package/dist/clis/xiaohongshu/creator-note-detail.d.ts +10 -0
  111. package/dist/clis/xiaohongshu/creator-note-detail.js +88 -0
  112. package/dist/clis/xiaohongshu/creator-notes.d.ts +11 -0
  113. package/dist/clis/xiaohongshu/creator-notes.js +109 -0
  114. package/dist/clis/xiaohongshu/creator-profile.d.ts +10 -0
  115. package/dist/clis/xiaohongshu/creator-profile.js +54 -0
  116. package/dist/clis/xiaohongshu/creator-stats.d.ts +10 -0
  117. package/dist/clis/xiaohongshu/creator-stats.js +74 -0
  118. package/dist/clis/xiaohongshu/download.d.ts +7 -0
  119. package/dist/clis/xiaohongshu/download.js +155 -0
  120. package/dist/clis/xiaohongshu/search.js +1 -1
  121. package/dist/clis/xiaohongshu/user-helpers.d.ts +15 -0
  122. package/dist/clis/xiaohongshu/user-helpers.js +67 -0
  123. package/dist/clis/xiaohongshu/user-helpers.test.d.ts +1 -0
  124. package/dist/clis/xiaohongshu/user-helpers.test.js +81 -0
  125. package/dist/clis/xiaohongshu/user.js +46 -29
  126. package/dist/clis/zhihu/download.d.ts +11 -0
  127. package/dist/clis/zhihu/download.js +186 -0
  128. package/dist/clis/zhihu/download.test.d.ts +1 -0
  129. package/dist/clis/zhihu/download.test.js +10 -0
  130. package/dist/download/index.d.ts +79 -0
  131. package/dist/download/index.js +325 -0
  132. package/dist/download/progress.d.ts +36 -0
  133. package/dist/download/progress.js +111 -0
  134. package/dist/engine.test.js +15 -0
  135. package/dist/main.js +16 -3
  136. package/dist/pipeline/registry.js +2 -0
  137. package/dist/pipeline/steps/download.d.ts +34 -0
  138. package/dist/pipeline/steps/download.js +251 -0
  139. package/dist/pipeline/template.js +28 -0
  140. package/package.json +4 -3
  141. package/scripts/test-site.mjs +70 -0
  142. package/src/browser/discover.ts +23 -7
  143. package/src/browser.test.ts +23 -0
  144. package/src/build-manifest.test.ts +28 -0
  145. package/src/build-manifest.ts +147 -57
  146. package/src/clis/bilibili/download.ts +161 -0
  147. package/src/clis/chatwise/README.md +38 -0
  148. package/src/clis/chatwise/README.zh-CN.md +38 -0
  149. package/src/clis/chatwise/ask.ts +87 -0
  150. package/src/clis/chatwise/export.ts +51 -0
  151. package/src/clis/chatwise/history.ts +47 -0
  152. package/src/clis/chatwise/model.ts +87 -0
  153. package/src/clis/chatwise/new.ts +21 -0
  154. package/src/clis/chatwise/read.ts +42 -0
  155. package/src/clis/chatwise/screenshot.ts +33 -0
  156. package/src/clis/chatwise/send.ts +50 -0
  157. package/src/clis/chatwise/status.ts +25 -0
  158. package/src/clis/discord-app/README.md +28 -0
  159. package/src/clis/discord-app/README.zh-CN.md +28 -0
  160. package/src/clis/discord-app/channels.ts +48 -0
  161. package/src/clis/discord-app/members.ts +41 -0
  162. package/src/clis/discord-app/read.ts +49 -0
  163. package/src/clis/discord-app/search.ts +64 -0
  164. package/src/clis/discord-app/send.ts +32 -0
  165. package/src/clis/discord-app/servers.ts +39 -0
  166. package/src/clis/discord-app/status.ts +18 -0
  167. package/src/clis/feishu/README.md +20 -0
  168. package/src/clis/feishu/README.zh-CN.md +20 -0
  169. package/src/clis/feishu/new.ts +32 -0
  170. package/src/clis/feishu/read.ts +48 -0
  171. package/src/clis/feishu/search.ts +35 -0
  172. package/src/clis/feishu/send.ts +46 -0
  173. package/src/clis/feishu/status.ts +34 -0
  174. package/src/clis/grok/ask.ts +90 -0
  175. package/src/clis/grok/debug.ts +49 -0
  176. package/src/clis/jimeng/generate.yaml +84 -0
  177. package/src/clis/jimeng/history.yaml +47 -0
  178. package/src/clis/linux-do/categories.yaml +41 -0
  179. package/src/clis/linux-do/category.yaml +49 -0
  180. package/src/clis/linux-do/hot.yaml +50 -0
  181. package/src/clis/linux-do/latest.yaml +40 -0
  182. package/src/clis/linux-do/search.yaml +45 -0
  183. package/src/clis/linux-do/topic.yaml +38 -0
  184. package/src/clis/notion/README.md +29 -0
  185. package/src/clis/notion/README.zh-CN.md +29 -0
  186. package/src/clis/notion/export.ts +36 -0
  187. package/src/clis/notion/favorites.ts +87 -0
  188. package/src/clis/notion/new.ts +39 -0
  189. package/src/clis/notion/read.ts +33 -0
  190. package/src/clis/notion/search.ts +54 -0
  191. package/src/clis/notion/sidebar.ts +44 -0
  192. package/src/clis/notion/status.ts +18 -0
  193. package/src/clis/notion/write.ts +45 -0
  194. package/src/clis/twitter/download.ts +227 -0
  195. package/src/clis/wechat/README.md +28 -0
  196. package/src/clis/wechat/README.zh-CN.md +28 -0
  197. package/src/clis/wechat/chats.ts +33 -0
  198. package/src/clis/wechat/contacts.ts +33 -0
  199. package/src/clis/wechat/read.ts +72 -0
  200. package/src/clis/wechat/search.ts +36 -0
  201. package/src/clis/wechat/send.ts +49 -0
  202. package/src/clis/wechat/status.ts +35 -0
  203. package/src/clis/xiaohongshu/creator-note-detail.ts +95 -0
  204. package/src/clis/xiaohongshu/creator-notes.ts +116 -0
  205. package/src/clis/xiaohongshu/creator-profile.ts +60 -0
  206. package/src/clis/xiaohongshu/creator-stats.ts +81 -0
  207. package/src/clis/xiaohongshu/download.ts +173 -0
  208. package/src/clis/xiaohongshu/search.ts +1 -1
  209. package/src/clis/xiaohongshu/user-helpers.test.ts +106 -0
  210. package/src/clis/xiaohongshu/user-helpers.ts +85 -0
  211. package/src/clis/xiaohongshu/user.ts +52 -32
  212. package/src/clis/zhihu/download.test.ts +12 -0
  213. package/src/clis/zhihu/download.ts +223 -0
  214. package/src/download/index.ts +395 -0
  215. package/src/download/progress.ts +125 -0
  216. package/src/engine.test.ts +17 -0
  217. package/src/main.ts +12 -3
  218. package/src/pipeline/registry.ts +2 -0
  219. package/src/pipeline/steps/download.ts +310 -0
  220. package/src/pipeline/template.ts +26 -0
  221. package/tests/e2e/browser-auth.test.ts +25 -0
@@ -1,40 +1,57 @@
1
1
  import { cli, Strategy } from '../../registry.js';
2
+ import { extractXhsUserNotes, normalizeXhsUserId } from './user-helpers.js';
3
+ async function readUserSnapshot(page) {
4
+ return await page.evaluate(`
5
+ (() => {
6
+ const safeClone = (value) => {
7
+ try {
8
+ return JSON.parse(JSON.stringify(value ?? null));
9
+ } catch {
10
+ return null;
11
+ }
12
+ };
13
+
14
+ const userStore = window.__INITIAL_STATE__?.user || {};
15
+ return {
16
+ noteGroups: safeClone(userStore.notes?._value || userStore.notes || []),
17
+ pageData: safeClone(userStore.userPageData?._value || userStore.userPageData || {}),
18
+ };
19
+ })()
20
+ `);
21
+ }
2
22
  cli({
3
23
  site: 'xiaohongshu',
4
24
  name: 'user',
5
- description: 'Get user notes from Xiaohongshu',
6
- domain: 'xiaohongshu.com',
7
- strategy: Strategy.INTERCEPT,
25
+ description: 'Get public notes from a Xiaohongshu user profile',
26
+ domain: 'www.xiaohongshu.com',
27
+ strategy: Strategy.COOKIE,
8
28
  browser: true,
9
29
  args: [
10
- { name: 'id', type: 'string', required: true },
11
- { name: 'limit', type: 'int', default: 15 },
30
+ { name: 'id', type: 'string', required: true, help: 'User id or profile URL' },
31
+ { name: 'limit', type: 'int', default: 15, help: 'Number of notes to return' },
12
32
  ],
13
33
  columns: ['id', 'title', 'type', 'likes', 'url'],
14
34
  func: async (page, kwargs) => {
15
- await page.goto(`https://www.xiaohongshu.com/user/profile/${kwargs.id}`);
16
- await page.wait(5);
17
- await page.installInterceptor('v1/user/posted');
18
- // Trigger API by scrolling
19
- await page.autoScroll({ times: 2, delayMs: 2000 });
20
- // Retrieve data
21
- const requests = await page.getInterceptedRequests();
22
- if (!requests || requests.length === 0)
23
- return [];
24
- let results = [];
25
- for (const req of requests) {
26
- if (req.data && req.data.data && req.data.data.notes) {
27
- for (const note of req.data.data.notes) {
28
- results.push({
29
- id: note.note_id || note.id,
30
- title: note.display_title || '',
31
- type: note.type || '',
32
- likes: note.interact_info?.liked_count || '0',
33
- url: `https://www.xiaohongshu.com/explore/${note.note_id || note.id}`
34
- });
35
- }
36
- }
35
+ const userId = normalizeXhsUserId(String(kwargs.id));
36
+ const limit = Math.max(1, Number(kwargs.limit ?? 15));
37
+ await page.goto(`https://www.xiaohongshu.com/user/profile/${userId}`);
38
+ await page.wait(3);
39
+ let snapshot = await readUserSnapshot(page);
40
+ let results = extractXhsUserNotes(snapshot ?? {}, userId);
41
+ let previousCount = results.length;
42
+ for (let i = 0; results.length < limit && i < 4; i += 1) {
43
+ await page.autoScroll({ times: 1, delayMs: 1500 });
44
+ await page.wait(1);
45
+ snapshot = await readUserSnapshot(page);
46
+ const nextResults = extractXhsUserNotes(snapshot ?? {}, userId);
47
+ if (nextResults.length <= previousCount)
48
+ break;
49
+ results = nextResults;
50
+ previousCount = nextResults.length;
51
+ }
52
+ if (results.length === 0) {
53
+ throw new Error('No public notes found for this Xiaohongshu user.');
37
54
  }
38
- return results.slice(0, kwargs.limit);
39
- }
55
+ return results.slice(0, limit);
56
+ },
40
57
  });
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Zhihu download — export articles to Markdown format.
3
+ *
4
+ * Usage:
5
+ * opencli zhihu download --url "https://zhuanlan.zhihu.com/p/xxx" --output ./zhihu
6
+ */
7
+ /**
8
+ * Convert HTML content to Markdown.
9
+ * This is a simplified converter for Zhihu article content.
10
+ */
11
+ export declare function htmlToMarkdown(html: string): string;
@@ -0,0 +1,186 @@
1
+ /**
2
+ * Zhihu download — export articles to Markdown format.
3
+ *
4
+ * Usage:
5
+ * opencli zhihu download --url "https://zhuanlan.zhihu.com/p/xxx" --output ./zhihu
6
+ */
7
+ import * as fs from 'node:fs';
8
+ import * as path from 'node:path';
9
+ import { cli, Strategy } from '../../registry.js';
10
+ import { sanitizeFilename, httpDownload } from '../../download/index.js';
11
+ import { formatBytes } from '../../download/progress.js';
12
+ /**
13
+ * Convert HTML content to Markdown.
14
+ * This is a simplified converter for Zhihu article content.
15
+ */
16
+ export function htmlToMarkdown(html) {
17
+ let md = html;
18
+ // Remove script and style tags
19
+ md = md.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
20
+ md = md.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
21
+ // Convert headers
22
+ md = md.replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n\n');
23
+ md = md.replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n\n');
24
+ md = md.replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n\n');
25
+ md = md.replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n\n');
26
+ // Convert paragraphs
27
+ md = md.replace(/<p[^>]*>([\s\S]*?)<\/p>/gi, '$1\n\n');
28
+ // Convert links
29
+ md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\/a>/gi, '[$2]($1)');
30
+ // Convert images
31
+ md = md.replace(/<img[^>]*src="([^"]*)"[^>]*alt="([^"]*)"[^>]*\/?>/gi, '![$2]($1)');
32
+ md = md.replace(/<img[^>]*src="([^"]*)"[^>]*\/?>/gi, '![]($1)');
33
+ // Convert lists
34
+ md = md.replace(/<ul[^>]*>([\s\S]*?)<\/ul>/gi, (match, content) => {
35
+ return content.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n') + '\n';
36
+ });
37
+ md = md.replace(/<ol[^>]*>([\s\S]*?)<\/ol>/gi, (match, content) => {
38
+ let index = 0;
39
+ return content.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (_itemMatch, itemContent) => `${++index}. ${itemContent}\n`) + '\n';
40
+ });
41
+ // Convert bold and italic
42
+ md = md.replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**');
43
+ md = md.replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**');
44
+ md = md.replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*');
45
+ md = md.replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*');
46
+ // Convert code blocks
47
+ md = md.replace(/<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi, '```\n$1\n```\n\n');
48
+ md = md.replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`');
49
+ // Convert blockquotes
50
+ md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, (match, content) => {
51
+ return content.split('\n').map((line) => `> ${line}`).join('\n') + '\n\n';
52
+ });
53
+ // Convert line breaks
54
+ md = md.replace(/<br\s*\/?>/gi, '\n');
55
+ // Remove remaining HTML tags
56
+ md = md.replace(/<[^>]+>/g, '');
57
+ // Decode HTML entities
58
+ md = md.replace(/&nbsp;/g, ' ');
59
+ md = md.replace(/&lt;/g, '<');
60
+ md = md.replace(/&gt;/g, '>');
61
+ md = md.replace(/&amp;/g, '&');
62
+ md = md.replace(/&quot;/g, '"');
63
+ // Clean up extra whitespace
64
+ md = md.replace(/\n{3,}/g, '\n\n');
65
+ md = md.trim();
66
+ return md;
67
+ }
68
+ cli({
69
+ site: 'zhihu',
70
+ name: 'download',
71
+ description: '导出知乎文章为 Markdown 格式',
72
+ domain: 'zhuanlan.zhihu.com',
73
+ strategy: Strategy.COOKIE,
74
+ args: [
75
+ { name: 'url', required: true, help: 'Article URL (zhuanlan.zhihu.com/p/xxx)' },
76
+ { name: 'output', default: './zhihu-articles', help: 'Output directory' },
77
+ { name: 'download-images', type: 'boolean', default: false, help: 'Download images locally' },
78
+ ],
79
+ columns: ['title', 'author', 'status', 'size'],
80
+ func: async (page, kwargs) => {
81
+ const url = kwargs.url;
82
+ const output = kwargs.output;
83
+ const downloadImages = kwargs['download-images'];
84
+ // Navigate to article page
85
+ await page.goto(url);
86
+ await page.wait(3);
87
+ // Extract article content
88
+ const data = await page.evaluate(`
89
+ (() => {
90
+ const result = {
91
+ title: '',
92
+ author: '',
93
+ content: '',
94
+ publishTime: '',
95
+ images: []
96
+ };
97
+
98
+ // Get title
99
+ const titleEl = document.querySelector('.Post-Title, h1.ContentItem-title, .ArticleTitle');
100
+ result.title = titleEl?.textContent?.trim() || 'untitled';
101
+
102
+ // Get author
103
+ const authorEl = document.querySelector('.AuthorInfo-name, .UserLink-link');
104
+ result.author = authorEl?.textContent?.trim() || 'unknown';
105
+
106
+ // Get publish time
107
+ const timeEl = document.querySelector('.ContentItem-time, .Post-Time');
108
+ result.publishTime = timeEl?.textContent?.trim() || '';
109
+
110
+ // Get content HTML
111
+ const contentEl = document.querySelector('.Post-RichTextContainer, .RichText, .ArticleContent');
112
+ if (contentEl) {
113
+ result.content = contentEl.innerHTML;
114
+
115
+ // Extract image URLs
116
+ contentEl.querySelectorAll('img').forEach(img => {
117
+ const src = img.getAttribute('data-original') || img.getAttribute('data-actualsrc') || img.src;
118
+ if (src && !src.includes('data:image')) {
119
+ result.images.push(src);
120
+ }
121
+ });
122
+ }
123
+
124
+ return result;
125
+ })()
126
+ `);
127
+ if (!data || !data.content) {
128
+ return [{
129
+ title: 'Error',
130
+ author: '-',
131
+ status: 'failed',
132
+ size: 'Could not extract article content',
133
+ }];
134
+ }
135
+ // Create output directory
136
+ fs.mkdirSync(output, { recursive: true });
137
+ // Convert HTML to Markdown
138
+ let markdown = htmlToMarkdown(data.content);
139
+ // Create frontmatter
140
+ const frontmatter = [
141
+ '---',
142
+ `title: "${data.title.replace(/"/g, '\\"')}"`,
143
+ `author: "${data.author.replace(/"/g, '\\"')}"`,
144
+ `source: "${url}"`,
145
+ data.publishTime ? `date: "${data.publishTime}"` : '',
146
+ '---',
147
+ '',
148
+ ].filter(Boolean).join('\n');
149
+ // Download images if requested
150
+ if (downloadImages && data.images && data.images.length > 0) {
151
+ const imagesDir = path.join(output, 'images');
152
+ fs.mkdirSync(imagesDir, { recursive: true });
153
+ const cookies = await page.evaluate(`(() => document.cookie)()`);
154
+ for (let i = 0; i < data.images.length; i++) {
155
+ const imgUrl = data.images[i];
156
+ const ext = imgUrl.match(/\.(jpg|jpeg|png|gif|webp)/i)?.[1] || 'jpg';
157
+ const imgFilename = `img_${i + 1}.${ext}`;
158
+ const imgPath = path.join(imagesDir, imgFilename);
159
+ try {
160
+ await httpDownload(imgUrl, imgPath, {
161
+ cookies: typeof cookies === 'string' ? cookies : '',
162
+ timeout: 30000,
163
+ });
164
+ // Replace image URL in markdown with local path
165
+ markdown = markdown.replace(new RegExp(imgUrl.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'), `./images/${imgFilename}`);
166
+ }
167
+ catch {
168
+ // Keep original URL if download fails
169
+ }
170
+ }
171
+ }
172
+ // Write markdown file
173
+ const safeTitle = sanitizeFilename(data.title, 100);
174
+ const filename = `${safeTitle}.md`;
175
+ const filePath = path.join(output, filename);
176
+ const fullContent = frontmatter + '\n' + markdown;
177
+ fs.writeFileSync(filePath, fullContent, 'utf-8');
178
+ const size = Buffer.byteLength(fullContent, 'utf-8');
179
+ return [{
180
+ title: data.title,
181
+ author: data.author,
182
+ status: 'success',
183
+ size: formatBytes(size),
184
+ }];
185
+ },
186
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,10 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { htmlToMarkdown } from './download.js';
3
+ describe('htmlToMarkdown', () => {
4
+ it('renders ordered lists with the original list item content', () => {
5
+ const html = '<ol><li>First item</li><li>Second item</li></ol>';
6
+ expect(htmlToMarkdown(html)).toContain('1. First item');
7
+ expect(htmlToMarkdown(html)).toContain('2. Second item');
8
+ expect(htmlToMarkdown(html)).not.toContain('$1');
9
+ });
10
+ });
@@ -0,0 +1,79 @@
1
+ /**
2
+ * Download utilities: HTTP downloads, yt-dlp wrapper, format conversion.
3
+ */
4
+ export interface DownloadOptions {
5
+ cookies?: string;
6
+ headers?: Record<string, string>;
7
+ timeout?: number;
8
+ onProgress?: (received: number, total: number) => void;
9
+ }
10
+ export interface YtdlpOptions {
11
+ cookies?: string;
12
+ cookiesFile?: string;
13
+ format?: string;
14
+ extraArgs?: string[];
15
+ onProgress?: (percent: number) => void;
16
+ }
17
+ /**
18
+ * Check if yt-dlp is available in PATH.
19
+ */
20
+ export declare function checkYtdlp(): boolean;
21
+ /**
22
+ * Check if ffmpeg is available in PATH.
23
+ */
24
+ export declare function checkFfmpeg(): boolean;
25
+ /**
26
+ * Detect content type from URL and optional headers.
27
+ */
28
+ export declare function detectContentType(url: string, contentType?: string): 'image' | 'video' | 'document' | 'binary';
29
+ /**
30
+ * Check if URL requires yt-dlp for download.
31
+ */
32
+ export declare function requiresYtdlp(url: string): boolean;
33
+ /**
34
+ * HTTP download with progress callback.
35
+ */
36
+ export declare function httpDownload(url: string, destPath: string, options?: DownloadOptions): Promise<{
37
+ success: boolean;
38
+ size: number;
39
+ error?: string;
40
+ }>;
41
+ /**
42
+ * Export cookies to Netscape format for yt-dlp.
43
+ */
44
+ export declare function exportCookiesToNetscape(cookies: Array<{
45
+ name: string;
46
+ value: string;
47
+ domain: string;
48
+ path?: string;
49
+ secure?: boolean;
50
+ httpOnly?: boolean;
51
+ }>, filePath: string): void;
52
+ /**
53
+ * Download video using yt-dlp.
54
+ */
55
+ export declare function ytdlpDownload(url: string, destPath: string, options?: YtdlpOptions): Promise<{
56
+ success: boolean;
57
+ size: number;
58
+ error?: string;
59
+ }>;
60
+ /**
61
+ * Save document content to file.
62
+ */
63
+ export declare function saveDocument(content: string, destPath: string, format?: 'json' | 'markdown' | 'html' | 'text', metadata?: Record<string, any>): Promise<{
64
+ success: boolean;
65
+ size: number;
66
+ error?: string;
67
+ }>;
68
+ /**
69
+ * Sanitize filename by removing invalid characters.
70
+ */
71
+ export declare function sanitizeFilename(name: string, maxLength?: number): string;
72
+ /**
73
+ * Generate filename from URL if not provided.
74
+ */
75
+ export declare function generateFilename(url: string, index: number, extension?: string): string;
76
+ /**
77
+ * Get temp directory for cookie files.
78
+ */
79
+ export declare function getTempDir(): string;