@jackwener/opencli 1.7.6 → 1.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +17 -8
  2. package/README.zh-CN.md +14 -8
  3. package/cli-manifest.json +325 -11
  4. package/clis/51job/company.js +125 -0
  5. package/clis/51job/detail.js +108 -0
  6. package/clis/51job/hot.js +55 -0
  7. package/clis/51job/search.js +79 -0
  8. package/clis/51job/utils.js +302 -0
  9. package/clis/51job/utils.test.js +69 -0
  10. package/clis/bilibili/video.js +11 -4
  11. package/clis/bilibili/video.test.js +51 -0
  12. package/clis/chatgpt/image.js +1 -1
  13. package/clis/deepseek/ask.js +19 -13
  14. package/clis/deepseek/ask.test.js +93 -1
  15. package/clis/deepseek/utils.js +108 -23
  16. package/clis/deepseek/utils.test.js +109 -1
  17. package/clis/gemini/image.js +1 -1
  18. package/clis/instagram/download.js +1 -1
  19. package/clis/twitter/likes.js +3 -2
  20. package/clis/twitter/search.js +4 -2
  21. package/clis/twitter/search.test.js +4 -0
  22. package/clis/twitter/shared.js +28 -0
  23. package/clis/twitter/shared.test.js +96 -0
  24. package/clis/twitter/thread.js +3 -1
  25. package/clis/twitter/timeline.js +3 -2
  26. package/clis/twitter/tweets.js +3 -2
  27. package/clis/twitter/tweets.test.js +1 -1
  28. package/clis/web/read.js +25 -5
  29. package/clis/web/read.test.js +76 -0
  30. package/clis/weread/ai-outline.js +170 -0
  31. package/clis/weread/ai-outline.test.js +83 -0
  32. package/clis/weread/book.js +57 -44
  33. package/clis/weread/commands.test.js +24 -0
  34. package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
  35. package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
  36. package/dist/src/browser/analyze.d.ts +103 -0
  37. package/dist/src/browser/analyze.js +230 -0
  38. package/dist/src/browser/analyze.test.d.ts +1 -0
  39. package/dist/src/browser/analyze.test.js +164 -0
  40. package/dist/src/browser/article-extract.d.ts +57 -0
  41. package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
  42. package/dist/src/browser/article-extract.e2e.test.js +105 -0
  43. package/dist/src/browser/article-extract.js +169 -0
  44. package/dist/src/browser/article-extract.test.d.ts +1 -0
  45. package/dist/src/browser/article-extract.test.js +94 -0
  46. package/dist/src/browser/cdp.js +11 -2
  47. package/dist/src/browser/verify-fixture.d.ts +59 -0
  48. package/dist/src/browser/verify-fixture.js +213 -0
  49. package/dist/src/browser/verify-fixture.test.d.ts +1 -0
  50. package/dist/src/browser/verify-fixture.test.js +161 -0
  51. package/dist/src/cli.d.ts +32 -0
  52. package/dist/src/cli.js +333 -43
  53. package/dist/src/cli.test.js +257 -1
  54. package/dist/src/daemon.d.ts +3 -2
  55. package/dist/src/daemon.js +16 -4
  56. package/dist/src/daemon.test.d.ts +1 -0
  57. package/dist/src/daemon.test.js +19 -0
  58. package/dist/src/download/article-download.d.ts +12 -0
  59. package/dist/src/download/article-download.js +141 -17
  60. package/dist/src/download/article-download.test.js +196 -0
  61. package/dist/src/download/index.js +73 -86
  62. package/dist/src/errors.js +4 -2
  63. package/dist/src/errors.test.js +13 -0
  64. package/dist/src/launcher.d.ts +1 -1
  65. package/dist/src/launcher.js +3 -3
  66. package/dist/src/output.js +1 -1
  67. package/dist/src/output.test.js +6 -0
  68. package/package.json +5 -1
@@ -1,6 +1,6 @@
1
1
  import { cli, Strategy } from '@jackwener/opencli/registry';
2
2
  import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
3
- import { resolveTwitterQueryId, sanitizeQueryId } from './shared.js';
3
+ import { resolveTwitterQueryId, sanitizeQueryId, extractMedia } from './shared.js';
4
4
  const BEARER_TOKEN = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
5
5
  const LIKES_QUERY_ID = 'RozQdCp4CilQzrcuU0NY5w';
6
6
  const USER_BY_SCREEN_NAME_QUERY_ID = 'qRednkZG-rn1P6b48NINmQ';
@@ -99,6 +99,7 @@ function extractLikedTweet(result, seen) {
99
99
  retweets: legacy.retweet_count || 0,
100
100
  created_at: legacy.created_at || '',
101
101
  url: `https://x.com/${screenName}/status/${tw.rest_id}`,
102
+ ...extractMedia(legacy),
102
103
  };
103
104
  }
104
105
  function parseLikes(data, seen) {
@@ -144,7 +145,7 @@ cli({
144
145
  { name: 'username', type: 'string', positional: true, help: 'Twitter screen name (without @). Defaults to logged-in user.' },
145
146
  { name: 'limit', type: 'int', default: 20 },
146
147
  ],
147
- columns: ['author', 'name', 'text', 'likes', 'url'],
148
+ columns: ['author', 'name', 'text', 'likes', 'url', 'has_media', 'media_urls'],
148
149
  func: async (page, kwargs) => {
149
150
  const limit = kwargs.limit || 20;
150
151
  let username = (kwargs.username || '').replace(/^@/, '');
@@ -1,5 +1,6 @@
1
1
  import { CommandExecutionError } from '@jackwener/opencli/errors';
2
2
  import { cli, Strategy } from '@jackwener/opencli/registry';
3
+ import { extractMedia } from './shared.js';
3
4
  /**
4
5
  * Trigger Twitter search SPA navigation with fallback strategies.
5
6
  *
@@ -102,7 +103,7 @@ cli({
102
103
  { name: 'filter', type: 'string', default: 'top', choices: ['top', 'live'] },
103
104
  { name: 'limit', type: 'int', default: 15 },
104
105
  ],
105
- columns: ['id', 'author', 'text', 'created_at', 'likes', 'views', 'url'],
106
+ columns: ['id', 'author', 'text', 'created_at', 'likes', 'views', 'url', 'has_media', 'media_urls'],
106
107
  func: async (page, kwargs) => {
107
108
  const query = kwargs.query;
108
109
  const filter = kwargs.filter === 'live' ? 'live' : 'top';
@@ -156,7 +157,8 @@ cli({
156
157
  created_at: tweet.legacy?.created_at || '',
157
158
  likes: tweet.legacy?.favorite_count || 0,
158
159
  views: tweet.views?.count || '0',
159
- url: `https://x.com/i/status/${tweet.rest_id}`
160
+ url: `https://x.com/i/status/${tweet.rest_id}`,
161
+ ...extractMedia(tweet.legacy),
160
162
  });
161
163
  }
162
164
  }
@@ -75,6 +75,8 @@ describe('twitter search command', () => {
75
75
  likes: 7,
76
76
  views: '12',
77
77
  url: 'https://x.com/i/status/1',
78
+ has_media: false,
79
+ media_urls: [],
78
80
  },
79
81
  ]);
80
82
  expect(page.installInterceptor).toHaveBeenCalledWith('SearchTimeline');
@@ -203,6 +205,8 @@ describe('twitter search command', () => {
203
205
  likes: 3,
204
206
  views: '5',
205
207
  url: 'https://x.com/i/status/99',
208
+ has_media: false,
209
+ media_urls: [],
206
210
  },
207
211
  ]);
208
212
  // 6 evaluate calls: 2x pushState + 2x pathname check + 1x fallback + 1x pathname check
@@ -35,6 +35,34 @@ export async function resolveTwitterQueryId(page, operationName, fallbackId) {
35
35
  }`);
36
36
  return sanitizeQueryId(resolved, fallbackId);
37
37
  }
38
+ /**
39
+ * Extract media flags and URLs from a tweet's `legacy` object.
40
+ *
41
+ * Prefers `extended_entities.media` (superset with full video_info) and falls
42
+ * back to `entities.media` when the extended form is missing. For videos and
43
+ * animated GIFs, returns the mp4 variant URL; for photos, returns
44
+ * `media_url_https`.
45
+ */
46
+ export function extractMedia(legacy) {
47
+ const media = legacy?.extended_entities?.media || legacy?.entities?.media;
48
+ if (!Array.isArray(media) || media.length === 0) {
49
+ return { has_media: false, media_urls: [] };
50
+ }
51
+ const urls = [];
52
+ for (const m of media) {
53
+ if (!m) continue;
54
+ if (m.type === 'video' || m.type === 'animated_gif') {
55
+ const variants = m.video_info?.variants || [];
56
+ const mp4 = variants.find((v) => v?.content_type === 'video/mp4');
57
+ const url = mp4?.url || m.media_url_https;
58
+ if (url) urls.push(url);
59
+ } else {
60
+ if (m.media_url_https) urls.push(m.media_url_https);
61
+ }
62
+ }
63
+ return { has_media: urls.length > 0, media_urls: urls };
64
+ }
38
65
  export const __test__ = {
39
66
  sanitizeQueryId,
67
+ extractMedia,
40
68
  };
@@ -0,0 +1,96 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { __test__ } from './shared.js';
3
+
4
+ const { extractMedia } = __test__;
5
+
6
+ describe('twitter extractMedia', () => {
7
+ it('returns false + empty list when legacy has no media', () => {
8
+ expect(extractMedia({})).toEqual({ has_media: false, media_urls: [] });
9
+ expect(extractMedia(undefined)).toEqual({ has_media: false, media_urls: [] });
10
+ expect(extractMedia({ extended_entities: { media: [] } })).toEqual({
11
+ has_media: false,
12
+ media_urls: [],
13
+ });
14
+ });
15
+
16
+ it('extracts photo urls from extended_entities', () => {
17
+ const result = extractMedia({
18
+ extended_entities: {
19
+ media: [
20
+ { type: 'photo', media_url_https: 'https://pbs.twimg.com/media/a.jpg' },
21
+ { type: 'photo', media_url_https: 'https://pbs.twimg.com/media/b.jpg' },
22
+ ],
23
+ },
24
+ });
25
+ expect(result.has_media).toBe(true);
26
+ expect(result.media_urls).toEqual([
27
+ 'https://pbs.twimg.com/media/a.jpg',
28
+ 'https://pbs.twimg.com/media/b.jpg',
29
+ ]);
30
+ });
31
+
32
+ it('prefers mp4 variant for video and animated_gif', () => {
33
+ const result = extractMedia({
34
+ extended_entities: {
35
+ media: [
36
+ {
37
+ type: 'video',
38
+ media_url_https: 'https://pbs.twimg.com/media/thumb.jpg',
39
+ video_info: {
40
+ variants: [
41
+ { content_type: 'application/x-mpegURL', url: 'https://video.twimg.com/x.m3u8' },
42
+ { content_type: 'video/mp4', url: 'https://video.twimg.com/x.mp4' },
43
+ ],
44
+ },
45
+ },
46
+ {
47
+ type: 'animated_gif',
48
+ media_url_https: 'https://pbs.twimg.com/tweet_video_thumb/g.jpg',
49
+ video_info: {
50
+ variants: [
51
+ { content_type: 'video/mp4', url: 'https://video.twimg.com/g.mp4' },
52
+ ],
53
+ },
54
+ },
55
+ ],
56
+ },
57
+ });
58
+ expect(result.has_media).toBe(true);
59
+ expect(result.media_urls).toEqual([
60
+ 'https://video.twimg.com/x.mp4',
61
+ 'https://video.twimg.com/g.mp4',
62
+ ]);
63
+ });
64
+
65
+ it('falls back to media_url_https when no mp4 variant is available', () => {
66
+ const result = extractMedia({
67
+ extended_entities: {
68
+ media: [
69
+ {
70
+ type: 'video',
71
+ media_url_https: 'https://pbs.twimg.com/media/thumb.jpg',
72
+ video_info: { variants: [] },
73
+ },
74
+ ],
75
+ },
76
+ });
77
+ expect(result).toEqual({
78
+ has_media: true,
79
+ media_urls: ['https://pbs.twimg.com/media/thumb.jpg'],
80
+ });
81
+ });
82
+
83
+ it('falls back to entities.media when extended_entities is missing', () => {
84
+ const result = extractMedia({
85
+ entities: {
86
+ media: [
87
+ { type: 'photo', media_url_https: 'https://pbs.twimg.com/media/c.jpg' },
88
+ ],
89
+ },
90
+ });
91
+ expect(result).toEqual({
92
+ has_media: true,
93
+ media_urls: ['https://pbs.twimg.com/media/c.jpg'],
94
+ });
95
+ });
96
+ });
@@ -1,5 +1,6 @@
1
1
  import { cli, Strategy } from '@jackwener/opencli/registry';
2
2
  import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
3
+ import { extractMedia } from './shared.js';
3
4
  // ── Twitter GraphQL constants ──────────────────────────────────────────
4
5
  const BEARER_TOKEN = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
5
6
  const TWEET_DETAIL_QUERY_ID = 'nBS-WpgA6ZG0CyNHD517JQ';
@@ -54,6 +55,7 @@ function extractTweet(r, seen) {
54
55
  in_reply_to: l.in_reply_to_status_id_str || undefined,
55
56
  created_at: l.created_at,
56
57
  url: `https://x.com/${screenName}/status/${tw.rest_id}`,
58
+ ...extractMedia(l),
57
59
  };
58
60
  }
59
61
  function parseTweetDetail(data, seen) {
@@ -101,7 +103,7 @@ cli({
101
103
  { name: 'tweet-id', positional: true, type: 'string', required: true },
102
104
  { name: 'limit', type: 'int', default: 50 },
103
105
  ],
104
- columns: ['id', 'author', 'text', 'likes', 'retweets', 'url'],
106
+ columns: ['id', 'author', 'text', 'likes', 'retweets', 'url', 'has_media', 'media_urls'],
105
107
  func: async (page, kwargs) => {
106
108
  let tweetId = kwargs['tweet-id'];
107
109
  const urlMatch = tweetId.match(/\/status\/(\d+)/);
@@ -1,6 +1,6 @@
1
1
  import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
2
2
  import { cli, Strategy } from '@jackwener/opencli/registry';
3
- import { resolveTwitterQueryId } from './shared.js';
3
+ import { resolveTwitterQueryId, extractMedia } from './shared.js';
4
4
  // ── Twitter GraphQL constants ──────────────────────────────────────────
5
5
  const BEARER_TOKEN = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
6
6
  const HOME_TIMELINE_QUERY_ID = 'c-CzHF1LboFilMpsx4ZCrQ';
@@ -85,6 +85,7 @@ function extractTweet(result, seen) {
85
85
  views,
86
86
  created_at: l.created_at || '',
87
87
  url: `https://x.com/${screenName}/status/${tw.rest_id}`,
88
+ ...extractMedia(l),
88
89
  };
89
90
  }
90
91
  function parseHomeTimeline(data, seen) {
@@ -148,7 +149,7 @@ cli({
148
149
  },
149
150
  { name: 'limit', type: 'int', default: 20 },
150
151
  ],
151
- columns: ['id', 'author', 'text', 'likes', 'retweets', 'replies', 'views', 'created_at', 'url'],
152
+ columns: ['id', 'author', 'text', 'likes', 'retweets', 'replies', 'views', 'created_at', 'url', 'has_media', 'media_urls'],
152
153
  func: async (page, kwargs) => {
153
154
  const limit = kwargs.limit || 20;
154
155
  const timelineType = kwargs.type === 'following' ? 'following' : 'for-you';
@@ -1,6 +1,6 @@
1
1
  import { cli, Strategy } from '@jackwener/opencli/registry';
2
2
  import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
3
- import { resolveTwitterQueryId, sanitizeQueryId } from './shared.js';
3
+ import { resolveTwitterQueryId, sanitizeQueryId, extractMedia } from './shared.js';
4
4
 
5
5
  const BEARER_TOKEN = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
6
6
  const USER_TWEETS_QUERY_ID = '6fWQaBPK51aGyC_VC7t9GQ';
@@ -105,6 +105,7 @@ function extractTweet(result, seen) {
105
105
  is_retweet: isRetweet,
106
106
  created_at: legacy.created_at || '',
107
107
  url: `https://x.com/${screenName}/status/${tw.rest_id}`,
108
+ ...extractMedia(legacy),
108
109
  };
109
110
  }
110
111
 
@@ -151,7 +152,7 @@ cli({
151
152
  { name: 'username', type: 'string', positional: true, required: true, help: 'Twitter screen name (with or without @)' },
152
153
  { name: 'limit', type: 'int', default: 20, help: 'Max tweets to return' },
153
154
  ],
154
- columns: ['author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url'],
155
+ columns: ['author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url', 'has_media', 'media_urls'],
155
156
  func: async (page, kwargs) => {
156
157
  const limit = Math.max(1, Math.min(200, kwargs.limit || 20));
157
158
  const username = String(kwargs.username || '').replace(/^@/, '').trim();
@@ -5,7 +5,7 @@ import { __test__ } from './tweets.js';
5
5
  describe('twitter tweets helpers', () => {
6
6
  it('registers is_retweet in the default columns', () => {
7
7
  const cmd = getRegistry().get('twitter/tweets');
8
- expect(cmd?.columns).toEqual(['author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url']);
8
+ expect(cmd?.columns).toEqual(['author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url', 'has_media', 'media_urls']);
9
9
  });
10
10
 
11
11
  it('falls back when queryId contains unsafe characters', () => {
package/clis/web/read.js CHANGED
@@ -15,7 +15,7 @@
15
15
  */
16
16
  import { cli, Strategy } from '@jackwener/opencli/registry';
17
17
  import { downloadArticle } from '@jackwener/opencli/download/article-download';
18
- cli({
18
+ const command = cli({
19
19
  site: 'web',
20
20
  name: 'read',
21
21
  description: 'Fetch any web page and export as Markdown',
@@ -26,6 +26,7 @@ cli({
26
26
  { name: 'output', default: './web-articles', help: 'Output directory' },
27
27
  { name: 'download-images', type: 'boolean', default: true, help: 'Download images locally' },
28
28
  { name: 'wait', type: 'int', default: 3, help: 'Seconds to wait after page load' },
29
+ { name: 'stdout', type: 'boolean', default: false, help: 'Print markdown to stdout instead of saving to a file' },
29
30
  ],
30
31
  columns: ['title', 'author', 'publish_time', 'status', 'size', 'saved'],
31
32
  func: async (page, kwargs) => {
@@ -162,14 +163,26 @@ cli({
162
163
  if (el.children && el.children.length > 2) dedup(el);
163
164
  });
164
165
 
166
+ // --- Lazy-load image src rewrite ---
167
+ // Many sites render <img src="placeholder.gif" data-src="real.jpg">.
168
+ // Promote the real URL onto src so both the markdown body and the
169
+ // image download list reference the same URL.
170
+ clone.querySelectorAll('img').forEach(img => {
171
+ const srcset = img.getAttribute('data-srcset') || '';
172
+ const srcsetFirst = srcset.split(',')[0]?.trim().split(' ')[0] || '';
173
+ const real = img.getAttribute('data-src')
174
+ || img.getAttribute('data-original')
175
+ || img.getAttribute('data-lazy-src')
176
+ || srcsetFirst;
177
+ if (real) img.setAttribute('src', real);
178
+ });
179
+
165
180
  result.contentHtml = clone.innerHTML;
166
181
 
167
182
  // --- Image extraction ---
168
183
  const seen = new Set();
169
184
  clone.querySelectorAll('img').forEach(img => {
170
- const src = img.getAttribute('data-src')
171
- || img.getAttribute('data-original')
172
- || img.getAttribute('src');
185
+ const src = img.getAttribute('src') || '';
173
186
  if (src && !src.startsWith('data:') && !seen.has(src)) {
174
187
  seen.add(src);
175
188
  result.imageUrls.push(src);
@@ -186,7 +199,7 @@ cli({
186
199
  referer = parsed.origin + '/';
187
200
  }
188
201
  catch { /* ignore */ }
189
- return downloadArticle({
202
+ const result = await downloadArticle({
190
203
  title: data?.title || 'untitled',
191
204
  author: data?.author,
192
205
  publishTime: data?.publishTime,
@@ -197,6 +210,13 @@ cli({
197
210
  output: kwargs.output,
198
211
  downloadImages: kwargs['download-images'],
199
212
  imageHeaders: referer ? { Referer: referer } : undefined,
213
+ stdout: kwargs.stdout,
200
214
  });
215
+ // `--stdout` is a content-streaming mode. The markdown body already went
216
+ // to process.stdout inside downloadArticle(), so returning rows here
217
+ // would make Commander append table/JSON output to the same stdout
218
+ // stream and break piping.
219
+ return kwargs.stdout ? null : result;
201
220
  },
202
221
  });
222
+ export const __test__ = { command };
@@ -0,0 +1,76 @@
1
+ import { beforeEach, describe, expect, it, vi } from 'vitest';
2
+
3
+ const { mockDownloadArticle } = vi.hoisted(() => ({
4
+ mockDownloadArticle: vi.fn(),
5
+ }));
6
+
7
+ vi.mock('@jackwener/opencli/download/article-download', () => ({
8
+ downloadArticle: mockDownloadArticle,
9
+ }));
10
+
11
+ const { __test__ } = await import('./read.js');
12
+
13
+ describe('web/read stdout behavior', () => {
14
+ const read = __test__.command;
15
+ const page = {
16
+ goto: vi.fn().mockResolvedValue(undefined),
17
+ wait: vi.fn().mockResolvedValue(undefined),
18
+ evaluate: vi.fn().mockResolvedValue({
19
+ title: 'Example Article',
20
+ author: 'Author',
21
+ publishTime: '2026-04-22',
22
+ contentHtml: '<p>hello</p>',
23
+ imageUrls: ['https://example.com/a.jpg'],
24
+ }),
25
+ };
26
+
27
+ beforeEach(() => {
28
+ mockDownloadArticle.mockReset();
29
+ mockDownloadArticle.mockResolvedValue([{
30
+ title: 'Example Article',
31
+ author: 'Author',
32
+ publish_time: '2026-04-22',
33
+ status: 'success',
34
+ size: '1 KB',
35
+ saved: '-',
36
+ }]);
37
+ page.goto.mockClear();
38
+ page.wait.mockClear();
39
+ page.evaluate.mockClear();
40
+ });
41
+
42
+ it('returns null in --stdout mode so the CLI does not append result rows to stdout', async () => {
43
+ const result = await read.func(page, {
44
+ url: 'https://example.com/article',
45
+ output: '/tmp/out',
46
+ 'download-images': false,
47
+ stdout: true,
48
+ });
49
+
50
+ expect(result).toBeNull();
51
+ expect(mockDownloadArticle).toHaveBeenCalledWith(
52
+ expect.objectContaining({
53
+ title: 'Example Article',
54
+ sourceUrl: 'https://example.com/article',
55
+ }),
56
+ expect.objectContaining({
57
+ output: '/tmp/out',
58
+ stdout: true,
59
+ }),
60
+ );
61
+ });
62
+
63
+ it('still returns the saved-row payload when writing to disk', async () => {
64
+ const rows = [{ title: 'Example Article', saved: '/tmp/out/Example Article/example.md' }];
65
+ mockDownloadArticle.mockResolvedValue(rows);
66
+
67
+ const result = await read.func(page, {
68
+ url: 'https://example.com/article',
69
+ output: '/tmp/out',
70
+ 'download-images': false,
71
+ stdout: false,
72
+ });
73
+
74
+ expect(result).toBe(rows);
75
+ });
76
+ });
@@ -0,0 +1,170 @@
1
+ import { cli, Strategy } from '@jackwener/opencli/registry';
2
+ import { CliError } from '@jackwener/opencli/errors';
3
+ import { WEREAD_UA, WEREAD_WEB_ORIGIN, WEREAD_DOMAIN } from './utils.js';
4
+
5
+ const WEB_API = `${WEREAD_WEB_ORIGIN}/web`;
6
+
7
+ function buildCookieHeader(cookies) {
8
+ return cookies.map((c) => `${c.name}=${c.value}`).join('; ');
9
+ }
10
+
11
+ async function postWebApiWithCookies(page, path, body) {
12
+ const url = `${WEB_API}${path}`;
13
+ const [apiCookies, domainCookies] = await Promise.all([
14
+ page.getCookies({ url }),
15
+ page.getCookies({ domain: WEREAD_DOMAIN }),
16
+ ]);
17
+ const merged = new Map();
18
+ for (const c of domainCookies) merged.set(c.name, c);
19
+ for (const c of apiCookies) merged.set(c.name, c);
20
+ const cookieHeader = buildCookieHeader(Array.from(merged.values()));
21
+
22
+ const resp = await fetch(url, {
23
+ method: 'POST',
24
+ headers: {
25
+ 'User-Agent': WEREAD_UA,
26
+ 'Content-Type': 'application/json',
27
+ 'Origin': WEREAD_WEB_ORIGIN,
28
+ 'Referer': `${WEREAD_WEB_ORIGIN}/`,
29
+ ...(cookieHeader ? { 'Cookie': cookieHeader } : {}),
30
+ },
31
+ body: JSON.stringify(body),
32
+ });
33
+
34
+ if (resp.status === 401) {
35
+ throw new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first');
36
+ }
37
+
38
+ let data;
39
+ try {
40
+ data = await resp.json();
41
+ } catch {
42
+ throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page');
43
+ }
44
+
45
+ if (data?.errcode === -2010 || data?.errcode === -2012) {
46
+ throw new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first');
47
+ }
48
+ if (!resp.ok) {
49
+ throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable');
50
+ }
51
+ return data;
52
+ }
53
+
54
+ async function postWebApi(path, body) {
55
+ const url = `${WEB_API}${path}`;
56
+ const resp = await fetch(url, {
57
+ method: 'POST',
58
+ headers: {
59
+ 'User-Agent': WEREAD_UA,
60
+ 'Content-Type': 'application/json',
61
+ },
62
+ body: JSON.stringify(body),
63
+ });
64
+ if (!resp.ok) {
65
+ throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable');
66
+ }
67
+ try {
68
+ return await resp.json();
69
+ } catch {
70
+ throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page');
71
+ }
72
+ }
73
+
74
+ cli({
75
+ site: 'weread',
76
+ name: 'ai-outline',
77
+ description: 'Get AI-generated outline for a book',
78
+ domain: 'weread.qq.com',
79
+ strategy: Strategy.COOKIE,
80
+ defaultFormat: 'plain',
81
+ args: [
82
+ { name: 'book-id', positional: true, required: true, help: 'Book ID (from shelf or search results)' },
83
+ { name: 'limit', type: 'int', default: 200, help: 'Max outline items to return' },
84
+ { name: 'depth', type: 'int', default: 4, help: 'Max outline depth (2=topics, 3=key points, 4=details)' },
85
+ { name: 'raw', type: 'boolean', default: false, help: 'Output structured rows (chapter/idx/level/text) for programmatic use' },
86
+ ],
87
+ columns: undefined,
88
+ func: async (page, args) => {
89
+ const bookId = String(args['book-id'] || '').trim();
90
+ const rawMode = Boolean(args.raw);
91
+
92
+ const chapterData = await postWebApiWithCookies(page, '/book/chapterInfos', {
93
+ bookIds: [bookId],
94
+ sinces: [0],
95
+ });
96
+ const chapters = chapterData?.data?.[0]?.updated ?? [];
97
+ if (chapters.length === 0) {
98
+ throw new CliError('NOT_FOUND', 'No chapters found for this book', 'Check that the book ID is correct');
99
+ }
100
+
101
+ const chapterUids = chapters.map((c) => c.chapterUid);
102
+ const chapterNameMap = new Map();
103
+ for (const c of chapters) {
104
+ chapterNameMap.set(c.chapterUid, c.title ?? '');
105
+ }
106
+
107
+ const outlineData = await postWebApi('/book/outline', {
108
+ bookId,
109
+ chapterUids,
110
+ });
111
+
112
+ const itemsArray = outlineData?.itemsArray ?? [];
113
+ const maxDepth = Number(args.depth);
114
+ const rawRows = [];
115
+
116
+ for (const entry of itemsArray) {
117
+ const items = entry.items;
118
+ if (!Array.isArray(items) || items.length === 0) continue;
119
+
120
+ const chapterName = chapterNameMap.get(entry.chapterUid) ?? `Chapter ${entry.chapterUid}`;
121
+ let lastL3Idx = '';
122
+ let l4Counter = 0;
123
+
124
+ for (const item of items) {
125
+ const level = item.level ?? 1;
126
+ if (level <= 1) continue;
127
+ if (level > maxDepth) continue;
128
+
129
+ let idx = item.uiIdx ?? '';
130
+ if (level === 3 && idx) {
131
+ lastL3Idx = idx;
132
+ l4Counter = 0;
133
+ }
134
+ if (level === 4 && !idx && lastL3Idx) {
135
+ l4Counter++;
136
+ idx = `${lastL3Idx}.${l4Counter}`;
137
+ }
138
+
139
+ rawRows.push({ chapter: chapterName, idx, level, text: item.text ?? '' });
140
+ }
141
+ }
142
+
143
+ if (rawRows.length === 0) {
144
+ throw new CliError('NOT_FOUND', 'No AI outline available for this book', 'AI outlines may not be generated for all books');
145
+ }
146
+
147
+ if (rawMode) {
148
+ return rawRows.slice(0, Number(args.limit));
149
+ }
150
+
151
+ const grouped = new Map();
152
+ for (const row of rawRows) {
153
+ if (!grouped.has(row.chapter)) grouped.set(row.chapter, []);
154
+ grouped.get(row.chapter).push(row);
155
+ }
156
+
157
+ const results = [];
158
+ for (const [chapter, rows] of grouped) {
159
+ const lines = [`📖 ${chapter}`];
160
+ for (const row of rows) {
161
+ const indent = ' '.repeat(row.level - 2);
162
+ const prefix = row.level === 2 ? `${row.idx}. ` : `${row.idx} `;
163
+ lines.push(`${indent}${prefix}${row.text}`);
164
+ }
165
+ results.push({ outline: lines.join('\n') });
166
+ }
167
+
168
+ return results.slice(0, Number(args.limit));
169
+ },
170
+ });