@jackwener/opencli 1.7.5 → 1.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +22 -10
  2. package/README.zh-CN.md +18 -9
  3. package/cli-manifest.json +401 -11
  4. package/clis/51job/company.js +125 -0
  5. package/clis/51job/detail.js +108 -0
  6. package/clis/51job/hot.js +55 -0
  7. package/clis/51job/search.js +79 -0
  8. package/clis/51job/utils.js +302 -0
  9. package/clis/51job/utils.test.js +69 -0
  10. package/clis/bilibili/video.js +68 -0
  11. package/clis/bilibili/video.test.js +132 -0
  12. package/clis/chatgpt/image.js +1 -1
  13. package/clis/deepseek/ask.js +37 -11
  14. package/clis/deepseek/ask.test.js +165 -0
  15. package/clis/deepseek/utils.js +192 -24
  16. package/clis/deepseek/utils.test.js +145 -0
  17. package/clis/gemini/image.js +1 -1
  18. package/clis/instagram/download.js +1 -1
  19. package/clis/jianyu/search.js +139 -3
  20. package/clis/jianyu/search.test.js +25 -0
  21. package/clis/jianyu/shared/procurement-detail.js +15 -0
  22. package/clis/jianyu/shared/procurement-detail.test.js +12 -0
  23. package/clis/twitter/likes.js +3 -2
  24. package/clis/twitter/search.js +4 -2
  25. package/clis/twitter/search.test.js +4 -0
  26. package/clis/twitter/shared.js +35 -2
  27. package/clis/twitter/shared.test.js +96 -0
  28. package/clis/twitter/thread.js +3 -1
  29. package/clis/twitter/timeline.js +3 -2
  30. package/clis/twitter/tweets.js +219 -0
  31. package/clis/twitter/tweets.test.js +125 -0
  32. package/clis/web/read.js +25 -5
  33. package/clis/web/read.test.js +76 -0
  34. package/clis/weread/ai-outline.js +170 -0
  35. package/clis/weread/ai-outline.test.js +83 -0
  36. package/clis/weread/book.js +57 -44
  37. package/clis/weread/commands.test.js +24 -0
  38. package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
  39. package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
  40. package/clis/youtube/channel.js +35 -0
  41. package/dist/src/browser/analyze.d.ts +103 -0
  42. package/dist/src/browser/analyze.js +230 -0
  43. package/dist/src/browser/analyze.test.d.ts +1 -0
  44. package/dist/src/browser/analyze.test.js +164 -0
  45. package/dist/src/browser/article-extract.d.ts +57 -0
  46. package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
  47. package/dist/src/browser/article-extract.e2e.test.js +105 -0
  48. package/dist/src/browser/article-extract.js +169 -0
  49. package/dist/src/browser/article-extract.test.d.ts +1 -0
  50. package/dist/src/browser/article-extract.test.js +94 -0
  51. package/dist/src/browser/base-page.d.ts +13 -3
  52. package/dist/src/browser/base-page.js +35 -25
  53. package/dist/src/browser/cdp.d.ts +1 -0
  54. package/dist/src/browser/cdp.js +23 -5
  55. package/dist/src/browser/compound.d.ts +59 -0
  56. package/dist/src/browser/compound.js +112 -0
  57. package/dist/src/browser/compound.test.d.ts +1 -0
  58. package/dist/src/browser/compound.test.js +175 -0
  59. package/dist/src/browser/dom-snapshot.d.ts +7 -0
  60. package/dist/src/browser/dom-snapshot.js +76 -3
  61. package/dist/src/browser/dom-snapshot.test.js +65 -0
  62. package/dist/src/browser/extract.d.ts +69 -0
  63. package/dist/src/browser/extract.js +132 -0
  64. package/dist/src/browser/extract.test.d.ts +1 -0
  65. package/dist/src/browser/extract.test.js +129 -0
  66. package/dist/src/browser/find.d.ts +76 -0
  67. package/dist/src/browser/find.js +179 -0
  68. package/dist/src/browser/find.test.d.ts +1 -0
  69. package/dist/src/browser/find.test.js +120 -0
  70. package/dist/src/browser/html-tree.d.ts +75 -0
  71. package/dist/src/browser/html-tree.js +112 -0
  72. package/dist/src/browser/html-tree.test.d.ts +1 -0
  73. package/dist/src/browser/html-tree.test.js +181 -0
  74. package/dist/src/browser/network-cache.d.ts +48 -0
  75. package/dist/src/browser/network-cache.js +66 -0
  76. package/dist/src/browser/network-cache.test.d.ts +1 -0
  77. package/dist/src/browser/network-cache.test.js +58 -0
  78. package/dist/src/browser/network-key.d.ts +22 -0
  79. package/dist/src/browser/network-key.js +66 -0
  80. package/dist/src/browser/network-key.test.d.ts +1 -0
  81. package/dist/src/browser/network-key.test.js +49 -0
  82. package/dist/src/browser/shape-filter.d.ts +52 -0
  83. package/dist/src/browser/shape-filter.js +101 -0
  84. package/dist/src/browser/shape-filter.test.d.ts +1 -0
  85. package/dist/src/browser/shape-filter.test.js +101 -0
  86. package/dist/src/browser/shape.d.ts +23 -0
  87. package/dist/src/browser/shape.js +95 -0
  88. package/dist/src/browser/shape.test.d.ts +1 -0
  89. package/dist/src/browser/shape.test.js +82 -0
  90. package/dist/src/browser/target-errors.d.ts +14 -1
  91. package/dist/src/browser/target-errors.js +13 -0
  92. package/dist/src/browser/target-errors.test.js +39 -6
  93. package/dist/src/browser/target-resolver.d.ts +57 -10
  94. package/dist/src/browser/target-resolver.js +195 -75
  95. package/dist/src/browser/target-resolver.test.js +80 -5
  96. package/dist/src/browser/verify-fixture.d.ts +59 -0
  97. package/dist/src/browser/verify-fixture.js +213 -0
  98. package/dist/src/browser/verify-fixture.test.d.ts +1 -0
  99. package/dist/src/browser/verify-fixture.test.js +161 -0
  100. package/dist/src/cli.d.ts +32 -0
  101. package/dist/src/cli.js +936 -141
  102. package/dist/src/cli.test.js +1051 -1
  103. package/dist/src/daemon.d.ts +3 -2
  104. package/dist/src/daemon.js +16 -4
  105. package/dist/src/daemon.test.d.ts +1 -0
  106. package/dist/src/daemon.test.js +19 -0
  107. package/dist/src/download/article-download.d.ts +12 -0
  108. package/dist/src/download/article-download.js +141 -17
  109. package/dist/src/download/article-download.test.js +196 -0
  110. package/dist/src/download/index.js +73 -86
  111. package/dist/src/errors.js +4 -2
  112. package/dist/src/errors.test.js +13 -0
  113. package/dist/src/execution.js +7 -2
  114. package/dist/src/execution.test.js +54 -0
  115. package/dist/src/launcher.d.ts +1 -1
  116. package/dist/src/launcher.js +3 -3
  117. package/dist/src/main.js +16 -0
  118. package/dist/src/output.js +1 -1
  119. package/dist/src/output.test.js +6 -0
  120. package/dist/src/types.d.ts +18 -3
  121. package/package.json +5 -1
package/clis/web/read.js CHANGED
@@ -15,7 +15,7 @@
15
15
  */
16
16
  import { cli, Strategy } from '@jackwener/opencli/registry';
17
17
  import { downloadArticle } from '@jackwener/opencli/download/article-download';
18
- cli({
18
+ const command = cli({
19
19
  site: 'web',
20
20
  name: 'read',
21
21
  description: 'Fetch any web page and export as Markdown',
@@ -26,6 +26,7 @@ cli({
26
26
  { name: 'output', default: './web-articles', help: 'Output directory' },
27
27
  { name: 'download-images', type: 'boolean', default: true, help: 'Download images locally' },
28
28
  { name: 'wait', type: 'int', default: 3, help: 'Seconds to wait after page load' },
29
+ { name: 'stdout', type: 'boolean', default: false, help: 'Print markdown to stdout instead of saving to a file' },
29
30
  ],
30
31
  columns: ['title', 'author', 'publish_time', 'status', 'size', 'saved'],
31
32
  func: async (page, kwargs) => {
@@ -162,14 +163,26 @@ cli({
162
163
  if (el.children && el.children.length > 2) dedup(el);
163
164
  });
164
165
 
166
+ // --- Lazy-load image src rewrite ---
167
+ // Many sites render <img src="placeholder.gif" data-src="real.jpg">.
168
+ // Promote the real URL onto src so both the markdown body and the
169
+ // image download list reference the same URL.
170
+ clone.querySelectorAll('img').forEach(img => {
171
+ const srcset = img.getAttribute('data-srcset') || '';
172
+ const srcsetFirst = srcset.split(',')[0]?.trim().split(' ')[0] || '';
173
+ const real = img.getAttribute('data-src')
174
+ || img.getAttribute('data-original')
175
+ || img.getAttribute('data-lazy-src')
176
+ || srcsetFirst;
177
+ if (real) img.setAttribute('src', real);
178
+ });
179
+
165
180
  result.contentHtml = clone.innerHTML;
166
181
 
167
182
  // --- Image extraction ---
168
183
  const seen = new Set();
169
184
  clone.querySelectorAll('img').forEach(img => {
170
- const src = img.getAttribute('data-src')
171
- || img.getAttribute('data-original')
172
- || img.getAttribute('src');
185
+ const src = img.getAttribute('src') || '';
173
186
  if (src && !src.startsWith('data:') && !seen.has(src)) {
174
187
  seen.add(src);
175
188
  result.imageUrls.push(src);
@@ -186,7 +199,7 @@ cli({
186
199
  referer = parsed.origin + '/';
187
200
  }
188
201
  catch { /* ignore */ }
189
- return downloadArticle({
202
+ const result = await downloadArticle({
190
203
  title: data?.title || 'untitled',
191
204
  author: data?.author,
192
205
  publishTime: data?.publishTime,
@@ -197,6 +210,13 @@ cli({
197
210
  output: kwargs.output,
198
211
  downloadImages: kwargs['download-images'],
199
212
  imageHeaders: referer ? { Referer: referer } : undefined,
213
+ stdout: kwargs.stdout,
200
214
  });
215
+ // `--stdout` is a content-streaming mode. The markdown body already went
216
+ // to process.stdout inside downloadArticle(), so returning rows here
217
+ // would make Commander append table/JSON output to the same stdout
218
+ // stream and break piping.
219
+ return kwargs.stdout ? null : result;
201
220
  },
202
221
  });
222
+ export const __test__ = { command };
@@ -0,0 +1,76 @@
1
+ import { beforeEach, describe, expect, it, vi } from 'vitest';
2
+
3
+ const { mockDownloadArticle } = vi.hoisted(() => ({
4
+ mockDownloadArticle: vi.fn(),
5
+ }));
6
+
7
+ vi.mock('@jackwener/opencli/download/article-download', () => ({
8
+ downloadArticle: mockDownloadArticle,
9
+ }));
10
+
11
+ const { __test__ } = await import('./read.js');
12
+
13
+ describe('web/read stdout behavior', () => {
14
+ const read = __test__.command;
15
+ const page = {
16
+ goto: vi.fn().mockResolvedValue(undefined),
17
+ wait: vi.fn().mockResolvedValue(undefined),
18
+ evaluate: vi.fn().mockResolvedValue({
19
+ title: 'Example Article',
20
+ author: 'Author',
21
+ publishTime: '2026-04-22',
22
+ contentHtml: '<p>hello</p>',
23
+ imageUrls: ['https://example.com/a.jpg'],
24
+ }),
25
+ };
26
+
27
+ beforeEach(() => {
28
+ mockDownloadArticle.mockReset();
29
+ mockDownloadArticle.mockResolvedValue([{
30
+ title: 'Example Article',
31
+ author: 'Author',
32
+ publish_time: '2026-04-22',
33
+ status: 'success',
34
+ size: '1 KB',
35
+ saved: '-',
36
+ }]);
37
+ page.goto.mockClear();
38
+ page.wait.mockClear();
39
+ page.evaluate.mockClear();
40
+ });
41
+
42
+ it('returns null in --stdout mode so the CLI does not append result rows to stdout', async () => {
43
+ const result = await read.func(page, {
44
+ url: 'https://example.com/article',
45
+ output: '/tmp/out',
46
+ 'download-images': false,
47
+ stdout: true,
48
+ });
49
+
50
+ expect(result).toBeNull();
51
+ expect(mockDownloadArticle).toHaveBeenCalledWith(
52
+ expect.objectContaining({
53
+ title: 'Example Article',
54
+ sourceUrl: 'https://example.com/article',
55
+ }),
56
+ expect.objectContaining({
57
+ output: '/tmp/out',
58
+ stdout: true,
59
+ }),
60
+ );
61
+ });
62
+
63
+ it('still returns the saved-row payload when writing to disk', async () => {
64
+ const rows = [{ title: 'Example Article', saved: '/tmp/out/Example Article/example.md' }];
65
+ mockDownloadArticle.mockResolvedValue(rows);
66
+
67
+ const result = await read.func(page, {
68
+ url: 'https://example.com/article',
69
+ output: '/tmp/out',
70
+ 'download-images': false,
71
+ stdout: false,
72
+ });
73
+
74
+ expect(result).toBe(rows);
75
+ });
76
+ });
@@ -0,0 +1,170 @@
1
+ import { cli, Strategy } from '@jackwener/opencli/registry';
2
+ import { CliError } from '@jackwener/opencli/errors';
3
+ import { WEREAD_UA, WEREAD_WEB_ORIGIN, WEREAD_DOMAIN } from './utils.js';
4
+
5
+ const WEB_API = `${WEREAD_WEB_ORIGIN}/web`;
6
+
7
+ function buildCookieHeader(cookies) {
8
+ return cookies.map((c) => `${c.name}=${c.value}`).join('; ');
9
+ }
10
+
11
+ async function postWebApiWithCookies(page, path, body) {
12
+ const url = `${WEB_API}${path}`;
13
+ const [apiCookies, domainCookies] = await Promise.all([
14
+ page.getCookies({ url }),
15
+ page.getCookies({ domain: WEREAD_DOMAIN }),
16
+ ]);
17
+ const merged = new Map();
18
+ for (const c of domainCookies) merged.set(c.name, c);
19
+ for (const c of apiCookies) merged.set(c.name, c);
20
+ const cookieHeader = buildCookieHeader(Array.from(merged.values()));
21
+
22
+ const resp = await fetch(url, {
23
+ method: 'POST',
24
+ headers: {
25
+ 'User-Agent': WEREAD_UA,
26
+ 'Content-Type': 'application/json',
27
+ 'Origin': WEREAD_WEB_ORIGIN,
28
+ 'Referer': `${WEREAD_WEB_ORIGIN}/`,
29
+ ...(cookieHeader ? { 'Cookie': cookieHeader } : {}),
30
+ },
31
+ body: JSON.stringify(body),
32
+ });
33
+
34
+ if (resp.status === 401) {
35
+ throw new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first');
36
+ }
37
+
38
+ let data;
39
+ try {
40
+ data = await resp.json();
41
+ } catch {
42
+ throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page');
43
+ }
44
+
45
+ if (data?.errcode === -2010 || data?.errcode === -2012) {
46
+ throw new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first');
47
+ }
48
+ if (!resp.ok) {
49
+ throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable');
50
+ }
51
+ return data;
52
+ }
53
+
54
+ async function postWebApi(path, body) {
55
+ const url = `${WEB_API}${path}`;
56
+ const resp = await fetch(url, {
57
+ method: 'POST',
58
+ headers: {
59
+ 'User-Agent': WEREAD_UA,
60
+ 'Content-Type': 'application/json',
61
+ },
62
+ body: JSON.stringify(body),
63
+ });
64
+ if (!resp.ok) {
65
+ throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable');
66
+ }
67
+ try {
68
+ return await resp.json();
69
+ } catch {
70
+ throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page');
71
+ }
72
+ }
73
+
74
+ cli({
75
+ site: 'weread',
76
+ name: 'ai-outline',
77
+ description: 'Get AI-generated outline for a book',
78
+ domain: 'weread.qq.com',
79
+ strategy: Strategy.COOKIE,
80
+ defaultFormat: 'plain',
81
+ args: [
82
+ { name: 'book-id', positional: true, required: true, help: 'Book ID (from shelf or search results)' },
83
+ { name: 'limit', type: 'int', default: 200, help: 'Max outline items to return' },
84
+ { name: 'depth', type: 'int', default: 4, help: 'Max outline depth (2=topics, 3=key points, 4=details)' },
85
+ { name: 'raw', type: 'boolean', default: false, help: 'Output structured rows (chapter/idx/level/text) for programmatic use' },
86
+ ],
87
+ columns: undefined,
88
+ func: async (page, args) => {
89
+ const bookId = String(args['book-id'] || '').trim();
90
+ const rawMode = Boolean(args.raw);
91
+
92
+ const chapterData = await postWebApiWithCookies(page, '/book/chapterInfos', {
93
+ bookIds: [bookId],
94
+ sinces: [0],
95
+ });
96
+ const chapters = chapterData?.data?.[0]?.updated ?? [];
97
+ if (chapters.length === 0) {
98
+ throw new CliError('NOT_FOUND', 'No chapters found for this book', 'Check that the book ID is correct');
99
+ }
100
+
101
+ const chapterUids = chapters.map((c) => c.chapterUid);
102
+ const chapterNameMap = new Map();
103
+ for (const c of chapters) {
104
+ chapterNameMap.set(c.chapterUid, c.title ?? '');
105
+ }
106
+
107
+ const outlineData = await postWebApi('/book/outline', {
108
+ bookId,
109
+ chapterUids,
110
+ });
111
+
112
+ const itemsArray = outlineData?.itemsArray ?? [];
113
+ const maxDepth = Number(args.depth);
114
+ const rawRows = [];
115
+
116
+ for (const entry of itemsArray) {
117
+ const items = entry.items;
118
+ if (!Array.isArray(items) || items.length === 0) continue;
119
+
120
+ const chapterName = chapterNameMap.get(entry.chapterUid) ?? `Chapter ${entry.chapterUid}`;
121
+ let lastL3Idx = '';
122
+ let l4Counter = 0;
123
+
124
+ for (const item of items) {
125
+ const level = item.level ?? 1;
126
+ if (level <= 1) continue;
127
+ if (level > maxDepth) continue;
128
+
129
+ let idx = item.uiIdx ?? '';
130
+ if (level === 3 && idx) {
131
+ lastL3Idx = idx;
132
+ l4Counter = 0;
133
+ }
134
+ if (level === 4 && !idx && lastL3Idx) {
135
+ l4Counter++;
136
+ idx = `${lastL3Idx}.${l4Counter}`;
137
+ }
138
+
139
+ rawRows.push({ chapter: chapterName, idx, level, text: item.text ?? '' });
140
+ }
141
+ }
142
+
143
+ if (rawRows.length === 0) {
144
+ throw new CliError('NOT_FOUND', 'No AI outline available for this book', 'AI outlines may not be generated for all books');
145
+ }
146
+
147
+ if (rawMode) {
148
+ return rawRows.slice(0, Number(args.limit));
149
+ }
150
+
151
+ const grouped = new Map();
152
+ for (const row of rawRows) {
153
+ if (!grouped.has(row.chapter)) grouped.set(row.chapter, []);
154
+ grouped.get(row.chapter).push(row);
155
+ }
156
+
157
+ const results = [];
158
+ for (const [chapter, rows] of grouped) {
159
+ const lines = [`📖 ${chapter}`];
160
+ for (const row of rows) {
161
+ const indent = ' '.repeat(row.level - 2);
162
+ const prefix = row.level === 2 ? `${row.idx}. ` : `${row.idx} `;
163
+ lines.push(`${indent}${prefix}${row.text}`);
164
+ }
165
+ results.push({ outline: lines.join('\n') });
166
+ }
167
+
168
+ return results.slice(0, Number(args.limit));
169
+ },
170
+ });
@@ -0,0 +1,83 @@
1
+ import { beforeEach, describe, expect, it, vi } from 'vitest';
2
+ import { getRegistry } from '@jackwener/opencli/registry';
3
+ import './ai-outline.js';
4
+
5
+ describe('weread ai-outline', () => {
6
+ const command = getRegistry().get('weread/ai-outline');
7
+
8
+ beforeEach(() => {
9
+ vi.restoreAllMocks();
10
+ });
11
+
12
+ it('registers ai-outline with plain default output', () => {
13
+ expect(command?.defaultFormat).toBe('plain');
14
+ });
15
+
16
+ it('maps chapterInfos auth-expired responses to AUTH_REQUIRED', async () => {
17
+ expect(command?.func).toBeTypeOf('function');
18
+ const page = {
19
+ getCookies: vi.fn()
20
+ .mockResolvedValueOnce([{ name: 'wr_vid', value: 'vid123', domain: '.weread.qq.com' }])
21
+ .mockResolvedValueOnce([{ name: 'wr_name', value: 'alice', domain: '.weread.qq.com' }]),
22
+ };
23
+ vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
24
+ ok: true,
25
+ status: 200,
26
+ json: () => Promise.resolve({ errcode: -2012, errmsg: '登录超时' }),
27
+ }));
28
+ await expect(command.func(page, { 'book-id': 'book-1' })).rejects.toMatchObject({
29
+ code: 'AUTH_REQUIRED',
30
+ message: 'Not logged in to WeRead',
31
+ });
32
+ });
33
+
34
+ it('returns structured rows for --raw and respects depth filtering', async () => {
35
+ expect(command?.func).toBeTypeOf('function');
36
+ const page = {
37
+ getCookies: vi.fn()
38
+ .mockResolvedValueOnce([{ name: 'wr_vid', value: 'vid123', domain: '.weread.qq.com' }])
39
+ .mockResolvedValueOnce([{ name: 'wr_name', value: 'alice', domain: '.weread.qq.com' }]),
40
+ };
41
+ const fetchMock = vi.fn()
42
+ .mockResolvedValueOnce({
43
+ ok: true,
44
+ status: 200,
45
+ json: () => Promise.resolve({
46
+ data: [{
47
+ updated: [
48
+ { chapterUid: 'c1', title: '第一章' },
49
+ ],
50
+ }],
51
+ }),
52
+ })
53
+ .mockResolvedValueOnce({
54
+ ok: true,
55
+ status: 200,
56
+ json: () => Promise.resolve({
57
+ itemsArray: [{
58
+ chapterUid: 'c1',
59
+ items: [
60
+ { level: 2, uiIdx: '1', text: '主题一' },
61
+ { level: 3, uiIdx: '1.1', text: '要点一' },
62
+ { level: 4, text: '细节一' },
63
+ ],
64
+ }],
65
+ }),
66
+ });
67
+ vi.stubGlobal('fetch', fetchMock);
68
+ const rows = await command.func(page, { 'book-id': 'book-1', raw: true, depth: 3, limit: 10 });
69
+ expect(rows).toEqual([
70
+ { chapter: '第一章', idx: '1', level: 2, text: '主题一' },
71
+ { chapter: '第一章', idx: '1.1', level: 3, text: '要点一' },
72
+ ]);
73
+ expect(fetchMock).toHaveBeenNthCalledWith(1, 'https://weread.qq.com/web/book/chapterInfos', expect.objectContaining({
74
+ method: 'POST',
75
+ headers: expect.objectContaining({
76
+ Cookie: 'wr_name=alice; wr_vid=vid123',
77
+ }),
78
+ }));
79
+ expect(fetchMock).toHaveBeenNthCalledWith(2, 'https://weread.qq.com/web/book/outline', expect.objectContaining({
80
+ method: 'POST',
81
+ }));
82
+ });
83
+ });
@@ -37,6 +37,61 @@ function countSearchIdentities(entries) {
37
37
  }
38
38
  return counts;
39
39
  }
40
+ export function strictTitleFromWereadDocumentTitle(rawTitle) {
41
+ const suffix = ' - 微信读书';
42
+ const normalized = String(rawTitle || '').trim();
43
+ if (!normalized.endsWith(suffix))
44
+ return '';
45
+ const base = normalized.slice(0, -suffix.length).trim();
46
+ // Only accept the title when WeRead exposes the strict "<title> - 微信读书"
47
+ // shape. If extra separators remain, the page title is ambiguous.
48
+ return base.includes(' - ') ? '' : base;
49
+ }
50
+ export function extractReaderFallbackMetadata(doc) {
51
+ const text = (node) => node?.textContent?.trim() || '';
52
+ const firstText = (...sels) => { for (const s of sels) {
53
+ const v = text(doc.querySelector(s));
54
+ if (v)
55
+ return v;
56
+ } return ''; };
57
+ const bodyText = doc.body?.innerText?.replace(/\s+/g, ' ').trim() || '';
58
+ const extractRating = () => {
59
+ const match = bodyText.match(/微信读书推荐值\s*([0-9.]+%)/);
60
+ return match ? match[1] : '';
61
+ };
62
+ const extractPublisher = () => {
63
+ const direct = text(doc.querySelector('.introDialog_content_pub_line'));
64
+ return direct.startsWith('出版社') ? direct.replace(/^出版社\s*/, '').trim() : '';
65
+ };
66
+ const extractIntro = () => {
67
+ const selectors = [
68
+ '.horizontalReaderCoverPage_content_bookInfo_intro',
69
+ '.wr_flyleaf_page_bookIntro_content',
70
+ '.introDialog_content_intro_para',
71
+ ];
72
+ for (const selector of selectors) {
73
+ const value = text(doc.querySelector(selector));
74
+ if (value)
75
+ return value;
76
+ }
77
+ return '';
78
+ };
79
+ const categorySource = Array.from(doc.scripts || [])
80
+ .map((script) => script.textContent || '')
81
+ .find((scriptText) => scriptText.includes('"category"')) || '';
82
+ const categoryMatch = categorySource.match(/"category"\s*:\s*"([^"]+)"/);
83
+ const title = firstText('.horizontalReaderCoverPage_content_bookTitle', '.wr_flyleaf_page_bookInfo_bookTitle', '.outline_book_detail_header_title', '.readerTopBar_title_link') || strictTitleFromWereadDocumentTitle(doc.title || '');
84
+ const author = firstText('.horizontalReaderCoverPage_content_author', '.wr_flyleaf_page_bookInfo_author', '.outline_book_detail_header_author');
85
+ return {
86
+ title,
87
+ author,
88
+ publisher: extractPublisher(),
89
+ intro: extractIntro(),
90
+ category: categoryMatch ? categoryMatch[1].trim() : '',
91
+ rating: extractRating(),
92
+ metadataReady: Boolean(title || author),
93
+ };
94
+ }
40
95
  /**
41
96
  * Reuse the public search page as a last-resort reader URL source when the
42
97
  * cached shelf page cannot provide a trustworthy bookId-to-reader mapping.
@@ -108,51 +163,9 @@ async function resolveSearchReaderUrl(title, author) {
108
163
  */
109
164
  async function loadReaderFallbackResult(page, readerUrl) {
110
165
  await page.goto(readerUrl);
111
- await page.wait({ selector: '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle', timeout: 10 });
166
+ await page.wait({ selector: '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle, .readerTopBar_title_link', timeout: 10 });
112
167
  const result = await page.evaluate(`
113
- (() => {
114
- const text = (node) => node?.textContent?.trim() || '';
115
- const bodyText = document.body?.innerText?.replace(/\\s+/g, ' ').trim() || '';
116
- const titleSelector = '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle';
117
- const authorSelector = '.horizontalReaderCoverPage_content_author, .wr_flyleaf_page_bookInfo_author';
118
- const extractRating = () => {
119
- const match = bodyText.match(/微信读书推荐值\\s*([0-9.]+%)/);
120
- return match ? match[1] : '';
121
- };
122
- const extractPublisher = () => {
123
- const direct = text(document.querySelector('.introDialog_content_pub_line'));
124
- return direct.startsWith('出版社') ? direct.replace(/^出版社\\s*/, '').trim() : '';
125
- };
126
- const extractIntro = () => {
127
- const selectors = [
128
- '.horizontalReaderCoverPage_content_bookInfo_intro',
129
- '.wr_flyleaf_page_bookIntro_content',
130
- '.introDialog_content_intro_para',
131
- ];
132
- for (const selector of selectors) {
133
- const value = text(document.querySelector(selector));
134
- if (value) return value;
135
- }
136
- return '';
137
- };
138
-
139
- const categorySource = Array.from(document.scripts)
140
- .map((script) => script.textContent || '')
141
- .find((scriptText) => scriptText.includes('"category"')) || '';
142
- const categoryMatch = categorySource.match(/"category"\\s*:\\s*"([^"]+)"/);
143
- const title = text(document.querySelector(titleSelector));
144
- const author = text(document.querySelector(authorSelector));
145
-
146
- return {
147
- title,
148
- author,
149
- publisher: extractPublisher(),
150
- intro: extractIntro(),
151
- category: categoryMatch ? categoryMatch[1].trim() : '',
152
- rating: extractRating(),
153
- metadataReady: Boolean(title || author),
154
- };
155
- })()
168
+ (${extractReaderFallbackMetadata.toString()})(document)
156
169
  `);
157
170
  return {
158
171
  title: String(result?.title || '').trim(),
@@ -14,6 +14,7 @@ import { getRegistry } from '@jackwener/opencli/registry';
14
14
  import './book.js';
15
15
  import './highlights.js';
16
16
  import './notes.js';
17
+ import { extractReaderFallbackMetadata, strictTitleFromWereadDocumentTitle } from './book.js';
17
18
  describe('weread book-id positional args', () => {
18
19
  const book = getRegistry().get('weread/book');
19
20
  const highlights = getRegistry().get('weread/highlights');
@@ -356,6 +357,29 @@ describe('weread book-id positional args', () => {
356
357
  message: 'Not logged in to WeRead',
357
358
  });
358
359
  });
360
+ it('does not guess author from document.title when the reader page skips cover metadata', async () => {
361
+ const nodes = new Map([
362
+ ['.readerTopBar_title_link', { textContent: 'Part 1 - Part 2' }],
363
+ ['.introDialog_content_pub_line', { textContent: '出版社 测试出版社' }],
364
+ ['.introDialog_content_intro_para', { textContent: '测试简介。' }],
365
+ ]);
366
+ const mockDocument = {
367
+ title: 'Part 1 - Part 2 - 作者甲 - 微信读书',
368
+ body: { innerText: '微信读书推荐值 88.8%' },
369
+ scripts: [],
370
+ querySelector: (selector) => nodes.get(selector) || null,
371
+ };
372
+ expect(strictTitleFromWereadDocumentTitle(mockDocument.title)).toBe('');
373
+ expect(extractReaderFallbackMetadata(mockDocument)).toEqual({
374
+ title: 'Part 1 - Part 2',
375
+ author: '',
376
+ publisher: '测试出版社',
377
+ intro: '测试简介。',
378
+ category: '',
379
+ rating: '88.8%',
380
+ metadataReady: true,
381
+ });
382
+ });
359
383
  it('passes the positional book-id to highlights', async () => {
360
384
  mockFetchPrivateApi.mockResolvedValue({ updated: [] });
361
385
  await highlights.func({}, { 'book-id': 'abc', limit: 5 });
@@ -20,9 +20,9 @@ cli({
20
20
  throw new CliError('INVALID_ARGUMENT', 'limit must be a positive integer', 'Example: --limit 5');
21
21
  }
22
22
  const credentials = loadXiaoyuzhouCredentials();
23
- const response = await requestXiaoyuzhouJson('/v1/podcast/listEpisode', {
23
+ const response = await requestXiaoyuzhouJson('/v1/episode/list', {
24
24
  method: 'POST',
25
- body: { pid: args.id, limit: requestedLimit },
25
+ body: { pid: args.id, order: 'desc', limit: requestedLimit },
26
26
  credentials,
27
27
  });
28
28
  const episodes = response.data ?? [];
@@ -0,0 +1,78 @@
1
+ import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest';
2
+ import { getRegistry } from '@jackwener/opencli/registry';
3
+
4
+ const { mockRequestJson, mockLoadCredentials } = vi.hoisted(() => ({
5
+ mockRequestJson: vi.fn(),
6
+ mockLoadCredentials: vi.fn(),
7
+ }));
8
+
9
+ vi.mock('./auth.js', async () => {
10
+ const actual = await vi.importActual('./auth.js');
11
+ return {
12
+ ...actual,
13
+ requestXiaoyuzhouJson: mockRequestJson,
14
+ loadXiaoyuzhouCredentials: mockLoadCredentials,
15
+ };
16
+ });
17
+
18
+ await import('./podcast-episodes.js');
19
+
20
+ let cmd;
21
+
22
+ beforeAll(() => {
23
+ cmd = getRegistry().get('xiaoyuzhou/podcast-episodes');
24
+ expect(cmd?.func).toBeTypeOf('function');
25
+ });
26
+
27
+ describe('xiaoyuzhou podcast-episodes', () => {
28
+ beforeEach(() => {
29
+ mockRequestJson.mockReset();
30
+ mockLoadCredentials.mockReset();
31
+ mockLoadCredentials.mockReturnValue({ access_token: 'access', refresh_token: 'refresh' });
32
+ });
33
+
34
+ it('calls the fixed episode list endpoint with desc ordering', async () => {
35
+ mockRequestJson.mockResolvedValue({
36
+ data: [
37
+ {
38
+ eid: 'ep-1',
39
+ title: 'Episode 1',
40
+ duration: 3661,
41
+ playCount: 42,
42
+ pubDate: '2026-04-20T10:00:00.000Z',
43
+ },
44
+ ],
45
+ });
46
+
47
+ const result = await cmd.func(null, {
48
+ id: 'podcast-1',
49
+ limit: 3,
50
+ });
51
+
52
+ expect(mockRequestJson).toHaveBeenCalledWith('/v1/episode/list', {
53
+ method: 'POST',
54
+ body: { pid: 'podcast-1', order: 'desc', limit: 3 },
55
+ credentials: { access_token: 'access', refresh_token: 'refresh' },
56
+ });
57
+ expect(result).toEqual([
58
+ {
59
+ eid: 'ep-1',
60
+ title: 'Episode 1',
61
+ duration: '61:01',
62
+ plays: 42,
63
+ date: '2026-04-20',
64
+ },
65
+ ]);
66
+ });
67
+
68
+ it('rejects non-positive limits before hitting the API', async () => {
69
+ await expect(cmd.func(null, {
70
+ id: 'podcast-1',
71
+ limit: 0,
72
+ })).rejects.toMatchObject({
73
+ code: 'INVALID_ARGUMENT',
74
+ message: 'limit must be a positive integer',
75
+ });
76
+ expect(mockRequestJson).not.toHaveBeenCalled();
77
+ });
78
+ });