@jackwener/opencli 1.7.5 → 1.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -10
- package/README.zh-CN.md +18 -9
- package/cli-manifest.json +401 -11
- package/clis/51job/company.js +125 -0
- package/clis/51job/detail.js +108 -0
- package/clis/51job/hot.js +55 -0
- package/clis/51job/search.js +79 -0
- package/clis/51job/utils.js +302 -0
- package/clis/51job/utils.test.js +69 -0
- package/clis/bilibili/video.js +68 -0
- package/clis/bilibili/video.test.js +132 -0
- package/clis/chatgpt/image.js +1 -1
- package/clis/deepseek/ask.js +37 -11
- package/clis/deepseek/ask.test.js +165 -0
- package/clis/deepseek/utils.js +192 -24
- package/clis/deepseek/utils.test.js +145 -0
- package/clis/gemini/image.js +1 -1
- package/clis/instagram/download.js +1 -1
- package/clis/jianyu/search.js +139 -3
- package/clis/jianyu/search.test.js +25 -0
- package/clis/jianyu/shared/procurement-detail.js +15 -0
- package/clis/jianyu/shared/procurement-detail.test.js +12 -0
- package/clis/twitter/likes.js +3 -2
- package/clis/twitter/search.js +4 -2
- package/clis/twitter/search.test.js +4 -0
- package/clis/twitter/shared.js +35 -2
- package/clis/twitter/shared.test.js +96 -0
- package/clis/twitter/thread.js +3 -1
- package/clis/twitter/timeline.js +3 -2
- package/clis/twitter/tweets.js +219 -0
- package/clis/twitter/tweets.test.js +125 -0
- package/clis/web/read.js +25 -5
- package/clis/web/read.test.js +76 -0
- package/clis/weread/ai-outline.js +170 -0
- package/clis/weread/ai-outline.test.js +83 -0
- package/clis/weread/book.js +57 -44
- package/clis/weread/commands.test.js +24 -0
- package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
- package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
- package/clis/youtube/channel.js +35 -0
- package/dist/src/browser/analyze.d.ts +103 -0
- package/dist/src/browser/analyze.js +230 -0
- package/dist/src/browser/analyze.test.d.ts +1 -0
- package/dist/src/browser/analyze.test.js +164 -0
- package/dist/src/browser/article-extract.d.ts +57 -0
- package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
- package/dist/src/browser/article-extract.e2e.test.js +105 -0
- package/dist/src/browser/article-extract.js +169 -0
- package/dist/src/browser/article-extract.test.d.ts +1 -0
- package/dist/src/browser/article-extract.test.js +94 -0
- package/dist/src/browser/base-page.d.ts +13 -3
- package/dist/src/browser/base-page.js +35 -25
- package/dist/src/browser/cdp.d.ts +1 -0
- package/dist/src/browser/cdp.js +23 -5
- package/dist/src/browser/compound.d.ts +59 -0
- package/dist/src/browser/compound.js +112 -0
- package/dist/src/browser/compound.test.d.ts +1 -0
- package/dist/src/browser/compound.test.js +175 -0
- package/dist/src/browser/dom-snapshot.d.ts +7 -0
- package/dist/src/browser/dom-snapshot.js +76 -3
- package/dist/src/browser/dom-snapshot.test.js +65 -0
- package/dist/src/browser/extract.d.ts +69 -0
- package/dist/src/browser/extract.js +132 -0
- package/dist/src/browser/extract.test.d.ts +1 -0
- package/dist/src/browser/extract.test.js +129 -0
- package/dist/src/browser/find.d.ts +76 -0
- package/dist/src/browser/find.js +179 -0
- package/dist/src/browser/find.test.d.ts +1 -0
- package/dist/src/browser/find.test.js +120 -0
- package/dist/src/browser/html-tree.d.ts +75 -0
- package/dist/src/browser/html-tree.js +112 -0
- package/dist/src/browser/html-tree.test.d.ts +1 -0
- package/dist/src/browser/html-tree.test.js +181 -0
- package/dist/src/browser/network-cache.d.ts +48 -0
- package/dist/src/browser/network-cache.js +66 -0
- package/dist/src/browser/network-cache.test.d.ts +1 -0
- package/dist/src/browser/network-cache.test.js +58 -0
- package/dist/src/browser/network-key.d.ts +22 -0
- package/dist/src/browser/network-key.js +66 -0
- package/dist/src/browser/network-key.test.d.ts +1 -0
- package/dist/src/browser/network-key.test.js +49 -0
- package/dist/src/browser/shape-filter.d.ts +52 -0
- package/dist/src/browser/shape-filter.js +101 -0
- package/dist/src/browser/shape-filter.test.d.ts +1 -0
- package/dist/src/browser/shape-filter.test.js +101 -0
- package/dist/src/browser/shape.d.ts +23 -0
- package/dist/src/browser/shape.js +95 -0
- package/dist/src/browser/shape.test.d.ts +1 -0
- package/dist/src/browser/shape.test.js +82 -0
- package/dist/src/browser/target-errors.d.ts +14 -1
- package/dist/src/browser/target-errors.js +13 -0
- package/dist/src/browser/target-errors.test.js +39 -6
- package/dist/src/browser/target-resolver.d.ts +57 -10
- package/dist/src/browser/target-resolver.js +195 -75
- package/dist/src/browser/target-resolver.test.js +80 -5
- package/dist/src/browser/verify-fixture.d.ts +59 -0
- package/dist/src/browser/verify-fixture.js +213 -0
- package/dist/src/browser/verify-fixture.test.d.ts +1 -0
- package/dist/src/browser/verify-fixture.test.js +161 -0
- package/dist/src/cli.d.ts +32 -0
- package/dist/src/cli.js +936 -141
- package/dist/src/cli.test.js +1051 -1
- package/dist/src/daemon.d.ts +3 -2
- package/dist/src/daemon.js +16 -4
- package/dist/src/daemon.test.d.ts +1 -0
- package/dist/src/daemon.test.js +19 -0
- package/dist/src/download/article-download.d.ts +12 -0
- package/dist/src/download/article-download.js +141 -17
- package/dist/src/download/article-download.test.js +196 -0
- package/dist/src/download/index.js +73 -86
- package/dist/src/errors.js +4 -2
- package/dist/src/errors.test.js +13 -0
- package/dist/src/execution.js +7 -2
- package/dist/src/execution.test.js +54 -0
- package/dist/src/launcher.d.ts +1 -1
- package/dist/src/launcher.js +3 -3
- package/dist/src/main.js +16 -0
- package/dist/src/output.js +1 -1
- package/dist/src/output.test.js +6 -0
- package/dist/src/types.d.ts +18 -3
- package/package.json +5 -1
package/clis/web/read.js
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
*/
|
|
16
16
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
17
17
|
import { downloadArticle } from '@jackwener/opencli/download/article-download';
|
|
18
|
-
cli({
|
|
18
|
+
const command = cli({
|
|
19
19
|
site: 'web',
|
|
20
20
|
name: 'read',
|
|
21
21
|
description: 'Fetch any web page and export as Markdown',
|
|
@@ -26,6 +26,7 @@ cli({
|
|
|
26
26
|
{ name: 'output', default: './web-articles', help: 'Output directory' },
|
|
27
27
|
{ name: 'download-images', type: 'boolean', default: true, help: 'Download images locally' },
|
|
28
28
|
{ name: 'wait', type: 'int', default: 3, help: 'Seconds to wait after page load' },
|
|
29
|
+
{ name: 'stdout', type: 'boolean', default: false, help: 'Print markdown to stdout instead of saving to a file' },
|
|
29
30
|
],
|
|
30
31
|
columns: ['title', 'author', 'publish_time', 'status', 'size', 'saved'],
|
|
31
32
|
func: async (page, kwargs) => {
|
|
@@ -162,14 +163,26 @@ cli({
|
|
|
162
163
|
if (el.children && el.children.length > 2) dedup(el);
|
|
163
164
|
});
|
|
164
165
|
|
|
166
|
+
// --- Lazy-load image src rewrite ---
|
|
167
|
+
// Many sites render <img src="placeholder.gif" data-src="real.jpg">.
|
|
168
|
+
// Promote the real URL onto src so both the markdown body and the
|
|
169
|
+
// image download list reference the same URL.
|
|
170
|
+
clone.querySelectorAll('img').forEach(img => {
|
|
171
|
+
const srcset = img.getAttribute('data-srcset') || '';
|
|
172
|
+
const srcsetFirst = srcset.split(',')[0]?.trim().split(' ')[0] || '';
|
|
173
|
+
const real = img.getAttribute('data-src')
|
|
174
|
+
|| img.getAttribute('data-original')
|
|
175
|
+
|| img.getAttribute('data-lazy-src')
|
|
176
|
+
|| srcsetFirst;
|
|
177
|
+
if (real) img.setAttribute('src', real);
|
|
178
|
+
});
|
|
179
|
+
|
|
165
180
|
result.contentHtml = clone.innerHTML;
|
|
166
181
|
|
|
167
182
|
// --- Image extraction ---
|
|
168
183
|
const seen = new Set();
|
|
169
184
|
clone.querySelectorAll('img').forEach(img => {
|
|
170
|
-
const src = img.getAttribute('
|
|
171
|
-
|| img.getAttribute('data-original')
|
|
172
|
-
|| img.getAttribute('src');
|
|
185
|
+
const src = img.getAttribute('src') || '';
|
|
173
186
|
if (src && !src.startsWith('data:') && !seen.has(src)) {
|
|
174
187
|
seen.add(src);
|
|
175
188
|
result.imageUrls.push(src);
|
|
@@ -186,7 +199,7 @@ cli({
|
|
|
186
199
|
referer = parsed.origin + '/';
|
|
187
200
|
}
|
|
188
201
|
catch { /* ignore */ }
|
|
189
|
-
|
|
202
|
+
const result = await downloadArticle({
|
|
190
203
|
title: data?.title || 'untitled',
|
|
191
204
|
author: data?.author,
|
|
192
205
|
publishTime: data?.publishTime,
|
|
@@ -197,6 +210,13 @@ cli({
|
|
|
197
210
|
output: kwargs.output,
|
|
198
211
|
downloadImages: kwargs['download-images'],
|
|
199
212
|
imageHeaders: referer ? { Referer: referer } : undefined,
|
|
213
|
+
stdout: kwargs.stdout,
|
|
200
214
|
});
|
|
215
|
+
// `--stdout` is a content-streaming mode. The markdown body already went
|
|
216
|
+
// to process.stdout inside downloadArticle(), so returning rows here
|
|
217
|
+
// would make Commander append table/JSON output to the same stdout
|
|
218
|
+
// stream and break piping.
|
|
219
|
+
return kwargs.stdout ? null : result;
|
|
201
220
|
},
|
|
202
221
|
});
|
|
222
|
+
export const __test__ = { command };
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
|
2
|
+
|
|
3
|
+
const { mockDownloadArticle } = vi.hoisted(() => ({
|
|
4
|
+
mockDownloadArticle: vi.fn(),
|
|
5
|
+
}));
|
|
6
|
+
|
|
7
|
+
vi.mock('@jackwener/opencli/download/article-download', () => ({
|
|
8
|
+
downloadArticle: mockDownloadArticle,
|
|
9
|
+
}));
|
|
10
|
+
|
|
11
|
+
const { __test__ } = await import('./read.js');
|
|
12
|
+
|
|
13
|
+
describe('web/read stdout behavior', () => {
|
|
14
|
+
const read = __test__.command;
|
|
15
|
+
const page = {
|
|
16
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
17
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
18
|
+
evaluate: vi.fn().mockResolvedValue({
|
|
19
|
+
title: 'Example Article',
|
|
20
|
+
author: 'Author',
|
|
21
|
+
publishTime: '2026-04-22',
|
|
22
|
+
contentHtml: '<p>hello</p>',
|
|
23
|
+
imageUrls: ['https://example.com/a.jpg'],
|
|
24
|
+
}),
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
beforeEach(() => {
|
|
28
|
+
mockDownloadArticle.mockReset();
|
|
29
|
+
mockDownloadArticle.mockResolvedValue([{
|
|
30
|
+
title: 'Example Article',
|
|
31
|
+
author: 'Author',
|
|
32
|
+
publish_time: '2026-04-22',
|
|
33
|
+
status: 'success',
|
|
34
|
+
size: '1 KB',
|
|
35
|
+
saved: '-',
|
|
36
|
+
}]);
|
|
37
|
+
page.goto.mockClear();
|
|
38
|
+
page.wait.mockClear();
|
|
39
|
+
page.evaluate.mockClear();
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it('returns null in --stdout mode so the CLI does not append result rows to stdout', async () => {
|
|
43
|
+
const result = await read.func(page, {
|
|
44
|
+
url: 'https://example.com/article',
|
|
45
|
+
output: '/tmp/out',
|
|
46
|
+
'download-images': false,
|
|
47
|
+
stdout: true,
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
expect(result).toBeNull();
|
|
51
|
+
expect(mockDownloadArticle).toHaveBeenCalledWith(
|
|
52
|
+
expect.objectContaining({
|
|
53
|
+
title: 'Example Article',
|
|
54
|
+
sourceUrl: 'https://example.com/article',
|
|
55
|
+
}),
|
|
56
|
+
expect.objectContaining({
|
|
57
|
+
output: '/tmp/out',
|
|
58
|
+
stdout: true,
|
|
59
|
+
}),
|
|
60
|
+
);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it('still returns the saved-row payload when writing to disk', async () => {
|
|
64
|
+
const rows = [{ title: 'Example Article', saved: '/tmp/out/Example Article/example.md' }];
|
|
65
|
+
mockDownloadArticle.mockResolvedValue(rows);
|
|
66
|
+
|
|
67
|
+
const result = await read.func(page, {
|
|
68
|
+
url: 'https://example.com/article',
|
|
69
|
+
output: '/tmp/out',
|
|
70
|
+
'download-images': false,
|
|
71
|
+
stdout: false,
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
expect(result).toBe(rows);
|
|
75
|
+
});
|
|
76
|
+
});
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
|
+
import { CliError } from '@jackwener/opencli/errors';
|
|
3
|
+
import { WEREAD_UA, WEREAD_WEB_ORIGIN, WEREAD_DOMAIN } from './utils.js';
|
|
4
|
+
|
|
5
|
+
const WEB_API = `${WEREAD_WEB_ORIGIN}/web`;
|
|
6
|
+
|
|
7
|
+
function buildCookieHeader(cookies) {
|
|
8
|
+
return cookies.map((c) => `${c.name}=${c.value}`).join('; ');
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
async function postWebApiWithCookies(page, path, body) {
|
|
12
|
+
const url = `${WEB_API}${path}`;
|
|
13
|
+
const [apiCookies, domainCookies] = await Promise.all([
|
|
14
|
+
page.getCookies({ url }),
|
|
15
|
+
page.getCookies({ domain: WEREAD_DOMAIN }),
|
|
16
|
+
]);
|
|
17
|
+
const merged = new Map();
|
|
18
|
+
for (const c of domainCookies) merged.set(c.name, c);
|
|
19
|
+
for (const c of apiCookies) merged.set(c.name, c);
|
|
20
|
+
const cookieHeader = buildCookieHeader(Array.from(merged.values()));
|
|
21
|
+
|
|
22
|
+
const resp = await fetch(url, {
|
|
23
|
+
method: 'POST',
|
|
24
|
+
headers: {
|
|
25
|
+
'User-Agent': WEREAD_UA,
|
|
26
|
+
'Content-Type': 'application/json',
|
|
27
|
+
'Origin': WEREAD_WEB_ORIGIN,
|
|
28
|
+
'Referer': `${WEREAD_WEB_ORIGIN}/`,
|
|
29
|
+
...(cookieHeader ? { 'Cookie': cookieHeader } : {}),
|
|
30
|
+
},
|
|
31
|
+
body: JSON.stringify(body),
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
if (resp.status === 401) {
|
|
35
|
+
throw new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
let data;
|
|
39
|
+
try {
|
|
40
|
+
data = await resp.json();
|
|
41
|
+
} catch {
|
|
42
|
+
throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page');
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (data?.errcode === -2010 || data?.errcode === -2012) {
|
|
46
|
+
throw new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first');
|
|
47
|
+
}
|
|
48
|
+
if (!resp.ok) {
|
|
49
|
+
throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable');
|
|
50
|
+
}
|
|
51
|
+
return data;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async function postWebApi(path, body) {
|
|
55
|
+
const url = `${WEB_API}${path}`;
|
|
56
|
+
const resp = await fetch(url, {
|
|
57
|
+
method: 'POST',
|
|
58
|
+
headers: {
|
|
59
|
+
'User-Agent': WEREAD_UA,
|
|
60
|
+
'Content-Type': 'application/json',
|
|
61
|
+
},
|
|
62
|
+
body: JSON.stringify(body),
|
|
63
|
+
});
|
|
64
|
+
if (!resp.ok) {
|
|
65
|
+
throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable');
|
|
66
|
+
}
|
|
67
|
+
try {
|
|
68
|
+
return await resp.json();
|
|
69
|
+
} catch {
|
|
70
|
+
throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page');
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
cli({
|
|
75
|
+
site: 'weread',
|
|
76
|
+
name: 'ai-outline',
|
|
77
|
+
description: 'Get AI-generated outline for a book',
|
|
78
|
+
domain: 'weread.qq.com',
|
|
79
|
+
strategy: Strategy.COOKIE,
|
|
80
|
+
defaultFormat: 'plain',
|
|
81
|
+
args: [
|
|
82
|
+
{ name: 'book-id', positional: true, required: true, help: 'Book ID (from shelf or search results)' },
|
|
83
|
+
{ name: 'limit', type: 'int', default: 200, help: 'Max outline items to return' },
|
|
84
|
+
{ name: 'depth', type: 'int', default: 4, help: 'Max outline depth (2=topics, 3=key points, 4=details)' },
|
|
85
|
+
{ name: 'raw', type: 'boolean', default: false, help: 'Output structured rows (chapter/idx/level/text) for programmatic use' },
|
|
86
|
+
],
|
|
87
|
+
columns: undefined,
|
|
88
|
+
func: async (page, args) => {
|
|
89
|
+
const bookId = String(args['book-id'] || '').trim();
|
|
90
|
+
const rawMode = Boolean(args.raw);
|
|
91
|
+
|
|
92
|
+
const chapterData = await postWebApiWithCookies(page, '/book/chapterInfos', {
|
|
93
|
+
bookIds: [bookId],
|
|
94
|
+
sinces: [0],
|
|
95
|
+
});
|
|
96
|
+
const chapters = chapterData?.data?.[0]?.updated ?? [];
|
|
97
|
+
if (chapters.length === 0) {
|
|
98
|
+
throw new CliError('NOT_FOUND', 'No chapters found for this book', 'Check that the book ID is correct');
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const chapterUids = chapters.map((c) => c.chapterUid);
|
|
102
|
+
const chapterNameMap = new Map();
|
|
103
|
+
for (const c of chapters) {
|
|
104
|
+
chapterNameMap.set(c.chapterUid, c.title ?? '');
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const outlineData = await postWebApi('/book/outline', {
|
|
108
|
+
bookId,
|
|
109
|
+
chapterUids,
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const itemsArray = outlineData?.itemsArray ?? [];
|
|
113
|
+
const maxDepth = Number(args.depth);
|
|
114
|
+
const rawRows = [];
|
|
115
|
+
|
|
116
|
+
for (const entry of itemsArray) {
|
|
117
|
+
const items = entry.items;
|
|
118
|
+
if (!Array.isArray(items) || items.length === 0) continue;
|
|
119
|
+
|
|
120
|
+
const chapterName = chapterNameMap.get(entry.chapterUid) ?? `Chapter ${entry.chapterUid}`;
|
|
121
|
+
let lastL3Idx = '';
|
|
122
|
+
let l4Counter = 0;
|
|
123
|
+
|
|
124
|
+
for (const item of items) {
|
|
125
|
+
const level = item.level ?? 1;
|
|
126
|
+
if (level <= 1) continue;
|
|
127
|
+
if (level > maxDepth) continue;
|
|
128
|
+
|
|
129
|
+
let idx = item.uiIdx ?? '';
|
|
130
|
+
if (level === 3 && idx) {
|
|
131
|
+
lastL3Idx = idx;
|
|
132
|
+
l4Counter = 0;
|
|
133
|
+
}
|
|
134
|
+
if (level === 4 && !idx && lastL3Idx) {
|
|
135
|
+
l4Counter++;
|
|
136
|
+
idx = `${lastL3Idx}.${l4Counter}`;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
rawRows.push({ chapter: chapterName, idx, level, text: item.text ?? '' });
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (rawRows.length === 0) {
|
|
144
|
+
throw new CliError('NOT_FOUND', 'No AI outline available for this book', 'AI outlines may not be generated for all books');
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (rawMode) {
|
|
148
|
+
return rawRows.slice(0, Number(args.limit));
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const grouped = new Map();
|
|
152
|
+
for (const row of rawRows) {
|
|
153
|
+
if (!grouped.has(row.chapter)) grouped.set(row.chapter, []);
|
|
154
|
+
grouped.get(row.chapter).push(row);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const results = [];
|
|
158
|
+
for (const [chapter, rows] of grouped) {
|
|
159
|
+
const lines = [`📖 ${chapter}`];
|
|
160
|
+
for (const row of rows) {
|
|
161
|
+
const indent = ' '.repeat(row.level - 2);
|
|
162
|
+
const prefix = row.level === 2 ? `${row.idx}. ` : `${row.idx} `;
|
|
163
|
+
lines.push(`${indent}${prefix}${row.text}`);
|
|
164
|
+
}
|
|
165
|
+
results.push({ outline: lines.join('\n') });
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return results.slice(0, Number(args.limit));
|
|
169
|
+
},
|
|
170
|
+
});
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { getRegistry } from '@jackwener/opencli/registry';
|
|
3
|
+
import './ai-outline.js';
|
|
4
|
+
|
|
5
|
+
describe('weread ai-outline', () => {
|
|
6
|
+
const command = getRegistry().get('weread/ai-outline');
|
|
7
|
+
|
|
8
|
+
beforeEach(() => {
|
|
9
|
+
vi.restoreAllMocks();
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it('registers ai-outline with plain default output', () => {
|
|
13
|
+
expect(command?.defaultFormat).toBe('plain');
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it('maps chapterInfos auth-expired responses to AUTH_REQUIRED', async () => {
|
|
17
|
+
expect(command?.func).toBeTypeOf('function');
|
|
18
|
+
const page = {
|
|
19
|
+
getCookies: vi.fn()
|
|
20
|
+
.mockResolvedValueOnce([{ name: 'wr_vid', value: 'vid123', domain: '.weread.qq.com' }])
|
|
21
|
+
.mockResolvedValueOnce([{ name: 'wr_name', value: 'alice', domain: '.weread.qq.com' }]),
|
|
22
|
+
};
|
|
23
|
+
vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
|
|
24
|
+
ok: true,
|
|
25
|
+
status: 200,
|
|
26
|
+
json: () => Promise.resolve({ errcode: -2012, errmsg: '登录超时' }),
|
|
27
|
+
}));
|
|
28
|
+
await expect(command.func(page, { 'book-id': 'book-1' })).rejects.toMatchObject({
|
|
29
|
+
code: 'AUTH_REQUIRED',
|
|
30
|
+
message: 'Not logged in to WeRead',
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('returns structured rows for --raw and respects depth filtering', async () => {
|
|
35
|
+
expect(command?.func).toBeTypeOf('function');
|
|
36
|
+
const page = {
|
|
37
|
+
getCookies: vi.fn()
|
|
38
|
+
.mockResolvedValueOnce([{ name: 'wr_vid', value: 'vid123', domain: '.weread.qq.com' }])
|
|
39
|
+
.mockResolvedValueOnce([{ name: 'wr_name', value: 'alice', domain: '.weread.qq.com' }]),
|
|
40
|
+
};
|
|
41
|
+
const fetchMock = vi.fn()
|
|
42
|
+
.mockResolvedValueOnce({
|
|
43
|
+
ok: true,
|
|
44
|
+
status: 200,
|
|
45
|
+
json: () => Promise.resolve({
|
|
46
|
+
data: [{
|
|
47
|
+
updated: [
|
|
48
|
+
{ chapterUid: 'c1', title: '第一章' },
|
|
49
|
+
],
|
|
50
|
+
}],
|
|
51
|
+
}),
|
|
52
|
+
})
|
|
53
|
+
.mockResolvedValueOnce({
|
|
54
|
+
ok: true,
|
|
55
|
+
status: 200,
|
|
56
|
+
json: () => Promise.resolve({
|
|
57
|
+
itemsArray: [{
|
|
58
|
+
chapterUid: 'c1',
|
|
59
|
+
items: [
|
|
60
|
+
{ level: 2, uiIdx: '1', text: '主题一' },
|
|
61
|
+
{ level: 3, uiIdx: '1.1', text: '要点一' },
|
|
62
|
+
{ level: 4, text: '细节一' },
|
|
63
|
+
],
|
|
64
|
+
}],
|
|
65
|
+
}),
|
|
66
|
+
});
|
|
67
|
+
vi.stubGlobal('fetch', fetchMock);
|
|
68
|
+
const rows = await command.func(page, { 'book-id': 'book-1', raw: true, depth: 3, limit: 10 });
|
|
69
|
+
expect(rows).toEqual([
|
|
70
|
+
{ chapter: '第一章', idx: '1', level: 2, text: '主题一' },
|
|
71
|
+
{ chapter: '第一章', idx: '1.1', level: 3, text: '要点一' },
|
|
72
|
+
]);
|
|
73
|
+
expect(fetchMock).toHaveBeenNthCalledWith(1, 'https://weread.qq.com/web/book/chapterInfos', expect.objectContaining({
|
|
74
|
+
method: 'POST',
|
|
75
|
+
headers: expect.objectContaining({
|
|
76
|
+
Cookie: 'wr_name=alice; wr_vid=vid123',
|
|
77
|
+
}),
|
|
78
|
+
}));
|
|
79
|
+
expect(fetchMock).toHaveBeenNthCalledWith(2, 'https://weread.qq.com/web/book/outline', expect.objectContaining({
|
|
80
|
+
method: 'POST',
|
|
81
|
+
}));
|
|
82
|
+
});
|
|
83
|
+
});
|
package/clis/weread/book.js
CHANGED
|
@@ -37,6 +37,61 @@ function countSearchIdentities(entries) {
|
|
|
37
37
|
}
|
|
38
38
|
return counts;
|
|
39
39
|
}
|
|
40
|
+
export function strictTitleFromWereadDocumentTitle(rawTitle) {
|
|
41
|
+
const suffix = ' - 微信读书';
|
|
42
|
+
const normalized = String(rawTitle || '').trim();
|
|
43
|
+
if (!normalized.endsWith(suffix))
|
|
44
|
+
return '';
|
|
45
|
+
const base = normalized.slice(0, -suffix.length).trim();
|
|
46
|
+
// Only accept the title when WeRead exposes the strict "<title> - 微信读书"
|
|
47
|
+
// shape. If extra separators remain, the page title is ambiguous.
|
|
48
|
+
return base.includes(' - ') ? '' : base;
|
|
49
|
+
}
|
|
50
|
+
export function extractReaderFallbackMetadata(doc) {
|
|
51
|
+
const text = (node) => node?.textContent?.trim() || '';
|
|
52
|
+
const firstText = (...sels) => { for (const s of sels) {
|
|
53
|
+
const v = text(doc.querySelector(s));
|
|
54
|
+
if (v)
|
|
55
|
+
return v;
|
|
56
|
+
} return ''; };
|
|
57
|
+
const bodyText = doc.body?.innerText?.replace(/\s+/g, ' ').trim() || '';
|
|
58
|
+
const extractRating = () => {
|
|
59
|
+
const match = bodyText.match(/微信读书推荐值\s*([0-9.]+%)/);
|
|
60
|
+
return match ? match[1] : '';
|
|
61
|
+
};
|
|
62
|
+
const extractPublisher = () => {
|
|
63
|
+
const direct = text(doc.querySelector('.introDialog_content_pub_line'));
|
|
64
|
+
return direct.startsWith('出版社') ? direct.replace(/^出版社\s*/, '').trim() : '';
|
|
65
|
+
};
|
|
66
|
+
const extractIntro = () => {
|
|
67
|
+
const selectors = [
|
|
68
|
+
'.horizontalReaderCoverPage_content_bookInfo_intro',
|
|
69
|
+
'.wr_flyleaf_page_bookIntro_content',
|
|
70
|
+
'.introDialog_content_intro_para',
|
|
71
|
+
];
|
|
72
|
+
for (const selector of selectors) {
|
|
73
|
+
const value = text(doc.querySelector(selector));
|
|
74
|
+
if (value)
|
|
75
|
+
return value;
|
|
76
|
+
}
|
|
77
|
+
return '';
|
|
78
|
+
};
|
|
79
|
+
const categorySource = Array.from(doc.scripts || [])
|
|
80
|
+
.map((script) => script.textContent || '')
|
|
81
|
+
.find((scriptText) => scriptText.includes('"category"')) || '';
|
|
82
|
+
const categoryMatch = categorySource.match(/"category"\s*:\s*"([^"]+)"/);
|
|
83
|
+
const title = firstText('.horizontalReaderCoverPage_content_bookTitle', '.wr_flyleaf_page_bookInfo_bookTitle', '.outline_book_detail_header_title', '.readerTopBar_title_link') || strictTitleFromWereadDocumentTitle(doc.title || '');
|
|
84
|
+
const author = firstText('.horizontalReaderCoverPage_content_author', '.wr_flyleaf_page_bookInfo_author', '.outline_book_detail_header_author');
|
|
85
|
+
return {
|
|
86
|
+
title,
|
|
87
|
+
author,
|
|
88
|
+
publisher: extractPublisher(),
|
|
89
|
+
intro: extractIntro(),
|
|
90
|
+
category: categoryMatch ? categoryMatch[1].trim() : '',
|
|
91
|
+
rating: extractRating(),
|
|
92
|
+
metadataReady: Boolean(title || author),
|
|
93
|
+
};
|
|
94
|
+
}
|
|
40
95
|
/**
|
|
41
96
|
* Reuse the public search page as a last-resort reader URL source when the
|
|
42
97
|
* cached shelf page cannot provide a trustworthy bookId-to-reader mapping.
|
|
@@ -108,51 +163,9 @@ async function resolveSearchReaderUrl(title, author) {
|
|
|
108
163
|
*/
|
|
109
164
|
async function loadReaderFallbackResult(page, readerUrl) {
|
|
110
165
|
await page.goto(readerUrl);
|
|
111
|
-
await page.wait({ selector: '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle', timeout: 10 });
|
|
166
|
+
await page.wait({ selector: '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle, .readerTopBar_title_link', timeout: 10 });
|
|
112
167
|
const result = await page.evaluate(`
|
|
113
|
-
(()
|
|
114
|
-
const text = (node) => node?.textContent?.trim() || '';
|
|
115
|
-
const bodyText = document.body?.innerText?.replace(/\\s+/g, ' ').trim() || '';
|
|
116
|
-
const titleSelector = '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle';
|
|
117
|
-
const authorSelector = '.horizontalReaderCoverPage_content_author, .wr_flyleaf_page_bookInfo_author';
|
|
118
|
-
const extractRating = () => {
|
|
119
|
-
const match = bodyText.match(/微信读书推荐值\\s*([0-9.]+%)/);
|
|
120
|
-
return match ? match[1] : '';
|
|
121
|
-
};
|
|
122
|
-
const extractPublisher = () => {
|
|
123
|
-
const direct = text(document.querySelector('.introDialog_content_pub_line'));
|
|
124
|
-
return direct.startsWith('出版社') ? direct.replace(/^出版社\\s*/, '').trim() : '';
|
|
125
|
-
};
|
|
126
|
-
const extractIntro = () => {
|
|
127
|
-
const selectors = [
|
|
128
|
-
'.horizontalReaderCoverPage_content_bookInfo_intro',
|
|
129
|
-
'.wr_flyleaf_page_bookIntro_content',
|
|
130
|
-
'.introDialog_content_intro_para',
|
|
131
|
-
];
|
|
132
|
-
for (const selector of selectors) {
|
|
133
|
-
const value = text(document.querySelector(selector));
|
|
134
|
-
if (value) return value;
|
|
135
|
-
}
|
|
136
|
-
return '';
|
|
137
|
-
};
|
|
138
|
-
|
|
139
|
-
const categorySource = Array.from(document.scripts)
|
|
140
|
-
.map((script) => script.textContent || '')
|
|
141
|
-
.find((scriptText) => scriptText.includes('"category"')) || '';
|
|
142
|
-
const categoryMatch = categorySource.match(/"category"\\s*:\\s*"([^"]+)"/);
|
|
143
|
-
const title = text(document.querySelector(titleSelector));
|
|
144
|
-
const author = text(document.querySelector(authorSelector));
|
|
145
|
-
|
|
146
|
-
return {
|
|
147
|
-
title,
|
|
148
|
-
author,
|
|
149
|
-
publisher: extractPublisher(),
|
|
150
|
-
intro: extractIntro(),
|
|
151
|
-
category: categoryMatch ? categoryMatch[1].trim() : '',
|
|
152
|
-
rating: extractRating(),
|
|
153
|
-
metadataReady: Boolean(title || author),
|
|
154
|
-
};
|
|
155
|
-
})()
|
|
168
|
+
(${extractReaderFallbackMetadata.toString()})(document)
|
|
156
169
|
`);
|
|
157
170
|
return {
|
|
158
171
|
title: String(result?.title || '').trim(),
|
|
@@ -14,6 +14,7 @@ import { getRegistry } from '@jackwener/opencli/registry';
|
|
|
14
14
|
import './book.js';
|
|
15
15
|
import './highlights.js';
|
|
16
16
|
import './notes.js';
|
|
17
|
+
import { extractReaderFallbackMetadata, strictTitleFromWereadDocumentTitle } from './book.js';
|
|
17
18
|
describe('weread book-id positional args', () => {
|
|
18
19
|
const book = getRegistry().get('weread/book');
|
|
19
20
|
const highlights = getRegistry().get('weread/highlights');
|
|
@@ -356,6 +357,29 @@ describe('weread book-id positional args', () => {
|
|
|
356
357
|
message: 'Not logged in to WeRead',
|
|
357
358
|
});
|
|
358
359
|
});
|
|
360
|
+
it('does not guess author from document.title when the reader page skips cover metadata', async () => {
|
|
361
|
+
const nodes = new Map([
|
|
362
|
+
['.readerTopBar_title_link', { textContent: 'Part 1 - Part 2' }],
|
|
363
|
+
['.introDialog_content_pub_line', { textContent: '出版社 测试出版社' }],
|
|
364
|
+
['.introDialog_content_intro_para', { textContent: '测试简介。' }],
|
|
365
|
+
]);
|
|
366
|
+
const mockDocument = {
|
|
367
|
+
title: 'Part 1 - Part 2 - 作者甲 - 微信读书',
|
|
368
|
+
body: { innerText: '微信读书推荐值 88.8%' },
|
|
369
|
+
scripts: [],
|
|
370
|
+
querySelector: (selector) => nodes.get(selector) || null,
|
|
371
|
+
};
|
|
372
|
+
expect(strictTitleFromWereadDocumentTitle(mockDocument.title)).toBe('');
|
|
373
|
+
expect(extractReaderFallbackMetadata(mockDocument)).toEqual({
|
|
374
|
+
title: 'Part 1 - Part 2',
|
|
375
|
+
author: '',
|
|
376
|
+
publisher: '测试出版社',
|
|
377
|
+
intro: '测试简介。',
|
|
378
|
+
category: '',
|
|
379
|
+
rating: '88.8%',
|
|
380
|
+
metadataReady: true,
|
|
381
|
+
});
|
|
382
|
+
});
|
|
359
383
|
it('passes the positional book-id to highlights', async () => {
|
|
360
384
|
mockFetchPrivateApi.mockResolvedValue({ updated: [] });
|
|
361
385
|
await highlights.func({}, { 'book-id': 'abc', limit: 5 });
|
|
@@ -20,9 +20,9 @@ cli({
|
|
|
20
20
|
throw new CliError('INVALID_ARGUMENT', 'limit must be a positive integer', 'Example: --limit 5');
|
|
21
21
|
}
|
|
22
22
|
const credentials = loadXiaoyuzhouCredentials();
|
|
23
|
-
const response = await requestXiaoyuzhouJson('/v1/
|
|
23
|
+
const response = await requestXiaoyuzhouJson('/v1/episode/list', {
|
|
24
24
|
method: 'POST',
|
|
25
|
-
body: { pid: args.id, limit: requestedLimit },
|
|
25
|
+
body: { pid: args.id, order: 'desc', limit: requestedLimit },
|
|
26
26
|
credentials,
|
|
27
27
|
});
|
|
28
28
|
const episodes = response.data ?? [];
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { getRegistry } from '@jackwener/opencli/registry';
|
|
3
|
+
|
|
4
|
+
const { mockRequestJson, mockLoadCredentials } = vi.hoisted(() => ({
|
|
5
|
+
mockRequestJson: vi.fn(),
|
|
6
|
+
mockLoadCredentials: vi.fn(),
|
|
7
|
+
}));
|
|
8
|
+
|
|
9
|
+
vi.mock('./auth.js', async () => {
|
|
10
|
+
const actual = await vi.importActual('./auth.js');
|
|
11
|
+
return {
|
|
12
|
+
...actual,
|
|
13
|
+
requestXiaoyuzhouJson: mockRequestJson,
|
|
14
|
+
loadXiaoyuzhouCredentials: mockLoadCredentials,
|
|
15
|
+
};
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
await import('./podcast-episodes.js');
|
|
19
|
+
|
|
20
|
+
let cmd;
|
|
21
|
+
|
|
22
|
+
beforeAll(() => {
|
|
23
|
+
cmd = getRegistry().get('xiaoyuzhou/podcast-episodes');
|
|
24
|
+
expect(cmd?.func).toBeTypeOf('function');
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
describe('xiaoyuzhou podcast-episodes', () => {
|
|
28
|
+
beforeEach(() => {
|
|
29
|
+
mockRequestJson.mockReset();
|
|
30
|
+
mockLoadCredentials.mockReset();
|
|
31
|
+
mockLoadCredentials.mockReturnValue({ access_token: 'access', refresh_token: 'refresh' });
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('calls the fixed episode list endpoint with desc ordering', async () => {
|
|
35
|
+
mockRequestJson.mockResolvedValue({
|
|
36
|
+
data: [
|
|
37
|
+
{
|
|
38
|
+
eid: 'ep-1',
|
|
39
|
+
title: 'Episode 1',
|
|
40
|
+
duration: 3661,
|
|
41
|
+
playCount: 42,
|
|
42
|
+
pubDate: '2026-04-20T10:00:00.000Z',
|
|
43
|
+
},
|
|
44
|
+
],
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
const result = await cmd.func(null, {
|
|
48
|
+
id: 'podcast-1',
|
|
49
|
+
limit: 3,
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
expect(mockRequestJson).toHaveBeenCalledWith('/v1/episode/list', {
|
|
53
|
+
method: 'POST',
|
|
54
|
+
body: { pid: 'podcast-1', order: 'desc', limit: 3 },
|
|
55
|
+
credentials: { access_token: 'access', refresh_token: 'refresh' },
|
|
56
|
+
});
|
|
57
|
+
expect(result).toEqual([
|
|
58
|
+
{
|
|
59
|
+
eid: 'ep-1',
|
|
60
|
+
title: 'Episode 1',
|
|
61
|
+
duration: '61:01',
|
|
62
|
+
plays: 42,
|
|
63
|
+
date: '2026-04-20',
|
|
64
|
+
},
|
|
65
|
+
]);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it('rejects non-positive limits before hitting the API', async () => {
|
|
69
|
+
await expect(cmd.func(null, {
|
|
70
|
+
id: 'podcast-1',
|
|
71
|
+
limit: 0,
|
|
72
|
+
})).rejects.toMatchObject({
|
|
73
|
+
code: 'INVALID_ARGUMENT',
|
|
74
|
+
message: 'limit must be a positive integer',
|
|
75
|
+
});
|
|
76
|
+
expect(mockRequestJson).not.toHaveBeenCalled();
|
|
77
|
+
});
|
|
78
|
+
});
|