@jackwener/opencli 1.7.6 → 1.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -8
- package/README.zh-CN.md +14 -8
- package/cli-manifest.json +469 -11
- package/clis/51job/company.js +125 -0
- package/clis/51job/detail.js +108 -0
- package/clis/51job/hot.js +55 -0
- package/clis/51job/search.js +79 -0
- package/clis/51job/utils.js +302 -0
- package/clis/51job/utils.test.js +69 -0
- package/clis/amazon/discussion.js +37 -6
- package/clis/amazon/discussion.test.js +147 -32
- package/clis/bilibili/video.js +11 -4
- package/clis/bilibili/video.test.js +51 -0
- package/clis/chatgpt/image.js +1 -1
- package/clis/chatgpt-app/ask.js +3 -19
- package/clis/chatgpt-app/ax.js +132 -1
- package/clis/chatgpt-app/ax.test.js +23 -0
- package/clis/chatgpt-app/send.js +2 -21
- package/clis/deepseek/ask.js +50 -18
- package/clis/deepseek/ask.test.js +195 -2
- package/clis/deepseek/utils.js +113 -29
- package/clis/deepseek/utils.test.js +109 -1
- package/clis/gemini/image.js +1 -1
- package/clis/instagram/download.js +1 -1
- package/clis/powerchina/search.js +250 -0
- package/clis/powerchina/search.test.js +67 -0
- package/clis/sinafinance/stock.js +5 -2
- package/clis/sinafinance/stock.test.js +59 -0
- package/clis/toutiao/articles.js +81 -0
- package/clis/toutiao/articles.test.js +23 -0
- package/clis/twitter/likes.js +3 -2
- package/clis/twitter/search.js +4 -2
- package/clis/twitter/search.test.js +4 -0
- package/clis/twitter/shared.js +28 -0
- package/clis/twitter/shared.test.js +96 -0
- package/clis/twitter/thread.js +3 -1
- package/clis/twitter/timeline.js +3 -2
- package/clis/twitter/tweets.js +3 -2
- package/clis/twitter/tweets.test.js +1 -1
- package/clis/web/read.js +25 -5
- package/clis/web/read.test.js +76 -0
- package/clis/weixin/create-draft.js +225 -0
- package/clis/weixin/drafts.js +65 -0
- package/clis/weixin/drafts.test.js +65 -0
- package/clis/weread/ai-outline.js +170 -0
- package/clis/weread/ai-outline.test.js +83 -0
- package/clis/weread/book.js +57 -44
- package/clis/weread/commands.test.js +24 -0
- package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
- package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
- package/dist/src/browser/analyze.d.ts +103 -0
- package/dist/src/browser/analyze.js +230 -0
- package/dist/src/browser/analyze.test.d.ts +1 -0
- package/dist/src/browser/analyze.test.js +164 -0
- package/dist/src/browser/article-extract.d.ts +57 -0
- package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
- package/dist/src/browser/article-extract.e2e.test.js +105 -0
- package/dist/src/browser/article-extract.js +169 -0
- package/dist/src/browser/article-extract.test.d.ts +1 -0
- package/dist/src/browser/article-extract.test.js +94 -0
- package/dist/src/browser/cdp.js +11 -2
- package/dist/src/browser/verify-fixture.d.ts +59 -0
- package/dist/src/browser/verify-fixture.js +213 -0
- package/dist/src/browser/verify-fixture.test.d.ts +1 -0
- package/dist/src/browser/verify-fixture.test.js +161 -0
- package/dist/src/cli.d.ts +32 -0
- package/dist/src/cli.js +333 -43
- package/dist/src/cli.test.js +257 -1
- package/dist/src/commanderAdapter.js +12 -0
- package/dist/src/commanderAdapter.test.js +11 -0
- package/dist/src/daemon.d.ts +3 -2
- package/dist/src/daemon.js +16 -4
- package/dist/src/daemon.test.d.ts +1 -0
- package/dist/src/daemon.test.js +19 -0
- package/dist/src/download/article-download.d.ts +12 -0
- package/dist/src/download/article-download.js +141 -17
- package/dist/src/download/article-download.test.js +196 -0
- package/dist/src/download/index.js +73 -86
- package/dist/src/errors.js +4 -2
- package/dist/src/errors.test.js +13 -0
- package/dist/src/launcher.d.ts +1 -1
- package/dist/src/launcher.js +3 -3
- package/dist/src/output.js +1 -1
- package/dist/src/output.test.js +6 -0
- package/package.json +5 -1
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { AuthRequiredError, EmptyResultError } from '@jackwener/opencli/errors';
|
|
3
|
+
import { getRegistry } from '@jackwener/opencli/registry';
|
|
4
|
+
import './create-draft.js';
|
|
5
|
+
import './drafts.js';
|
|
6
|
+
|
|
7
|
+
function createPageMock(overrides = {}) {
|
|
8
|
+
return {
|
|
9
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
10
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
11
|
+
evaluate: overrides.evaluate ?? vi.fn().mockResolvedValue(undefined),
|
|
12
|
+
setFileInput: vi.fn().mockResolvedValue(undefined),
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
describe('weixin command registration', () => {
|
|
17
|
+
it('registers create-draft and drafts commands', () => {
|
|
18
|
+
const registry = getRegistry();
|
|
19
|
+
const values = [...registry.values()];
|
|
20
|
+
expect(values.find(c => c.site === 'weixin' && c.name === 'create-draft')).toBeDefined();
|
|
21
|
+
expect(values.find(c => c.site === 'weixin' && c.name === 'drafts')).toBeDefined();
|
|
22
|
+
});
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
describe('weixin drafts command', () => {
|
|
26
|
+
it('throws AuthRequiredError when no session token is available', async () => {
|
|
27
|
+
const command = getRegistry().get('weixin/drafts');
|
|
28
|
+
const page = createPageMock({
|
|
29
|
+
evaluate: vi.fn().mockResolvedValueOnce(undefined),
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
await expect(command.func(page, { limit: 10 })).rejects.toBeInstanceOf(AuthRequiredError);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('fails instead of scraping arbitrary body text when structured selectors miss', async () => {
|
|
36
|
+
const command = getRegistry().get('weixin/drafts');
|
|
37
|
+
const evaluate = vi.fn()
|
|
38
|
+
.mockResolvedValueOnce('123456')
|
|
39
|
+
.mockImplementationOnce(async (script) => {
|
|
40
|
+
expect(script).not.toContain('document.body.innerText');
|
|
41
|
+
return [];
|
|
42
|
+
});
|
|
43
|
+
const page = createPageMock({ evaluate });
|
|
44
|
+
|
|
45
|
+
await expect(command.func(page, { limit: 10 })).rejects.toBeInstanceOf(EmptyResultError);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it('returns structured drafts and respects the requested limit', async () => {
|
|
49
|
+
const command = getRegistry().get('weixin/drafts');
|
|
50
|
+
const page = createPageMock({
|
|
51
|
+
evaluate: vi.fn()
|
|
52
|
+
.mockResolvedValueOnce('123456')
|
|
53
|
+
.mockResolvedValueOnce([
|
|
54
|
+
{ Index: 1, Title: '第一篇草稿', Time: '2026-04-24 10:00' },
|
|
55
|
+
{ Index: 2, Title: '第二篇草稿', Time: '2026-04-24 11:00' },
|
|
56
|
+
]),
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
const result = await command.func(page, { limit: 1 });
|
|
60
|
+
|
|
61
|
+
expect(result).toEqual([
|
|
62
|
+
{ Index: 1, Title: '第一篇草稿', Time: '2026-04-24 10:00' },
|
|
63
|
+
]);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
|
+
import { CliError } from '@jackwener/opencli/errors';
|
|
3
|
+
import { WEREAD_UA, WEREAD_WEB_ORIGIN, WEREAD_DOMAIN } from './utils.js';
|
|
4
|
+
|
|
5
|
+
const WEB_API = `${WEREAD_WEB_ORIGIN}/web`;
|
|
6
|
+
|
|
7
|
+
function buildCookieHeader(cookies) {
|
|
8
|
+
return cookies.map((c) => `${c.name}=${c.value}`).join('; ');
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
async function postWebApiWithCookies(page, path, body) {
|
|
12
|
+
const url = `${WEB_API}${path}`;
|
|
13
|
+
const [apiCookies, domainCookies] = await Promise.all([
|
|
14
|
+
page.getCookies({ url }),
|
|
15
|
+
page.getCookies({ domain: WEREAD_DOMAIN }),
|
|
16
|
+
]);
|
|
17
|
+
const merged = new Map();
|
|
18
|
+
for (const c of domainCookies) merged.set(c.name, c);
|
|
19
|
+
for (const c of apiCookies) merged.set(c.name, c);
|
|
20
|
+
const cookieHeader = buildCookieHeader(Array.from(merged.values()));
|
|
21
|
+
|
|
22
|
+
const resp = await fetch(url, {
|
|
23
|
+
method: 'POST',
|
|
24
|
+
headers: {
|
|
25
|
+
'User-Agent': WEREAD_UA,
|
|
26
|
+
'Content-Type': 'application/json',
|
|
27
|
+
'Origin': WEREAD_WEB_ORIGIN,
|
|
28
|
+
'Referer': `${WEREAD_WEB_ORIGIN}/`,
|
|
29
|
+
...(cookieHeader ? { 'Cookie': cookieHeader } : {}),
|
|
30
|
+
},
|
|
31
|
+
body: JSON.stringify(body),
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
if (resp.status === 401) {
|
|
35
|
+
throw new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
let data;
|
|
39
|
+
try {
|
|
40
|
+
data = await resp.json();
|
|
41
|
+
} catch {
|
|
42
|
+
throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page');
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (data?.errcode === -2010 || data?.errcode === -2012) {
|
|
46
|
+
throw new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first');
|
|
47
|
+
}
|
|
48
|
+
if (!resp.ok) {
|
|
49
|
+
throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable');
|
|
50
|
+
}
|
|
51
|
+
return data;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async function postWebApi(path, body) {
|
|
55
|
+
const url = `${WEB_API}${path}`;
|
|
56
|
+
const resp = await fetch(url, {
|
|
57
|
+
method: 'POST',
|
|
58
|
+
headers: {
|
|
59
|
+
'User-Agent': WEREAD_UA,
|
|
60
|
+
'Content-Type': 'application/json',
|
|
61
|
+
},
|
|
62
|
+
body: JSON.stringify(body),
|
|
63
|
+
});
|
|
64
|
+
if (!resp.ok) {
|
|
65
|
+
throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable');
|
|
66
|
+
}
|
|
67
|
+
try {
|
|
68
|
+
return await resp.json();
|
|
69
|
+
} catch {
|
|
70
|
+
throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page');
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
cli({
|
|
75
|
+
site: 'weread',
|
|
76
|
+
name: 'ai-outline',
|
|
77
|
+
description: 'Get AI-generated outline for a book',
|
|
78
|
+
domain: 'weread.qq.com',
|
|
79
|
+
strategy: Strategy.COOKIE,
|
|
80
|
+
defaultFormat: 'plain',
|
|
81
|
+
args: [
|
|
82
|
+
{ name: 'book-id', positional: true, required: true, help: 'Book ID (from shelf or search results)' },
|
|
83
|
+
{ name: 'limit', type: 'int', default: 200, help: 'Max outline items to return' },
|
|
84
|
+
{ name: 'depth', type: 'int', default: 4, help: 'Max outline depth (2=topics, 3=key points, 4=details)' },
|
|
85
|
+
{ name: 'raw', type: 'boolean', default: false, help: 'Output structured rows (chapter/idx/level/text) for programmatic use' },
|
|
86
|
+
],
|
|
87
|
+
columns: undefined,
|
|
88
|
+
func: async (page, args) => {
|
|
89
|
+
const bookId = String(args['book-id'] || '').trim();
|
|
90
|
+
const rawMode = Boolean(args.raw);
|
|
91
|
+
|
|
92
|
+
const chapterData = await postWebApiWithCookies(page, '/book/chapterInfos', {
|
|
93
|
+
bookIds: [bookId],
|
|
94
|
+
sinces: [0],
|
|
95
|
+
});
|
|
96
|
+
const chapters = chapterData?.data?.[0]?.updated ?? [];
|
|
97
|
+
if (chapters.length === 0) {
|
|
98
|
+
throw new CliError('NOT_FOUND', 'No chapters found for this book', 'Check that the book ID is correct');
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const chapterUids = chapters.map((c) => c.chapterUid);
|
|
102
|
+
const chapterNameMap = new Map();
|
|
103
|
+
for (const c of chapters) {
|
|
104
|
+
chapterNameMap.set(c.chapterUid, c.title ?? '');
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const outlineData = await postWebApi('/book/outline', {
|
|
108
|
+
bookId,
|
|
109
|
+
chapterUids,
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const itemsArray = outlineData?.itemsArray ?? [];
|
|
113
|
+
const maxDepth = Number(args.depth);
|
|
114
|
+
const rawRows = [];
|
|
115
|
+
|
|
116
|
+
for (const entry of itemsArray) {
|
|
117
|
+
const items = entry.items;
|
|
118
|
+
if (!Array.isArray(items) || items.length === 0) continue;
|
|
119
|
+
|
|
120
|
+
const chapterName = chapterNameMap.get(entry.chapterUid) ?? `Chapter ${entry.chapterUid}`;
|
|
121
|
+
let lastL3Idx = '';
|
|
122
|
+
let l4Counter = 0;
|
|
123
|
+
|
|
124
|
+
for (const item of items) {
|
|
125
|
+
const level = item.level ?? 1;
|
|
126
|
+
if (level <= 1) continue;
|
|
127
|
+
if (level > maxDepth) continue;
|
|
128
|
+
|
|
129
|
+
let idx = item.uiIdx ?? '';
|
|
130
|
+
if (level === 3 && idx) {
|
|
131
|
+
lastL3Idx = idx;
|
|
132
|
+
l4Counter = 0;
|
|
133
|
+
}
|
|
134
|
+
if (level === 4 && !idx && lastL3Idx) {
|
|
135
|
+
l4Counter++;
|
|
136
|
+
idx = `${lastL3Idx}.${l4Counter}`;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
rawRows.push({ chapter: chapterName, idx, level, text: item.text ?? '' });
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (rawRows.length === 0) {
|
|
144
|
+
throw new CliError('NOT_FOUND', 'No AI outline available for this book', 'AI outlines may not be generated for all books');
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (rawMode) {
|
|
148
|
+
return rawRows.slice(0, Number(args.limit));
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const grouped = new Map();
|
|
152
|
+
for (const row of rawRows) {
|
|
153
|
+
if (!grouped.has(row.chapter)) grouped.set(row.chapter, []);
|
|
154
|
+
grouped.get(row.chapter).push(row);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const results = [];
|
|
158
|
+
for (const [chapter, rows] of grouped) {
|
|
159
|
+
const lines = [`📖 ${chapter}`];
|
|
160
|
+
for (const row of rows) {
|
|
161
|
+
const indent = ' '.repeat(row.level - 2);
|
|
162
|
+
const prefix = row.level === 2 ? `${row.idx}. ` : `${row.idx} `;
|
|
163
|
+
lines.push(`${indent}${prefix}${row.text}`);
|
|
164
|
+
}
|
|
165
|
+
results.push({ outline: lines.join('\n') });
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return results.slice(0, Number(args.limit));
|
|
169
|
+
},
|
|
170
|
+
});
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { getRegistry } from '@jackwener/opencli/registry';
|
|
3
|
+
import './ai-outline.js';
|
|
4
|
+
|
|
5
|
+
describe('weread ai-outline', () => {
|
|
6
|
+
const command = getRegistry().get('weread/ai-outline');
|
|
7
|
+
|
|
8
|
+
beforeEach(() => {
|
|
9
|
+
vi.restoreAllMocks();
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it('registers ai-outline with plain default output', () => {
|
|
13
|
+
expect(command?.defaultFormat).toBe('plain');
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it('maps chapterInfos auth-expired responses to AUTH_REQUIRED', async () => {
|
|
17
|
+
expect(command?.func).toBeTypeOf('function');
|
|
18
|
+
const page = {
|
|
19
|
+
getCookies: vi.fn()
|
|
20
|
+
.mockResolvedValueOnce([{ name: 'wr_vid', value: 'vid123', domain: '.weread.qq.com' }])
|
|
21
|
+
.mockResolvedValueOnce([{ name: 'wr_name', value: 'alice', domain: '.weread.qq.com' }]),
|
|
22
|
+
};
|
|
23
|
+
vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
|
|
24
|
+
ok: true,
|
|
25
|
+
status: 200,
|
|
26
|
+
json: () => Promise.resolve({ errcode: -2012, errmsg: '登录超时' }),
|
|
27
|
+
}));
|
|
28
|
+
await expect(command.func(page, { 'book-id': 'book-1' })).rejects.toMatchObject({
|
|
29
|
+
code: 'AUTH_REQUIRED',
|
|
30
|
+
message: 'Not logged in to WeRead',
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('returns structured rows for --raw and respects depth filtering', async () => {
|
|
35
|
+
expect(command?.func).toBeTypeOf('function');
|
|
36
|
+
const page = {
|
|
37
|
+
getCookies: vi.fn()
|
|
38
|
+
.mockResolvedValueOnce([{ name: 'wr_vid', value: 'vid123', domain: '.weread.qq.com' }])
|
|
39
|
+
.mockResolvedValueOnce([{ name: 'wr_name', value: 'alice', domain: '.weread.qq.com' }]),
|
|
40
|
+
};
|
|
41
|
+
const fetchMock = vi.fn()
|
|
42
|
+
.mockResolvedValueOnce({
|
|
43
|
+
ok: true,
|
|
44
|
+
status: 200,
|
|
45
|
+
json: () => Promise.resolve({
|
|
46
|
+
data: [{
|
|
47
|
+
updated: [
|
|
48
|
+
{ chapterUid: 'c1', title: '第一章' },
|
|
49
|
+
],
|
|
50
|
+
}],
|
|
51
|
+
}),
|
|
52
|
+
})
|
|
53
|
+
.mockResolvedValueOnce({
|
|
54
|
+
ok: true,
|
|
55
|
+
status: 200,
|
|
56
|
+
json: () => Promise.resolve({
|
|
57
|
+
itemsArray: [{
|
|
58
|
+
chapterUid: 'c1',
|
|
59
|
+
items: [
|
|
60
|
+
{ level: 2, uiIdx: '1', text: '主题一' },
|
|
61
|
+
{ level: 3, uiIdx: '1.1', text: '要点一' },
|
|
62
|
+
{ level: 4, text: '细节一' },
|
|
63
|
+
],
|
|
64
|
+
}],
|
|
65
|
+
}),
|
|
66
|
+
});
|
|
67
|
+
vi.stubGlobal('fetch', fetchMock);
|
|
68
|
+
const rows = await command.func(page, { 'book-id': 'book-1', raw: true, depth: 3, limit: 10 });
|
|
69
|
+
expect(rows).toEqual([
|
|
70
|
+
{ chapter: '第一章', idx: '1', level: 2, text: '主题一' },
|
|
71
|
+
{ chapter: '第一章', idx: '1.1', level: 3, text: '要点一' },
|
|
72
|
+
]);
|
|
73
|
+
expect(fetchMock).toHaveBeenNthCalledWith(1, 'https://weread.qq.com/web/book/chapterInfos', expect.objectContaining({
|
|
74
|
+
method: 'POST',
|
|
75
|
+
headers: expect.objectContaining({
|
|
76
|
+
Cookie: 'wr_name=alice; wr_vid=vid123',
|
|
77
|
+
}),
|
|
78
|
+
}));
|
|
79
|
+
expect(fetchMock).toHaveBeenNthCalledWith(2, 'https://weread.qq.com/web/book/outline', expect.objectContaining({
|
|
80
|
+
method: 'POST',
|
|
81
|
+
}));
|
|
82
|
+
});
|
|
83
|
+
});
|
package/clis/weread/book.js
CHANGED
|
@@ -37,6 +37,61 @@ function countSearchIdentities(entries) {
|
|
|
37
37
|
}
|
|
38
38
|
return counts;
|
|
39
39
|
}
|
|
40
|
+
export function strictTitleFromWereadDocumentTitle(rawTitle) {
|
|
41
|
+
const suffix = ' - 微信读书';
|
|
42
|
+
const normalized = String(rawTitle || '').trim();
|
|
43
|
+
if (!normalized.endsWith(suffix))
|
|
44
|
+
return '';
|
|
45
|
+
const base = normalized.slice(0, -suffix.length).trim();
|
|
46
|
+
// Only accept the title when WeRead exposes the strict "<title> - 微信读书"
|
|
47
|
+
// shape. If extra separators remain, the page title is ambiguous.
|
|
48
|
+
return base.includes(' - ') ? '' : base;
|
|
49
|
+
}
|
|
50
|
+
export function extractReaderFallbackMetadata(doc) {
|
|
51
|
+
const text = (node) => node?.textContent?.trim() || '';
|
|
52
|
+
const firstText = (...sels) => { for (const s of sels) {
|
|
53
|
+
const v = text(doc.querySelector(s));
|
|
54
|
+
if (v)
|
|
55
|
+
return v;
|
|
56
|
+
} return ''; };
|
|
57
|
+
const bodyText = doc.body?.innerText?.replace(/\s+/g, ' ').trim() || '';
|
|
58
|
+
const extractRating = () => {
|
|
59
|
+
const match = bodyText.match(/微信读书推荐值\s*([0-9.]+%)/);
|
|
60
|
+
return match ? match[1] : '';
|
|
61
|
+
};
|
|
62
|
+
const extractPublisher = () => {
|
|
63
|
+
const direct = text(doc.querySelector('.introDialog_content_pub_line'));
|
|
64
|
+
return direct.startsWith('出版社') ? direct.replace(/^出版社\s*/, '').trim() : '';
|
|
65
|
+
};
|
|
66
|
+
const extractIntro = () => {
|
|
67
|
+
const selectors = [
|
|
68
|
+
'.horizontalReaderCoverPage_content_bookInfo_intro',
|
|
69
|
+
'.wr_flyleaf_page_bookIntro_content',
|
|
70
|
+
'.introDialog_content_intro_para',
|
|
71
|
+
];
|
|
72
|
+
for (const selector of selectors) {
|
|
73
|
+
const value = text(doc.querySelector(selector));
|
|
74
|
+
if (value)
|
|
75
|
+
return value;
|
|
76
|
+
}
|
|
77
|
+
return '';
|
|
78
|
+
};
|
|
79
|
+
const categorySource = Array.from(doc.scripts || [])
|
|
80
|
+
.map((script) => script.textContent || '')
|
|
81
|
+
.find((scriptText) => scriptText.includes('"category"')) || '';
|
|
82
|
+
const categoryMatch = categorySource.match(/"category"\s*:\s*"([^"]+)"/);
|
|
83
|
+
const title = firstText('.horizontalReaderCoverPage_content_bookTitle', '.wr_flyleaf_page_bookInfo_bookTitle', '.outline_book_detail_header_title', '.readerTopBar_title_link') || strictTitleFromWereadDocumentTitle(doc.title || '');
|
|
84
|
+
const author = firstText('.horizontalReaderCoverPage_content_author', '.wr_flyleaf_page_bookInfo_author', '.outline_book_detail_header_author');
|
|
85
|
+
return {
|
|
86
|
+
title,
|
|
87
|
+
author,
|
|
88
|
+
publisher: extractPublisher(),
|
|
89
|
+
intro: extractIntro(),
|
|
90
|
+
category: categoryMatch ? categoryMatch[1].trim() : '',
|
|
91
|
+
rating: extractRating(),
|
|
92
|
+
metadataReady: Boolean(title || author),
|
|
93
|
+
};
|
|
94
|
+
}
|
|
40
95
|
/**
|
|
41
96
|
* Reuse the public search page as a last-resort reader URL source when the
|
|
42
97
|
* cached shelf page cannot provide a trustworthy bookId-to-reader mapping.
|
|
@@ -108,51 +163,9 @@ async function resolveSearchReaderUrl(title, author) {
|
|
|
108
163
|
*/
|
|
109
164
|
async function loadReaderFallbackResult(page, readerUrl) {
|
|
110
165
|
await page.goto(readerUrl);
|
|
111
|
-
await page.wait({ selector: '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle', timeout: 10 });
|
|
166
|
+
await page.wait({ selector: '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle, .readerTopBar_title_link', timeout: 10 });
|
|
112
167
|
const result = await page.evaluate(`
|
|
113
|
-
(()
|
|
114
|
-
const text = (node) => node?.textContent?.trim() || '';
|
|
115
|
-
const bodyText = document.body?.innerText?.replace(/\\s+/g, ' ').trim() || '';
|
|
116
|
-
const titleSelector = '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle';
|
|
117
|
-
const authorSelector = '.horizontalReaderCoverPage_content_author, .wr_flyleaf_page_bookInfo_author';
|
|
118
|
-
const extractRating = () => {
|
|
119
|
-
const match = bodyText.match(/微信读书推荐值\\s*([0-9.]+%)/);
|
|
120
|
-
return match ? match[1] : '';
|
|
121
|
-
};
|
|
122
|
-
const extractPublisher = () => {
|
|
123
|
-
const direct = text(document.querySelector('.introDialog_content_pub_line'));
|
|
124
|
-
return direct.startsWith('出版社') ? direct.replace(/^出版社\\s*/, '').trim() : '';
|
|
125
|
-
};
|
|
126
|
-
const extractIntro = () => {
|
|
127
|
-
const selectors = [
|
|
128
|
-
'.horizontalReaderCoverPage_content_bookInfo_intro',
|
|
129
|
-
'.wr_flyleaf_page_bookIntro_content',
|
|
130
|
-
'.introDialog_content_intro_para',
|
|
131
|
-
];
|
|
132
|
-
for (const selector of selectors) {
|
|
133
|
-
const value = text(document.querySelector(selector));
|
|
134
|
-
if (value) return value;
|
|
135
|
-
}
|
|
136
|
-
return '';
|
|
137
|
-
};
|
|
138
|
-
|
|
139
|
-
const categorySource = Array.from(document.scripts)
|
|
140
|
-
.map((script) => script.textContent || '')
|
|
141
|
-
.find((scriptText) => scriptText.includes('"category"')) || '';
|
|
142
|
-
const categoryMatch = categorySource.match(/"category"\\s*:\\s*"([^"]+)"/);
|
|
143
|
-
const title = text(document.querySelector(titleSelector));
|
|
144
|
-
const author = text(document.querySelector(authorSelector));
|
|
145
|
-
|
|
146
|
-
return {
|
|
147
|
-
title,
|
|
148
|
-
author,
|
|
149
|
-
publisher: extractPublisher(),
|
|
150
|
-
intro: extractIntro(),
|
|
151
|
-
category: categoryMatch ? categoryMatch[1].trim() : '',
|
|
152
|
-
rating: extractRating(),
|
|
153
|
-
metadataReady: Boolean(title || author),
|
|
154
|
-
};
|
|
155
|
-
})()
|
|
168
|
+
(${extractReaderFallbackMetadata.toString()})(document)
|
|
156
169
|
`);
|
|
157
170
|
return {
|
|
158
171
|
title: String(result?.title || '').trim(),
|
|
@@ -14,6 +14,7 @@ import { getRegistry } from '@jackwener/opencli/registry';
|
|
|
14
14
|
import './book.js';
|
|
15
15
|
import './highlights.js';
|
|
16
16
|
import './notes.js';
|
|
17
|
+
import { extractReaderFallbackMetadata, strictTitleFromWereadDocumentTitle } from './book.js';
|
|
17
18
|
describe('weread book-id positional args', () => {
|
|
18
19
|
const book = getRegistry().get('weread/book');
|
|
19
20
|
const highlights = getRegistry().get('weread/highlights');
|
|
@@ -356,6 +357,29 @@ describe('weread book-id positional args', () => {
|
|
|
356
357
|
message: 'Not logged in to WeRead',
|
|
357
358
|
});
|
|
358
359
|
});
|
|
360
|
+
it('does not guess author from document.title when the reader page skips cover metadata', async () => {
|
|
361
|
+
const nodes = new Map([
|
|
362
|
+
['.readerTopBar_title_link', { textContent: 'Part 1 - Part 2' }],
|
|
363
|
+
['.introDialog_content_pub_line', { textContent: '出版社 测试出版社' }],
|
|
364
|
+
['.introDialog_content_intro_para', { textContent: '测试简介。' }],
|
|
365
|
+
]);
|
|
366
|
+
const mockDocument = {
|
|
367
|
+
title: 'Part 1 - Part 2 - 作者甲 - 微信读书',
|
|
368
|
+
body: { innerText: '微信读书推荐值 88.8%' },
|
|
369
|
+
scripts: [],
|
|
370
|
+
querySelector: (selector) => nodes.get(selector) || null,
|
|
371
|
+
};
|
|
372
|
+
expect(strictTitleFromWereadDocumentTitle(mockDocument.title)).toBe('');
|
|
373
|
+
expect(extractReaderFallbackMetadata(mockDocument)).toEqual({
|
|
374
|
+
title: 'Part 1 - Part 2',
|
|
375
|
+
author: '',
|
|
376
|
+
publisher: '测试出版社',
|
|
377
|
+
intro: '测试简介。',
|
|
378
|
+
category: '',
|
|
379
|
+
rating: '88.8%',
|
|
380
|
+
metadataReady: true,
|
|
381
|
+
});
|
|
382
|
+
});
|
|
359
383
|
it('passes the positional book-id to highlights', async () => {
|
|
360
384
|
mockFetchPrivateApi.mockResolvedValue({ updated: [] });
|
|
361
385
|
await highlights.func({}, { 'book-id': 'abc', limit: 5 });
|
|
@@ -20,9 +20,9 @@ cli({
|
|
|
20
20
|
throw new CliError('INVALID_ARGUMENT', 'limit must be a positive integer', 'Example: --limit 5');
|
|
21
21
|
}
|
|
22
22
|
const credentials = loadXiaoyuzhouCredentials();
|
|
23
|
-
const response = await requestXiaoyuzhouJson('/v1/
|
|
23
|
+
const response = await requestXiaoyuzhouJson('/v1/episode/list', {
|
|
24
24
|
method: 'POST',
|
|
25
|
-
body: { pid: args.id, limit: requestedLimit },
|
|
25
|
+
body: { pid: args.id, order: 'desc', limit: requestedLimit },
|
|
26
26
|
credentials,
|
|
27
27
|
});
|
|
28
28
|
const episodes = response.data ?? [];
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { getRegistry } from '@jackwener/opencli/registry';
|
|
3
|
+
|
|
4
|
+
const { mockRequestJson, mockLoadCredentials } = vi.hoisted(() => ({
|
|
5
|
+
mockRequestJson: vi.fn(),
|
|
6
|
+
mockLoadCredentials: vi.fn(),
|
|
7
|
+
}));
|
|
8
|
+
|
|
9
|
+
vi.mock('./auth.js', async () => {
|
|
10
|
+
const actual = await vi.importActual('./auth.js');
|
|
11
|
+
return {
|
|
12
|
+
...actual,
|
|
13
|
+
requestXiaoyuzhouJson: mockRequestJson,
|
|
14
|
+
loadXiaoyuzhouCredentials: mockLoadCredentials,
|
|
15
|
+
};
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
await import('./podcast-episodes.js');
|
|
19
|
+
|
|
20
|
+
let cmd;
|
|
21
|
+
|
|
22
|
+
beforeAll(() => {
|
|
23
|
+
cmd = getRegistry().get('xiaoyuzhou/podcast-episodes');
|
|
24
|
+
expect(cmd?.func).toBeTypeOf('function');
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
describe('xiaoyuzhou podcast-episodes', () => {
|
|
28
|
+
beforeEach(() => {
|
|
29
|
+
mockRequestJson.mockReset();
|
|
30
|
+
mockLoadCredentials.mockReset();
|
|
31
|
+
mockLoadCredentials.mockReturnValue({ access_token: 'access', refresh_token: 'refresh' });
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('calls the fixed episode list endpoint with desc ordering', async () => {
|
|
35
|
+
mockRequestJson.mockResolvedValue({
|
|
36
|
+
data: [
|
|
37
|
+
{
|
|
38
|
+
eid: 'ep-1',
|
|
39
|
+
title: 'Episode 1',
|
|
40
|
+
duration: 3661,
|
|
41
|
+
playCount: 42,
|
|
42
|
+
pubDate: '2026-04-20T10:00:00.000Z',
|
|
43
|
+
},
|
|
44
|
+
],
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
const result = await cmd.func(null, {
|
|
48
|
+
id: 'podcast-1',
|
|
49
|
+
limit: 3,
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
expect(mockRequestJson).toHaveBeenCalledWith('/v1/episode/list', {
|
|
53
|
+
method: 'POST',
|
|
54
|
+
body: { pid: 'podcast-1', order: 'desc', limit: 3 },
|
|
55
|
+
credentials: { access_token: 'access', refresh_token: 'refresh' },
|
|
56
|
+
});
|
|
57
|
+
expect(result).toEqual([
|
|
58
|
+
{
|
|
59
|
+
eid: 'ep-1',
|
|
60
|
+
title: 'Episode 1',
|
|
61
|
+
duration: '61:01',
|
|
62
|
+
plays: 42,
|
|
63
|
+
date: '2026-04-20',
|
|
64
|
+
},
|
|
65
|
+
]);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it('rejects non-positive limits before hitting the API', async () => {
|
|
69
|
+
await expect(cmd.func(null, {
|
|
70
|
+
id: 'podcast-1',
|
|
71
|
+
limit: 0,
|
|
72
|
+
})).rejects.toMatchObject({
|
|
73
|
+
code: 'INVALID_ARGUMENT',
|
|
74
|
+
message: 'limit must be a positive integer',
|
|
75
|
+
});
|
|
76
|
+
expect(mockRequestJson).not.toHaveBeenCalled();
|
|
77
|
+
});
|
|
78
|
+
});
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `browser analyze <url>` — turn site-recon guesswork into deterministic CLI output.
|
|
3
|
+
*
|
|
4
|
+
* When an agent starts a new adapter, the first question is "which pattern am
|
|
5
|
+
* I looking at?" (A/B/C/D/E from site-recon docs) and "will Node-side fetch
|
|
6
|
+
* work, or will anti-bot middleware block me?". Today the agent has to open
|
|
7
|
+
* the page, poke `network`, try cURL, fail, guess again. This module condenses
|
|
8
|
+
* that into one call that returns a classification + evidence.
|
|
9
|
+
*
|
|
10
|
+
* Kept pure (no page imports) so the bulk is unit-testable; the CLI wrapper
|
|
11
|
+
* drives a real page, feeds the resulting signals here, and prints the verdict.
|
|
12
|
+
*/
|
|
13
|
+
import type { CliCommand } from '../registry.js';
|
|
14
|
+
export interface PageSignals {
|
|
15
|
+
/** URL we navigated to (may redirect; both fields help agents notice that). */
|
|
16
|
+
requestedUrl: string;
|
|
17
|
+
finalUrl: string;
|
|
18
|
+
/** document.cookie split into names; value not needed for detection. */
|
|
19
|
+
cookieNames: string[];
|
|
20
|
+
/**
|
|
21
|
+
* Response bodies captured during the navigation + first few seconds.
|
|
22
|
+
* We only need enough body text to spot WAF markers; the CLI truncates
|
|
23
|
+
* per-entry before feeding us.
|
|
24
|
+
*/
|
|
25
|
+
networkEntries: Array<{
|
|
26
|
+
url: string;
|
|
27
|
+
status: number;
|
|
28
|
+
contentType: string;
|
|
29
|
+
/** First N chars of body; null when not available. */
|
|
30
|
+
bodyPreview: string | null;
|
|
31
|
+
}>;
|
|
32
|
+
/**
|
|
33
|
+
* Which globals the page exposes on `window`. We don't care about the values,
|
|
34
|
+
* just presence — distinguishes Pattern B (SSR state) from Pattern A.
|
|
35
|
+
*/
|
|
36
|
+
initialState: {
|
|
37
|
+
__INITIAL_STATE__: boolean;
|
|
38
|
+
__NUXT__: boolean;
|
|
39
|
+
__NEXT_DATA__: boolean;
|
|
40
|
+
__APOLLO_STATE__: boolean;
|
|
41
|
+
};
|
|
42
|
+
/** Document title — only for the human-debug `summary` field. */
|
|
43
|
+
title: string;
|
|
44
|
+
}
|
|
45
|
+
export type AntiBotVendor = 'aliyun_waf' | 'cloudflare' | 'akamai' | 'geetest' | 'unknown';
|
|
46
|
+
export interface AntiBotVerdict {
|
|
47
|
+
detected: boolean;
|
|
48
|
+
vendor: AntiBotVendor | null;
|
|
49
|
+
evidence: string[];
|
|
50
|
+
/** One-line imperative instruction for the agent. */
|
|
51
|
+
implication: string;
|
|
52
|
+
}
|
|
53
|
+
export declare function detectAntiBot(signals: PageSignals): AntiBotVerdict;
|
|
54
|
+
export type Pattern = 'A' | 'B' | 'C' | 'D' | 'E' | 'unknown';
|
|
55
|
+
export interface PatternVerdict {
|
|
56
|
+
pattern: Pattern;
|
|
57
|
+
reason: string;
|
|
58
|
+
/** How many JSON XHR/fetch responses we saw during navigation. */
|
|
59
|
+
json_responses: number;
|
|
60
|
+
/** Count of non-2xx API responses — hint for token-gated (Pattern D). */
|
|
61
|
+
auth_failures: number;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Apply the decision tree from `site-recon.md` mechanically.
|
|
65
|
+
*
|
|
66
|
+
* B beats A when initial-state globals are present: even if the page fetches
|
|
67
|
+
* more data via XHR afterwards, the SSR payload is the highest-leverage source.
|
|
68
|
+
* D (token-gated) dominates when we see 401/403 on what looks like API
|
|
69
|
+
* endpoints — without that, an authenticated route looks identical to A.
|
|
70
|
+
*/
|
|
71
|
+
export declare function classifyPattern(signals: PageSignals): PatternVerdict;
|
|
72
|
+
export interface NearestAdapter {
|
|
73
|
+
site: string;
|
|
74
|
+
example_commands: string[];
|
|
75
|
+
reason: string;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Find existing adapters that target the same site.
|
|
79
|
+
*
|
|
80
|
+
* Keep the hostname match simple — agents extend naming conventions
|
|
81
|
+
* differently per site, so we match on the registered `domain` field and fall
|
|
82
|
+
* back to site-name containment. Returning `null` is fine; agents can always
|
|
83
|
+
* read site-memory docs.
|
|
84
|
+
*/
|
|
85
|
+
export declare function findNearestAdapter(finalUrl: string, registry: Map<string, CliCommand>): NearestAdapter | null;
|
|
86
|
+
export interface AnalyzeReport {
|
|
87
|
+
requested_url: string;
|
|
88
|
+
final_url: string;
|
|
89
|
+
title: string;
|
|
90
|
+
pattern: PatternVerdict;
|
|
91
|
+
anti_bot: AntiBotVerdict;
|
|
92
|
+
initial_state: PageSignals['initialState'];
|
|
93
|
+
nearest_adapter: NearestAdapter | null;
|
|
94
|
+
recommended_next_step: string;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Synthesize the verdict from collected signals + registry.
|
|
98
|
+
*
|
|
99
|
+
* The `recommended_next_step` is deliberately a single imperative
|
|
100
|
+
* sentence — agents act on it directly instead of re-deriving advice from
|
|
101
|
+
* the structured fields.
|
|
102
|
+
*/
|
|
103
|
+
export declare function analyzeSite(signals: PageSignals, registry: Map<string, CliCommand>): AnalyzeReport;
|