@jackwener/opencli 1.7.18 → 1.7.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -8
- package/README.zh-CN.md +7 -8
- package/cli-manifest.json +305 -9
- package/clis/ctrip/ctrip.test.js +486 -1
- package/clis/ctrip/flight.js +136 -0
- package/clis/ctrip/hotel-search.js +132 -0
- package/clis/ctrip/utils.js +298 -0
- package/clis/google/search.js +16 -6
- package/clis/google-scholar/search.js +20 -5
- package/clis/google-scholar/search.test.js +35 -2
- package/clis/reddit/home.js +117 -0
- package/clis/reddit/home.test.js +127 -0
- package/clis/reddit/read.js +400 -54
- package/clis/reddit/read.test.js +315 -12
- package/clis/reddit/subreddit-info.js +117 -0
- package/clis/reddit/subreddit-info.test.js +163 -0
- package/clis/reddit/whoami.js +84 -0
- package/clis/reddit/whoami.test.js +105 -0
- package/clis/rednote/search.js +6 -2
- package/clis/twitter/bookmark-folder.js +3 -1
- package/clis/twitter/bookmarks.js +3 -1
- package/clis/twitter/followers.js +20 -5
- package/clis/twitter/followers.test.js +44 -0
- package/clis/twitter/following.js +36 -20
- package/clis/twitter/following.test.js +60 -8
- package/clis/twitter/likes.js +28 -13
- package/clis/twitter/likes.test.js +111 -1
- package/clis/twitter/list-add.js +128 -204
- package/clis/twitter/list-add.test.js +97 -1
- package/clis/twitter/list-tweets.js +13 -4
- package/clis/twitter/list-tweets.test.js +48 -0
- package/clis/twitter/lists.js +5 -2
- package/clis/twitter/post.js +23 -4
- package/clis/twitter/post.test.js +30 -0
- package/clis/twitter/profile.js +16 -8
- package/clis/twitter/profile.test.js +39 -0
- package/clis/twitter/reply.js +133 -10
- package/clis/twitter/reply.test.js +55 -0
- package/clis/twitter/search.js +188 -170
- package/clis/twitter/search.test.js +96 -258
- package/clis/twitter/shared.js +167 -16
- package/clis/twitter/shared.test.js +102 -1
- package/clis/twitter/timeline.js +3 -1
- package/clis/twitter/tweets.js +147 -51
- package/clis/twitter/tweets.test.js +238 -1
- package/clis/xiaohongshu/comments.js +23 -2
- package/clis/xiaohongshu/comments.test.js +63 -1
- package/clis/xiaohongshu/search.js +168 -13
- package/clis/xiaohongshu/search.test.js +82 -8
- package/clis/xueqiu/earnings-date.js +2 -2
- package/clis/xueqiu/kline.js +2 -2
- package/clis/xueqiu/utils.js +19 -0
- package/clis/xueqiu/utils.test.js +26 -0
- package/clis/zhihu/answer-detail.js +233 -0
- package/clis/zhihu/answer-detail.test.js +330 -0
- package/clis/zhihu/question.js +44 -10
- package/clis/zhihu/question.test.js +78 -1
- package/clis/zhihu/recommend.js +103 -0
- package/clis/zhihu/recommend.test.js +143 -0
- package/dist/src/browser/base-page.d.ts +3 -2
- package/dist/src/browser/base-page.test.js +2 -2
- package/dist/src/browser/cdp.js +3 -3
- package/dist/src/browser/page.d.ts +3 -2
- package/dist/src/browser/page.js +4 -4
- package/dist/src/browser/page.test.js +31 -0
- package/dist/src/browser/utils.d.ts +10 -0
- package/dist/src/browser/utils.js +37 -0
- package/dist/src/browser/utils.test.d.ts +1 -0
- package/dist/src/browser/utils.test.js +29 -0
- package/dist/src/cli-argv-preprocess.d.ts +37 -0
- package/dist/src/cli-argv-preprocess.js +131 -0
- package/dist/src/cli-argv-preprocess.test.d.ts +1 -0
- package/dist/src/cli-argv-preprocess.test.js +130 -0
- package/dist/src/cli.js +123 -86
- package/dist/src/cli.test.js +33 -28
- package/dist/src/commands/daemon.js +6 -7
- package/dist/src/doctor.js +15 -16
- package/dist/src/download/progress.js +15 -11
- package/dist/src/download/progress.test.d.ts +1 -0
- package/dist/src/download/progress.test.js +25 -0
- package/dist/src/execution.js +1 -3
- package/dist/src/execution.test.js +4 -16
- package/dist/src/help.d.ts +11 -0
- package/dist/src/help.js +46 -5
- package/dist/src/logger.js +8 -9
- package/dist/src/main.js +16 -0
- package/dist/src/output.js +4 -5
- package/dist/src/runtime-detect.d.ts +1 -1
- package/dist/src/runtime-detect.js +1 -1
- package/dist/src/runtime-detect.test.js +3 -2
- package/dist/src/tui.d.ts +0 -1
- package/dist/src/tui.js +9 -22
- package/dist/src/types.d.ts +3 -1
- package/dist/src/update-check.js +4 -5
- package/package.json +5 -4
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
import { describe, expect, it, vi } from 'vitest';
|
|
2
2
|
import { getRegistry } from '@jackwener/opencli/registry';
|
|
3
3
|
import { JSDOM } from 'jsdom';
|
|
4
|
-
import { __test__, noteIdToDate } from './search.js';
|
|
4
|
+
import { __test__, buildScrollUntilJs, noteIdToDate } from './search.js';
|
|
5
|
+
|
|
6
|
+
function markVisible(el) {
|
|
7
|
+
el.getBoundingClientRect = () => ({ width: 100, height: 100 });
|
|
8
|
+
}
|
|
5
9
|
function createPageMock(evaluateResults) {
|
|
6
10
|
const evaluate = vi.fn();
|
|
7
11
|
for (const result of evaluateResults) {
|
|
@@ -31,6 +35,16 @@ function createPageMock(evaluateResults) {
|
|
|
31
35
|
};
|
|
32
36
|
}
|
|
33
37
|
describe('xiaohongshu search', () => {
|
|
38
|
+
it('rejects invalid limit before browser navigation', async () => {
|
|
39
|
+
const cmd = getRegistry().get('xiaohongshu/search');
|
|
40
|
+
const page = createPageMock([]);
|
|
41
|
+
|
|
42
|
+
await expect(cmd.func(page, { query: '特斯拉', limit: 0 })).rejects.toMatchObject({
|
|
43
|
+
code: 'ARGUMENT',
|
|
44
|
+
message: expect.stringContaining('--limit'),
|
|
45
|
+
});
|
|
46
|
+
expect(page.goto).not.toHaveBeenCalled();
|
|
47
|
+
});
|
|
34
48
|
it('throws a clear error when the search page is blocked by a login wall', async () => {
|
|
35
49
|
const cmd = getRegistry().get('xiaohongshu/search');
|
|
36
50
|
expect(cmd?.func).toBeTypeOf('function');
|
|
@@ -39,7 +53,8 @@ describe('xiaohongshu search', () => {
|
|
|
39
53
|
'login_wall',
|
|
40
54
|
]);
|
|
41
55
|
await expect(cmd.func(page, { query: '特斯拉', limit: 5 })).rejects.toThrow('Xiaohongshu search results are blocked behind a login wall');
|
|
42
|
-
//
|
|
56
|
+
// No scroll-until / autoScroll call when a login wall is detected early.
|
|
57
|
+
expect(page.evaluate).toHaveBeenCalledTimes(1);
|
|
43
58
|
expect(page.autoScroll).not.toHaveBeenCalled();
|
|
44
59
|
});
|
|
45
60
|
it('returns ranked results with search_result url and author_url preserved', async () => {
|
|
@@ -50,7 +65,7 @@ describe('xiaohongshu search', () => {
|
|
|
50
65
|
const page = createPageMock([
|
|
51
66
|
// First evaluate: MutationObserver wait (content appeared)
|
|
52
67
|
'content',
|
|
53
|
-
// Second evaluate:
|
|
68
|
+
// Second evaluate: initial DOM extraction (already enough results)
|
|
54
69
|
[
|
|
55
70
|
{
|
|
56
71
|
title: '某鱼买FSD被坑了4万',
|
|
@@ -82,7 +97,7 @@ describe('xiaohongshu search', () => {
|
|
|
82
97
|
const page = createPageMock([
|
|
83
98
|
// First evaluate: MutationObserver wait (content appeared)
|
|
84
99
|
'content',
|
|
85
|
-
// Second evaluate:
|
|
100
|
+
// Second evaluate: initial DOM extraction (already enough valid rows)
|
|
86
101
|
[
|
|
87
102
|
{
|
|
88
103
|
title: 'Result A',
|
|
@@ -118,15 +133,36 @@ describe('xiaohongshu search', () => {
|
|
|
118
133
|
const page = createPageMock([
|
|
119
134
|
// First evaluate: MutationObserver wait (content appeared)
|
|
120
135
|
'content',
|
|
121
|
-
// Second evaluate: extraction (
|
|
136
|
+
// Second evaluate: initial extraction (no rows rendered)
|
|
122
137
|
[],
|
|
123
138
|
]);
|
|
124
139
|
const result = (await cmd.func(page, { query: '测试等待', limit: 5 }));
|
|
125
140
|
expect(result).toHaveLength(0);
|
|
126
141
|
// Only one navigation, no retry
|
|
127
142
|
expect(page.goto).toHaveBeenCalledTimes(1);
|
|
128
|
-
//
|
|
129
|
-
expect(page.evaluate).toHaveBeenCalledTimes(
|
|
143
|
+
// Four evaluate calls: wait, initial extraction, scroll-until, post-scroll extraction.
|
|
144
|
+
expect(page.evaluate).toHaveBeenCalledTimes(4);
|
|
145
|
+
});
|
|
146
|
+
it('scrolls only when the initial extraction has fewer rows than requested', async () => {
|
|
147
|
+
const cmd = getRegistry().get('xiaohongshu/search');
|
|
148
|
+
expect(cmd?.func).toBeTypeOf('function');
|
|
149
|
+
const page = createPageMock([
|
|
150
|
+
'content',
|
|
151
|
+
[
|
|
152
|
+
{ title: 'Result A', author: 'UserA', likes: '10', url: 'https://www.xiaohongshu.com/search_result/aaa', author_url: '' },
|
|
153
|
+
],
|
|
154
|
+
3,
|
|
155
|
+
[
|
|
156
|
+
{ title: 'Result A', author: 'UserA', likes: '10', url: 'https://www.xiaohongshu.com/search_result/aaa', author_url: '' },
|
|
157
|
+
{ title: 'Result B', author: 'UserB', likes: '5', url: 'https://www.xiaohongshu.com/search_result/bbb', author_url: '' },
|
|
158
|
+
],
|
|
159
|
+
]);
|
|
160
|
+
|
|
161
|
+
const result = (await cmd.func(page, { query: '测试等待', limit: 2 }));
|
|
162
|
+
|
|
163
|
+
expect(result).toHaveLength(2);
|
|
164
|
+
expect(result.map((item) => item.title)).toEqual(['Result A', 'Result B']);
|
|
165
|
+
expect(page.evaluate).toHaveBeenCalledTimes(4);
|
|
130
166
|
});
|
|
131
167
|
it('separates fallback author text from appended relative date', async () => {
|
|
132
168
|
const cmd = getRegistry().get('xiaohongshu/search');
|
|
@@ -141,9 +177,10 @@ describe('xiaohongshu search', () => {
|
|
|
141
177
|
<span class="count">8</span>
|
|
142
178
|
</section>
|
|
143
179
|
`, { url: 'https://www.xiaohongshu.com/search_result?keyword=test' });
|
|
180
|
+
markVisible(dom.window.document.querySelector('section.note-item'));
|
|
144
181
|
const page = createPageMock([]);
|
|
145
182
|
page.evaluate.mockImplementationOnce(async () => 'content');
|
|
146
|
-
page.evaluate.mockImplementationOnce(async (script) => Function('document', `return (${script})`)(dom.window.document));
|
|
183
|
+
page.evaluate.mockImplementationOnce(async (script) => Function('document', 'getComputedStyle', `return (${script})`)(dom.window.document, dom.window.getComputedStyle.bind(dom.window)));
|
|
147
184
|
|
|
148
185
|
const result = await cmd.func(page, { query: '测试', limit: 1 });
|
|
149
186
|
|
|
@@ -155,6 +192,43 @@ describe('xiaohongshu search', () => {
|
|
|
155
192
|
});
|
|
156
193
|
});
|
|
157
194
|
});
|
|
195
|
+
describe('buildScrollUntilJs', () => {
|
|
196
|
+
it('inlines the target count and default maxScrolls into the generated IIFE', () => {
|
|
197
|
+
const js = buildScrollUntilJs(40);
|
|
198
|
+
// Target count must drive the early-exit check (#1471: --limit > 13 was capped).
|
|
199
|
+
expect(js).toContain('countItems() >= 40');
|
|
200
|
+
// Default safety cap of 15 to bound runtime on infinite-scroll pages.
|
|
201
|
+
expect(js).toContain('i < 15');
|
|
202
|
+
// Plateau detection so the loop exits early when XHS stops lazy-loading
|
|
203
|
+
// instead of spinning all 15 iterations against an exhausted feed.
|
|
204
|
+
expect(js).toContain('plateauRounds');
|
|
205
|
+
// Related-search rows must not count toward the target.
|
|
206
|
+
expect(js).toContain("classList.contains('query-note-item')");
|
|
207
|
+
});
|
|
208
|
+
it('respects a custom maxScrolls override', () => {
|
|
209
|
+
const js = buildScrollUntilJs(100, 5);
|
|
210
|
+
expect(js).toContain('countItems() >= 100');
|
|
211
|
+
expect(js).toContain('i < 5');
|
|
212
|
+
});
|
|
213
|
+
it('counts only visible real note rows', async () => {
|
|
214
|
+
const dom = new JSDOM(`
|
|
215
|
+
<section class="note-item" id="visible"></section>
|
|
216
|
+
<section class="note-item query-note-item" id="query"></section>
|
|
217
|
+
<section class="note-item" id="hidden" style="display:none"></section>
|
|
218
|
+
`, { url: 'https://www.xiaohongshu.com/search_result?keyword=test' });
|
|
219
|
+
markVisible(dom.window.document.querySelector('#visible'));
|
|
220
|
+
markVisible(dom.window.document.querySelector('#query'));
|
|
221
|
+
markVisible(dom.window.document.querySelector('#hidden'));
|
|
222
|
+
|
|
223
|
+
const result = await Function('document', 'window', 'MutationObserver', 'getComputedStyle', `return (${buildScrollUntilJs(1)})`)(dom.window.document, dom.window, dom.window.MutationObserver, dom.window.getComputedStyle.bind(dom.window));
|
|
224
|
+
|
|
225
|
+
expect(result).toBe(1);
|
|
226
|
+
});
|
|
227
|
+
it('rejects unsafe helper arguments instead of interpolating them into code', () => {
|
|
228
|
+
expect(() => buildScrollUntilJs(0)).toThrow(/targetCount/);
|
|
229
|
+
expect(() => buildScrollUntilJs(10, 0)).toThrow(/maxScrolls/);
|
|
230
|
+
});
|
|
231
|
+
});
|
|
158
232
|
describe('stripXhsAuthorDateSuffix', () => {
|
|
159
233
|
it('only strips trailing date suffixes and preserves date-like author text', () => {
|
|
160
234
|
expect(__test__.stripXhsAuthorDateSuffix('作者名 3天前')).toBe('作者名');
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { cli } from '@jackwener/opencli/registry';
|
|
2
2
|
import { EmptyResultError } from '@jackwener/opencli/errors';
|
|
3
|
-
import { fetchXueqiuJson } from './utils.js';
|
|
3
|
+
import { fetchXueqiuJson, formatChinaDate } from './utils.js';
|
|
4
4
|
cli({
|
|
5
5
|
site: 'xueqiu',
|
|
6
6
|
name: 'earnings-date',
|
|
@@ -32,7 +32,7 @@ cli({
|
|
|
32
32
|
.filter((item) => item.subtype === 2)
|
|
33
33
|
.map((item) => {
|
|
34
34
|
const ts = item.timestamp;
|
|
35
|
-
const dateStr = ts ?
|
|
35
|
+
const dateStr = ts ? formatChinaDate(ts) : null;
|
|
36
36
|
const isFuture = ts && ts > now;
|
|
37
37
|
return { date: dateStr, report: item.message, status: isFuture ? '⏳ 未发布' : '✅ 已发布', _ts: ts, _future: isFuture };
|
|
38
38
|
});
|
package/clis/xueqiu/kline.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { cli } from '@jackwener/opencli/registry';
|
|
2
2
|
import { EmptyResultError } from '@jackwener/opencli/errors';
|
|
3
|
-
import { fetchXueqiuJson } from './utils.js';
|
|
3
|
+
import { fetchXueqiuJson, formatChinaDate } from './utils.js';
|
|
4
4
|
cli({
|
|
5
5
|
site: 'xueqiu',
|
|
6
6
|
name: 'kline',
|
|
@@ -31,7 +31,7 @@ cli({
|
|
|
31
31
|
const colIdx = {};
|
|
32
32
|
columns.forEach((name, i) => { colIdx[name] = i; });
|
|
33
33
|
return d.data.item.map(row => ({
|
|
34
|
-
date: colIdx.timestamp != null ?
|
|
34
|
+
date: colIdx.timestamp != null ? formatChinaDate(row[colIdx.timestamp]) : null,
|
|
35
35
|
open: row[colIdx.open] ?? null,
|
|
36
36
|
high: row[colIdx.high] ?? null,
|
|
37
37
|
low: row[colIdx.low] ?? null,
|
package/clis/xueqiu/utils.js
CHANGED
|
@@ -1,4 +1,23 @@
|
|
|
1
1
|
import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
|
|
2
|
+
|
|
3
|
+
const CHINA_DATE_FORMATTER = new Intl.DateTimeFormat('en-US', {
|
|
4
|
+
timeZone: 'Asia/Shanghai',
|
|
5
|
+
year: 'numeric',
|
|
6
|
+
month: '2-digit',
|
|
7
|
+
day: '2-digit',
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
/** Format a Unix ms timestamp as the matching `YYYY-MM-DD` in Asia/Shanghai (xueqiu's canonical user timezone for all markets). */
|
|
11
|
+
export function formatChinaDate(ts) {
|
|
12
|
+
if (ts == null) return null;
|
|
13
|
+
const parts = Object.fromEntries(
|
|
14
|
+
CHINA_DATE_FORMATTER.formatToParts(new Date(ts))
|
|
15
|
+
.filter((part) => part.type !== 'literal')
|
|
16
|
+
.map((part) => [part.type, part.value]),
|
|
17
|
+
);
|
|
18
|
+
return `${parts.year}-${parts.month}-${parts.day}`;
|
|
19
|
+
}
|
|
20
|
+
|
|
2
21
|
/**
|
|
3
22
|
* Fetch a xueqiu JSON API from inside the browser context (credentials included).
|
|
4
23
|
* Page must already be navigated to xueqiu.com before calling this function.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { formatChinaDate } from './utils.js';
|
|
3
|
+
|
|
4
|
+
describe('formatChinaDate', () => {
|
|
5
|
+
it('returns the Asia/Shanghai date for a UTC ms at China midnight', () => {
|
|
6
|
+
expect(formatChinaDate(Date.UTC(2026, 4, 7, 16, 0, 0))).toBe('2026-05-08');
|
|
7
|
+
});
|
|
8
|
+
it('returns the same China date for a moment late in the day', () => {
|
|
9
|
+
expect(formatChinaDate(Date.UTC(2026, 4, 8, 14, 0, 0))).toBe('2026-05-08');
|
|
10
|
+
});
|
|
11
|
+
it('formats representative A-share and US-market bars on xueqiu Beijing dates', () => {
|
|
12
|
+
expect(formatChinaDate(Date.UTC(2026, 4, 7, 16, 0, 0))).toBe('2026-05-08');
|
|
13
|
+
expect(formatChinaDate(Date.UTC(2026, 4, 10, 16, 0, 0))).toBe('2026-05-11');
|
|
14
|
+
});
|
|
15
|
+
it('crosses the China day boundary at 16:00 UTC', () => {
|
|
16
|
+
expect(formatChinaDate(Date.UTC(2026, 0, 1, 15, 59, 59))).toBe('2026-01-01');
|
|
17
|
+
expect(formatChinaDate(Date.UTC(2026, 0, 1, 16, 0, 0))).toBe('2026-01-02');
|
|
18
|
+
});
|
|
19
|
+
it('always returns an ISO calendar date string, not a locale-shaped slash date', () => {
|
|
20
|
+
expect(formatChinaDate(Date.UTC(2026, 0, 1, 16, 0, 0))).toMatch(/^\d{4}-\d{2}-\d{2}$/);
|
|
21
|
+
});
|
|
22
|
+
it('returns null for nullish input', () => {
|
|
23
|
+
expect(formatChinaDate(null)).toBeNull();
|
|
24
|
+
expect(formatChinaDate(undefined)).toBeNull();
|
|
25
|
+
});
|
|
26
|
+
});
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
|
+
import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
|
|
3
|
+
|
|
4
|
+
// Light-weight HTML → text, preserving paragraph / heading / list-item
|
|
5
|
+
// line breaks. Zhihu answer `content` is HTML, so we map block-level
|
|
6
|
+
// closing tags + `<br>` to newlines before stripping the rest.
|
|
7
|
+
function stripHtml(html) {
|
|
8
|
+
if (!html) return '';
|
|
9
|
+
return html
|
|
10
|
+
.replace(/<br\s*\/?\s*>/gi, '\n')
|
|
11
|
+
// Block-level closing tags become paragraph breaks (double
|
|
12
|
+
// newline) so the stripped text stays readable. The trailing
|
|
13
|
+
// `\n{3,}` collapse pass below normalizes accidental triples.
|
|
14
|
+
.replace(/<\/(?:p|div|h[1-6]|li|blockquote)>/gi, '\n\n')
|
|
15
|
+
.replace(/<[^>]+>/g, '')
|
|
16
|
+
.replace(/ /g, ' ')
|
|
17
|
+
.replace(/</g, '<')
|
|
18
|
+
.replace(/>/g, '>')
|
|
19
|
+
.replace(/&/g, '&')
|
|
20
|
+
.replace(/"/g, '"')
|
|
21
|
+
.replace(/'/g, "'")
|
|
22
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
23
|
+
.trim();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const ANSWER_ID_RE = /^\d+$/;
|
|
27
|
+
const ANSWER_TYPED_RE = /^answer:(\d+):(\d+)$/;
|
|
28
|
+
const ANSWER_PATH_RE = /^\/question\/(\d+)\/answer\/(\d+)\/?$/;
|
|
29
|
+
const BARE_ANSWER_PATH_RE = /^\/answer\/(\d+)\/?$/;
|
|
30
|
+
const QUESTION_PATH_RE = /^\/question\/(\d+)\/?$/;
|
|
31
|
+
const QUESTION_API_PATH_RE = /^\/api\/v4\/questions\/(\d+)\/?$/;
|
|
32
|
+
|
|
33
|
+
// Accepts: bare numeric id (`1937205528846655537`), the typed
|
|
34
|
+
// target form used by the existing zhihu write adapters
|
|
35
|
+
// (`answer:<qid>:<aid>`), or the full Zhihu URL pasted from a
|
|
36
|
+
// browser (`https://www.zhihu.com/question/<qid>/answer/<aid>`).
|
|
37
|
+
// Returns string-safe ids, or null when the input does not resolve to
|
|
38
|
+
// any of those exact shapes.
|
|
39
|
+
function parseAnswerTarget(input) {
|
|
40
|
+
const value = String(input ?? '').trim();
|
|
41
|
+
if (!value) return null;
|
|
42
|
+
if (ANSWER_ID_RE.test(value)) return { answerId: value, questionId: '' };
|
|
43
|
+
const typed = value.match(ANSWER_TYPED_RE);
|
|
44
|
+
if (typed) return { questionId: typed[1], answerId: typed[2] };
|
|
45
|
+
try {
|
|
46
|
+
const url = new URL(value);
|
|
47
|
+
if (
|
|
48
|
+
url.protocol !== 'https:' ||
|
|
49
|
+
url.username ||
|
|
50
|
+
url.password ||
|
|
51
|
+
url.port ||
|
|
52
|
+
(url.hostname !== 'www.zhihu.com' && url.hostname !== 'zhihu.com')
|
|
53
|
+
) {
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
let m = url.pathname.match(ANSWER_PATH_RE);
|
|
57
|
+
if (m) return { questionId: m[1], answerId: m[2] };
|
|
58
|
+
m = url.pathname.match(BARE_ANSWER_PATH_RE);
|
|
59
|
+
if (m) return { answerId: m[1], questionId: '' };
|
|
60
|
+
} catch {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function extractAnswerId(input) {
|
|
67
|
+
return parseAnswerTarget(input)?.answerId ?? null;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function extractQuestionIdFromAnswerUrl(input) {
|
|
71
|
+
const value = String(input ?? '').trim();
|
|
72
|
+
if (!value) return '';
|
|
73
|
+
try {
|
|
74
|
+
const url = new URL(value);
|
|
75
|
+
if (url.protocol !== 'https:' || (url.hostname !== 'www.zhihu.com' && url.hostname !== 'zhihu.com')) {
|
|
76
|
+
return '';
|
|
77
|
+
}
|
|
78
|
+
return url.pathname.match(ANSWER_PATH_RE)?.[1]
|
|
79
|
+
|| url.pathname.match(QUESTION_PATH_RE)?.[1]
|
|
80
|
+
|| url.pathname.match(QUESTION_API_PATH_RE)?.[1]
|
|
81
|
+
|| '';
|
|
82
|
+
} catch {
|
|
83
|
+
return '';
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function normalizeCount(value) {
|
|
88
|
+
return Number.isInteger(value) && value >= 0 ? value : 0;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function normalizeUnixSeconds(value) {
|
|
92
|
+
return typeof value === 'number' && Number.isFinite(value) && value > 0
|
|
93
|
+
? new Date(value * 1000).toISOString()
|
|
94
|
+
: '';
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
cli({
|
|
98
|
+
site: 'zhihu',
|
|
99
|
+
name: 'answer-detail',
|
|
100
|
+
access: 'read',
|
|
101
|
+
description: '知乎单个回答完整内容(按 answer ID 获取)',
|
|
102
|
+
domain: 'www.zhihu.com',
|
|
103
|
+
strategy: Strategy.COOKIE,
|
|
104
|
+
args: [
|
|
105
|
+
{ name: 'id', required: true, positional: true, help: 'Answer ID, full Zhihu answer URL, or typed target (answer:<qid>:<aid>)' },
|
|
106
|
+
{ name: 'max-content', type: 'int', default: 0, help: 'Optional cap on stripped content length in characters (0 = no truncation, return the full answer)' },
|
|
107
|
+
],
|
|
108
|
+
columns: ['id', 'author', 'votes', 'comments', 'question_id', 'question_title', 'url', 'created_at', 'updated_at', 'content'],
|
|
109
|
+
func: async (page, kwargs) => {
|
|
110
|
+
const target = parseAnswerTarget(kwargs.id);
|
|
111
|
+
if (!target) {
|
|
112
|
+
throw new ArgumentError(
|
|
113
|
+
'Answer ID must be a numeric id, a Zhihu answer URL, or answer:<qid>:<aid>',
|
|
114
|
+
'Example: opencli zhihu answer-detail 1937205528846655537',
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
const { answerId } = target;
|
|
118
|
+
// `--max-content 0` (the default) means "no cap, return the
|
|
119
|
+
// full stripped answer". Any positive value is an opt-in user
|
|
120
|
+
// cap, mirroring the wikipedia `page` pattern — we never
|
|
121
|
+
// silently truncate behind the user's back.
|
|
122
|
+
const rawMaxContent = kwargs['max-content'];
|
|
123
|
+
const maxContent = rawMaxContent == null ? 0 : Number(rawMaxContent);
|
|
124
|
+
if (!Number.isInteger(maxContent) || maxContent < 0) {
|
|
125
|
+
throw new ArgumentError(
|
|
126
|
+
'--max-content must be a non-negative integer (0 = no cap, full content)',
|
|
127
|
+
'Example: --max-content 2000',
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
// Navigate to the answer page itself: this both seeds the
|
|
131
|
+
// cookie/anti-bot context and works even when the caller did
|
|
132
|
+
// not supply the parent question id (Zhihu redirects from
|
|
133
|
+
// `/answer/<aid>` to the canonical `/question/<qid>/answer/<aid>`).
|
|
134
|
+
try {
|
|
135
|
+
await page.goto(`https://www.zhihu.com/answer/${answerId}`);
|
|
136
|
+
} catch (err) {
|
|
137
|
+
throw new CommandExecutionError(
|
|
138
|
+
`Failed to open Zhihu answer ${answerId}: ${err instanceof Error ? err.message : String(err)}`,
|
|
139
|
+
'Open the answer URL in Chrome and retry after the page is reachable.',
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
const currentQuestionId = page.getCurrentUrl
|
|
143
|
+
? extractQuestionIdFromAnswerUrl(await page.getCurrentUrl().catch(() => ''))
|
|
144
|
+
: '';
|
|
145
|
+
const apiUrl = `https://www.zhihu.com/api/v4/answers/${answerId}?include=content,voteup_count,comment_count,author,created_time,updated_time,question`;
|
|
146
|
+
const data = await page.evaluate(`
|
|
147
|
+
(async () => {
|
|
148
|
+
const r = await fetch(${JSON.stringify(apiUrl)}, { credentials: 'include' });
|
|
149
|
+
if (!r.ok) return { __httpError: r.status };
|
|
150
|
+
try {
|
|
151
|
+
return await r.json();
|
|
152
|
+
} catch (error) {
|
|
153
|
+
return { __malformedJson: error instanceof Error ? error.message : String(error) };
|
|
154
|
+
}
|
|
155
|
+
})()
|
|
156
|
+
`).catch((err) => {
|
|
157
|
+
throw new CommandExecutionError(
|
|
158
|
+
`Zhihu answer detail request failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
159
|
+
'Try again later or rerun with -v for more detail.',
|
|
160
|
+
);
|
|
161
|
+
});
|
|
162
|
+
if (!data || data.__httpError) {
|
|
163
|
+
const status = data?.__httpError;
|
|
164
|
+
if (status === 401 || status === 403) {
|
|
165
|
+
throw new AuthRequiredError('www.zhihu.com', 'Failed to fetch Zhihu answer detail');
|
|
166
|
+
}
|
|
167
|
+
if (status === 404) {
|
|
168
|
+
throw new EmptyResultError('zhihu answer-detail', `No Zhihu answer was found for ${answerId}.`);
|
|
169
|
+
}
|
|
170
|
+
throw new CommandExecutionError(
|
|
171
|
+
status
|
|
172
|
+
? `Zhihu answer detail request failed (HTTP ${status})`
|
|
173
|
+
: 'Zhihu answer detail request failed',
|
|
174
|
+
'Try again later or rerun with -v for more detail',
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
if (data.__malformedJson) {
|
|
178
|
+
throw new CommandExecutionError(
|
|
179
|
+
`Zhihu answer detail returned malformed JSON: ${data.__malformedJson}`,
|
|
180
|
+
'Try again later or rerun with -v for more detail',
|
|
181
|
+
);
|
|
182
|
+
}
|
|
183
|
+
if (typeof data !== 'object' || Array.isArray(data)) {
|
|
184
|
+
throw new CommandExecutionError(
|
|
185
|
+
'Zhihu answer detail returned a malformed payload',
|
|
186
|
+
'Try again later or rerun with -v for more detail',
|
|
187
|
+
);
|
|
188
|
+
}
|
|
189
|
+
if (data.error || data.error_msg || data.message) {
|
|
190
|
+
throw new CommandExecutionError(
|
|
191
|
+
`Zhihu answer detail returned an error payload: ${data.error?.message || data.error_msg || data.message}`,
|
|
192
|
+
'Try again later or rerun with -v for more detail',
|
|
193
|
+
);
|
|
194
|
+
}
|
|
195
|
+
if (!Object.prototype.hasOwnProperty.call(data, 'content')) {
|
|
196
|
+
throw new CommandExecutionError(
|
|
197
|
+
'Zhihu answer detail payload did not include answer content',
|
|
198
|
+
'Try again later or rerun with -v for more detail',
|
|
199
|
+
);
|
|
200
|
+
}
|
|
201
|
+
const question = data.question || {};
|
|
202
|
+
// Answer ids and newer question ids can exceed
|
|
203
|
+
// Number.MAX_SAFE_INTEGER. Prefer ids parsed from user input or
|
|
204
|
+
// the canonical redirected URL; only fall back to API numeric ids
|
|
205
|
+
// when no string-safe source is available.
|
|
206
|
+
const questionId = target.questionId
|
|
207
|
+
|| currentQuestionId
|
|
208
|
+
|| extractQuestionIdFromAnswerUrl(question.url)
|
|
209
|
+
|| (question.id == null ? '' : String(question.id));
|
|
210
|
+
const stripped = stripHtml(data.content || '');
|
|
211
|
+
// Truncation is opt-in only; default `maxContent === 0` short-
|
|
212
|
+
// circuits the conditional so the full stripped body is returned.
|
|
213
|
+
const content = maxContent > 0 && stripped.length > maxContent
|
|
214
|
+
? stripped.substring(0, maxContent)
|
|
215
|
+
: stripped;
|
|
216
|
+
return [{
|
|
217
|
+
id: answerId,
|
|
218
|
+
author: data.author?.name || 'anonymous',
|
|
219
|
+
votes: normalizeCount(data.voteup_count),
|
|
220
|
+
comments: normalizeCount(data.comment_count),
|
|
221
|
+
question_id: questionId,
|
|
222
|
+
question_title: question.title || '',
|
|
223
|
+
url: questionId
|
|
224
|
+
? `https://www.zhihu.com/question/${questionId}/answer/${answerId}`
|
|
225
|
+
: `https://www.zhihu.com/answer/${answerId}`,
|
|
226
|
+
created_at: normalizeUnixSeconds(data.created_time),
|
|
227
|
+
updated_at: normalizeUnixSeconds(data.updated_time),
|
|
228
|
+
content,
|
|
229
|
+
}];
|
|
230
|
+
},
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
export const __test__ = { stripHtml, extractAnswerId, parseAnswerTarget, extractQuestionIdFromAnswerUrl };
|