@jackwener/opencli 1.7.6 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/README.md +17 -8
  2. package/README.zh-CN.md +14 -8
  3. package/cli-manifest.json +469 -11
  4. package/clis/51job/company.js +125 -0
  5. package/clis/51job/detail.js +108 -0
  6. package/clis/51job/hot.js +55 -0
  7. package/clis/51job/search.js +79 -0
  8. package/clis/51job/utils.js +302 -0
  9. package/clis/51job/utils.test.js +69 -0
  10. package/clis/amazon/discussion.js +37 -6
  11. package/clis/amazon/discussion.test.js +147 -32
  12. package/clis/bilibili/video.js +11 -4
  13. package/clis/bilibili/video.test.js +51 -0
  14. package/clis/chatgpt/image.js +1 -1
  15. package/clis/chatgpt-app/ask.js +3 -19
  16. package/clis/chatgpt-app/ax.js +132 -1
  17. package/clis/chatgpt-app/ax.test.js +23 -0
  18. package/clis/chatgpt-app/send.js +2 -21
  19. package/clis/deepseek/ask.js +50 -18
  20. package/clis/deepseek/ask.test.js +195 -2
  21. package/clis/deepseek/utils.js +113 -29
  22. package/clis/deepseek/utils.test.js +109 -1
  23. package/clis/gemini/image.js +1 -1
  24. package/clis/instagram/download.js +1 -1
  25. package/clis/powerchina/search.js +250 -0
  26. package/clis/powerchina/search.test.js +67 -0
  27. package/clis/sinafinance/stock.js +5 -2
  28. package/clis/sinafinance/stock.test.js +59 -0
  29. package/clis/toutiao/articles.js +81 -0
  30. package/clis/toutiao/articles.test.js +23 -0
  31. package/clis/twitter/likes.js +3 -2
  32. package/clis/twitter/search.js +4 -2
  33. package/clis/twitter/search.test.js +4 -0
  34. package/clis/twitter/shared.js +28 -0
  35. package/clis/twitter/shared.test.js +96 -0
  36. package/clis/twitter/thread.js +3 -1
  37. package/clis/twitter/timeline.js +3 -2
  38. package/clis/twitter/tweets.js +3 -2
  39. package/clis/twitter/tweets.test.js +1 -1
  40. package/clis/web/read.js +25 -5
  41. package/clis/web/read.test.js +76 -0
  42. package/clis/weixin/create-draft.js +225 -0
  43. package/clis/weixin/drafts.js +65 -0
  44. package/clis/weixin/drafts.test.js +65 -0
  45. package/clis/weread/ai-outline.js +170 -0
  46. package/clis/weread/ai-outline.test.js +83 -0
  47. package/clis/weread/book.js +57 -44
  48. package/clis/weread/commands.test.js +24 -0
  49. package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
  50. package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
  51. package/dist/src/browser/analyze.d.ts +103 -0
  52. package/dist/src/browser/analyze.js +230 -0
  53. package/dist/src/browser/analyze.test.d.ts +1 -0
  54. package/dist/src/browser/analyze.test.js +164 -0
  55. package/dist/src/browser/article-extract.d.ts +57 -0
  56. package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
  57. package/dist/src/browser/article-extract.e2e.test.js +105 -0
  58. package/dist/src/browser/article-extract.js +169 -0
  59. package/dist/src/browser/article-extract.test.d.ts +1 -0
  60. package/dist/src/browser/article-extract.test.js +94 -0
  61. package/dist/src/browser/cdp.js +11 -2
  62. package/dist/src/browser/verify-fixture.d.ts +59 -0
  63. package/dist/src/browser/verify-fixture.js +213 -0
  64. package/dist/src/browser/verify-fixture.test.d.ts +1 -0
  65. package/dist/src/browser/verify-fixture.test.js +161 -0
  66. package/dist/src/cli.d.ts +32 -0
  67. package/dist/src/cli.js +333 -43
  68. package/dist/src/cli.test.js +257 -1
  69. package/dist/src/commanderAdapter.js +12 -0
  70. package/dist/src/commanderAdapter.test.js +11 -0
  71. package/dist/src/daemon.d.ts +3 -2
  72. package/dist/src/daemon.js +16 -4
  73. package/dist/src/daemon.test.d.ts +1 -0
  74. package/dist/src/daemon.test.js +19 -0
  75. package/dist/src/download/article-download.d.ts +12 -0
  76. package/dist/src/download/article-download.js +141 -17
  77. package/dist/src/download/article-download.test.js +196 -0
  78. package/dist/src/download/index.js +73 -86
  79. package/dist/src/errors.js +4 -2
  80. package/dist/src/errors.test.js +13 -0
  81. package/dist/src/launcher.d.ts +1 -1
  82. package/dist/src/launcher.js +3 -3
  83. package/dist/src/output.js +1 -1
  84. package/dist/src/output.test.js +6 -0
  85. package/package.json +5 -1
@@ -0,0 +1,250 @@
1
+ /**
2
+ * PowerChina search — browser DOM extraction with multi-entry URL probing.
3
+ */
4
+ import { cli, Strategy } from '@jackwener/opencli/registry';
5
+ import { AuthRequiredError } from '@jackwener/opencli/errors';
6
+ import {
7
+ cleanText,
8
+ normalizeDate,
9
+ toProcurementSearchRecords,
10
+ } from '../jianyu/shared/procurement-contract.js';
11
+ import { searchRowsFromEntries } from '../jianyu/shared/china-bid-search.js';
12
+
13
+ const SEARCH_ENTRIES = [
14
+ 'https://bid.powerchina.cn/search',
15
+ 'https://bid.powerchina.cn/',
16
+ ];
17
+ const API_LIST_ENDPOINT = 'https://bid.powerchina.cn/newcbs/recpro-newmember/BidAnnouncementSummary/list';
18
+ const API_DETAIL_ENDPOINT = 'https://bid.powerchina.cn/newcbs/recpro-newmember/BidAnnouncementSummary/getInfo';
19
+ const API_DEFAULT_ANNOUNCEMENT_TYPE = '招采公告';
20
+
21
+ const PROCUREMENT_TITLE_HINT = /(公告|招标|采购|中标|成交|项目|notice|tender|bidding)/i;
22
+ const NAVIGATION_TITLE_HINT = /^(english|中文|chinese|language|home|首页|搜索|search)$/i;
23
+ const RETRYABLE_SEARCH_ERROR_HINT = /(detached while handling command|execution context was destroyed|target closed|cannot find context with specified id)/i;
24
+
25
+ export function buildSearchCandidates(query) {
26
+ const keyword = query.trim();
27
+ if (!keyword) return [...SEARCH_ENTRIES];
28
+ const encoded = encodeURIComponent(keyword);
29
+ return [
30
+ `https://bid.powerchina.cn/search?keyword=${encoded}`,
31
+ `https://bid.powerchina.cn/search?keywords=${encoded}`,
32
+ `https://bid.powerchina.cn/search?q=${encoded}`,
33
+ ...SEARCH_ENTRIES,
34
+ ];
35
+ }
36
+
37
+ function dedupeCandidates(items) {
38
+ const deduped = [];
39
+ const seen = new Set();
40
+ for (const item of items) {
41
+ const key = `${item.title}\t${item.url}`;
42
+ if (seen.has(key)) continue;
43
+ seen.add(key);
44
+ deduped.push(item);
45
+ }
46
+ return deduped;
47
+ }
48
+
49
+ function isLikelyNavigationUrl(rawUrl) {
50
+ const urlText = cleanText(rawUrl);
51
+ if (!urlText) return true;
52
+ try {
53
+ const parsed = new URL(urlText);
54
+ const pathname = parsed.pathname.toLowerCase().replace(/\/+$/, '') || '/';
55
+ const hash = cleanText(parsed.hash).toLowerCase();
56
+ if (pathname === '/' || pathname === '/index') return true;
57
+ if (pathname === '/search') return true;
58
+ if (pathname === '/old' || pathname.startsWith('/old/')) return true;
59
+ if (pathname === '/en' || pathname.startsWith('/en/')) return true;
60
+ if (pathname === '/zh' || pathname.startsWith('/zh/')) return true;
61
+ if (hash === '#/' || hash === '#/index' || hash.startsWith('#/search')) return true;
62
+ return false;
63
+ } catch {
64
+ return true;
65
+ }
66
+ }
67
+
68
+ function isLikelyNavigationTitle(rawTitle) {
69
+ const title = cleanText(rawTitle);
70
+ if (!title) return true;
71
+ const normalized = title.toLowerCase();
72
+ if (NAVIGATION_TITLE_HINT.test(normalized)) return true;
73
+ if (normalized.length <= 10 && (normalized === 'en' || normalized === 'zh' || normalized.includes('english'))) {
74
+ return true;
75
+ }
76
+ return false;
77
+ }
78
+
79
+ function filterNavigationRows(items) {
80
+ return items.filter((item) => {
81
+ const title = cleanText(item.title);
82
+ const url = cleanText(item.url);
83
+ if (!url || !title) return false;
84
+ if (isLikelyNavigationUrl(url)) return false;
85
+ if (isLikelyNavigationTitle(title) && !PROCUREMENT_TITLE_HINT.test(title)) return false;
86
+ return true;
87
+ });
88
+ }
89
+
90
+ export function buildApiDetailUrl(id) {
91
+ const normalizedId = cleanText(id);
92
+ if (!normalizedId) return '';
93
+ return `${API_DETAIL_ENDPOINT}/${encodeURIComponent(normalizedId)}`;
94
+ }
95
+
96
+ function toApiCandidate(row) {
97
+ const id = cleanText(row.id);
98
+ const title = cleanText(row.title);
99
+ if (!id || !title) return null;
100
+
101
+ const url = buildApiDetailUrl(id);
102
+ if (!url) return null;
103
+
104
+ const contextText = cleanText([
105
+ row.announcementType,
106
+ row.titleTypeName,
107
+ row.source,
108
+ row.publishTime,
109
+ row.registrationDeadline,
110
+ row.submissionDeadline,
111
+ row.bidOpenTime,
112
+ ].filter(Boolean).join(' | '));
113
+
114
+ const date = normalizeDate(cleanText(row.publishTime || row.bidOpenTime || row.submissionDeadline || ''));
115
+ return {
116
+ title,
117
+ url,
118
+ date,
119
+ contextText,
120
+ };
121
+ }
122
+
123
+ async function searchRowsFromApi(query, limit) {
124
+ const keyword = cleanText(query);
125
+ const pageSize = Math.max(20, Math.min(100, Math.max(limit * 3, limit)));
126
+ const payload = {
127
+ pageNum: 1,
128
+ pageSize,
129
+ announcementType: API_DEFAULT_ANNOUNCEMENT_TYPE,
130
+ companyType: '3',
131
+ time: Date.now(),
132
+ };
133
+ if (keyword) payload.keyWords = keyword;
134
+
135
+ const response = await fetch(API_LIST_ENDPOINT, {
136
+ method: 'POST',
137
+ headers: {
138
+ 'Content-Type': 'application/json;charset=utf-8',
139
+ },
140
+ body: JSON.stringify(payload),
141
+ });
142
+
143
+ if (!response.ok) {
144
+ throw new Error(`[taxonomy=relay_unavailable] site=powerchina command=search api HTTP ${response.status}`);
145
+ }
146
+
147
+ const data = await response.json();
148
+ if ((data.code ?? 200) !== 200) {
149
+ throw new Error(`[taxonomy=relay_unavailable] site=powerchina command=search api code=${data.code ?? 'unknown'} msg=${cleanText(data.msg)}`);
150
+ }
151
+
152
+ const rows = Array.isArray(data.rows) ? data.rows : [];
153
+ const mapped = rows
154
+ .map((row) => toApiCandidate(row))
155
+ .filter(Boolean);
156
+ return dedupeCandidates(mapped).slice(0, limit);
157
+ }
158
+
159
+ cli({
160
+ site: 'powerchina',
161
+ name: 'search',
162
+ description: '搜索中国电建阳光采购公告',
163
+ domain: 'bid.powerchina.cn',
164
+ strategy: Strategy.COOKIE,
165
+ browser: true,
166
+ args: [
167
+ { name: 'query', required: true, positional: true, help: 'Search keyword, e.g. "procurement"' },
168
+ { name: 'limit', type: 'int', default: 20, help: 'Number of results (max 50)' },
169
+ ],
170
+ columns: ['rank', 'content_type', 'title', 'publish_time', 'project_code', 'budget_or_limit', 'url'],
171
+ func: async (page, kwargs) => {
172
+ const query = cleanText(kwargs.query);
173
+ const limit = Math.max(1, Math.min(Number(kwargs.limit) || 20, 50));
174
+ let extractedRows = [];
175
+ let apiFailure = null;
176
+ let apiSucceeded = false;
177
+
178
+ try {
179
+ const apiRows = await searchRowsFromApi(query, limit);
180
+ extractedRows = apiRows;
181
+ apiSucceeded = true;
182
+ } catch (error) {
183
+ apiFailure = cleanText(error instanceof Error ? error.message : String(error || ''));
184
+ }
185
+
186
+ if (apiSucceeded && extractedRows.length === 0) {
187
+ return [];
188
+ }
189
+
190
+ if (!apiSucceeded) {
191
+ try {
192
+ extractedRows = await searchRowsFromEntries(page, {
193
+ query,
194
+ candidateUrls: buildSearchCandidates(query),
195
+ allowedHostFragments: ['bid.powerchina.cn', 'powerchina.cn'],
196
+ limit,
197
+ });
198
+ } catch (error) {
199
+ const message = cleanText(error instanceof Error ? error.message : String(error || ''));
200
+ if (RETRYABLE_SEARCH_ERROR_HINT.test(message)) {
201
+ throw new Error(`[taxonomy=relay_unavailable] site=powerchina command=search detached browser context: ${message}`);
202
+ }
203
+ throw error;
204
+ }
205
+ }
206
+
207
+ const rows = filterNavigationRows(
208
+ dedupeCandidates(extractedRows).map((item) => ({
209
+ title: cleanText(item.title),
210
+ url: cleanText(item.url),
211
+ date: normalizeDate(cleanText(item.date)),
212
+ contextText: cleanText(item.contextText),
213
+ })),
214
+ );
215
+
216
+ if (rows.length === 0 && extractedRows.length > 0) {
217
+ throw new Error('[taxonomy=empty_result] site=powerchina command=search extracted only navigation/portal rows');
218
+ }
219
+
220
+ if (rows.length === 0) {
221
+ const pageText = cleanText(await page.evaluate('document.body ? document.body.innerText : ""'));
222
+ if (/(请先登录|未登录|登录后|验证码|人机验证)/.test(pageText)) {
223
+ throw new AuthRequiredError(
224
+ 'bid.powerchina.cn',
225
+ '[taxonomy=selector_drift] site=powerchina command=search login required or human verification',
226
+ );
227
+ }
228
+ if (apiFailure) {
229
+ throw new Error(`[taxonomy=empty_result] site=powerchina command=search api/dom yielded no result: ${apiFailure}`);
230
+ }
231
+ }
232
+
233
+ return toProcurementSearchRecords(rows, {
234
+ site: 'powerchina',
235
+ query,
236
+ limit,
237
+ });
238
+ },
239
+ });
240
+
241
+ export const __test__ = {
242
+ buildSearchCandidates,
243
+ normalizeDate,
244
+ dedupeCandidates,
245
+ filterNavigationRows,
246
+ isLikelyNavigationUrl,
247
+ isLikelyNavigationTitle,
248
+ buildApiDetailUrl,
249
+ toApiCandidate,
250
+ };
@@ -0,0 +1,67 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { __test__ } from './search.js';
3
+
4
+ describe('powerchina search helpers', () => {
5
+ it('builds candidate URLs with keyword variants', () => {
6
+ const candidates = __test__.buildSearchCandidates('procurement');
7
+ expect(candidates[0]).toContain('keyword=procurement');
8
+ expect(candidates.some((item) => item.includes('/search?keywords='))).toBe(true);
9
+ expect(candidates.some((item) => item === 'https://bid.powerchina.cn/search')).toBe(true);
10
+ });
11
+
12
+ it('normalizes date text', () => {
13
+ expect(__test__.normalizeDate('2026-4-7')).toBe('2026-04-07');
14
+ expect(__test__.normalizeDate('公告时间:2026年04月07日')).toBe('2026-04-07');
15
+ });
16
+
17
+ it('deduplicates title/url pairs', () => {
18
+ const deduped = __test__.dedupeCandidates([
19
+ { title: 'A', url: 'https://a.com/1', date: '2026-04-07' },
20
+ { title: 'A', url: 'https://a.com/1', date: '2026-04-07' },
21
+ { title: 'B', url: 'https://a.com/1', date: '2026-04-07' },
22
+ ]);
23
+ expect(deduped).toHaveLength(2);
24
+ });
25
+
26
+ it('filters obvious navigation rows before quality gate', () => {
27
+ const filtered = __test__.filterNavigationRows([
28
+ { title: '搜索', url: 'https://bid.powerchina.cn/search', date: '2026-04-07' },
29
+ { title: '首页', url: 'https://bid.powerchina.cn/', date: '2026-04-07' },
30
+ { title: 'English', url: 'https://bid.powerchina.cn/old/en', date: '' },
31
+ { title: '某项目电梯采购公告', url: 'https://bid.powerchina.cn/notice/detail?id=123', date: '2026-04-07' },
32
+ ]);
33
+ expect(filtered).toHaveLength(1);
34
+ expect(filtered[0].title).toContain('电梯采购公告');
35
+ });
36
+
37
+ it('treats old/en language switch urls as navigation', () => {
38
+ expect(__test__.isLikelyNavigationUrl('https://bid.powerchina.cn/old/en')).toBe(true);
39
+ });
40
+
41
+ it('treats language-toggle labels as navigation titles', () => {
42
+ expect(__test__.isLikelyNavigationTitle('English')).toBe(true);
43
+ expect(__test__.isLikelyNavigationTitle('EN')).toBe(true);
44
+ });
45
+
46
+ it('builds api detail urls with stable id', () => {
47
+ const url = __test__.buildApiDetailUrl('2409419657');
48
+ expect(url).toBe('https://bid.powerchina.cn/newcbs/recpro-newmember/BidAnnouncementSummary/getInfo/2409419657');
49
+ });
50
+
51
+ it('maps api rows into normalized search candidates', () => {
52
+ const mapped = __test__.toApiCandidate({
53
+ id: '2409419657',
54
+ title: '某项目电梯采购公告',
55
+ announcementType: '招采公告',
56
+ companyType: '3',
57
+ titleTypeName: '货物类',
58
+ source: '设备物资集中采购电子平台',
59
+ publishTime: '2026-04-07 17:05:02',
60
+ submissionDeadline: '2026-04-14',
61
+ });
62
+ expect(mapped).not.toBeNull();
63
+ expect(mapped?.title).toContain('电梯采购公告');
64
+ expect(mapped?.date).toBe('2026-04-07');
65
+ expect(mapped?.url).toBe('https://bid.powerchina.cn/newcbs/recpro-newmember/BidAnnouncementSummary/getInfo/2409419657');
66
+ });
67
+ });
@@ -79,12 +79,15 @@ cli({
79
79
  if (!entries.length) {
80
80
  throw new CliError('NOT_FOUND', `No stock found for "${key}"`, 'Try a different name, code, or --market');
81
81
  }
82
- // Pick best match: score by name similarity, tiebreak by market priority
82
+ // Pick best match: score by name/symbol similarity, tiebreak by market priority
83
83
  const needle = key.toLowerCase();
84
84
  const score = (e) => {
85
85
  const n = e.name.toLowerCase();
86
- if (n === needle)
86
+ const s = e.symbol.toLowerCase();
87
+ if (s === needle || n === needle)
87
88
  return 1;
89
+ if (s.includes(needle))
90
+ return needle.length / s.length;
88
91
  if (n.includes(needle))
89
92
  return needle.length / n.length;
90
93
  return 0;
@@ -0,0 +1,59 @@
1
+ import { beforeEach, describe, expect, it, vi } from 'vitest';
2
+ import { getRegistry } from '@jackwener/opencli/registry';
3
+ import './stock.js';
4
+
5
+ function textResponse(body) {
6
+ return {
7
+ ok: true,
8
+ arrayBuffer: async () => Buffer.from(body, 'utf8'),
9
+ };
10
+ }
11
+
12
+ describe('sinafinance stock command', () => {
13
+ beforeEach(() => {
14
+ vi.restoreAllMocks();
15
+ vi.stubGlobal('TextDecoder', class {
16
+ decode(buf) {
17
+ return Buffer.from(buf).toString('utf8');
18
+ }
19
+ });
20
+ });
21
+
22
+ it('prefers exact symbol match over partial symbol and name misses', async () => {
23
+ const cmd = getRegistry().get('sinafinance/stock');
24
+ expect(cmd?.func).toBeTypeOf('function');
25
+
26
+ const fetchMock = vi.fn()
27
+ .mockResolvedValueOnce(textResponse('var suggestvalue="x,41,,AAPL,苹果;x,41,,AAPLU,Apple Units";'))
28
+ .mockResolvedValueOnce(textResponse('var hq_str_gb_AAPL="Apple Inc,189.98,1.23,0,1.56,0,188.50,180.00,195.00,175.00,1200000,0,3000000000000";'));
29
+ vi.stubGlobal('fetch', fetchMock);
30
+
31
+ const result = await cmd.func(null, { key: 'AAPL', market: 'auto' });
32
+
33
+ expect(fetchMock).toHaveBeenNthCalledWith(1, 'https://suggest3.sinajs.cn/suggest/type=11,31,41&key=AAPL', expect.any(Object));
34
+ expect(fetchMock).toHaveBeenNthCalledWith(2, 'https://hq.sinajs.cn/list=gb_AAPL', expect.any(Object));
35
+ expect(result[0]).toMatchObject({
36
+ Symbol: 'AAPL',
37
+ Name: 'Apple Inc',
38
+ Price: '189.98',
39
+ });
40
+ });
41
+
42
+ it('still matches by display name when the query targets the company name', async () => {
43
+ const cmd = getRegistry().get('sinafinance/stock');
44
+ expect(cmd?.func).toBeTypeOf('function');
45
+
46
+ const fetchMock = vi.fn()
47
+ .mockResolvedValueOnce(textResponse('var suggestvalue="x,41,,AAPL,苹果;x,41,,AAPLU,Apple Units";'))
48
+ .mockResolvedValueOnce(textResponse('var hq_str_gb_AAPL="苹果公司,189.98,1.23,0,1.56,0,188.50,180.00,195.00,175.00,1200000,0,3000000000000";'));
49
+ vi.stubGlobal('fetch', fetchMock);
50
+
51
+ const result = await cmd.func(null, { key: '苹果', market: 'auto' });
52
+
53
+ expect(fetchMock).toHaveBeenNthCalledWith(2, 'https://hq.sinajs.cn/list=gb_AAPL', expect.any(Object));
54
+ expect(result[0]).toMatchObject({
55
+ Symbol: 'AAPL',
56
+ Name: '苹果公司',
57
+ });
58
+ });
59
+ });
@@ -0,0 +1,81 @@
1
+ import { cli } from '@jackwener/opencli/registry';
2
+
3
+ const NON_TITLE_LINES = new Set([
4
+ '展现', '阅读', '点赞', '评论',
5
+ '查看数据', '查看评论', '修改', '更多', '首发',
6
+ '已发布', '定时发布', '定时发布中', '由文章生成', '审核中',
7
+ ]);
8
+
9
+ export function parseToutiaoArticlesText(text) {
10
+ const lines = String(text || '').split('\n').map((line) => line.trim()).filter(Boolean);
11
+ const results = [];
12
+
13
+ for (let i = 0; i < lines.length; i++) {
14
+ const line = lines[i];
15
+ if (!/^\d{2}-\d{2}\s+\d{2}:\d{2}$/.test(line)) continue;
16
+
17
+ const date = line;
18
+ let title = '';
19
+ let status = '';
20
+ let stats = null;
21
+
22
+ for (let back = 3; back >= 1; back--) {
23
+ const prev = lines[i - back] || '';
24
+ if (!prev || prev.length >= 100 || /^\d+$/.test(prev) || NON_TITLE_LINES.has(prev)) continue;
25
+ title = prev;
26
+ break;
27
+ }
28
+
29
+ for (let fwd = 1; fwd < 8; fwd++) {
30
+ const fwdLine = lines[i + fwd] || '';
31
+ if (fwdLine === '已发布' || fwdLine === '定时发布中' || fwdLine === '审核中' || fwdLine === '由文章生成') {
32
+ status = fwdLine;
33
+ }
34
+ if (fwdLine.includes('展现') && fwdLine.includes('阅读')) {
35
+ const match = fwdLine.match(/展现\s*([\d,]+)\s*阅读\s*([\d,]+)\s*点赞\s*([\d,]+)\s*评论\s*([\d,]*)/);
36
+ if (match) {
37
+ stats = {
38
+ '展现': match[1],
39
+ '阅读': match[2],
40
+ '点赞': match[3],
41
+ '评论': match[4] || '0',
42
+ };
43
+ }
44
+ }
45
+ }
46
+
47
+ if (title && stats) results.push({ title, date, status, ...stats });
48
+ }
49
+
50
+ return results;
51
+ }
52
+
53
+ cli({
54
+ site: 'toutiao',
55
+ name: 'articles',
56
+ description: '获取头条号创作者后台文章列表及数据',
57
+ domain: 'mp.toutiao.com',
58
+ args: [
59
+ { name: 'page', type: 'int', default: 1, help: '页码 (1-4)' },
60
+ ],
61
+ columns: ['title', 'date', 'status', '展现', '阅读', '点赞', '评论'],
62
+ pipeline: [
63
+ { navigate: 'https://mp.toutiao.com/profile_v4/manage/content/all?page=${{ args.page }}' },
64
+ { wait: 'networkidle' },
65
+ { wait: 3000 },
66
+ {
67
+ evaluate: `
68
+ (async () => {
69
+ // Wait for content to load
70
+ await new Promise(r => setTimeout(r, 2000));
71
+ const parse = ${parseToutiaoArticlesText.toString()};
72
+ return parse(document.body.innerText || '');
73
+ })()
74
+ `
75
+ },
76
+ ],
77
+ });
78
+
79
+ export const __test__ = {
80
+ parseToutiaoArticlesText,
81
+ };
@@ -0,0 +1,23 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { __test__ } from './articles.js';
3
+
4
+ describe('toutiao articles parser', () => {
5
+ it('keeps short chinese titles instead of silently dropping the row', () => {
6
+ const text = [
7
+ '短标题',
8
+ '04-20 20:30',
9
+ '已发布',
10
+ '展现 8 阅读 0 点赞 0 评论 0',
11
+ ].join('\n');
12
+
13
+ expect(__test__.parseToutiaoArticlesText(text)).toEqual([{
14
+ title: '短标题',
15
+ date: '04-20 20:30',
16
+ status: '已发布',
17
+ '展现': '8',
18
+ '阅读': '0',
19
+ '点赞': '0',
20
+ '评论': '0',
21
+ }]);
22
+ });
23
+ });
@@ -1,6 +1,6 @@
1
1
  import { cli, Strategy } from '@jackwener/opencli/registry';
2
2
  import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
3
- import { resolveTwitterQueryId, sanitizeQueryId } from './shared.js';
3
+ import { resolveTwitterQueryId, sanitizeQueryId, extractMedia } from './shared.js';
4
4
  const BEARER_TOKEN = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA';
5
5
  const LIKES_QUERY_ID = 'RozQdCp4CilQzrcuU0NY5w';
6
6
  const USER_BY_SCREEN_NAME_QUERY_ID = 'qRednkZG-rn1P6b48NINmQ';
@@ -99,6 +99,7 @@ function extractLikedTweet(result, seen) {
99
99
  retweets: legacy.retweet_count || 0,
100
100
  created_at: legacy.created_at || '',
101
101
  url: `https://x.com/${screenName}/status/${tw.rest_id}`,
102
+ ...extractMedia(legacy),
102
103
  };
103
104
  }
104
105
  function parseLikes(data, seen) {
@@ -144,7 +145,7 @@ cli({
144
145
  { name: 'username', type: 'string', positional: true, help: 'Twitter screen name (without @). Defaults to logged-in user.' },
145
146
  { name: 'limit', type: 'int', default: 20 },
146
147
  ],
147
- columns: ['author', 'name', 'text', 'likes', 'url'],
148
+ columns: ['author', 'name', 'text', 'likes', 'url', 'has_media', 'media_urls'],
148
149
  func: async (page, kwargs) => {
149
150
  const limit = kwargs.limit || 20;
150
151
  let username = (kwargs.username || '').replace(/^@/, '');
@@ -1,5 +1,6 @@
1
1
  import { CommandExecutionError } from '@jackwener/opencli/errors';
2
2
  import { cli, Strategy } from '@jackwener/opencli/registry';
3
+ import { extractMedia } from './shared.js';
3
4
  /**
4
5
  * Trigger Twitter search SPA navigation with fallback strategies.
5
6
  *
@@ -102,7 +103,7 @@ cli({
102
103
  { name: 'filter', type: 'string', default: 'top', choices: ['top', 'live'] },
103
104
  { name: 'limit', type: 'int', default: 15 },
104
105
  ],
105
- columns: ['id', 'author', 'text', 'created_at', 'likes', 'views', 'url'],
106
+ columns: ['id', 'author', 'text', 'created_at', 'likes', 'views', 'url', 'has_media', 'media_urls'],
106
107
  func: async (page, kwargs) => {
107
108
  const query = kwargs.query;
108
109
  const filter = kwargs.filter === 'live' ? 'live' : 'top';
@@ -156,7 +157,8 @@ cli({
156
157
  created_at: tweet.legacy?.created_at || '',
157
158
  likes: tweet.legacy?.favorite_count || 0,
158
159
  views: tweet.views?.count || '0',
159
- url: `https://x.com/i/status/${tweet.rest_id}`
160
+ url: `https://x.com/i/status/${tweet.rest_id}`,
161
+ ...extractMedia(tweet.legacy),
160
162
  });
161
163
  }
162
164
  }
@@ -75,6 +75,8 @@ describe('twitter search command', () => {
75
75
  likes: 7,
76
76
  views: '12',
77
77
  url: 'https://x.com/i/status/1',
78
+ has_media: false,
79
+ media_urls: [],
78
80
  },
79
81
  ]);
80
82
  expect(page.installInterceptor).toHaveBeenCalledWith('SearchTimeline');
@@ -203,6 +205,8 @@ describe('twitter search command', () => {
203
205
  likes: 3,
204
206
  views: '5',
205
207
  url: 'https://x.com/i/status/99',
208
+ has_media: false,
209
+ media_urls: [],
206
210
  },
207
211
  ]);
208
212
  // 6 evaluate calls: 2x pushState + 2x pathname check + 1x fallback + 1x pathname check
@@ -35,6 +35,34 @@ export async function resolveTwitterQueryId(page, operationName, fallbackId) {
35
35
  }`);
36
36
  return sanitizeQueryId(resolved, fallbackId);
37
37
  }
38
+ /**
39
+ * Extract media flags and URLs from a tweet's `legacy` object.
40
+ *
41
+ * Prefers `extended_entities.media` (superset with full video_info) and falls
42
+ * back to `entities.media` when the extended form is missing. For videos and
43
+ * animated GIFs, returns the mp4 variant URL; for photos, returns
44
+ * `media_url_https`.
45
+ */
46
+ export function extractMedia(legacy) {
47
+ const media = legacy?.extended_entities?.media || legacy?.entities?.media;
48
+ if (!Array.isArray(media) || media.length === 0) {
49
+ return { has_media: false, media_urls: [] };
50
+ }
51
+ const urls = [];
52
+ for (const m of media) {
53
+ if (!m) continue;
54
+ if (m.type === 'video' || m.type === 'animated_gif') {
55
+ const variants = m.video_info?.variants || [];
56
+ const mp4 = variants.find((v) => v?.content_type === 'video/mp4');
57
+ const url = mp4?.url || m.media_url_https;
58
+ if (url) urls.push(url);
59
+ } else {
60
+ if (m.media_url_https) urls.push(m.media_url_https);
61
+ }
62
+ }
63
+ return { has_media: urls.length > 0, media_urls: urls };
64
+ }
38
65
  export const __test__ = {
39
66
  sanitizeQueryId,
67
+ extractMedia,
40
68
  };