@xbrowser/twitter 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts ADDED
@@ -0,0 +1,417 @@
1
+ import { z } from 'zod';
2
+ import type { XCLIAPI } from '@dyyz1993/xcli-core';
3
+ import { ok, fail } from '@dyyz1993/xcli-core';
4
+
5
+ export default function (xcli: XCLIAPI): void {
6
+ const site = xcli.createSite({
7
+ name: 'twitter',
8
+ url: 'https://x.com',
9
+ description: 'X (Twitter) - 社交媒体内容采集(XHR 拦截模式,数据更丰富)',
10
+ requiresLogin: true,
11
+ });
12
+
13
+ const BASE = 'https://x.com';
14
+
15
+ // ─── 工具 ──────────────────────────────────────
16
+
17
+ function getPage(ctx: Record<string, unknown>) {
18
+ const page = ctx.page as import('playwright').Page;
19
+ if (!page) throw new Error('需要浏览器页面,请使用 --cdp 参数连接');
20
+ return page;
21
+ }
22
+
23
+ function buildTips(ctx: Record<string, unknown>): string[] {
24
+ const tips: string[] = [];
25
+ if (!ctx.cdpEndpoint) tips.push('建议使用 --cdp 9221 连接到已登录的浏览器');
26
+ tips.push(`Session: ${ctx.sessionId || 'default'}`);
27
+ return tips;
28
+ }
29
+
30
+ // ─── 通用:拦截 GraphQL API ────────────────────
31
+
32
+ interface CaptureOptions {
33
+ page: import('playwright').Page;
34
+ urlPattern: string;
35
+ dataExtractor: (json: Record<string, unknown>) => Record<string, unknown> | null;
36
+ timeout?: number;
37
+ }
38
+
39
+ async function captureApiResponse<T>(opts: CaptureOptions): Promise<T | null> {
40
+ const { page, urlPattern, dataExtractor, timeout = 15000 } = opts;
41
+
42
+ return new Promise((resolve) => {
43
+ const handler = async (resp: import('playwright').Response) => {
44
+ if (!resp.url().includes(urlPattern)) return;
45
+ try {
46
+ const json = await resp.json() as Record<string, unknown>;
47
+ const data = dataExtractor(json);
48
+ if (data) {
49
+ resolve(data as T);
50
+ }
51
+ } catch { /* ignore parse errors */ }
52
+ };
53
+
54
+ page.on('response', handler);
55
+ setTimeout(() => {
56
+ page.off('response', handler);
57
+ resolve(null);
58
+ }, timeout);
59
+ });
60
+ }
61
+
62
+ // ─── 1. search ─────────────────────────────────
63
+
64
+ site.command('search', {
65
+ description: '搜索 X/Twitter 推文(API 拦截模式)',
66
+ scope: 'browser',
67
+ parameters: z.object({
68
+ query: z.string().describe('搜索关键词'),
69
+ limit: z.number().optional().default(10),
70
+ }),
71
+ examples: [
72
+ { cmd: 'xbrowser twitter search --query "OpenAI"', description: '搜索 OpenAI 相关推文' },
73
+ ],
74
+ result: z.any(),
75
+ handler: async (params, ctx) => {
76
+ const page = getPage(ctx as Record<string, unknown>);
77
+ const tips = buildTips(ctx as Record<string, unknown>);
78
+
79
+ // 搜素 API 的 pattern 是 SearchTimeline
80
+ // 直接用 DOM 方式,因为搜索页的 GraphQL 端点名复杂
81
+ await page.goto(`${BASE}/search?q=${encodeURIComponent(params.query)}&src=typed_query&f=top`, { waitUntil: 'domcontentloaded' });
82
+ await page.waitForTimeout(5000);
83
+
84
+ const results = await page.evaluate((limit) => {
85
+ const tweets: Array<Record<string, unknown>> = [];
86
+ const articles = document.querySelectorAll('article[data-testid="tweet"]');
87
+ articles.forEach((article, i) => {
88
+ if (i >= limit) return;
89
+ const textEl = article.querySelector('[data-testid="tweetText"]');
90
+ const timeEl = article.querySelector('time');
91
+ const likeEl = article.querySelector('[data-testid="like"]');
92
+ const retweetEl = article.querySelector('[data-testid="retweet"]');
93
+ const replyEl = article.querySelector('[data-testid="reply"]');
94
+ const linkEl = article.querySelector('a[href*="/status/"]');
95
+ const nameEl = article.querySelector('[data-testid="User-Name"]');
96
+ tweets.push({
97
+ author: nameEl?.textContent?.trim() || '',
98
+ text: textEl?.textContent?.trim() || '',
99
+ time: timeEl?.getAttribute('datetime') || '',
100
+ likes: likeEl?.textContent?.trim() || '0',
101
+ retweets: retweetEl?.textContent?.trim() || '0',
102
+ replies: replyEl?.textContent?.trim() || '0',
103
+ link: linkEl instanceof HTMLAnchorElement ? linkEl.href : '',
104
+ });
105
+ });
106
+ return tweets;
107
+ }, params.limit);
108
+
109
+ return ok({ query: params.query, count: results.length, tweets: results }, tips);
110
+ },
111
+ });
112
+
113
+ // ─── 2. profile ────────────────────────────────
114
+
115
+ site.command('profile', {
116
+ description: '获取 X/Twitter 用户资料(API 拦截模式,含丰富指标)',
117
+ scope: 'browser',
118
+ parameters: z.object({
119
+ username: z.string().describe('用户名(不含 @)'),
120
+ }),
121
+ examples: [
122
+ { cmd: 'xbrowser twitter profile --username "elonmusk"', description: '获取 Elon Musk 资料' },
123
+ ],
124
+ result: z.any(),
125
+ handler: async (params, ctx) => {
126
+ const page = getPage(ctx as Record<string, unknown>);
127
+ const tips = buildTips(ctx as Record<string, unknown>);
128
+
129
+ // 拦截 UserByScreenName(用户信息)+ 其他端点
130
+ let userData: Record<string, unknown> | null = null;
131
+
132
+ page.on('response', async (resp) => {
133
+ const url = resp.url();
134
+ if (url.includes('UserByScreenName') || url.includes('ProfileSpotlights')) {
135
+ try {
136
+ const text = await resp.text();
137
+ const json = JSON.parse(text);
138
+ const result = (json?.data?.user?.result || json?.data?.user_result?.result || {}) as Record<string, unknown>;
139
+ if (result?.legacy) {
140
+ const legacy = result.legacy as Record<string, unknown>;
141
+ const profile = result.profile as Record<string, unknown> | undefined;
142
+ userData = {
143
+ id: result.rest_id as string,
144
+ name: legacy.name as string,
145
+ screenName: legacy.screen_name as string,
146
+ description: legacy.description as string,
147
+ location: legacy.location as string,
148
+ url: legacy.url as string,
149
+ followersCount: legacy.followers_count,
150
+ followingCount: legacy.friends_count,
151
+ tweetCount: legacy.statuses_count,
152
+ listedCount: legacy.listed_count,
153
+ mediaCount: legacy.media_count,
154
+ createdAt: legacy.created_at as string,
155
+ avatar: (legacy.profile_image_url_https as string)?.replace('_normal', ''),
156
+ banner: legacy.profile_banner_url as string,
157
+ verified: !!legacy.verified,
158
+ hasCustomTimeline: result.has_custom_timelines,
159
+ professional: !!result.is_blue_verified,
160
+ // 额外指标
161
+ fastFollowersCount: legacy.fast_followers_count,
162
+ normalFollowersCount: legacy.normal_followers_count,
163
+ favouritesCount: legacy.favourites_count,
164
+ wantsToBeNotified: legacy.wants_to_be_notified,
165
+ };
166
+ }
167
+ } catch {}
168
+ }
169
+ });
170
+
171
+ await page.goto(`${BASE}/${params.username}`, { waitUntil: 'domcontentloaded' });
172
+ await page.waitForTimeout(5000);
173
+
174
+ if (!userData) {
175
+ // DOM 兜底
176
+ userData = await page.evaluate(() => {
177
+ const name = document.querySelector('[data-testid="UserName"]')?.textContent?.trim() || '';
178
+ const bio = document.querySelector('[data-testid="UserDescription"]')?.textContent?.trim() || '';
179
+ return { name, bio, source: 'dom' };
180
+ }) as Record<string, unknown>;
181
+ }
182
+
183
+ return ok(userData, [...tips, `用户: ${userData?.name || params.username}`]);
184
+ },
185
+ });
186
+
187
+ // ─── 3. timeline ───────────────────────────────
188
+
189
+ site.command('timeline', {
190
+ description: '获取 X/Twitter 用户最新推文(API 拦截模式,含 views/bookmarks 等)',
191
+ scope: 'browser',
192
+ parameters: z.object({
193
+ username: z.string().describe('用户名(不含 @)'),
194
+ limit: z.number().optional().default(5),
195
+ }),
196
+ examples: [
197
+ { cmd: 'xbrowser twitter timeline --username "elonmusk"', description: '获取 Elon Musk 最新推文' },
198
+ ],
199
+ result: z.any(),
200
+ handler: async (params, ctx) => {
201
+ const page = getPage(ctx as Record<string, unknown>);
202
+ const tips = buildTips(ctx as Record<string, unknown>);
203
+ const capturedTweets: Array<Record<string, unknown>> = [];
204
+
205
+ // 用 waitForResponse 等 UserTweets API 返回(可能在 SSR 后通过滚动触发)
206
+ const responsePromise = page.waitForResponse(
207
+ resp => resp.url().includes('UserTweets') && resp.status() === 200,
208
+ { timeout: 25000 }
209
+ ).catch(() => null);
210
+
211
+ await page.goto(`${BASE}/${params.username}`, { waitUntil: 'domcontentloaded' });
212
+ await page.waitForTimeout(3000);
213
+ await page.evaluate(() => window.scrollBy(0, 600));
214
+ await page.waitForTimeout(1000);
215
+ await page.evaluate(() => window.scrollBy(0, 600));
216
+
217
+ const apiResp = await responsePromise;
218
+ if (apiResp) {
219
+ const text = await apiResp.text();
220
+ try {
221
+ const json = JSON.parse(text);
222
+ const instructions = json?.data?.user?.result?.timeline_v2?.timeline?.instructions || [];
223
+ for (const inst of instructions) {
224
+ const entries = inst?.entries || [];
225
+ for (const entry of entries) {
226
+ const result = entry?.content?.itemContent?.tweet_results?.result as Record<string, unknown> | undefined;
227
+ if (!result?.legacy || capturedTweets.length >= params.limit) continue;
228
+ const legacy = result.legacy as Record<string, unknown>;
229
+ const extMedia = ((legacy as Record<string, unknown>).extended_entities?.media || []) as Array<Record<string, unknown>>;
230
+ capturedTweets.push({
231
+ id: result.rest_id,
232
+ text: (legacy.full_text as string || ''),
233
+ likes: Number(legacy.favorite_count) || 0,
234
+ retweets: Number(legacy.retweet_count) || 0,
235
+ mediaUrls: extMedia.map((m: Record<string, unknown>) => (m.media_url_https as string) || ''),
236
+ });
237
+ }
238
+ }
239
+ } catch { /* ignore parse errors */ }
240
+ }
241
+
242
+ const tweets = capturedTweets.slice(0, params.limit);
243
+ await page.waitForTimeout(2000);
244
+
245
+ // API 没捕获到时 DOM 兜底
246
+ if (tweets.length === 0) {
247
+ await page.evaluate(() => window.scrollBy(0, 400));
248
+ await page.waitForTimeout(2000);
249
+ const domTweets = await page.evaluate((limit) => {
250
+ const items: Array<Record<string, string>> = [];
251
+ document.querySelectorAll('article[data-testid="tweet"]').forEach((a, i) => {
252
+ if (i >= limit) return;
253
+ items.push({
254
+ text: (a.querySelector('[data-testid="tweetText"]')?.textContent || '').trim(),
255
+ time: (a.querySelector('time')?.getAttribute('datetime') || ''),
256
+ likes: (a.querySelector('[data-testid="like"]')?.textContent || '').trim(),
257
+ });
258
+ });
259
+ return items;
260
+ }, params.limit);
261
+ return ok({ username: params.username, count: domTweets.length, tweets: domTweets, source: 'dom(api fallback)' }, tips);
262
+ }
263
+
264
+ return ok({ username: params.username, count: tweets.length, tweets, source: 'api' }, [...tips, `${params.username} 最近 ${tweets.length} 条推文(API 模式)`]);
265
+ },
266
+ });
267
+
268
+ // ─── 4. replies ────────────────────────────────
269
+
270
+ site.command('replies', {
271
+ description: '获取推文的回复(API 拦截模式)',
272
+ scope: 'browser',
273
+ parameters: z.object({
274
+ id: z.string().describe('推文 ID'),
275
+ limit: z.number().optional().default(5),
276
+ }),
277
+ examples: [
278
+ { cmd: 'xbrowser twitter replies --id "123456789"', description: '获取推文回复' },
279
+ ],
280
+ result: z.any(),
281
+ handler: async (params, ctx) => {
282
+ const page = getPage(ctx as Record<string, unknown>);
283
+ const tips = buildTips(ctx as Record<string, unknown>);
284
+ const captured: Array<Record<string, unknown>> = [];
285
+
286
+ page.on('response', async (resp) => {
287
+ const url = resp.url();
288
+ if (!url.includes('TweetDetail')) return;
289
+ try {
290
+ const json = JSON.parse(await resp.text());
291
+ const entries = (json?.data?.threaded_conversation_with_injections_v2?.instructions?.[0]?.entries || []) as Array<Record<string, unknown>>;
292
+ for (const entry of entries) {
293
+ const result = (entry?.content as Record<string, unknown>)?.itemContent?.tweet_results?.result as Record<string, unknown> | undefined;
294
+ if (!result?.legacy || captured.length >= params.limit) continue;
295
+ const legacy = result.legacy as Record<string, unknown>;
296
+ captured.push({
297
+ id: result.rest_id as string,
298
+ text: legacy.full_text as string,
299
+ user: (legacy.user_id_str as string),
300
+ createdAt: legacy.created_at as string,
301
+ likes: legacy.favorite_count,
302
+ retweets: legacy.retweet_count,
303
+ replies: legacy.reply_count,
304
+ lang: legacy.lang,
305
+ });
306
+ }
307
+ } catch {}
308
+ });
309
+
310
+ await page.goto(`${BASE}/i/status/${params.id}`, { waitUntil: 'domcontentloaded' });
311
+ await page.waitForTimeout(6000);
312
+
313
+ const replies = captured.slice(0, params.limit);
314
+ tips.push(`找到 ${replies.length} 条回复`);
315
+
316
+ return ok({ tweetId: params.id, count: replies.length, replies }, tips);
317
+ },
318
+ });
319
+
320
+ // ─── 5. liked ──────────────────────────────────
321
+
322
+ site.command('liked', {
323
+ description: '获取用户点赞的推文',
324
+ scope: 'browser',
325
+ parameters: z.object({
326
+ username: z.string().describe('用户名(不含 @)'),
327
+ limit: z.number().optional().default(5),
328
+ }),
329
+ examples: [
330
+ { cmd: 'xbrowser twitter liked --username "elonmusk"', description: '获取 Elon Musk 点赞' },
331
+ ],
332
+ result: z.any(),
333
+ handler: async (params, ctx) => {
334
+ const page = getPage(ctx as Record<string, unknown>);
335
+ const tips = buildTips(ctx as Record<string, unknown>);
336
+
337
+ await page.goto(`${BASE}/${params.username}/likes`, { waitUntil: 'domcontentloaded' });
338
+ await page.waitForTimeout(5000);
339
+ await page.evaluate(() => window.scrollBy(0, 400));
340
+ await page.waitForTimeout(2000);
341
+
342
+ const tweets = await page.evaluate((limit) => {
343
+ const items: Array<Record<string, string>> = [];
344
+ document.querySelectorAll('article[data-testid="tweet"]').forEach((a, i) => {
345
+ if (i >= limit) return;
346
+ const textEl = a.querySelector('[data-testid="tweetText"]');
347
+ const timeEl = a.querySelector('time');
348
+ const likeEl = a.querySelector('[data-testid="like"]');
349
+ const nameEl = a.querySelector('[data-testid="User-Name"]');
350
+ items.push({
351
+ author: nameEl?.textContent?.trim() || '',
352
+ text: textEl?.textContent?.trim() || '',
353
+ time: timeEl?.getAttribute('datetime') || '',
354
+ likes: likeEl?.textContent?.trim() || '0',
355
+ });
356
+ });
357
+ return items;
358
+ }, params.limit);
359
+
360
+ return ok({ username: params.username, count: tweets.length, tweets }, tips);
361
+ },
362
+ });
363
+
364
+ // ─── 6. search-image ───────────────────────────
365
+
366
+ site.command('search-image', {
367
+ description: 'Twitter/X 图片搜索',
368
+ scope: 'browser',
369
+ parameters: z.object({
370
+ query: z.string().describe('搜索关键词'),
371
+ limit: z.number().optional().default(10),
372
+ page: z.any().optional(),
373
+ timeout: z.number().optional().default(20000),
374
+ }),
375
+ result: z.any(),
376
+ handler: async (params, ctx) => {
377
+ const page = (params.page as import('playwright').Page) || (ctx as Record<string, unknown>).page as import('playwright').Page;
378
+ if (!page) throw new Error('需要浏览器页面');
379
+ try {
380
+ await page.goto(`https://x.com/search?q=${encodeURIComponent(params.query)}%20filter%3Aimages&f=live`, { waitUntil: 'domcontentloaded', timeout: params.timeout });
381
+ await page.waitForTimeout(5000);
382
+ for (let i = 0; i < 3; i++) { await page.evaluate(() => window.scrollBy(0, 800)); await page.waitForTimeout(600); }
383
+ const results = await page.evaluate((limit) => {
384
+ const imgs: Array<Record<string, unknown>> = [];
385
+ document.querySelectorAll('img[src*="pbs.twimg"]').forEach((img) => {
386
+ if (imgs.length >= limit) return;
387
+ const el = img as HTMLImageElement;
388
+ if (el.width < 80) return;
389
+ const src = el.src || '';
390
+ imgs.push({
391
+ title: el.alt || '', thumbnailUrl: src, sourceUrl: el.closest('a')?.href || '',
392
+ originalUrl: src.replace(/name=\w+/, 'name=orig'), width: el.naturalWidth || 0,
393
+ height: el.naturalHeight || 0, format: 'jpg', sourceSite: 'twitter',
394
+ });
395
+ });
396
+ return imgs;
397
+ }, params.limit);
398
+ return ok({ query: params.query, engine: 'twitter', results, total: results.length, timestamp: Date.now() }, [`Twitter "${params.query}",共 ${results.length} 张`]);
399
+ } catch (error) { return fail(error instanceof Error ? error.message : '未知错误'); }
400
+ },
401
+ });
402
+
403
+ // ─── login/logout ──────────────────────────────
404
+
405
+ site.login(async (ctx) => {
406
+ const page = (ctx as Record<string, unknown>).page as import('playwright').Page | undefined;
407
+ console.log('⚠️ 请使用 --cdp 参数连接到已登录 X.com 的浏览器');
408
+ console.log(' xbrowser twitter timeline --username elonmusk --cdp http://localhost:9221');
409
+ if (page) {
410
+ await page.goto('https://x.com/', { waitUntil: 'domcontentloaded', timeout: 30000 }).catch(() => {});
411
+ }
412
+ });
413
+
414
+ site.logout(async () => {
415
+ console.log('⚠️ 请在浏览器中手动退出 X.com');
416
+ });
417
+ }
package/package.json ADDED
@@ -0,0 +1,30 @@
1
+ {
2
+ "name": "@xbrowser/twitter",
3
+ "version": "2.1.0",
4
+ "type": "module",
5
+ "description": "X (Twitter) 数据采集 - XHR 拦截模式,含 views/bookmarks/媒体等丰富数据",
6
+ "main": "index.ts",
7
+ "author": "XBrowser Team",
8
+ "license": "MIT",
9
+ "xbrowser": {
10
+ "name": "twitter",
11
+ "url": "https://x.com",
12
+ "description": "X (Twitter) 数据采集插件(XHR 拦截模式)",
13
+ "commands": [
14
+ "search",
15
+ "profile",
16
+ "timeline",
17
+ "replies",
18
+ "liked"
19
+ ],
20
+ "sites": [
21
+ "twitter"
22
+ ]
23
+ },
24
+ "dependencies": {
25
+ "zod": "^3.24.0"
26
+ },
27
+ "peerDependencies": {
28
+ "@dyyz1993/xcli-core": ">=1.0.0"
29
+ }
30
+ }