@jackwener/opencli 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CLI-CREATOR.md +151 -75
  2. package/README.md +11 -8
  3. package/README.zh-CN.md +11 -8
  4. package/SKILL.md +42 -15
  5. package/dist/browser.d.ts +11 -1
  6. package/dist/browser.js +95 -3
  7. package/dist/clis/bilibili/dynamic.d.ts +1 -0
  8. package/dist/clis/bilibili/dynamic.js +33 -0
  9. package/dist/clis/bilibili/ranking.d.ts +1 -0
  10. package/dist/clis/bilibili/ranking.js +24 -0
  11. package/dist/clis/bilibili/subtitle.d.ts +1 -0
  12. package/dist/clis/bilibili/subtitle.js +86 -0
  13. package/dist/clis/reddit/frontpage.yaml +30 -0
  14. package/dist/clis/reddit/hot.yaml +3 -2
  15. package/dist/clis/reddit/search.yaml +34 -0
  16. package/dist/clis/reddit/subreddit.yaml +39 -0
  17. package/dist/clis/twitter/bookmarks.yaml +85 -0
  18. package/dist/clis/twitter/profile.d.ts +1 -0
  19. package/dist/clis/twitter/profile.js +56 -0
  20. package/dist/clis/twitter/search.d.ts +1 -0
  21. package/dist/clis/twitter/search.js +60 -0
  22. package/dist/clis/twitter/timeline.d.ts +1 -0
  23. package/dist/clis/twitter/timeline.js +47 -0
  24. package/dist/clis/xiaohongshu/user.d.ts +1 -0
  25. package/dist/clis/xiaohongshu/user.js +40 -0
  26. package/dist/clis/xueqiu/feed.yaml +53 -0
  27. package/dist/clis/xueqiu/hot-stock.yaml +49 -0
  28. package/dist/clis/xueqiu/hot.yaml +46 -0
  29. package/dist/clis/xueqiu/search.yaml +53 -0
  30. package/dist/clis/xueqiu/stock.yaml +67 -0
  31. package/dist/clis/xueqiu/watchlist.yaml +46 -0
  32. package/dist/clis/zhihu/hot.yaml +6 -2
  33. package/dist/clis/zhihu/search.yaml +3 -1
  34. package/dist/engine.d.ts +1 -1
  35. package/dist/engine.js +9 -1
  36. package/dist/explore.js +50 -0
  37. package/dist/main.d.ts +1 -1
  38. package/dist/main.js +12 -5
  39. package/dist/pipeline/steps/browser.js +4 -8
  40. package/dist/pipeline/steps/fetch.js +19 -6
  41. package/dist/pipeline/steps/intercept.js +56 -29
  42. package/dist/pipeline/steps/tap.js +8 -6
  43. package/dist/pipeline/template.js +3 -1
  44. package/dist/pipeline/template.test.js +6 -0
  45. package/dist/types.d.ts +11 -1
  46. package/package.json +1 -1
  47. package/src/browser.ts +101 -6
  48. package/src/clis/bilibili/dynamic.ts +34 -0
  49. package/src/clis/bilibili/ranking.ts +25 -0
  50. package/src/clis/bilibili/subtitle.ts +100 -0
  51. package/src/clis/reddit/frontpage.yaml +30 -0
  52. package/src/clis/reddit/hot.yaml +3 -2
  53. package/src/clis/reddit/search.yaml +34 -0
  54. package/src/clis/reddit/subreddit.yaml +39 -0
  55. package/src/clis/twitter/bookmarks.yaml +85 -0
  56. package/src/clis/twitter/profile.ts +61 -0
  57. package/src/clis/twitter/search.ts +65 -0
  58. package/src/clis/twitter/timeline.ts +50 -0
  59. package/src/clis/xiaohongshu/user.ts +45 -0
  60. package/src/clis/xueqiu/feed.yaml +53 -0
  61. package/src/clis/xueqiu/hot-stock.yaml +49 -0
  62. package/src/clis/xueqiu/hot.yaml +46 -0
  63. package/src/clis/xueqiu/search.yaml +53 -0
  64. package/src/clis/xueqiu/stock.yaml +67 -0
  65. package/src/clis/xueqiu/watchlist.yaml +46 -0
  66. package/src/clis/zhihu/hot.yaml +6 -2
  67. package/src/clis/zhihu/search.yaml +3 -1
  68. package/src/engine.ts +10 -1
  69. package/src/explore.ts +51 -0
  70. package/src/main.ts +11 -5
  71. package/src/pipeline/steps/browser.ts +4 -7
  72. package/src/pipeline/steps/fetch.ts +22 -6
  73. package/src/pipeline/steps/intercept.ts +58 -28
  74. package/src/pipeline/steps/tap.ts +8 -6
  75. package/src/pipeline/template.test.ts +6 -0
  76. package/src/pipeline/template.ts +3 -1
  77. package/src/types.ts +4 -1
  78. package/dist/clis/index.d.ts +0 -22
  79. package/dist/clis/index.js +0 -34
  80. package/src/clis/index.ts +0 -46
@@ -0,0 +1,33 @@
1
+ import { cli, Strategy } from '../../registry.js';
2
+ import { apiGet } from '../../bilibili.js';
3
+ cli({
4
+ site: 'bilibili',
5
+ name: 'dynamic',
6
+ description: 'Get Bilibili user dynamic feed',
7
+ domain: 'www.bilibili.com',
8
+ strategy: Strategy.COOKIE,
9
+ args: [
10
+ { name: 'limit', type: 'int', default: 15 },
11
+ ],
12
+ columns: ['id', 'author', 'text', 'likes', 'url'],
13
+ func: async (page, kwargs) => {
14
+ const payload = await apiGet(page, '/x/polymer/web-dynamic/v1/feed/all', { params: {}, signed: false });
15
+ const results = payload?.data?.items ?? [];
16
+ return results.slice(0, Number(kwargs.limit)).map((item) => {
17
+ let text = '';
18
+ if (item.modules?.module_dynamic?.desc?.text) {
19
+ text = item.modules.module_dynamic.desc.text;
20
+ }
21
+ else if (item.modules?.module_dynamic?.major?.archive?.title) {
22
+ text = item.modules.module_dynamic.major.archive.title;
23
+ }
24
+ return {
25
+ id: item.id_str ?? '',
26
+ author: item.modules?.module_author?.name ?? '',
27
+ text: text,
28
+ likes: item.modules?.module_stat?.like?.count ?? 0,
29
+ url: item.id_str ? `https://t.bilibili.com/${item.id_str}` : ''
30
+ };
31
+ });
32
+ },
33
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,24 @@
1
+ import { cli, Strategy } from '../../registry.js';
2
+ import { apiGet } from '../../bilibili.js';
3
+ cli({
4
+ site: 'bilibili',
5
+ name: 'ranking',
6
+ description: 'Get Bilibili video ranking board',
7
+ domain: 'www.bilibili.com',
8
+ strategy: Strategy.COOKIE,
9
+ args: [
10
+ { name: 'limit', type: 'int', default: 20 },
11
+ ],
12
+ columns: ['rank', 'title', 'author', 'score', 'url'],
13
+ func: async (page, kwargs) => {
14
+ const payload = await apiGet(page, '/x/web-interface/ranking/v2', { params: { rid: 0, type: 'all' }, signed: false });
15
+ const results = payload?.data?.list ?? [];
16
+ return results.slice(0, Number(kwargs.limit)).map((item, i) => ({
17
+ rank: i + 1,
18
+ title: item.title ?? '',
19
+ author: item.owner?.name ?? '',
20
+ score: item.stat?.view ?? 0,
21
+ url: item.bvid ? `https://www.bilibili.com/video/${item.bvid}` : ''
22
+ }));
23
+ },
24
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,86 @@
1
+ import { cli, Strategy } from '../../registry.js';
2
+ import { apiGet } from '../../bilibili.js';
3
+ cli({
4
+ site: 'bilibili',
5
+ name: 'subtitle',
6
+ description: '获取 Bilibili 视频的字幕',
7
+ strategy: Strategy.COOKIE,
8
+ args: [
9
+ { name: 'bvid', required: true },
10
+ { name: 'lang', required: false, help: '字幕语言代码 (如 zh-CN, en-US, ai-zh),默认取第一个' },
11
+ ],
12
+ columns: ['index', 'from', 'to', 'content'],
13
+ func: async (page, kwargs) => {
14
+ if (!page)
15
+ throw new Error('Requires browser');
16
+ // 1. 先前往视频详情页 (建立有鉴权的 Session,且这里不需要加载完整个视频)
17
+ await page.goto(`https://www.bilibili.com/video/${kwargs.bvid}/`);
18
+ // 2. 利用 __INITIAL_STATE__ 获取基础信息,拿 CID
19
+ const cid = await page.evaluate(`(async () => {
20
+ const state = window.__INITIAL_STATE__ || {};
21
+ return state?.videoData?.cid;
22
+ })()`);
23
+ if (!cid) {
24
+ throw new Error('无法在页面中提取到当前视频的 CID,请检查页面是否正常加载。');
25
+ }
26
+ // 3. 在 Node 端使用 apiGet 获取带 Wbi 签名的字幕列表
27
+ // 之前纯靠 evaluate 里的 fetch 会失败,因为 B 站 /wbi/ 开头的接口强校验 w_rid,未签名直接被风控返回 403 HTML
28
+ const payload = await apiGet(page, '/x/player/wbi/v2', {
29
+ params: { bvid: kwargs.bvid, cid },
30
+ signed: true, // 开启 wbi_sign 自动签名
31
+ });
32
+ if (payload.code !== 0) {
33
+ throw new Error(`获取视频播放信息失败: ${payload.message} (${payload.code})`);
34
+ }
35
+ const subtitles = payload.data?.subtitle?.subtitles || [];
36
+ if (subtitles.length === 0) {
37
+ throw new Error('此视频没有发现外挂或智能字幕。');
38
+ }
39
+ // 4. 选择目标字幕语言
40
+ const target = kwargs.lang
41
+ ? subtitles.find((s) => s.lan === kwargs.lang) || subtitles[0]
42
+ : subtitles[0];
43
+ const targetSubUrl = target.subtitle_url;
44
+ if (!targetSubUrl || targetSubUrl === '') {
45
+ throw new Error('[风控拦截/未登录] 获取到的 subtitle_url 为空!请确保 CLI 已成功登录且风控未封锁此账号。');
46
+ }
47
+ const finalUrl = targetSubUrl.startsWith('//') ? 'https:' + targetSubUrl : targetSubUrl;
48
+ // 5. 解析并拉取 CDN 的 JSON 文件
49
+ const fetchJs = `
50
+ (async () => {
51
+ const url = ${JSON.stringify(finalUrl)};
52
+ const res = await fetch(url);
53
+ const text = await res.text();
54
+
55
+ if (text.startsWith('<!DOCTYPE') || text.startsWith('<html')) {
56
+ return { error: 'HTML', text: text.substring(0, 100), url };
57
+ }
58
+
59
+ try {
60
+ const subJson = JSON.parse(text);
61
+ // B站真实返回格式是 { font_size: 0.4, font_color: "#FFFFFF", background_alpha: 0.5, background_color: "#9C27B0", Stroke: "none", type: "json" , body: [{from: 0, to: 0, content: ""}] }
62
+ if (Array.isArray(subJson?.body)) return { success: true, data: subJson.body };
63
+ if (Array.isArray(subJson)) return { success: true, data: subJson };
64
+ return { error: 'UNKNOWN_JSON', data: subJson };
65
+ } catch (e) {
66
+ return { error: 'PARSE_FAILED', text: text.substring(0, 100) };
67
+ }
68
+ })()
69
+ `;
70
+ const items = await page.evaluate(fetchJs);
71
+ if (items?.error) {
72
+ throw new Error(`字幕获取失败: ${items.error}${items.text ? ' — ' + items.text : ''}`);
73
+ }
74
+ const finalItems = items?.data || [];
75
+ if (!Array.isArray(finalItems)) {
76
+ throw new Error('解析到的字幕列表对象不符合数组格式');
77
+ }
78
+ // 6. 数据映射
79
+ return finalItems.map((item, idx) => ({
80
+ index: idx + 1,
81
+ from: Number(item.from || 0).toFixed(2) + 's',
82
+ to: Number(item.to || 0).toFixed(2) + 's',
83
+ content: item.content
84
+ }));
85
+ },
86
+ });
@@ -0,0 +1,30 @@
1
+ site: reddit
2
+ name: frontpage
3
+ description: Reddit Frontpage / r/all
4
+ domain: reddit.com
5
+ strategy: cookie
6
+ browser: true
7
+
8
+ args:
9
+ limit:
10
+ type: int
11
+ default: 15
12
+
13
+ columns: [title, subreddit, author, upvotes, comments, url]
14
+
15
+ pipeline:
16
+ - navigate: https://www.reddit.com
17
+ - evaluate: |
18
+ (async () => {
19
+ const res = await fetch('/r/all.json?limit=${{ args.limit }}', { credentials: 'include' });
20
+ const j = await res.json();
21
+ return j?.data?.children || [];
22
+ })()
23
+ - map:
24
+ title: ${{ item.data.title }}
25
+ subreddit: ${{ item.data.subreddit_name_prefixed }}
26
+ author: ${{ item.data.author }}
27
+ upvotes: ${{ item.data.score }}
28
+ comments: ${{ item.data.num_comments }}
29
+ url: https://www.reddit.com${{ item.data.permalink }}
30
+ - limit: ${{ args.limit }}
@@ -18,9 +18,10 @@ pipeline:
18
18
 
19
19
  - evaluate: |
20
20
  (async () => {
21
- const sub = '${{ args.subreddit }}';
21
+ const sub = ${{ args.subreddit | json }};
22
22
  const path = sub ? '/r/' + sub + '/hot.json' : '/hot.json';
23
- const res = await fetch(path + '?limit=${{ args.limit }}&raw_json=1', {
23
+ const limit = ${{ args.limit }};
24
+ const res = await fetch(path + '?limit=' + limit + '&raw_json=1', {
24
25
  credentials: 'include'
25
26
  });
26
27
  const d = await res.json();
@@ -0,0 +1,34 @@
1
+ site: reddit
2
+ name: search
3
+ description: Search Reddit Posts
4
+ domain: reddit.com
5
+ strategy: cookie
6
+ browser: true
7
+
8
+ args:
9
+ query:
10
+ type: string
11
+ required: true
12
+ limit:
13
+ type: int
14
+ default: 15
15
+
16
+ columns: [title, subreddit, author, upvotes, comments, url]
17
+
18
+ pipeline:
19
+ - navigate: https://www.reddit.com
20
+ - evaluate: |
21
+ (async () => {
22
+ const q = encodeURIComponent('${{ args.query }}');
23
+ const res = await fetch('/search.json?q=' + q + '&limit=${{ args.limit }}', { credentials: 'include' });
24
+ const j = await res.json();
25
+ return j?.data?.children || [];
26
+ })()
27
+ - map:
28
+ title: ${{ item.data.title }}
29
+ subreddit: ${{ item.data.subreddit_name_prefixed }}
30
+ author: ${{ item.data.author }}
31
+ upvotes: ${{ item.data.score }}
32
+ comments: ${{ item.data.num_comments }}
33
+ url: https://www.reddit.com${{ item.data.permalink }}
34
+ - limit: ${{ args.limit }}
@@ -0,0 +1,39 @@
1
+ site: reddit
2
+ name: subreddit
3
+ description: Get posts from a specific Subreddit
4
+ domain: reddit.com
5
+ strategy: cookie
6
+ browser: true
7
+
8
+ args:
9
+ name:
10
+ type: string
11
+ required: true
12
+ sort:
13
+ type: string
14
+ default: hot
15
+ description: "Sorting method: hot, new, top, rising"
16
+ limit:
17
+ type: int
18
+ default: 15
19
+
20
+ columns: [title, author, upvotes, comments, url]
21
+
22
+ pipeline:
23
+ - navigate: https://www.reddit.com
24
+ - evaluate: |
25
+ (async () => {
26
+ let sub = '${{ args.name }}';
27
+ if (sub.startsWith('r/')) sub = sub.slice(2);
28
+ const sort = '${{ args.sort }}';
29
+ const res = await fetch('/r/' + sub + '/' + sort + '.json?limit=${{ args.limit }}', { credentials: 'include' });
30
+ const j = await res.json();
31
+ return j?.data?.children || [];
32
+ })()
33
+ - map:
34
+ title: ${{ item.data.title }}
35
+ author: ${{ item.data.author }}
36
+ upvotes: ${{ item.data.score }}
37
+ comments: ${{ item.data.num_comments }}
38
+ url: https://www.reddit.com${{ item.data.permalink }}
39
+ - limit: ${{ args.limit }}
@@ -0,0 +1,85 @@
1
+ site: twitter
2
+ name: bookmarks
3
+ description: 获取 Twitter 书签列表
4
+ domain: x.com
5
+ browser: true
6
+
7
+ args:
8
+ limit:
9
+ type: int
10
+ default: 20
11
+ description: Number of bookmarks to return (default 20)
12
+
13
+ pipeline:
14
+ - navigate: https://x.com/i/bookmarks
15
+ - wait: 2
16
+ - evaluate: |
17
+ (async () => {
18
+ const ct0 = document.cookie.split(';').map(c=>c.trim()).find(c=>c.startsWith('ct0='))?.split('=')[1];
19
+ if (!ct0) throw new Error('No ct0 cookie. Hint: Not logged into x.com.');
20
+ const bearer = decodeURIComponent('AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA');
21
+ const _h = {'Authorization':'Bearer '+bearer, 'X-Csrf-Token':ct0, 'X-Twitter-Auth-Type':'OAuth2Session', 'X-Twitter-Active-User':'yes'};
22
+
23
+ const count = Math.min(${{ args.limit }}, 100);
24
+ const variables = JSON.stringify({count, includePromotedContent: false});
25
+ const features = JSON.stringify({
26
+ rweb_video_screen_enabled: false, profile_label_improvements_pcf_label_in_post_enabled: true,
27
+ responsive_web_profile_redirect_enabled: false, rweb_tipjar_consumption_enabled: false,
28
+ verified_phone_label_enabled: false, creator_subscriptions_tweet_preview_api_enabled: true,
29
+ responsive_web_graphql_timeline_navigation_enabled: true,
30
+ responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
31
+ premium_content_api_read_enabled: false, communities_web_enable_tweet_community_results_fetch: true,
32
+ c9s_tweet_anatomy_moderator_badge_enabled: true,
33
+ articles_preview_enabled: true, responsive_web_edit_tweet_api_enabled: true,
34
+ graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
35
+ view_counts_everywhere_api_enabled: true, longform_notetweets_consumption_enabled: true,
36
+ responsive_web_twitter_article_tweet_consumption_enabled: true,
37
+ tweet_awards_web_tipping_enabled: false,
38
+ content_disclosure_indicator_enabled: true, content_disclosure_ai_generated_indicator_enabled: true,
39
+ freedom_of_speech_not_reach_fetch_enabled: true, standardized_nudges_misinfo: true,
40
+ tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: true,
41
+ longform_notetweets_rich_text_read_enabled: true, longform_notetweets_inline_media_enabled: false,
42
+ responsive_web_enhance_cards_enabled: false
43
+ });
44
+ const url = '/i/api/graphql/Fy0QMy4q_aZCpkO0PnyLYw/Bookmarks?variables=' + encodeURIComponent(variables) + '&features=' + encodeURIComponent(features);
45
+ const resp = await fetch(url, {headers: _h, credentials: 'include'});
46
+ if (!resp.ok) throw new Error('HTTP ' + resp.status + '. Hint: queryId may have changed.');
47
+ const d = await resp.json();
48
+
49
+ const instructions = d.data?.bookmark_timeline_v2?.timeline?.instructions || d.data?.bookmark_timeline?.timeline?.instructions || [];
50
+ let tweets = [], seen = new Set();
51
+ for (const inst of instructions) {
52
+ for (const entry of (inst.entries || [])) {
53
+ const r = entry.content?.itemContent?.tweet_results?.result;
54
+ if (!r) continue;
55
+ const tw = r.tweet || r;
56
+ const l = tw.legacy || {};
57
+ if (!tw.rest_id || seen.has(tw.rest_id)) continue;
58
+ seen.add(tw.rest_id);
59
+ const u = tw.core?.user_results?.result;
60
+ const nt = tw.note_tweet?.note_tweet_results?.result?.text;
61
+ const screenName = u?.legacy?.screen_name || u?.core?.screen_name;
62
+ tweets.push({
63
+ id: tw.rest_id,
64
+ author: screenName,
65
+ name: u?.legacy?.name || u?.core?.name,
66
+ url: 'https://x.com/' + (screenName || '_') + '/status/' + tw.rest_id,
67
+ text: nt || l.full_text || '',
68
+ likes: l.favorite_count,
69
+ retweets: l.retweet_count,
70
+ created_at: l.created_at
71
+ });
72
+ }
73
+ }
74
+ return tweets;
75
+ })()
76
+
77
+ - map:
78
+ author: ${{ item.author }}
79
+ text: ${{ item.text }}
80
+ likes: ${{ item.likes }}
81
+ url: ${{ item.url }}
82
+
83
+ - limit: ${{ args.limit }}
84
+
85
+ columns: [author, text, likes, url]
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,56 @@
1
+ import { cli, Strategy } from '../../registry.js';
2
+ cli({
3
+ site: 'twitter',
4
+ name: 'profile',
5
+ description: 'Fetch tweets from a user profile',
6
+ domain: 'x.com',
7
+ strategy: Strategy.INTERCEPT,
8
+ browser: true,
9
+ args: [
10
+ { name: 'username', type: 'string', required: true },
11
+ { name: 'limit', type: 'int', default: 15 },
12
+ ],
13
+ columns: ['id', 'text', 'likes', 'views', 'url'],
14
+ func: async (page, kwargs) => {
15
+ // Navigate to user profile via search for reliability
16
+ await page.goto(`https://x.com/search?q=from:${kwargs.username}&f=live`);
17
+ await page.wait(5);
18
+ // Inject XHR interceptor
19
+ await page.installInterceptor('SearchTimeline');
20
+ // Trigger API by scrolling
21
+ await page.autoScroll({ times: 3, delayMs: 2000 });
22
+ // Retrieve data
23
+ const requests = await page.getInterceptedRequests();
24
+ if (!requests || requests.length === 0)
25
+ return [];
26
+ let results = [];
27
+ for (const req of requests) {
28
+ try {
29
+ const insts = req.data.data.search_by_raw_query.search_timeline.timeline.instructions;
30
+ const addEntries = insts.find((i) => i.type === 'TimelineAddEntries');
31
+ if (!addEntries)
32
+ continue;
33
+ for (const entry of addEntries.entries) {
34
+ if (!entry.entryId.startsWith('tweet-'))
35
+ continue;
36
+ let tweet = entry.content?.itemContent?.tweet_results?.result;
37
+ if (!tweet)
38
+ continue;
39
+ if (tweet.__typename === 'TweetWithVisibilityResults' && tweet.tweet) {
40
+ tweet = tweet.tweet;
41
+ }
42
+ results.push({
43
+ id: tweet.rest_id,
44
+ text: tweet.legacy?.full_text || '',
45
+ likes: tweet.legacy?.favorite_count || 0,
46
+ views: tweet.views?.count || '0',
47
+ url: `https://x.com/i/status/${tweet.rest_id}`
48
+ });
49
+ }
50
+ }
51
+ catch (e) {
52
+ }
53
+ }
54
+ return results.slice(0, kwargs.limit);
55
+ }
56
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,60 @@
1
+ import { cli, Strategy } from '../../registry.js';
2
+ cli({
3
+ site: 'twitter',
4
+ name: 'search',
5
+ description: 'Search Twitter/X for tweets',
6
+ domain: 'x.com',
7
+ strategy: Strategy.INTERCEPT, // Use intercept strategy
8
+ browser: true,
9
+ args: [
10
+ { name: 'query', type: 'string', required: true },
11
+ { name: 'limit', type: 'int', default: 15 },
12
+ ],
13
+ columns: ['id', 'author', 'text', 'likes', 'views', 'url'],
14
+ func: async (page, kwargs) => {
15
+ // 1. Navigate to the search page
16
+ const q = encodeURIComponent(kwargs.query);
17
+ await page.goto(`https://x.com/search?q=${q}&f=top`);
18
+ await page.wait(5);
19
+ // 2. Inject XHR interceptor
20
+ await page.installInterceptor('SearchTimeline');
21
+ // 3. Trigger API by scrolling
22
+ await page.autoScroll({ times: 3, delayMs: 2000 });
23
+ // 4. Retrieve data
24
+ const requests = await page.getInterceptedRequests();
25
+ if (!requests || requests.length === 0)
26
+ return [];
27
+ let results = [];
28
+ for (const req of requests) {
29
+ try {
30
+ const insts = req.data.data.search_by_raw_query.search_timeline.timeline.instructions;
31
+ const addEntries = insts.find((i) => i.type === 'TimelineAddEntries');
32
+ if (!addEntries)
33
+ continue;
34
+ for (const entry of addEntries.entries) {
35
+ if (!entry.entryId.startsWith('tweet-'))
36
+ continue;
37
+ let tweet = entry.content?.itemContent?.tweet_results?.result;
38
+ if (!tweet)
39
+ continue;
40
+ // Handle retweet wrapping
41
+ if (tweet.__typename === 'TweetWithVisibilityResults' && tweet.tweet) {
42
+ tweet = tweet.tweet;
43
+ }
44
+ results.push({
45
+ id: tweet.rest_id,
46
+ author: tweet.core?.user_results?.result?.legacy?.screen_name || 'unknown',
47
+ text: tweet.legacy?.full_text || '',
48
+ likes: tweet.legacy?.favorite_count || 0,
49
+ views: tweet.views?.count || '0',
50
+ url: `https://x.com/i/status/${tweet.rest_id}`
51
+ });
52
+ }
53
+ }
54
+ catch (e) {
55
+ // ignore parsing errors for individual payloads
56
+ }
57
+ }
58
+ return results.slice(0, kwargs.limit);
59
+ }
60
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,47 @@
1
+ import { cli, Strategy } from '../../registry.js';
2
+ cli({
3
+ site: 'twitter',
4
+ name: 'timeline',
5
+ description: 'Twitter Home Timeline',
6
+ domain: 'x.com',
7
+ strategy: Strategy.COOKIE,
8
+ args: [
9
+ { name: 'limit', type: 'int', default: 20 },
10
+ ],
11
+ columns: ['responseType', 'first'],
12
+ func: async (page, kwargs) => {
13
+ await page.goto('https://x.com/home');
14
+ await page.wait(5);
15
+ // Inject the fetch interceptor manually to see exactly what happens
16
+ await page.evaluate(`
17
+ () => {
18
+ window.__intercept_data = [];
19
+ const origFetch = window.fetch;
20
+ window.fetch = async function(...args) {
21
+ let u = typeof args[0] === 'string' ? args[0] : (args[0] && args[0].url) || '';
22
+ const res = await origFetch.apply(this, args);
23
+ setTimeout(async () => {
24
+ try {
25
+ if (u.includes('HomeTimeline')) {
26
+ const clone = res.clone();
27
+ const j = await clone.json();
28
+ window.__intercept_data.push(j);
29
+ }
30
+ } catch(e) {}
31
+ }, 0);
32
+ return res;
33
+ };
34
+ }
35
+ `);
36
+ // trigger scroll
37
+ for (let i = 0; i < 3; i++) {
38
+ await page.evaluate('() => window.scrollTo(0, document.body.scrollHeight)');
39
+ await page.wait(2);
40
+ }
41
+ // extract
42
+ const data = await page.evaluate('() => window.__intercept_data');
43
+ if (!data || data.length === 0)
44
+ return [{ responseType: 'no data captured' }];
45
+ return [{ responseType: `captured ${data.length} responses`, first: JSON.stringify(data[0]).substring(0, 300) }];
46
+ }
47
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,40 @@
1
+ import { cli, Strategy } from '../../registry.js';
2
+ cli({
3
+ site: 'xiaohongshu',
4
+ name: 'user',
5
+ description: 'Get user notes from Xiaohongshu',
6
+ domain: 'xiaohongshu.com',
7
+ strategy: Strategy.INTERCEPT,
8
+ browser: true,
9
+ args: [
10
+ { name: 'id', type: 'string', required: true },
11
+ { name: 'limit', type: 'int', default: 15 },
12
+ ],
13
+ columns: ['id', 'title', 'type', 'likes', 'url'],
14
+ func: async (page, kwargs) => {
15
+ await page.goto(`https://www.xiaohongshu.com/user/profile/${kwargs.id}`);
16
+ await page.wait(5);
17
+ await page.installInterceptor('v1/user/posted');
18
+ // Trigger API by scrolling
19
+ await page.autoScroll({ times: 2, delayMs: 2000 });
20
+ // Retrieve data
21
+ const requests = await page.getInterceptedRequests();
22
+ if (!requests || requests.length === 0)
23
+ return [];
24
+ let results = [];
25
+ for (const req of requests) {
26
+ if (req.data && req.data.data && req.data.data.notes) {
27
+ for (const note of req.data.data.notes) {
28
+ results.push({
29
+ id: note.note_id || note.id,
30
+ title: note.display_title || '',
31
+ type: note.type || '',
32
+ likes: note.interact_info?.liked_count || '0',
33
+ url: `https://www.xiaohongshu.com/explore/${note.note_id || note.id}`
34
+ });
35
+ }
36
+ }
37
+ }
38
+ return results.slice(0, kwargs.limit);
39
+ }
40
+ });
@@ -0,0 +1,53 @@
1
+ site: xueqiu
2
+ name: feed
3
+ description: 获取雪球首页时间线(关注用户的动态)
4
+ domain: xueqiu.com
5
+ browser: true
6
+
7
+ args:
8
+ page:
9
+ type: int
10
+ default: 1
11
+ description: 页码,默认 1
12
+ limit:
13
+ type: int
14
+ default: 20
15
+ description: 每页数量,默认 20
16
+
17
+ pipeline:
18
+ - navigate: https://xueqiu.com
19
+ - evaluate: |
20
+ (async () => {
21
+ const page = ${{ args.page }};
22
+ const count = ${{ args.limit }};
23
+ const resp = await fetch(`https://xueqiu.com/v4/statuses/home_timeline.json?page=${page}&count=${count}`, {credentials: 'include'});
24
+ if (!resp.ok) throw new Error('HTTP ' + resp.status + ' Hint: Not logged in?');
25
+ const d = await resp.json();
26
+
27
+ const strip = (html) => (html || '').replace(/<[^>]+>/g, '').replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').trim();
28
+ const list = d.home_timeline || d.list || [];
29
+ return list.map(item => {
30
+ const user = item.user || {};
31
+ return {
32
+ id: item.id,
33
+ text: strip(item.description).substring(0, 200),
34
+ url: 'https://xueqiu.com/' + user.id + '/' + item.id,
35
+ author: user.screen_name,
36
+ likes: item.fav_count,
37
+ retweets: item.retweet_count,
38
+ replies: item.reply_count,
39
+ created_at: item.created_at ? new Date(item.created_at).toISOString() : null
40
+ };
41
+ });
42
+ })()
43
+
44
+ - map:
45
+ author: ${{ item.author }}
46
+ text: ${{ item.text }}
47
+ likes: ${{ item.likes }}
48
+ replies: ${{ item.replies }}
49
+ url: ${{ item.url }}
50
+
51
+ - limit: ${{ args.limit }}
52
+
53
+ columns: [author, text, likes, replies, url]