@jackwener/opencli 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CLI-CREATOR.md CHANGED
@@ -57,8 +57,9 @@ opencli bilibili hot -v # 查看已有命令的 pipeline 每步数据流
57
57
 
58
58
  1. **后缀爆破法 (`.json`)**: 像 Reddit 这样复杂的网站,只要在其 URL 后加上 `.json`(例如 `/r/all.json`),就能在带 Cookie 的情况下直接利用 `fetch` 拿到极其干净的 REST 数据(Tier 2 Cookie 策略极速秒杀)。另外如功能完备的**雪球 (xueqiu)** 也可以走这种纯 API 的方式极简获取,成为你构建简单 YAML 的黄金标杆。
59
59
  2. **全局状态查找法 (`__INITIAL_STATE__`)**: 许多服务端渲染 (SSR) 的网站(如小红书、Bilibili)会将首页或详情页的完整数据挂载到全局 window 对象上。与其去拦截网络请求,不如直接 `page.evaluate('() => window.__INITIAL_STATE__')` 获取整个数据树。
60
- 3. **框架探测与 Store Action 截断**: 如果站点使用 Vue + Pinia,可以使用 `tap` 步骤调用 action,让前端框架代替你完成复杂的鉴权签名封装。
61
- 4. **底层 XHR/Fetch 拦截**: 最后手段,当上述都不行时,使用 TypeScript 适配器进行无侵入式的请求抓取。
60
+ 3. **主动交互触发法 (Active Interaction)**: 很多深层 API(如视频字幕、评论下的回复)是懒加载的。在静态抓包找不到数据时,尝试在 `evaluate` 步骤或手动打断点时,主动去**点击(Click)页面上的对应按钮**(如"CC"、"展开全部"),从而诱发隐藏的 Network Fetch。
61
+ 4. **框架探测与 Store Action 截断**: 如果站点使用 Vue + Pinia,可以使用 `tap` 步骤调用 action,让前端框架代替你完成复杂的鉴权签名封装。
62
+ 5. **底层 XHR/Fetch 拦截**: 最后手段,当上述都不行时,使用 TypeScript 适配器进行无侵入式的请求抓取。
62
63
 
63
64
  ### 1d. 框架检测
64
65
 
@@ -411,6 +412,35 @@ cli({
411
412
 
412
413
  > **拦截核心思路**:不自己构造签名,而是利用 `installInterceptor` 劫持网站自己的 `XMLHttpRequest` 和 `fetch`,让网站发请求,我们直接在底层取出解析好的 `response.json()`。
413
414
 
415
+ #### 进阶场景 1: 级联请求 (Cascading Requests) 与鉴权绕过
416
+
417
+ 部分 API 获取是非常复杂的连环请求(例如 B 站获取视频字幕:先需要 `bvid` 获取核心 `cid`,再通过 `cid` 获取包含签名/Wbi 的字幕列表拉取地址,最后 fetch 真实的 CDN 资源)。在此类场景中,你必须在一个 `evaluate` 块内部或者在 TypeScript Node 端编排整个请求链条:
418
+
419
+ ```typescript
420
+ // 真实场景:B站获取视频字幕的级联获取思路
421
+ const subtitleUrls = await page.evaluate(async (bvid) => {
422
+ // Step 1: 拿 CID (通常可以通过页面全局状态极速提取)
423
+ const cid = window.__INITIAL_STATE__?.videoData?.cid;
424
+
425
+ // Step 2: 依据 BVID 和 CID 拿字幕配置 (可能需要携带 W_RID 签名或依赖浏览器当前登录状态 Cookie)
426
+ const res = await fetch(\`/x/player/wbi/v2?bvid=\${bvid}&cid=\${cid}\`, { credentials: 'include' });
427
+ const data = await res.json();
428
+
429
+ // Step 3: 风控拦截/未登录降级空值检测 (Anti-Bot Empty Value Detection) ⚠️ 极其重要
430
+ // 很多大厂 API 只要签名失败或无强登录 Cookie 依然会返回 HTTP 200,但把关键 URL 设为 ""
431
+ const firstSubUrl = data.data?.subtitle?.subtitles?.[0]?.subtitle_url;
432
+ if (!firstSubUrl) {
433
+ throw new Error('被风控降级或需登录:拿不到真实的 subtitle_url,请检查 Cookie 状态 (Tier 2/3)');
434
+ }
435
+
436
+ return firstSubUrl;
437
+ }, kwargs.bvid);
438
+
439
+ // Step 4: 拉取最终的 CDN 静态文件 (无鉴权)
440
+ const finalRes = await fetch(subtitleUrls.startsWith('//') ? 'https:' + subtitleUrls : subtitleUrls);
441
+ const subtitles = await finalRes.json();
442
+ ```
443
+
414
444
  ---
415
445
 
416
446
  ## Step 4: 测试
@@ -539,6 +569,70 @@ git push
539
569
 
540
570
  ---
541
571
 
572
+ ## 进阶模式: 级联请求 (Cascading Requests)
573
+
574
+ 当目标数据需要多步 API 链式获取时(如 `BVID → CID → 字幕列表 → 字幕内容`),必须使用 **TS 适配器**。YAML 无法处理这种多步逻辑。
575
+
576
+ ### 模板代码
577
+
578
+ ```typescript
579
+ import { cli, Strategy } from '../../registry.js';
580
+ import type { IPage } from '../../types.js';
581
+ import { apiGet } from '../../bilibili.js'; // 复用平台 SDK
582
+
583
+ cli({
584
+ site: 'bilibili',
585
+ name: 'subtitle',
586
+ strategy: Strategy.COOKIE,
587
+ args: [{ name: 'bvid', required: true }],
588
+ columns: ['index', 'from', 'to', 'content'],
589
+ func: async (page: IPage | null, kwargs: any) => {
590
+ if (!page) throw new Error('Requires browser');
591
+
592
+ // Step 1: 建立 Session
593
+ await page.goto(`https://www.bilibili.com/video/${kwargs.bvid}/`);
594
+
595
+ // Step 2: 从页面提取中间 ID (__INITIAL_STATE__)
596
+ const cid = await page.evaluate(`(async () => {
597
+ return window.__INITIAL_STATE__?.videoData?.cid;
598
+ })()`);
599
+ if (!cid) throw new Error('无法提取 CID');
600
+
601
+ // Step 3: 用中间 ID 调用下一级 API (自动 Wbi 签名)
602
+ const payload = await apiGet(page, '/x/player/wbi/v2', {
603
+ params: { bvid: kwargs.bvid, cid },
604
+ signed: true, // ← 自动生成 w_rid
605
+ });
606
+
607
+ // Step 4: 检测风控降级 (空值断言)
608
+ const subtitles = payload.data?.subtitle?.subtitles || [];
609
+ const url = subtitles[0]?.subtitle_url;
610
+ if (!url) throw new Error('subtitle_url 为空,疑似风控降级');
611
+
612
+ // Step 5: 拉取最终数据 (CDN JSON)
613
+ const items = await page.evaluate(`(async () => {
614
+ const res = await fetch(${JSON.stringify('https:' + url)});
615
+ const json = await res.json();
616
+ return { data: json.body || json };
617
+ })()`);
618
+
619
+ return items.data.map((item, idx) => ({ ... }));
620
+ },
621
+ });
622
+ ```
623
+
624
+ ### 关键要点
625
+
626
+ | 步骤 | 注意事项 |
627
+ |------|----------|
628
+ | 提取中间 ID | 优先从 `__INITIAL_STATE__` 拿,避免额外 API 调用 |
629
+ | Wbi 签名 | B 站 `/wbi/` 接口**强制校验** `w_rid`,纯 `fetch` 会被 403 |
630
+ | 空值断言 | 即使 HTTP 200,核心字段可能为空串(风控降级) |
631
+ | CDN URL | 常以 `//` 开头,记得补 `https:` |
632
+ | `JSON.stringify` | 拼接 URL 到 evaluate 时必须用它转义,避免注入 |
633
+
634
+ ---
635
+
542
636
  ## 常见陷阱
543
637
 
544
638
  | 陷阱 | 表现 | 解决方案 |
@@ -553,6 +647,8 @@ git push
553
647
  | TS evaluate 格式 | `() => {}` 报 `result is not a function` | TS 中 `page.evaluate()` 必须用 IIFE:`(async () => { ... })()` |
554
648
  | 页面异步加载 | evaluate 拿到空数据(store state 还没更新) | 在 evaluate 内用 polling 等待数据出现,或增加 `wait` 时间 |
555
649
  | YAML 内嵌大段 JS | 调试困难,字符串转义问题 | 超过 10 行 JS 的命令改用 TS adapter |
650
+ | **风控被拦截(伪200)** | 获取到的 JSON 里核心数据是 `""` (空串) | 极易被误判。必须添加断言!无核心数据立刻要求升级鉴权 Tier 并重新配置 Cookie |
651
+ | **API 没找见** | `explore` 工具打分出来的都拿不到深层数据 | 点击页面按钮诱发懒加载数据,再结合 `getInterceptedRequests` 获取 |
556
652
 
557
653
  ---
558
654
 
@@ -565,9 +661,10 @@ git push
565
661
  opencli generate https://www.example.com --goal "hot"
566
662
 
567
663
  # 或分步执行:
568
- opencli explore https://www.example.com --site mysite # 发现 API
569
- opencli synthesize mysite # 生成候选 YAML
570
- opencli verify mysite/hot --smoke # 冒烟测试
664
+ opencli explore https://www.example.com --site mysite # 发现 API
665
+ opencli explore https://www.example.com --auto --click "字幕,CC" # 模拟点击触发懒加载 API
666
+ opencli synthesize mysite # 生成候选 YAML
667
+ opencli verify mysite/hot --smoke # 冒烟测试
571
668
  ```
572
669
 
573
670
  生成的候选 YAML 保存在 `.opencli/explore/mysite/candidates/`,可直接复制到 `src/clis/mysite/` 并微调。
package/README.md CHANGED
@@ -7,7 +7,7 @@
7
7
 
8
8
  [![npm](https://img.shields.io/npm/v/@jackwener/opencli)](https://www.npmjs.com/package/@jackwener/opencli)
9
9
 
10
- A CLI tool that turns **any website** into a command-line interface. **35+ commands** across **17 sites** — bilibili, zhihu, xiaohongshu, twitter, reddit, xueqiu, github, v2ex, hackernews, bbc, weibo, boss, yahoo-finance, reuters, smzdm, ctrip, youtube — powered by browser session reuse and AI-native discovery.
10
+ A CLI tool that turns **any website** into a command-line interface. **46 commands** across **17 sites** — bilibili, zhihu, xiaohongshu, twitter, reddit, xueqiu, github, v2ex, hackernews, bbc, weibo, boss, yahoo-finance, reuters, smzdm, ctrip, youtube — powered by browser session reuse and AI-native discovery.
11
11
 
12
12
  ## ✨ Highlights
13
13
 
@@ -82,12 +82,12 @@ Public API commands (`hackernews`, `github search`, `v2ex`) need no browser at a
82
82
 
83
83
  | Site | Commands | Mode |
84
84
  |------|----------|------|
85
- | **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` | 🔐 Browser |
85
+ | **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` `subtitle` `dynamic` `ranking` | 🔐 Browser |
86
86
  | **zhihu** | `hot` `search` `question` | 🔐 Browser |
87
- | **xiaohongshu** | `search` `notifications` `feed` | 🔐 Browser |
87
+ | **xiaohongshu** | `search` `notifications` `feed` `me` `user` | 🔐 Browser |
88
88
  | **xueqiu** | `feed` `hot-stock` `hot` `search` `stock` `watchlist` | 🔐 Browser |
89
- | **twitter** | `trending` `bookmarks` | 🔐 Browser |
90
- | **reddit** | `hot` | 🔐 Browser |
89
+ | **twitter** | `trending` `bookmarks` `profile` `search` `timeline` | 🔐 Browser |
90
+ | **reddit** | `hot` `frontpage` `search` `subreddit` | 🔐 Browser |
91
91
  | **weibo** | `hot` | 🔐 Browser |
92
92
  | **boss** | `search` | 🔐 Browser |
93
93
  | **youtube** | `search` | 🔐 Browser |
package/README.zh-CN.md CHANGED
@@ -11,7 +11,7 @@ OpenCLI 通过 Chrome 浏览器 + [Playwright MCP Bridge](https://github.com/nic
11
11
 
12
12
  ## ✨ 亮点
13
13
 
14
- - 🌐 **35+ 命令,17 个站点** — B站、知乎、小红书、Twitter、Reddit、雪球(xueqiu)、GitHub、V2EX、Hacker News、BBC、微博、BOSS直聘、Yahoo Finance、路透社、什么值得买、携程、YouTube
14
+ - 🌐 **46 个命令,17 个站点** — B站、知乎、小红书、Twitter、Reddit、雪球(xueqiu)、GitHub、V2EX、Hacker News、BBC、微博、BOSS直聘、Yahoo Finance、路透社、什么值得买、携程、YouTube
15
15
  - 🔐 **零风控** — 复用 Chrome 登录态,无需存储任何凭证
16
16
  - 🤖 **AI 原生** — `explore` 自动发现 API,`synthesize` 生成适配器,`cascade` 探测认证策略
17
17
  - 🚀 **动态加载引擎** — 只需将 `.ts` 或 `.yaml` 适配器放入 `clis/` 文件夹即可自动注册生效
@@ -83,12 +83,12 @@ npm install -g @jackwener/opencli@latest
83
83
 
84
84
  | 站点 | 命令 | 模式 |
85
85
  |------|------|------|
86
- | **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` | 🔐 浏览器 |
86
+ | **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` `subtitle` `dynamic` `ranking` | 🔐 浏览器 |
87
87
  | **zhihu** | `hot` `search` `question` | 🔐 浏览器 |
88
- | **xiaohongshu** | `search` `notifications` `feed` | 🔐 浏览器 |
88
+ | **xiaohongshu** | `search` `notifications` `feed` `me` `user` | 🔐 浏览器 |
89
89
  | **xueqiu** | `feed` `hot-stock` `hot` `search` `stock` `watchlist` | 🔐 浏览器 |
90
- | **twitter** | `trending` `bookmarks` | 🔐 浏览器 |
91
- | **reddit** | `hot` | 🔐 浏览器 |
90
+ | **twitter** | `trending` `bookmarks` `profile` `search` `timeline` | 🔐 浏览器 |
91
+ | **reddit** | `hot` `frontpage` `search` `subreddit` | 🔐 浏览器 |
92
92
  | **weibo** | `hot` | 🔐 浏览器 |
93
93
  | **boss** | `search` | 🔐 浏览器 |
94
94
  | **youtube** | `search` | 🔐 浏览器 |
package/SKILL.md CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: opencli
3
3
  description: "OpenCLI — Make any website your CLI. Zero risk, AI-powered, reuse Chrome login."
4
- version: 0.1.0
4
+ version: 0.4.0
5
5
  author: jackwener
6
6
  tags: [cli, browser, web, mcp, playwright, bilibili, zhihu, twitter, github, v2ex, hackernews, reddit, xiaohongshu, xueqiu, AI, agent]
7
7
  ---
@@ -49,6 +49,9 @@ opencli bilibili favorite # 我的收藏
49
49
  opencli bilibili history --limit 20 # 观看历史
50
50
  opencli bilibili feed --limit 10 # 动态时间线
51
51
  opencli bilibili user-videos --uid 12345 # 用户投稿
52
+ opencli bilibili subtitle --bvid BV1xxx # 获取视频字幕 (支持 --lang zh-CN)
53
+ opencli bilibili dynamic --limit 10 # 动态
54
+ opencli bilibili ranking --limit 10 # 排行榜
52
55
 
53
56
  # 知乎 (browser)
54
57
  opencli zhihu hot --limit 10 # 知乎热榜
@@ -59,6 +62,8 @@ opencli zhihu question --id 34816524 # 问题详情和回答
59
62
  opencli xiaohongshu search --keyword "美食" # 搜索笔记
60
63
  opencli xiaohongshu notifications # 通知(mentions/likes/connections)
61
64
  opencli xiaohongshu feed --limit 10 # 推荐 Feed
65
+ opencli xiaohongshu me # 我的信息
66
+ opencli xiaohongshu user --uid xxx # 用户主页
62
67
 
63
68
  # 雪球 Xueqiu (browser)
64
69
  opencli xueqiu hot-stock --limit 10 # 雪球热门股票榜
@@ -73,10 +78,16 @@ opencli github search --keyword "cli" # 搜索仓库
73
78
  # Twitter/X (browser)
74
79
  opencli twitter trending --limit 10 # 热门话题
75
80
  opencli twitter bookmarks --limit 20 # 获取收藏的书签推文
81
+ opencli twitter search --keyword "AI" # 搜索推文
82
+ opencli twitter profile --username elonmusk # 用户资料
83
+ opencli twitter timeline --limit 20 # 时间线
76
84
 
77
85
  # Reddit (browser)
78
86
  opencli reddit hot --limit 10 # 热门帖子
79
87
  opencli reddit hot --subreddit programming # 指定子版块
88
+ opencli reddit frontpage --limit 10 # 首页
89
+ opencli reddit search --keyword "AI" # 搜索
90
+ opencli reddit subreddit --name rust # 子版块浏览
80
91
 
81
92
  # V2EX (public)
82
93
  opencli v2ex hot --limit 10 # 热门话题
@@ -135,6 +146,9 @@ opencli generate <url> --goal "hot"
135
146
  # Strategy Cascade: auto-probe PUBLIC → COOKIE → HEADER
136
147
  opencli cascade <api-url>
137
148
 
149
+ # Explore with interactive fuzzing (click buttons to trigger lazy APIs)
150
+ opencli explore <url> --auto --click "字幕,CC,评论"
151
+
138
152
  # Verify: smoke-test a generated adapter
139
153
  opencli verify <site/name> --smoke
140
154
  ```
package/dist/browser.d.ts CHANGED
@@ -23,7 +23,11 @@ export declare class Page implements IPage {
23
23
  click(ref: string): Promise<void>;
24
24
  typeText(ref: string, text: string): Promise<void>;
25
25
  pressKey(key: string): Promise<void>;
26
- wait(seconds: number): Promise<void>;
26
+ wait(options: number | {
27
+ text?: string;
28
+ time?: number;
29
+ timeout?: number;
30
+ }): Promise<void>;
27
31
  tabs(): Promise<any>;
28
32
  closeTab(index?: number): Promise<void>;
29
33
  newTab(): Promise<void>;
package/dist/browser.js CHANGED
@@ -110,8 +110,14 @@ export class Page {
110
110
  async pressKey(key) {
111
111
  await this.call('tools/call', { name: 'browser_press_key', arguments: { key } });
112
112
  }
113
- async wait(seconds) {
114
- await this.call('tools/call', { name: 'browser_wait_for', arguments: { time: seconds } });
113
+ async wait(options) {
114
+ if (typeof options === 'number') {
115
+ await this.call('tools/call', { name: 'browser_wait_for', arguments: { time: options } });
116
+ }
117
+ else {
118
+ // Pass directly to native wait_for, which supports natively awaiting text strings without heavy DOM polling
119
+ await this.call('tools/call', { name: 'browser_wait_for', arguments: options });
120
+ }
115
121
  }
116
122
  async tabs() {
117
123
  return this.call('tools/call', { name: 'browser_tabs', arguments: { action: 'list' } });
@@ -137,10 +143,32 @@ export class Page {
137
143
  async autoScroll(options = {}) {
138
144
  const times = options.times ?? 3;
139
145
  const delayMs = options.delayMs ?? 2000;
140
- for (let i = 0; i < times; i++) {
141
- await this.evaluate('() => window.scrollTo(0, document.body.scrollHeight)');
142
- await this.wait(delayMs / 1000);
146
+ const js = `
147
+ async () => {
148
+ const maxTimes = ${times};
149
+ const maxWaitMs = ${delayMs};
150
+ for (let i = 0; i < maxTimes; i++) {
151
+ const lastHeight = document.body.scrollHeight;
152
+ window.scrollTo(0, lastHeight);
153
+ await new Promise(resolve => {
154
+ let timeoutId;
155
+ const observer = new MutationObserver(() => {
156
+ if (document.body.scrollHeight > lastHeight) {
157
+ clearTimeout(timeoutId);
158
+ observer.disconnect();
159
+ setTimeout(resolve, 100); // Small debounce for rendering
160
+ }
161
+ });
162
+ observer.observe(document.body, { childList: true, subtree: true });
163
+ timeoutId = setTimeout(() => {
164
+ observer.disconnect();
165
+ resolve(null);
166
+ }, maxWaitMs);
167
+ });
143
168
  }
169
+ }
170
+ `;
171
+ await this.evaluate(js);
144
172
  }
145
173
  async installInterceptor(pattern) {
146
174
  const js = `
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,86 @@
1
+ import { cli, Strategy } from '../../registry.js';
2
+ import { apiGet } from '../../bilibili.js';
3
+ cli({
4
+ site: 'bilibili',
5
+ name: 'subtitle',
6
+ description: '获取 Bilibili 视频的字幕',
7
+ strategy: Strategy.COOKIE,
8
+ args: [
9
+ { name: 'bvid', required: true },
10
+ { name: 'lang', required: false, help: '字幕语言代码 (如 zh-CN, en-US, ai-zh),默认取第一个' },
11
+ ],
12
+ columns: ['index', 'from', 'to', 'content'],
13
+ func: async (page, kwargs) => {
14
+ if (!page)
15
+ throw new Error('Requires browser');
16
+ // 1. 先前往视频详情页 (建立有鉴权的 Session,且这里不需要加载完整个视频)
17
+ await page.goto(`https://www.bilibili.com/video/${kwargs.bvid}/`);
18
+ // 2. 利用 __INITIAL_STATE__ 获取基础信息,拿 CID
19
+ const cid = await page.evaluate(`(async () => {
20
+ const state = window.__INITIAL_STATE__ || {};
21
+ return state?.videoData?.cid;
22
+ })()`);
23
+ if (!cid) {
24
+ throw new Error('无法在页面中提取到当前视频的 CID,请检查页面是否正常加载。');
25
+ }
26
+ // 3. 在 Node 端使用 apiGet 获取带 Wbi 签名的字幕列表
27
+ // 之前纯靠 evaluate 里的 fetch 会失败,因为 B 站 /wbi/ 开头的接口强校验 w_rid,未签名直接被风控返回 403 HTML
28
+ const payload = await apiGet(page, '/x/player/wbi/v2', {
29
+ params: { bvid: kwargs.bvid, cid },
30
+ signed: true, // 开启 wbi_sign 自动签名
31
+ });
32
+ if (payload.code !== 0) {
33
+ throw new Error(`获取视频播放信息失败: ${payload.message} (${payload.code})`);
34
+ }
35
+ const subtitles = payload.data?.subtitle?.subtitles || [];
36
+ if (subtitles.length === 0) {
37
+ throw new Error('此视频没有发现外挂或智能字幕。');
38
+ }
39
+ // 4. 选择目标字幕语言
40
+ const target = kwargs.lang
41
+ ? subtitles.find((s) => s.lan === kwargs.lang) || subtitles[0]
42
+ : subtitles[0];
43
+ const targetSubUrl = target.subtitle_url;
44
+ if (!targetSubUrl || targetSubUrl === '') {
45
+ throw new Error('[风控拦截/未登录] 获取到的 subtitle_url 为空!请确保 CLI 已成功登录且风控未封锁此账号。');
46
+ }
47
+ const finalUrl = targetSubUrl.startsWith('//') ? 'https:' + targetSubUrl : targetSubUrl;
48
+ // 5. 解析并拉取 CDN 的 JSON 文件
49
+ const fetchJs = `
50
+ (async () => {
51
+ const url = ${JSON.stringify(finalUrl)};
52
+ const res = await fetch(url);
53
+ const text = await res.text();
54
+
55
+ if (text.startsWith('<!DOCTYPE') || text.startsWith('<html')) {
56
+ return { error: 'HTML', text: text.substring(0, 100), url };
57
+ }
58
+
59
+ try {
60
+ const subJson = JSON.parse(text);
61
+ // B站真实返回格式是 { font_size: 0.4, font_color: "#FFFFFF", background_alpha: 0.5, background_color: "#9C27B0", Stroke: "none", type: "json" , body: [{from: 0, to: 0, content: ""}] }
62
+ if (Array.isArray(subJson?.body)) return { success: true, data: subJson.body };
63
+ if (Array.isArray(subJson)) return { success: true, data: subJson };
64
+ return { error: 'UNKNOWN_JSON', data: subJson };
65
+ } catch (e) {
66
+ return { error: 'PARSE_FAILED', text: text.substring(0, 100) };
67
+ }
68
+ })()
69
+ `;
70
+ const items = await page.evaluate(fetchJs);
71
+ if (items?.error) {
72
+ throw new Error(`字幕获取失败: ${items.error}${items.text ? ' — ' + items.text : ''}`);
73
+ }
74
+ const finalItems = items?.data || [];
75
+ if (!Array.isArray(finalItems)) {
76
+ throw new Error('解析到的字幕列表对象不符合数组格式');
77
+ }
78
+ // 6. 数据映射
79
+ return finalItems.map((item, idx) => ({
80
+ index: idx + 1,
81
+ from: Number(item.from || 0).toFixed(2) + 's',
82
+ to: Number(item.to || 0).toFixed(2) + 's',
83
+ content: item.content
84
+ }));
85
+ },
86
+ });
package/dist/explore.js CHANGED
@@ -175,6 +175,9 @@ function scoreEndpoint(ep) {
175
175
  s += 2;
176
176
  if (ep.status === 200)
177
177
  s += 2;
178
+ // Anti-Bot Empty Value Detection: penalize JSON endpoints returning empty data
179
+ if (ep.responseAnalysis && ep.responseAnalysis.itemCount === 0 && ep.contentType.includes('json'))
180
+ s -= 3;
178
181
  return s;
179
182
  }
180
183
  function inferCapabilityName(url, goal) {
@@ -266,6 +269,28 @@ const STORE_DISCOVER_JS = `
266
269
  return stores;
267
270
  }
268
271
  `;
272
+ // ── Auto-Interaction (Fuzzing) ─────────────────────────────────────────────
273
+ const INTERACT_FUZZ_JS = `
274
+ async () => {
275
+ const sleep = ms => new Promise(r => setTimeout(r, ms));
276
+ const clickables = Array.from(document.querySelectorAll(
277
+ 'button, [role="button"], [role="tab"], .tab, .btn, a[href="javascript:void(0)"], a[href="#"]'
278
+ )).slice(0, 15); // limit to 15 to avoid endless loops
279
+
280
+ let clicked = 0;
281
+ for (const el of clickables) {
282
+ try {
283
+ const rect = el.getBoundingClientRect();
284
+ if (rect.width > 0 && rect.height > 0) {
285
+ el.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true, view: window }));
286
+ clicked++;
287
+ await sleep(300); // give it time to trigger network
288
+ }
289
+ } catch {}
290
+ }
291
+ return clicked;
292
+ }
293
+ `;
269
294
  // ── Main explore function ──────────────────────────────────────────────────
270
295
  export async function exploreUrl(url, opts) {
271
296
  const waitSeconds = opts.waitSeconds ?? 3.0;
@@ -283,6 +308,31 @@ export async function exploreUrl(url, opts) {
283
308
  catch { }
284
309
  await page.wait(1);
285
310
  }
311
+ // Step 2.5: Interactive Fuzzing (if requested)
312
+ if (opts.auto) {
313
+ try {
314
+ // First: targeted clicks by label (e.g. "字幕", "CC", "评论")
315
+ if (opts.clickLabels?.length) {
316
+ for (const label of opts.clickLabels) {
317
+ const safeLabel = label.replace(/'/g, "\\'");
318
+ await page.evaluate(`
319
+ (() => {
320
+ const el = [...document.querySelectorAll('button, [role="button"], [role="tab"], a, span')]
321
+ .find(e => e.textContent && e.textContent.trim().includes('${safeLabel}'));
322
+ if (el) el.click();
323
+ })()
324
+ `);
325
+ await page.wait(1);
326
+ }
327
+ }
328
+ // Then: blind fuzzing on generic interactive elements
329
+ const clicks = await page.evaluate(INTERACT_FUZZ_JS);
330
+ await page.wait(2); // wait for XHRs to settle
331
+ }
332
+ catch (e) {
333
+ // fuzzing is best-effort, don't fail the whole explore
334
+ }
335
+ }
286
336
  // Step 3: Read page metadata
287
337
  const metadata = await readPageMetadata(page);
288
338
  // Step 4: Capture network traffic
package/dist/main.js CHANGED
@@ -56,8 +56,8 @@ program.command('validate').description('Validate CLI definitions').argument('[t
56
56
  .action(async (target) => { const { validateClisWithTarget, renderValidationReport } = await import('./validate.js'); console.log(renderValidationReport(validateClisWithTarget([BUILTIN_CLIS, USER_CLIS], target))); });
57
57
  program.command('verify').description('Validate + smoke test').argument('[target]').option('--smoke', 'Run smoke tests', false)
58
58
  .action(async (target, opts) => { const { verifyClis, renderVerifyReport } = await import('./verify.js'); const r = await verifyClis({ builtinClis: BUILTIN_CLIS, userClis: USER_CLIS, target, smoke: opts.smoke }); console.log(renderVerifyReport(r)); process.exitCode = r.ok ? 0 : 1; });
59
- program.command('explore').alias('probe').description('Explore a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3')
60
- .action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); });
59
+ program.command('explore').alias('probe').description('Explore a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3').option('--auto', 'Enable interactive fuzzing (simulate clicks to trigger lazy APIs)').option('--click <labels>', 'Comma-separated labels to click before fuzzing (e.g. "字幕,CC,评论")')
60
+ .action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); const clickLabels = opts.click ? opts.click.split(',').map((s) => s.trim()) : undefined; console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait), auto: opts.auto, clickLabels }))); });
61
61
  program.command('synthesize').description('Synthesize CLIs from explore').argument('<target>').option('--top <n>', '', '3')
62
62
  .action(async (target, opts) => { const { synthesizeFromExplore, renderSynthesizeSummary } = await import('./synthesize.js'); console.log(renderSynthesizeSummary(synthesizeFromExplore(target, { top: parseInt(opts.top) }))); });
63
63
  program.command('generate').description('One-shot: explore → synthesize → register').argument('<url>').option('--goal <text>').option('--site <name>')
@@ -27,14 +27,10 @@ export async function stepWait(page, params, data, args) {
27
27
  await page.wait(params);
28
28
  else if (typeof params === 'object' && params) {
29
29
  if ('text' in params) {
30
- const timeout = params.timeout ?? 10;
31
- const start = Date.now();
32
- while ((Date.now() - start) / 1000 < timeout) {
33
- const snap = await page.snapshot({ raw: true });
34
- if (typeof snap === 'string' && snap.includes(params.text))
35
- break;
36
- await page.wait(0.5);
37
- }
30
+ await page.wait({
31
+ text: String(render(params.text, { args, data })),
32
+ timeout: params.timeout
33
+ });
38
34
  }
39
35
  else if ('time' in params)
40
36
  await page.wait(Number(params.time));
@@ -2,6 +2,20 @@
2
2
  * Pipeline step: fetch — HTTP API requests.
3
3
  */
4
4
  import { render } from '../template.js';
5
+ /** Simple async concurrency limiter */
6
+ async function mapConcurrent(items, limit, fn) {
7
+ const results = new Array(items.length);
8
+ let index = 0;
9
+ async function worker() {
10
+ while (index < items.length) {
11
+ const i = index++;
12
+ results[i] = await fn(items[i], i);
13
+ }
14
+ }
15
+ const workers = Array.from({ length: Math.min(limit, items.length) }, () => worker());
16
+ await Promise.all(workers);
17
+ return results;
18
+ }
5
19
  /** Single URL fetch helper */
6
20
  async function fetchSingle(page, url, method, queryParams, headers, args, data) {
7
21
  const renderedParams = {};
@@ -38,12 +52,11 @@ export async function stepFetch(page, params, data, args) {
38
52
  const urlTemplate = String(urlOrObj);
39
53
  // Per-item fetch when data is array and URL references item
40
54
  if (Array.isArray(data) && urlTemplate.includes('item')) {
41
- const results = [];
42
- for (let i = 0; i < data.length; i++) {
43
- const itemUrl = String(render(urlTemplate, { args, data, item: data[i], index: i }));
44
- results.push(await fetchSingle(page, itemUrl, method, queryParams, headers, args, data));
45
- }
46
- return results;
55
+ const concurrency = typeof params?.concurrency === 'number' ? params.concurrency : 5;
56
+ return mapConcurrent(data, concurrency, async (item, index) => {
57
+ const itemUrl = String(render(urlTemplate, { args, data, item, index }));
58
+ return fetchSingle(page, itemUrl, method, queryParams, headers, args, data);
59
+ });
47
60
  }
48
61
  const url = render(urlOrObj, { args, data });
49
62
  return fetchSingle(page, String(url), method, queryParams, headers, args, data);
@@ -36,6 +36,8 @@ export async function stepTap(page, params, data, args) {
36
36
  async () => {
37
37
  // ── 1. Setup capture proxy (fetch + XHR dual interception) ──
38
38
  let captured = null;
39
+ let captureResolve;
40
+ const capturePromise = new Promise(r => { captureResolve = r; });
39
41
  const capturePattern = ${JSON.stringify(capturePattern)};
40
42
 
41
43
  // Intercept fetch API
@@ -46,7 +48,7 @@ export async function stepTap(page, params, data, args) {
46
48
  const url = typeof fetchArgs[0] === 'string' ? fetchArgs[0]
47
49
  : fetchArgs[0] instanceof Request ? fetchArgs[0].url : String(fetchArgs[0]);
48
50
  if (capturePattern && url.includes(capturePattern) && !captured) {
49
- try { captured = await resp.clone().json(); } catch {}
51
+ try { captured = await resp.clone().json(); captureResolve(); } catch {}
50
52
  }
51
53
  } catch {}
52
54
  return resp;
@@ -65,13 +67,13 @@ export async function stepTap(page, params, data, args) {
65
67
  const origHandler = xhr.onreadystatechange;
66
68
  xhr.onreadystatechange = function() {
67
69
  if (xhr.readyState === 4 && !captured) {
68
- try { captured = JSON.parse(xhr.responseText); } catch {}
70
+ try { captured = JSON.parse(xhr.responseText); captureResolve(); } catch {}
69
71
  }
70
72
  if (origHandler) origHandler.apply(this, arguments);
71
73
  };
72
74
  const origOnload = xhr.onload;
73
75
  xhr.onload = function() {
74
- if (!captured) { try { captured = JSON.parse(xhr.responseText); } catch {} }
76
+ if (!captured) { try { captured = JSON.parse(xhr.responseText); captureResolve(); } catch {} }
75
77
  if (origOnload) origOnload.apply(this, arguments);
76
78
  };
77
79
  }
@@ -111,9 +113,9 @@ export async function stepTap(page, params, data, args) {
111
113
  await ${actionCall};
112
114
 
113
115
  // ── 4. Wait for network response ──
114
- const deadline = Date.now() + ${timeout} * 1000;
115
- while (!captured && Date.now() < deadline) {
116
- await new Promise(r => setTimeout(r, 200));
116
+ if (!captured) {
117
+ const timeoutPromise = new Promise(r => setTimeout(r, ${timeout} * 1000));
118
+ await Promise.race([capturePromise, timeoutPromise]);
117
119
  }
118
120
  } finally {
119
121
  // ── 5. Always restore originals ──
package/dist/types.d.ts CHANGED
@@ -16,7 +16,11 @@ export interface IPage {
16
16
  click(ref: string): Promise<void>;
17
17
  typeText(ref: string, text: string): Promise<void>;
18
18
  pressKey(key: string): Promise<void>;
19
- wait(seconds: number): Promise<void>;
19
+ wait(options: number | {
20
+ text?: string;
21
+ time?: number;
22
+ timeout?: number;
23
+ }): Promise<void>;
20
24
  tabs(): Promise<any>;
21
25
  closeTab(index?: number): Promise<void>;
22
26
  newTab(): Promise<void>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jackwener/opencli",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
package/src/browser.ts CHANGED
@@ -104,8 +104,13 @@ export class Page implements IPage {
104
104
  await this.call('tools/call', { name: 'browser_press_key', arguments: { key } });
105
105
  }
106
106
 
107
- async wait(seconds: number): Promise<void> {
108
- await this.call('tools/call', { name: 'browser_wait_for', arguments: { time: seconds } });
107
+ async wait(options: number | { text?: string; time?: number; timeout?: number }): Promise<void> {
108
+ if (typeof options === 'number') {
109
+ await this.call('tools/call', { name: 'browser_wait_for', arguments: { time: options } });
110
+ } else {
111
+ // Pass directly to native wait_for, which supports natively awaiting text strings without heavy DOM polling
112
+ await this.call('tools/call', { name: 'browser_wait_for', arguments: options });
113
+ }
109
114
  }
110
115
 
111
116
  async tabs(): Promise<any> {
@@ -139,10 +144,32 @@ export class Page implements IPage {
139
144
  async autoScroll(options: { times?: number; delayMs?: number } = {}): Promise<void> {
140
145
  const times = options.times ?? 3;
141
146
  const delayMs = options.delayMs ?? 2000;
142
- for (let i = 0; i < times; i++) {
143
- await this.evaluate('() => window.scrollTo(0, document.body.scrollHeight)');
144
- await this.wait(delayMs / 1000);
145
- }
147
+ const js = `
148
+ async () => {
149
+ const maxTimes = ${times};
150
+ const maxWaitMs = ${delayMs};
151
+ for (let i = 0; i < maxTimes; i++) {
152
+ const lastHeight = document.body.scrollHeight;
153
+ window.scrollTo(0, lastHeight);
154
+ await new Promise(resolve => {
155
+ let timeoutId;
156
+ const observer = new MutationObserver(() => {
157
+ if (document.body.scrollHeight > lastHeight) {
158
+ clearTimeout(timeoutId);
159
+ observer.disconnect();
160
+ setTimeout(resolve, 100); // Small debounce for rendering
161
+ }
162
+ });
163
+ observer.observe(document.body, { childList: true, subtree: true });
164
+ timeoutId = setTimeout(() => {
165
+ observer.disconnect();
166
+ resolve(null);
167
+ }, maxWaitMs);
168
+ });
169
+ }
170
+ }
171
+ `;
172
+ await this.evaluate(js);
146
173
  }
147
174
 
148
175
  async installInterceptor(pattern: string): Promise<void> {
@@ -0,0 +1,100 @@
1
+ import { cli, Strategy } from '../../registry.js';
2
+ import type { IPage } from '../../types.js';
3
+ import { apiGet } from '../../bilibili.js';
4
+
5
+ cli({
6
+ site: 'bilibili',
7
+ name: 'subtitle',
8
+ description: '获取 Bilibili 视频的字幕',
9
+ strategy: Strategy.COOKIE,
10
+ args: [
11
+ { name: 'bvid', required: true },
12
+ { name: 'lang', required: false, help: '字幕语言代码 (如 zh-CN, en-US, ai-zh),默认取第一个' },
13
+ ],
14
+ columns: ['index', 'from', 'to', 'content'],
15
+ func: async (page: IPage | null, kwargs: any) => {
16
+ if (!page) throw new Error('Requires browser');
17
+ // 1. 先前往视频详情页 (建立有鉴权的 Session,且这里不需要加载完整个视频)
18
+ await page.goto(`https://www.bilibili.com/video/${kwargs.bvid}/`);
19
+
20
+ // 2. 利用 __INITIAL_STATE__ 获取基础信息,拿 CID
21
+ const cid = await page.evaluate(`(async () => {
22
+ const state = window.__INITIAL_STATE__ || {};
23
+ return state?.videoData?.cid;
24
+ })()`);
25
+
26
+ if (!cid) {
27
+ throw new Error('无法在页面中提取到当前视频的 CID,请检查页面是否正常加载。');
28
+ }
29
+
30
+ // 3. 在 Node 端使用 apiGet 获取带 Wbi 签名的字幕列表
31
+ // 之前纯靠 evaluate 里的 fetch 会失败,因为 B 站 /wbi/ 开头的接口强校验 w_rid,未签名直接被风控返回 403 HTML
32
+ const payload = await apiGet(page, '/x/player/wbi/v2', {
33
+ params: { bvid: kwargs.bvid, cid },
34
+ signed: true, // 开启 wbi_sign 自动签名
35
+ });
36
+
37
+ if (payload.code !== 0) {
38
+ throw new Error(`获取视频播放信息失败: ${payload.message} (${payload.code})`);
39
+ }
40
+
41
+ const subtitles = payload.data?.subtitle?.subtitles || [];
42
+ if (subtitles.length === 0) {
43
+ throw new Error('此视频没有发现外挂或智能字幕。');
44
+ }
45
+
46
+ // 4. 选择目标字幕语言
47
+ const target = kwargs.lang
48
+ ? subtitles.find((s: any) => s.lan === kwargs.lang) || subtitles[0]
49
+ : subtitles[0];
50
+
51
+ const targetSubUrl = target.subtitle_url;
52
+ if (!targetSubUrl || targetSubUrl === '') {
53
+ throw new Error('[风控拦截/未登录] 获取到的 subtitle_url 为空!请确保 CLI 已成功登录且风控未封锁此账号。');
54
+ }
55
+
56
+ const finalUrl = targetSubUrl.startsWith('//') ? 'https:' + targetSubUrl : targetSubUrl;
57
+
58
+
59
+ // 5. 解析并拉取 CDN 的 JSON 文件
60
+ const fetchJs = `
61
+ (async () => {
62
+ const url = ${JSON.stringify(finalUrl)};
63
+ const res = await fetch(url);
64
+ const text = await res.text();
65
+
66
+ if (text.startsWith('<!DOCTYPE') || text.startsWith('<html')) {
67
+ return { error: 'HTML', text: text.substring(0, 100), url };
68
+ }
69
+
70
+ try {
71
+ const subJson = JSON.parse(text);
72
+ // B站真实返回格式是 { font_size: 0.4, font_color: "#FFFFFF", background_alpha: 0.5, background_color: "#9C27B0", Stroke: "none", type: "json" , body: [{from: 0, to: 0, content: ""}] }
73
+ if (Array.isArray(subJson?.body)) return { success: true, data: subJson.body };
74
+ if (Array.isArray(subJson)) return { success: true, data: subJson };
75
+ return { error: 'UNKNOWN_JSON', data: subJson };
76
+ } catch (e) {
77
+ return { error: 'PARSE_FAILED', text: text.substring(0, 100) };
78
+ }
79
+ })()
80
+ `;
81
+ const items = await page.evaluate(fetchJs);
82
+
83
+ if (items?.error) {
84
+ throw new Error(`字幕获取失败: ${items.error}${items.text ? ' — ' + items.text : ''}`);
85
+ }
86
+
87
+ const finalItems = items?.data || [];
88
+ if (!Array.isArray(finalItems)) {
89
+ throw new Error('解析到的字幕列表对象不符合数组格式');
90
+ }
91
+
92
+ // 6. 数据映射
93
+ return finalItems.map((item: any, idx: number) => ({
94
+ index: idx + 1,
95
+ from: Number(item.from || 0).toFixed(2) + 's',
96
+ to: Number(item.to || 0).toFixed(2) + 's',
97
+ content: item.content
98
+ }));
99
+ },
100
+ });
package/src/explore.ts CHANGED
@@ -184,6 +184,8 @@ function scoreEndpoint(ep: { contentType: string; responseAnalysis: any; pattern
184
184
  if (ep.hasPaginationParam) s += 2;
185
185
  if (ep.hasLimitParam) s += 2;
186
186
  if (ep.status === 200) s += 2;
187
+ // Anti-Bot Empty Value Detection: penalize JSON endpoints returning empty data
188
+ if (ep.responseAnalysis && ep.responseAnalysis.itemCount === 0 && ep.contentType.includes('json')) s -= 3;
187
189
  return s;
188
190
  }
189
191
 
@@ -277,6 +279,30 @@ export interface DiscoveredStore {
277
279
  stateKeys: string[];
278
280
  }
279
281
 
282
+ // ── Auto-Interaction (Fuzzing) ─────────────────────────────────────────────
283
+
284
+ const INTERACT_FUZZ_JS = `
285
+ async () => {
286
+ const sleep = ms => new Promise(r => setTimeout(r, ms));
287
+ const clickables = Array.from(document.querySelectorAll(
288
+ 'button, [role="button"], [role="tab"], .tab, .btn, a[href="javascript:void(0)"], a[href="#"]'
289
+ )).slice(0, 15); // limit to 15 to avoid endless loops
290
+
291
+ let clicked = 0;
292
+ for (const el of clickables) {
293
+ try {
294
+ const rect = el.getBoundingClientRect();
295
+ if (rect.width > 0 && rect.height > 0) {
296
+ el.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true, view: window }));
297
+ clicked++;
298
+ await sleep(300); // give it time to trigger network
299
+ }
300
+ } catch {}
301
+ }
302
+ return clicked;
303
+ }
304
+ `;
305
+
280
306
  // ── Main explore function ──────────────────────────────────────────────────
281
307
 
282
308
  export async function exploreUrl(
@@ -300,6 +326,31 @@ export async function exploreUrl(
300
326
  // Step 2: Auto-scroll to trigger lazy loading (use keyboard since page.scroll may not exist)
301
327
  for (let i = 0; i < 3; i++) { try { await page.pressKey('End'); } catch {} await page.wait(1); }
302
328
 
329
+ // Step 2.5: Interactive Fuzzing (if requested)
330
+ if (opts.auto) {
331
+ try {
332
+ // First: targeted clicks by label (e.g. "字幕", "CC", "评论")
333
+ if (opts.clickLabels?.length) {
334
+ for (const label of opts.clickLabels) {
335
+ const safeLabel = label.replace(/'/g, "\\'");
336
+ await page.evaluate(`
337
+ (() => {
338
+ const el = [...document.querySelectorAll('button, [role="button"], [role="tab"], a, span')]
339
+ .find(e => e.textContent && e.textContent.trim().includes('${safeLabel}'));
340
+ if (el) el.click();
341
+ })()
342
+ `);
343
+ await page.wait(1);
344
+ }
345
+ }
346
+ // Then: blind fuzzing on generic interactive elements
347
+ const clicks = await page.evaluate(INTERACT_FUZZ_JS);
348
+ await page.wait(2); // wait for XHRs to settle
349
+ } catch (e) {
350
+ // fuzzing is best-effort, don't fail the whole explore
351
+ }
352
+ }
353
+
303
354
  // Step 3: Read page metadata
304
355
  const metadata = await readPageMetadata(page);
305
356
 
package/src/main.ts CHANGED
@@ -53,8 +53,8 @@ program.command('validate').description('Validate CLI definitions').argument('[t
53
53
  program.command('verify').description('Validate + smoke test').argument('[target]').option('--smoke', 'Run smoke tests', false)
54
54
  .action(async (target, opts) => { const { verifyClis, renderVerifyReport } = await import('./verify.js'); const r = await verifyClis({ builtinClis: BUILTIN_CLIS, userClis: USER_CLIS, target, smoke: opts.smoke }); console.log(renderVerifyReport(r)); process.exitCode = r.ok ? 0 : 1; });
55
55
 
56
- program.command('explore').alias('probe').description('Explore a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3')
57
- .action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); });
56
+ program.command('explore').alias('probe').description('Explore a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3').option('--auto', 'Enable interactive fuzzing (simulate clicks to trigger lazy APIs)').option('--click <labels>', 'Comma-separated labels to click before fuzzing (e.g. "字幕,CC,评论")')
57
+ .action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); const clickLabels = opts.click ? opts.click.split(',').map((s: string) => s.trim()) : undefined; console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait), auto: opts.auto, clickLabels }))); });
58
58
 
59
59
  program.command('synthesize').description('Synthesize CLIs from explore').argument('<target>').option('--top <n>', '', '3')
60
60
  .action(async (target, opts) => { const { synthesizeFromExplore, renderSynthesizeSummary } = await import('./synthesize.js'); console.log(renderSynthesizeSummary(synthesizeFromExplore(target, { top: parseInt(opts.top) }))); });
@@ -31,13 +31,10 @@ export async function stepWait(page: IPage, params: any, data: any, args: Record
31
31
  if (typeof params === 'number') await page.wait(params);
32
32
  else if (typeof params === 'object' && params) {
33
33
  if ('text' in params) {
34
- const timeout = params.timeout ?? 10;
35
- const start = Date.now();
36
- while ((Date.now() - start) / 1000 < timeout) {
37
- const snap = await page.snapshot({ raw: true });
38
- if (typeof snap === 'string' && snap.includes(params.text)) break;
39
- await page.wait(0.5);
40
- }
34
+ await page.wait({
35
+ text: String(render(params.text, { args, data })),
36
+ timeout: params.timeout
37
+ });
41
38
  } else if ('time' in params) await page.wait(Number(params.time));
42
39
  } else if (typeof params === 'string') await page.wait(Number(render(params, { args, data })));
43
40
  return data;
@@ -5,6 +5,23 @@
5
5
  import type { IPage } from '../../types.js';
6
6
  import { render } from '../template.js';
7
7
 
8
+ /** Simple async concurrency limiter */
9
+ async function mapConcurrent<T, R>(items: T[], limit: number, fn: (item: T, index: number) => Promise<R>): Promise<R[]> {
10
+ const results: R[] = new Array(items.length);
11
+ let index = 0;
12
+
13
+ async function worker() {
14
+ while (index < items.length) {
15
+ const i = index++;
16
+ results[i] = await fn(items[i], i);
17
+ }
18
+ }
19
+
20
+ const workers = Array.from({ length: Math.min(limit, items.length) }, () => worker());
21
+ await Promise.all(workers);
22
+ return results;
23
+ }
24
+
8
25
  /** Single URL fetch helper */
9
26
  async function fetchSingle(
10
27
  page: IPage | null, url: string, method: string,
@@ -48,12 +65,11 @@ export async function stepFetch(page: IPage | null, params: any, data: any, args
48
65
 
49
66
  // Per-item fetch when data is array and URL references item
50
67
  if (Array.isArray(data) && urlTemplate.includes('item')) {
51
- const results: any[] = [];
52
- for (let i = 0; i < data.length; i++) {
53
- const itemUrl = String(render(urlTemplate, { args, data, item: data[i], index: i }));
54
- results.push(await fetchSingle(page, itemUrl, method, queryParams, headers, args, data));
55
- }
56
- return results;
68
+ const concurrency = typeof params?.concurrency === 'number' ? params.concurrency : 5;
69
+ return mapConcurrent(data, concurrency, async (item, index) => {
70
+ const itemUrl = String(render(urlTemplate, { args, data, item, index }));
71
+ return fetchSingle(page, itemUrl, method, queryParams, headers, args, data);
72
+ });
57
73
  }
58
74
  const url = render(urlOrObj, { args, data });
59
75
  return fetchSingle(page, String(url), method, queryParams, headers, args, data);
@@ -42,6 +42,8 @@ export async function stepTap(page: IPage, params: any, data: any, args: Record<
42
42
  async () => {
43
43
  // ── 1. Setup capture proxy (fetch + XHR dual interception) ──
44
44
  let captured = null;
45
+ let captureResolve;
46
+ const capturePromise = new Promise(r => { captureResolve = r; });
45
47
  const capturePattern = ${JSON.stringify(capturePattern)};
46
48
 
47
49
  // Intercept fetch API
@@ -52,7 +54,7 @@ export async function stepTap(page: IPage, params: any, data: any, args: Record<
52
54
  const url = typeof fetchArgs[0] === 'string' ? fetchArgs[0]
53
55
  : fetchArgs[0] instanceof Request ? fetchArgs[0].url : String(fetchArgs[0]);
54
56
  if (capturePattern && url.includes(capturePattern) && !captured) {
55
- try { captured = await resp.clone().json(); } catch {}
57
+ try { captured = await resp.clone().json(); captureResolve(); } catch {}
56
58
  }
57
59
  } catch {}
58
60
  return resp;
@@ -71,13 +73,13 @@ export async function stepTap(page: IPage, params: any, data: any, args: Record<
71
73
  const origHandler = xhr.onreadystatechange;
72
74
  xhr.onreadystatechange = function() {
73
75
  if (xhr.readyState === 4 && !captured) {
74
- try { captured = JSON.parse(xhr.responseText); } catch {}
76
+ try { captured = JSON.parse(xhr.responseText); captureResolve(); } catch {}
75
77
  }
76
78
  if (origHandler) origHandler.apply(this, arguments);
77
79
  };
78
80
  const origOnload = xhr.onload;
79
81
  xhr.onload = function() {
80
- if (!captured) { try { captured = JSON.parse(xhr.responseText); } catch {} }
82
+ if (!captured) { try { captured = JSON.parse(xhr.responseText); captureResolve(); } catch {} }
81
83
  if (origOnload) origOnload.apply(this, arguments);
82
84
  };
83
85
  }
@@ -117,9 +119,9 @@ export async function stepTap(page: IPage, params: any, data: any, args: Record<
117
119
  await ${actionCall};
118
120
 
119
121
  // ── 4. Wait for network response ──
120
- const deadline = Date.now() + ${timeout} * 1000;
121
- while (!captured && Date.now() < deadline) {
122
- await new Promise(r => setTimeout(r, 200));
122
+ if (!captured) {
123
+ const timeoutPromise = new Promise(r => setTimeout(r, ${timeout} * 1000));
124
+ await Promise.race([capturePromise, timeoutPromise]);
123
125
  }
124
126
  } finally {
125
127
  // ── 5. Always restore originals ──
package/src/types.ts CHANGED
@@ -12,7 +12,7 @@ export interface IPage {
12
12
  click(ref: string): Promise<void>;
13
13
  typeText(ref: string, text: string): Promise<void>;
14
14
  pressKey(key: string): Promise<void>;
15
- wait(seconds: number): Promise<void>;
15
+ wait(options: number | { text?: string; time?: number; timeout?: number }): Promise<void>;
16
16
  tabs(): Promise<any>;
17
17
  closeTab(index?: number): Promise<void>;
18
18
  newTab(): Promise<void>;