@jackwener/opencli 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLI-CREATOR.md +151 -75
- package/README.md +11 -8
- package/README.zh-CN.md +11 -8
- package/SKILL.md +42 -15
- package/dist/browser.d.ts +11 -1
- package/dist/browser.js +95 -3
- package/dist/clis/bilibili/dynamic.d.ts +1 -0
- package/dist/clis/bilibili/dynamic.js +33 -0
- package/dist/clis/bilibili/ranking.d.ts +1 -0
- package/dist/clis/bilibili/ranking.js +24 -0
- package/dist/clis/bilibili/subtitle.d.ts +1 -0
- package/dist/clis/bilibili/subtitle.js +86 -0
- package/dist/clis/reddit/frontpage.yaml +30 -0
- package/dist/clis/reddit/hot.yaml +3 -2
- package/dist/clis/reddit/search.yaml +34 -0
- package/dist/clis/reddit/subreddit.yaml +39 -0
- package/dist/clis/twitter/bookmarks.yaml +85 -0
- package/dist/clis/twitter/profile.d.ts +1 -0
- package/dist/clis/twitter/profile.js +56 -0
- package/dist/clis/twitter/search.d.ts +1 -0
- package/dist/clis/twitter/search.js +60 -0
- package/dist/clis/twitter/timeline.d.ts +1 -0
- package/dist/clis/twitter/timeline.js +47 -0
- package/dist/clis/xiaohongshu/user.d.ts +1 -0
- package/dist/clis/xiaohongshu/user.js +40 -0
- package/dist/clis/xueqiu/feed.yaml +53 -0
- package/dist/clis/xueqiu/hot-stock.yaml +49 -0
- package/dist/clis/xueqiu/hot.yaml +46 -0
- package/dist/clis/xueqiu/search.yaml +53 -0
- package/dist/clis/xueqiu/stock.yaml +67 -0
- package/dist/clis/xueqiu/watchlist.yaml +46 -0
- package/dist/clis/zhihu/hot.yaml +6 -2
- package/dist/clis/zhihu/search.yaml +3 -1
- package/dist/engine.d.ts +1 -1
- package/dist/engine.js +9 -1
- package/dist/explore.js +50 -0
- package/dist/main.d.ts +1 -1
- package/dist/main.js +12 -5
- package/dist/pipeline/steps/browser.js +4 -8
- package/dist/pipeline/steps/fetch.js +19 -6
- package/dist/pipeline/steps/intercept.js +56 -29
- package/dist/pipeline/steps/tap.js +8 -6
- package/dist/pipeline/template.js +3 -1
- package/dist/pipeline/template.test.js +6 -0
- package/dist/types.d.ts +11 -1
- package/package.json +1 -1
- package/src/browser.ts +101 -6
- package/src/clis/bilibili/dynamic.ts +34 -0
- package/src/clis/bilibili/ranking.ts +25 -0
- package/src/clis/bilibili/subtitle.ts +100 -0
- package/src/clis/reddit/frontpage.yaml +30 -0
- package/src/clis/reddit/hot.yaml +3 -2
- package/src/clis/reddit/search.yaml +34 -0
- package/src/clis/reddit/subreddit.yaml +39 -0
- package/src/clis/twitter/bookmarks.yaml +85 -0
- package/src/clis/twitter/profile.ts +61 -0
- package/src/clis/twitter/search.ts +65 -0
- package/src/clis/twitter/timeline.ts +50 -0
- package/src/clis/xiaohongshu/user.ts +45 -0
- package/src/clis/xueqiu/feed.yaml +53 -0
- package/src/clis/xueqiu/hot-stock.yaml +49 -0
- package/src/clis/xueqiu/hot.yaml +46 -0
- package/src/clis/xueqiu/search.yaml +53 -0
- package/src/clis/xueqiu/stock.yaml +67 -0
- package/src/clis/xueqiu/watchlist.yaml +46 -0
- package/src/clis/zhihu/hot.yaml +6 -2
- package/src/clis/zhihu/search.yaml +3 -1
- package/src/engine.ts +10 -1
- package/src/explore.ts +51 -0
- package/src/main.ts +11 -5
- package/src/pipeline/steps/browser.ts +4 -7
- package/src/pipeline/steps/fetch.ts +22 -6
- package/src/pipeline/steps/intercept.ts +58 -28
- package/src/pipeline/steps/tap.ts +8 -6
- package/src/pipeline/template.test.ts +6 -0
- package/src/pipeline/template.ts +3 -1
- package/src/types.ts +4 -1
- package/dist/clis/index.d.ts +0 -22
- package/dist/clis/index.js +0 -34
- package/src/clis/index.ts +0 -46
package/CLI-CREATOR.md
CHANGED
|
@@ -51,7 +51,17 @@ opencli bilibili hot -v # 查看已有命令的 pipeline 每步数据流
|
|
|
51
51
|
- **Request Headers**: Cookie? Bearer? 自定义签名头(X-s、X-t)?
|
|
52
52
|
- **Response Body**: JSON 结构,特别是数据在哪个路径(`data.items`、`data.list`)
|
|
53
53
|
|
|
54
|
-
### 1c.
|
|
54
|
+
### 1c. 高阶 API 发现捷径法则 (Heuristics)
|
|
55
|
+
|
|
56
|
+
在开始死磕复杂的抓包拦截之前,按照以下优先级进行尝试:
|
|
57
|
+
|
|
58
|
+
1. **后缀爆破法 (`.json`)**: 像 Reddit 这样复杂的网站,只要在其 URL 后加上 `.json`(例如 `/r/all.json`),就能在带 Cookie 的情况下直接利用 `fetch` 拿到极其干净的 REST 数据(Tier 2 Cookie 策略极速秒杀)。另外如功能完备的**雪球 (xueqiu)** 也可以走这种纯 API 的方式极简获取,成为你构建简单 YAML 的黄金标杆。
|
|
59
|
+
2. **全局状态查找法 (`__INITIAL_STATE__`)**: 许多服务端渲染 (SSR) 的网站(如小红书、Bilibili)会将首页或详情页的完整数据挂载到全局 window 对象上。与其去拦截网络请求,不如直接 `page.evaluate('() => window.__INITIAL_STATE__')` 获取整个数据树。
|
|
60
|
+
3. **主动交互触发法 (Active Interaction)**: 很多深层 API(如视频字幕、评论下的回复)是懒加载的。在静态抓包找不到数据时,尝试在 `evaluate` 步骤或手动打断点时,主动去**点击(Click)页面上的对应按钮**(如"CC"、"展开全部"),从而诱发隐藏的 Network Fetch。
|
|
61
|
+
4. **框架探测与 Store Action 截断**: 如果站点使用 Vue + Pinia,可以使用 `tap` 步骤调用 action,让前端框架代替你完成复杂的鉴权签名封装。
|
|
62
|
+
5. **底层 XHR/Fetch 拦截**: 最后手段,当上述都不行时,使用 TypeScript 适配器进行无侵入式的请求抓取。
|
|
63
|
+
|
|
64
|
+
### 1d. 框架检测
|
|
55
65
|
|
|
56
66
|
Explore 自动检测前端框架。如果需要手动确认:
|
|
57
67
|
|
|
@@ -110,9 +120,9 @@ opencli cascade https://api.example.com/hot
|
|
|
110
120
|
|
|
111
121
|
```
|
|
112
122
|
你的 pipeline 里有 evaluate 步骤(内嵌 JS 代码)?
|
|
113
|
-
→ ✅ 用 TypeScript (src/clis/<site>/<name>.ts)
|
|
123
|
+
→ ✅ 用 TypeScript (src/clis/<site>/<name>.ts),保存即自动动态注册
|
|
114
124
|
→ ❌ 纯声明式(navigate + tap + map + limit)?
|
|
115
|
-
→ ✅ 用 YAML (src/clis/<site>/<name>.yaml)
|
|
125
|
+
→ ✅ 用 YAML (src/clis/<site>/<name>.yaml),保存即自动注册
|
|
116
126
|
```
|
|
117
127
|
|
|
118
128
|
| 场景 | 选择 | 示例 |
|
|
@@ -310,7 +320,7 @@ pipeline:
|
|
|
310
320
|
|
|
311
321
|
适用于需要嵌入 JS 代码读取 Pinia state、XHR 拦截、GraphQL、分页、复杂数据转换等场景。
|
|
312
322
|
|
|
313
|
-
文件路径: `src/clis/<site>/<name>.ts
|
|
323
|
+
文件路径: `src/clis/<site>/<name>.ts`。文件将会在运行时被动态扫描并注册(切勿在 `index.ts` 中手动 `import`)。
|
|
314
324
|
|
|
315
325
|
#### Tier 3 — Header 认证(Twitter)
|
|
316
326
|
|
|
@@ -353,84 +363,83 @@ cli({
|
|
|
353
363
|
});
|
|
354
364
|
```
|
|
355
365
|
|
|
356
|
-
#### Tier 4 —
|
|
366
|
+
#### Tier 4 — XHR/Fetch 双重拦截 (Twitter/小红书 通用模式)
|
|
357
367
|
|
|
358
368
|
```typescript
|
|
359
|
-
// src/clis/xiaohongshu/
|
|
369
|
+
// src/clis/xiaohongshu/user.ts
|
|
360
370
|
import { cli, Strategy } from '../../registry.js';
|
|
361
371
|
|
|
362
372
|
cli({
|
|
363
373
|
site: 'xiaohongshu',
|
|
364
|
-
name: '
|
|
365
|
-
description: '
|
|
366
|
-
strategy: Strategy.
|
|
367
|
-
args: [{ name: '
|
|
368
|
-
columns: ['rank', 'title', '
|
|
374
|
+
name: 'user',
|
|
375
|
+
description: '获取用户笔记',
|
|
376
|
+
strategy: Strategy.INTERCEPT,
|
|
377
|
+
args: [{ name: 'id', required: true }],
|
|
378
|
+
columns: ['rank', 'title', 'likes', 'url'],
|
|
369
379
|
func: async (page, kwargs) => {
|
|
370
|
-
await page.goto(
|
|
371
|
-
await page.wait(
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
const x = this;
|
|
393
|
-
const orig = x.onreadystatechange;
|
|
394
|
-
x.onreadystatechange = function() {
|
|
395
|
-
if (x.readyState === 4 && !captured) {
|
|
396
|
-
try { captured = JSON.parse(x.responseText); } catch {}
|
|
397
|
-
}
|
|
398
|
-
if (orig) orig.apply(this, arguments);
|
|
399
|
-
};
|
|
400
|
-
}
|
|
401
|
-
return origSend.apply(this, arguments);
|
|
402
|
-
};
|
|
403
|
-
|
|
404
|
-
try {
|
|
405
|
-
// 触发 Store Action,让网站自己签名发请求
|
|
406
|
-
searchStore.mutateSearchValue('${kwargs.keyword}');
|
|
407
|
-
await searchStore.loadMore();
|
|
408
|
-
await new Promise(r => setTimeout(r, 800));
|
|
409
|
-
} finally {
|
|
410
|
-
// 恢复原始 XHR
|
|
411
|
-
XMLHttpRequest.prototype.open = origOpen;
|
|
412
|
-
XMLHttpRequest.prototype.send = origSend;
|
|
380
|
+
await page.goto(`https://www.xiaohongshu.com/user/profile/${kwargs.id}`);
|
|
381
|
+
await page.wait(5);
|
|
382
|
+
|
|
383
|
+
// XHR/Fetch 底层拦截:捕获所有包含 'v1/user/posted' 的请求
|
|
384
|
+
await page.installInterceptor('v1/user/posted');
|
|
385
|
+
|
|
386
|
+
// 触发后端 API:模拟人类用户向底部滚动2次
|
|
387
|
+
await page.autoScroll({ times: 2, delayMs: 2000 });
|
|
388
|
+
|
|
389
|
+
// 提取所有被拦截捕获的 JSON 响应体
|
|
390
|
+
const requests = await page.getInterceptedRequests();
|
|
391
|
+
if (!requests || requests.length === 0) return [];
|
|
392
|
+
|
|
393
|
+
let results = [];
|
|
394
|
+
for (const req of requests) {
|
|
395
|
+
if (req.data?.data?.notes) {
|
|
396
|
+
for (const note of req.data.data.notes) {
|
|
397
|
+
results.push({
|
|
398
|
+
title: note.display_title || '',
|
|
399
|
+
likes: note.interact_info?.liked_count || '0',
|
|
400
|
+
url: `https://explore/${note.note_id || note.id}`
|
|
401
|
+
});
|
|
413
402
|
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
414
405
|
|
|
415
|
-
|
|
416
|
-
return (captured.data?.items || []).map(i => ({
|
|
417
|
-
title: i.note_card?.display_title || '',
|
|
418
|
-
author: i.note_card?.user?.nickname || '',
|
|
419
|
-
likes: i.note_card?.interact_info?.liked_count || '0',
|
|
420
|
-
type: i.note_card?.type || '',
|
|
421
|
-
}));
|
|
422
|
-
})()
|
|
423
|
-
`);
|
|
424
|
-
|
|
425
|
-
if (!Array.isArray(data)) return [];
|
|
426
|
-
return data.slice(0, kwargs.limit || 20).map((item, i) => ({
|
|
406
|
+
return results.slice(0, 20).map((item, i) => ({
|
|
427
407
|
rank: i + 1, ...item,
|
|
428
408
|
}));
|
|
429
409
|
},
|
|
430
410
|
});
|
|
431
411
|
```
|
|
432
412
|
|
|
433
|
-
>
|
|
413
|
+
> **拦截核心思路**:不自己构造签名,而是利用 `installInterceptor` 劫持网站自己的 `XMLHttpRequest` 和 `fetch`,让网站发请求,我们直接在底层取出解析好的 `response.json()`。
|
|
414
|
+
|
|
415
|
+
#### 进阶场景 1: 级联请求 (Cascading Requests) 与鉴权绕过
|
|
416
|
+
|
|
417
|
+
部分 API 获取是非常复杂的连环请求(例如 B 站获取视频字幕:先需要 `bvid` 获取核心 `cid`,再通过 `cid` 获取包含签名/Wbi 的字幕列表拉取地址,最后 fetch 真实的 CDN 资源)。在此类场景中,你必须在一个 `evaluate` 块内部或者在 TypeScript Node 端编排整个请求链条:
|
|
418
|
+
|
|
419
|
+
```typescript
|
|
420
|
+
// 真实场景:B站获取视频字幕的级联获取思路
|
|
421
|
+
const subtitleUrls = await page.evaluate(async (bvid) => {
|
|
422
|
+
// Step 1: 拿 CID (通常可以通过页面全局状态极速提取)
|
|
423
|
+
const cid = window.__INITIAL_STATE__?.videoData?.cid;
|
|
424
|
+
|
|
425
|
+
// Step 2: 依据 BVID 和 CID 拿字幕配置 (可能需要携带 W_RID 签名或依赖浏览器当前登录状态 Cookie)
|
|
426
|
+
const res = await fetch(\`/x/player/wbi/v2?bvid=\${bvid}&cid=\${cid}\`, { credentials: 'include' });
|
|
427
|
+
const data = await res.json();
|
|
428
|
+
|
|
429
|
+
// Step 3: 风控拦截/未登录降级空值检测 (Anti-Bot Empty Value Detection) ⚠️ 极其重要
|
|
430
|
+
// 很多大厂 API 只要签名失败或无强登录 Cookie 依然会返回 HTTP 200,但把关键 URL 设为 ""
|
|
431
|
+
const firstSubUrl = data.data?.subtitle?.subtitles?.[0]?.subtitle_url;
|
|
432
|
+
if (!firstSubUrl) {
|
|
433
|
+
throw new Error('被风控降级或需登录:拿不到真实的 subtitle_url,请检查 Cookie 状态 (Tier 2/3)');
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
return firstSubUrl;
|
|
437
|
+
}, kwargs.bvid);
|
|
438
|
+
|
|
439
|
+
// Step 4: 拉取最终的 CDN 静态文件 (无鉴权)
|
|
440
|
+
const finalRes = await fetch(subtitleUrls.startsWith('//') ? 'https:' + subtitleUrls : subtitleUrls);
|
|
441
|
+
const subtitles = await finalRes.json();
|
|
442
|
+
```
|
|
434
443
|
|
|
435
444
|
---
|
|
436
445
|
|
|
@@ -537,11 +546,7 @@ opencli mysite hot -f csv > data.csv
|
|
|
537
546
|
|
|
538
547
|
### TS 适配器
|
|
539
548
|
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
```typescript
|
|
543
|
-
import './mysite/search.js';
|
|
544
|
-
```
|
|
549
|
+
放入 `src/clis/<site>/<name>.ts` 即自动加载模块,无需在 `index.ts` 中写入 `import`。
|
|
545
550
|
|
|
546
551
|
### 验证注册
|
|
547
552
|
|
|
@@ -558,6 +563,74 @@ git commit -m "feat(mysite): add hot and search adapters"
|
|
|
558
563
|
git push
|
|
559
564
|
```
|
|
560
565
|
|
|
566
|
+
## 设计哲学: Zero-Dependency jq
|
|
567
|
+
|
|
568
|
+
> 💡 **架构理念升级**: OpenCLI 的原生机制本质上内建了一个 **Zero-Dependency jq 数据处理流**。使用时不需要依赖系统命令级别的 `jq` 包,而是将所有的解析拍平动作放在 `evaluate` 块内的原生 JavaScript 里,再由外层 YAML 通过 `select`、`map` 等命令提取。这将彻底消灭跨操作系统下产生的第三方二进制库依赖。
|
|
569
|
+
|
|
570
|
+
---
|
|
571
|
+
|
|
572
|
+
## 进阶模式: 级联请求 (Cascading Requests)
|
|
573
|
+
|
|
574
|
+
当目标数据需要多步 API 链式获取时(如 `BVID → CID → 字幕列表 → 字幕内容`),必须使用 **TS 适配器**。YAML 无法处理这种多步逻辑。
|
|
575
|
+
|
|
576
|
+
### 模板代码
|
|
577
|
+
|
|
578
|
+
```typescript
|
|
579
|
+
import { cli, Strategy } from '../../registry.js';
|
|
580
|
+
import type { IPage } from '../../types.js';
|
|
581
|
+
import { apiGet } from '../../bilibili.js'; // 复用平台 SDK
|
|
582
|
+
|
|
583
|
+
cli({
|
|
584
|
+
site: 'bilibili',
|
|
585
|
+
name: 'subtitle',
|
|
586
|
+
strategy: Strategy.COOKIE,
|
|
587
|
+
args: [{ name: 'bvid', required: true }],
|
|
588
|
+
columns: ['index', 'from', 'to', 'content'],
|
|
589
|
+
func: async (page: IPage | null, kwargs: any) => {
|
|
590
|
+
if (!page) throw new Error('Requires browser');
|
|
591
|
+
|
|
592
|
+
// Step 1: 建立 Session
|
|
593
|
+
await page.goto(`https://www.bilibili.com/video/${kwargs.bvid}/`);
|
|
594
|
+
|
|
595
|
+
// Step 2: 从页面提取中间 ID (__INITIAL_STATE__)
|
|
596
|
+
const cid = await page.evaluate(`(async () => {
|
|
597
|
+
return window.__INITIAL_STATE__?.videoData?.cid;
|
|
598
|
+
})()`);
|
|
599
|
+
if (!cid) throw new Error('无法提取 CID');
|
|
600
|
+
|
|
601
|
+
// Step 3: 用中间 ID 调用下一级 API (自动 Wbi 签名)
|
|
602
|
+
const payload = await apiGet(page, '/x/player/wbi/v2', {
|
|
603
|
+
params: { bvid: kwargs.bvid, cid },
|
|
604
|
+
signed: true, // ← 自动生成 w_rid
|
|
605
|
+
});
|
|
606
|
+
|
|
607
|
+
// Step 4: 检测风控降级 (空值断言)
|
|
608
|
+
const subtitles = payload.data?.subtitle?.subtitles || [];
|
|
609
|
+
const url = subtitles[0]?.subtitle_url;
|
|
610
|
+
if (!url) throw new Error('subtitle_url 为空,疑似风控降级');
|
|
611
|
+
|
|
612
|
+
// Step 5: 拉取最终数据 (CDN JSON)
|
|
613
|
+
const items = await page.evaluate(`(async () => {
|
|
614
|
+
const res = await fetch(${JSON.stringify('https:' + url)});
|
|
615
|
+
const json = await res.json();
|
|
616
|
+
return { data: json.body || json };
|
|
617
|
+
})()`);
|
|
618
|
+
|
|
619
|
+
return items.data.map((item, idx) => ({ ... }));
|
|
620
|
+
},
|
|
621
|
+
});
|
|
622
|
+
```
|
|
623
|
+
|
|
624
|
+
### 关键要点
|
|
625
|
+
|
|
626
|
+
| 步骤 | 注意事项 |
|
|
627
|
+
|------|----------|
|
|
628
|
+
| 提取中间 ID | 优先从 `__INITIAL_STATE__` 拿,避免额外 API 调用 |
|
|
629
|
+
| Wbi 签名 | B 站 `/wbi/` 接口**强制校验** `w_rid`,纯 `fetch` 会被 403 |
|
|
630
|
+
| 空值断言 | 即使 HTTP 200,核心字段可能为空串(风控降级) |
|
|
631
|
+
| CDN URL | 常以 `//` 开头,记得补 `https:` |
|
|
632
|
+
| `JSON.stringify` | 拼接 URL 到 evaluate 时必须用它转义,避免注入 |
|
|
633
|
+
|
|
561
634
|
---
|
|
562
635
|
|
|
563
636
|
## 常见陷阱
|
|
@@ -574,6 +647,8 @@ git push
|
|
|
574
647
|
| TS evaluate 格式 | `() => {}` 报 `result is not a function` | TS 中 `page.evaluate()` 必须用 IIFE:`(async () => { ... })()` |
|
|
575
648
|
| 页面异步加载 | evaluate 拿到空数据(store state 还没更新) | 在 evaluate 内用 polling 等待数据出现,或增加 `wait` 时间 |
|
|
576
649
|
| YAML 内嵌大段 JS | 调试困难,字符串转义问题 | 超过 10 行 JS 的命令改用 TS adapter |
|
|
650
|
+
| **风控被拦截(伪200)** | 获取到的 JSON 里核心数据是 `""` (空串) | 极易被误判。必须添加断言!无核心数据立刻要求升级鉴权 Tier 并重新配置 Cookie |
|
|
651
|
+
| **API 没找见** | `explore` 工具打分出来的都拿不到深层数据 | 点击页面按钮诱发懒加载数据,再结合 `getInterceptedRequests` 获取 |
|
|
577
652
|
|
|
578
653
|
---
|
|
579
654
|
|
|
@@ -586,9 +661,10 @@ git push
|
|
|
586
661
|
opencli generate https://www.example.com --goal "hot"
|
|
587
662
|
|
|
588
663
|
# 或分步执行:
|
|
589
|
-
opencli explore https://www.example.com --site mysite
|
|
590
|
-
opencli
|
|
591
|
-
opencli
|
|
664
|
+
opencli explore https://www.example.com --site mysite # 发现 API
|
|
665
|
+
opencli explore https://www.example.com --auto --click "字幕,CC" # 模拟点击触发懒加载 API
|
|
666
|
+
opencli synthesize mysite # 生成候选 YAML
|
|
667
|
+
opencli verify mysite/hot --smoke # 冒烟测试
|
|
592
668
|
```
|
|
593
669
|
|
|
594
670
|
生成的候选 YAML 保存在 `.opencli/explore/mysite/candidates/`,可直接复制到 `src/clis/mysite/` 并微调。
|
package/README.md
CHANGED
|
@@ -7,14 +7,16 @@
|
|
|
7
7
|
|
|
8
8
|
[](https://www.npmjs.com/package/@jackwener/opencli)
|
|
9
9
|
|
|
10
|
-
A CLI tool that turns **any website** into a command-line interface. **
|
|
10
|
+
A CLI tool that turns **any website** into a command-line interface. **46 commands** across **17 sites** — bilibili, zhihu, xiaohongshu, twitter, reddit, xueqiu, github, v2ex, hackernews, bbc, weibo, boss, yahoo-finance, reuters, smzdm, ctrip, youtube — powered by browser session reuse and AI-native discovery.
|
|
11
11
|
|
|
12
12
|
## ✨ Highlights
|
|
13
13
|
|
|
14
14
|
- 🔐 **Account-safe** — Reuses Chrome's logged-in state; your credentials never leave the browser
|
|
15
15
|
- 🤖 **AI Agent ready** — `explore` discovers APIs, `synthesize` generates adapters, `cascade` finds auth strategies
|
|
16
|
-
-
|
|
17
|
-
-
|
|
16
|
+
- 🚀 **Dynamic Loader** — Simply drop `.ts` or `.yaml` adapters into the `clis/` folder for auto-registration
|
|
17
|
+
- 📝 **Dual-Engine Architecture**:
|
|
18
|
+
- **YAML Declarative Engine**: Most adapters are minimal ~30 lines of YAML pipeline
|
|
19
|
+
- **Native Browser Injection Engine**: Advanced TypeScript utilities (`installInterceptor`, `autoScroll`) for XHR hijacking, GraphQL unwrapping, and store mutation
|
|
18
20
|
|
|
19
21
|
## 🚀 Quick Start
|
|
20
22
|
|
|
@@ -80,11 +82,12 @@ Public API commands (`hackernews`, `github search`, `v2ex`) need no browser at a
|
|
|
80
82
|
|
|
81
83
|
| Site | Commands | Mode |
|
|
82
84
|
|------|----------|------|
|
|
83
|
-
| **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` | 🔐 Browser |
|
|
85
|
+
| **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` `subtitle` `dynamic` `ranking` | 🔐 Browser |
|
|
84
86
|
| **zhihu** | `hot` `search` `question` | 🔐 Browser |
|
|
85
|
-
| **xiaohongshu** | `search` `notifications` `feed` | 🔐 Browser |
|
|
86
|
-
| **
|
|
87
|
-
| **
|
|
87
|
+
| **xiaohongshu** | `search` `notifications` `feed` `me` `user` | 🔐 Browser |
|
|
88
|
+
| **xueqiu** | `feed` `hot-stock` `hot` `search` `stock` `watchlist` | 🔐 Browser |
|
|
89
|
+
| **twitter** | `trending` `bookmarks` `profile` `search` `timeline` | 🔐 Browser |
|
|
90
|
+
| **reddit** | `hot` `frontpage` `search` `subreddit` | 🔐 Browser |
|
|
88
91
|
| **weibo** | `hot` | 🔐 Browser |
|
|
89
92
|
| **boss** | `search` | 🔐 Browser |
|
|
90
93
|
| **youtube** | `search` | 🔐 Browser |
|
|
@@ -131,7 +134,7 @@ Explore outputs to `.opencli/explore/<site>/`:
|
|
|
131
134
|
|
|
132
135
|
## 🔧 Create New Commands
|
|
133
136
|
|
|
134
|
-
See **[
|
|
137
|
+
See **[CLI-CREATOR.md](./CLI-CREATOR.md)** for the full adapter guide (YAML pipeline + TypeScript).
|
|
135
138
|
|
|
136
139
|
## Releasing New Versions
|
|
137
140
|
|
package/README.zh-CN.md
CHANGED
|
@@ -11,11 +11,13 @@ OpenCLI 通过 Chrome 浏览器 + [Playwright MCP Bridge](https://github.com/nic
|
|
|
11
11
|
|
|
12
12
|
## ✨ 亮点
|
|
13
13
|
|
|
14
|
-
- 🌐 **
|
|
14
|
+
- 🌐 **46 个命令,17 个站点** — B站、知乎、小红书、Twitter、Reddit、雪球(xueqiu)、GitHub、V2EX、Hacker News、BBC、微博、BOSS直聘、Yahoo Finance、路透社、什么值得买、携程、YouTube
|
|
15
15
|
- 🔐 **零风控** — 复用 Chrome 登录态,无需存储任何凭证
|
|
16
16
|
- 🤖 **AI 原生** — `explore` 自动发现 API,`synthesize` 生成适配器,`cascade` 探测认证策略
|
|
17
|
-
-
|
|
18
|
-
-
|
|
17
|
+
- 🚀 **动态加载引擎** — 只需将 `.ts` 或 `.yaml` 适配器放入 `clis/` 文件夹即可自动注册生效
|
|
18
|
+
- 📝 **双引擎架构设计**:
|
|
19
|
+
- **YAML 声明式引擎**:大部分适配器只需极简的 ~30 行 YAML 声明
|
|
20
|
+
- **原生浏览器注入引擎**:提供高级 TS API(`installInterceptor`、`autoScroll`)轻松实现 XHR 劫持、GraphQL 解包及状态库注入
|
|
19
21
|
|
|
20
22
|
## 🚀 快速开始
|
|
21
23
|
|
|
@@ -81,11 +83,12 @@ npm install -g @jackwener/opencli@latest
|
|
|
81
83
|
|
|
82
84
|
| 站点 | 命令 | 模式 |
|
|
83
85
|
|------|------|------|
|
|
84
|
-
| **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` | 🔐 浏览器 |
|
|
86
|
+
| **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` `subtitle` `dynamic` `ranking` | 🔐 浏览器 |
|
|
85
87
|
| **zhihu** | `hot` `search` `question` | 🔐 浏览器 |
|
|
86
|
-
| **xiaohongshu** | `search` `notifications` `feed` | 🔐 浏览器 |
|
|
87
|
-
| **
|
|
88
|
-
| **
|
|
88
|
+
| **xiaohongshu** | `search` `notifications` `feed` `me` `user` | 🔐 浏览器 |
|
|
89
|
+
| **xueqiu** | `feed` `hot-stock` `hot` `search` `stock` `watchlist` | 🔐 浏览器 |
|
|
90
|
+
| **twitter** | `trending` `bookmarks` `profile` `search` `timeline` | 🔐 浏览器 |
|
|
91
|
+
| **reddit** | `hot` `frontpage` `search` `subreddit` | 🔐 浏览器 |
|
|
89
92
|
| **weibo** | `hot` | 🔐 浏览器 |
|
|
90
93
|
| **boss** | `search` | 🔐 浏览器 |
|
|
91
94
|
| **youtube** | `search` | 🔐 浏览器 |
|
|
@@ -132,7 +135,7 @@ opencli cascade https://api.example.com/data
|
|
|
132
135
|
|
|
133
136
|
## 🔧 创建新命令
|
|
134
137
|
|
|
135
|
-
查看 **[
|
|
138
|
+
查看 **[CLI-CREATOR.md](./CLI-CREATOR.md)** 了解完整的适配器开发指南(YAML pipeline + TypeScript)。
|
|
136
139
|
|
|
137
140
|
## 版本发布
|
|
138
141
|
|
package/SKILL.md
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: opencli
|
|
3
3
|
description: "OpenCLI — Make any website your CLI. Zero risk, AI-powered, reuse Chrome login."
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
author: jackwener
|
|
6
|
-
tags: [cli, browser, web, mcp, playwright, bilibili, zhihu, twitter, github, v2ex, hackernews, reddit, xiaohongshu, AI, agent]
|
|
6
|
+
tags: [cli, browser, web, mcp, playwright, bilibili, zhihu, twitter, github, v2ex, hackernews, reddit, xiaohongshu, xueqiu, AI, agent]
|
|
7
7
|
---
|
|
8
8
|
|
|
9
9
|
# OpenCLI
|
|
@@ -49,6 +49,9 @@ opencli bilibili favorite # 我的收藏
|
|
|
49
49
|
opencli bilibili history --limit 20 # 观看历史
|
|
50
50
|
opencli bilibili feed --limit 10 # 动态时间线
|
|
51
51
|
opencli bilibili user-videos --uid 12345 # 用户投稿
|
|
52
|
+
opencli bilibili subtitle --bvid BV1xxx # 获取视频字幕 (支持 --lang zh-CN)
|
|
53
|
+
opencli bilibili dynamic --limit 10 # 动态
|
|
54
|
+
opencli bilibili ranking --limit 10 # 排行榜
|
|
52
55
|
|
|
53
56
|
# 知乎 (browser)
|
|
54
57
|
opencli zhihu hot --limit 10 # 知乎热榜
|
|
@@ -59,6 +62,14 @@ opencli zhihu question --id 34816524 # 问题详情和回答
|
|
|
59
62
|
opencli xiaohongshu search --keyword "美食" # 搜索笔记
|
|
60
63
|
opencli xiaohongshu notifications # 通知(mentions/likes/connections)
|
|
61
64
|
opencli xiaohongshu feed --limit 10 # 推荐 Feed
|
|
65
|
+
opencli xiaohongshu me # 我的信息
|
|
66
|
+
opencli xiaohongshu user --uid xxx # 用户主页
|
|
67
|
+
|
|
68
|
+
# 雪球 Xueqiu (browser)
|
|
69
|
+
opencli xueqiu hot-stock --limit 10 # 雪球热门股票榜
|
|
70
|
+
opencli xueqiu stock --symbol SH600519 # 查看股票实时行情
|
|
71
|
+
opencli xueqiu watchlist # 获取自选股/持仓列表
|
|
72
|
+
opencli xueqiu feed # 我的关注 timeline
|
|
62
73
|
|
|
63
74
|
# GitHub (trending=browser, search=public)
|
|
64
75
|
opencli github trending --limit 10 # GitHub Trending
|
|
@@ -66,10 +77,17 @@ opencli github search --keyword "cli" # 搜索仓库
|
|
|
66
77
|
|
|
67
78
|
# Twitter/X (browser)
|
|
68
79
|
opencli twitter trending --limit 10 # 热门话题
|
|
80
|
+
opencli twitter bookmarks --limit 20 # 获取收藏的书签推文
|
|
81
|
+
opencli twitter search --keyword "AI" # 搜索推文
|
|
82
|
+
opencli twitter profile --username elonmusk # 用户资料
|
|
83
|
+
opencli twitter timeline --limit 20 # 时间线
|
|
69
84
|
|
|
70
85
|
# Reddit (browser)
|
|
71
86
|
opencli reddit hot --limit 10 # 热门帖子
|
|
72
87
|
opencli reddit hot --subreddit programming # 指定子版块
|
|
88
|
+
opencli reddit frontpage --limit 10 # 首页
|
|
89
|
+
opencli reddit search --keyword "AI" # 搜索
|
|
90
|
+
opencli reddit subreddit --name rust # 子版块浏览
|
|
73
91
|
|
|
74
92
|
# V2EX (public)
|
|
75
93
|
opencli v2ex hot --limit 10 # 热门话题
|
|
@@ -128,6 +146,9 @@ opencli generate <url> --goal "hot"
|
|
|
128
146
|
# Strategy Cascade: auto-probe PUBLIC → COOKIE → HEADER
|
|
129
147
|
opencli cascade <api-url>
|
|
130
148
|
|
|
149
|
+
# Explore with interactive fuzzing (click buttons to trigger lazy APIs)
|
|
150
|
+
opencli explore <url> --auto --click "字幕,CC,评论"
|
|
151
|
+
|
|
131
152
|
# Verify: smoke-test a generated adapter
|
|
132
153
|
opencli verify <site/name> --smoke
|
|
133
154
|
```
|
|
@@ -209,7 +230,7 @@ pipeline:
|
|
|
209
230
|
|
|
210
231
|
### TypeScript Adapter (programmatic)
|
|
211
232
|
|
|
212
|
-
Create `src/clis/<site>/<name>.ts
|
|
233
|
+
Create `src/clis/<site>/<name>.ts`. It will be automatically dynamically loaded (DO NOT manually import it in `index.ts`):
|
|
213
234
|
|
|
214
235
|
```typescript
|
|
215
236
|
import { cli, Strategy } from '../../registry.js';
|
|
@@ -217,27 +238,33 @@ import { cli, Strategy } from '../../registry.js';
|
|
|
217
238
|
cli({
|
|
218
239
|
site: 'mysite',
|
|
219
240
|
name: 'search',
|
|
220
|
-
strategy: Strategy.COOKIE
|
|
241
|
+
strategy: Strategy.INTERCEPT, // Or COOKIE
|
|
221
242
|
args: [{ name: 'keyword', required: true }],
|
|
222
243
|
columns: ['rank', 'title', 'url'],
|
|
223
244
|
func: async (page, kwargs) => {
|
|
224
|
-
await page.goto('https://www.mysite.com');
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
245
|
+
await page.goto('https://www.mysite.com/search');
|
|
246
|
+
|
|
247
|
+
// Inject native XHR/Fetch interceptor hook
|
|
248
|
+
await page.installInterceptor('/api/search');
|
|
249
|
+
|
|
250
|
+
// Auto scroll down to trigger lazy loading
|
|
251
|
+
await page.autoScroll({ times: 3, delayMs: 2000 });
|
|
252
|
+
|
|
253
|
+
// Retrieve intercepted JSON payloads
|
|
254
|
+
const requests = await page.getInterceptedRequests();
|
|
255
|
+
|
|
256
|
+
let results = [];
|
|
257
|
+
for (const req of requests) {
|
|
258
|
+
results.push(...req.data.items);
|
|
259
|
+
}
|
|
260
|
+
return results.map((item, i) => ({
|
|
234
261
|
rank: i + 1, title: item.title, url: item.url,
|
|
235
262
|
}));
|
|
236
263
|
},
|
|
237
264
|
});
|
|
238
265
|
```
|
|
239
266
|
|
|
240
|
-
**When to use TS**: XHR interception (
|
|
267
|
+
**When to use TS**: XHR interception (`page.installInterceptor`), infinite scrolling (`page.autoScroll`), cookie extraction, complex data transforms (like GraphQL unwrapping).
|
|
241
268
|
|
|
242
269
|
## Pipeline Steps
|
|
243
270
|
|
package/dist/browser.d.ts
CHANGED
|
@@ -23,7 +23,11 @@ export declare class Page implements IPage {
|
|
|
23
23
|
click(ref: string): Promise<void>;
|
|
24
24
|
typeText(ref: string, text: string): Promise<void>;
|
|
25
25
|
pressKey(key: string): Promise<void>;
|
|
26
|
-
wait(
|
|
26
|
+
wait(options: number | {
|
|
27
|
+
text?: string;
|
|
28
|
+
time?: number;
|
|
29
|
+
timeout?: number;
|
|
30
|
+
}): Promise<void>;
|
|
27
31
|
tabs(): Promise<any>;
|
|
28
32
|
closeTab(index?: number): Promise<void>;
|
|
29
33
|
newTab(): Promise<void>;
|
|
@@ -31,6 +35,12 @@ export declare class Page implements IPage {
|
|
|
31
35
|
networkRequests(includeStatic?: boolean): Promise<any>;
|
|
32
36
|
consoleMessages(level?: string): Promise<any>;
|
|
33
37
|
scroll(direction?: string, amount?: number): Promise<void>;
|
|
38
|
+
autoScroll(options?: {
|
|
39
|
+
times?: number;
|
|
40
|
+
delayMs?: number;
|
|
41
|
+
}): Promise<void>;
|
|
42
|
+
installInterceptor(pattern: string): Promise<void>;
|
|
43
|
+
getInterceptedRequests(): Promise<any[]>;
|
|
34
44
|
}
|
|
35
45
|
/**
|
|
36
46
|
* Playwright MCP process manager.
|
package/dist/browser.js
CHANGED
|
@@ -110,8 +110,14 @@ export class Page {
|
|
|
110
110
|
async pressKey(key) {
|
|
111
111
|
await this.call('tools/call', { name: 'browser_press_key', arguments: { key } });
|
|
112
112
|
}
|
|
113
|
-
async wait(
|
|
114
|
-
|
|
113
|
+
async wait(options) {
|
|
114
|
+
if (typeof options === 'number') {
|
|
115
|
+
await this.call('tools/call', { name: 'browser_wait_for', arguments: { time: options } });
|
|
116
|
+
}
|
|
117
|
+
else {
|
|
118
|
+
// Pass directly to native wait_for, which supports natively awaiting text strings without heavy DOM polling
|
|
119
|
+
await this.call('tools/call', { name: 'browser_wait_for', arguments: options });
|
|
120
|
+
}
|
|
115
121
|
}
|
|
116
122
|
async tabs() {
|
|
117
123
|
return this.call('tools/call', { name: 'browser_tabs', arguments: { action: 'list' } });
|
|
@@ -134,6 +140,88 @@ export class Page {
|
|
|
134
140
|
async scroll(direction = 'down', amount = 500) {
|
|
135
141
|
await this.call('tools/call', { name: 'browser_press_key', arguments: { key: direction === 'down' ? 'PageDown' : 'PageUp' } });
|
|
136
142
|
}
|
|
143
|
+
async autoScroll(options = {}) {
|
|
144
|
+
const times = options.times ?? 3;
|
|
145
|
+
const delayMs = options.delayMs ?? 2000;
|
|
146
|
+
const js = `
|
|
147
|
+
async () => {
|
|
148
|
+
const maxTimes = ${times};
|
|
149
|
+
const maxWaitMs = ${delayMs};
|
|
150
|
+
for (let i = 0; i < maxTimes; i++) {
|
|
151
|
+
const lastHeight = document.body.scrollHeight;
|
|
152
|
+
window.scrollTo(0, lastHeight);
|
|
153
|
+
await new Promise(resolve => {
|
|
154
|
+
let timeoutId;
|
|
155
|
+
const observer = new MutationObserver(() => {
|
|
156
|
+
if (document.body.scrollHeight > lastHeight) {
|
|
157
|
+
clearTimeout(timeoutId);
|
|
158
|
+
observer.disconnect();
|
|
159
|
+
setTimeout(resolve, 100); // Small debounce for rendering
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
observer.observe(document.body, { childList: true, subtree: true });
|
|
163
|
+
timeoutId = setTimeout(() => {
|
|
164
|
+
observer.disconnect();
|
|
165
|
+
resolve(null);
|
|
166
|
+
}, maxWaitMs);
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
`;
|
|
171
|
+
await this.evaluate(js);
|
|
172
|
+
}
|
|
173
|
+
async installInterceptor(pattern) {
|
|
174
|
+
const js = `
|
|
175
|
+
() => {
|
|
176
|
+
window.__opencli_xhr = window.__opencli_xhr || [];
|
|
177
|
+
window.__opencli_patterns = window.__opencli_patterns || [];
|
|
178
|
+
if (!window.__opencli_patterns.includes('${pattern}')) {
|
|
179
|
+
window.__opencli_patterns.push('${pattern}');
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
if (!window.__patched_xhr) {
|
|
183
|
+
const checkMatch = (url) => window.__opencli_patterns.some(p => url.includes(p));
|
|
184
|
+
|
|
185
|
+
const XHR = XMLHttpRequest.prototype;
|
|
186
|
+
const open = XHR.open;
|
|
187
|
+
const send = XHR.send;
|
|
188
|
+
XHR.open = function(method, url) {
|
|
189
|
+
this._url = url;
|
|
190
|
+
return open.call(this, method, url, ...Array.prototype.slice.call(arguments, 2));
|
|
191
|
+
};
|
|
192
|
+
XHR.send = function() {
|
|
193
|
+
this.addEventListener('load', function() {
|
|
194
|
+
if (checkMatch(this._url)) {
|
|
195
|
+
try { window.__opencli_xhr.push({url: this._url, data: JSON.parse(this.responseText)}); } catch(e){}
|
|
196
|
+
}
|
|
197
|
+
});
|
|
198
|
+
return send.apply(this, arguments);
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
const origFetch = window.fetch;
|
|
202
|
+
window.fetch = async function(...args) {
|
|
203
|
+
let u = typeof args[0] === 'string' ? args[0] : (args[0] && args[0].url) || '';
|
|
204
|
+
const res = await origFetch.apply(this, args);
|
|
205
|
+
setTimeout(async () => {
|
|
206
|
+
try {
|
|
207
|
+
if (checkMatch(u)) {
|
|
208
|
+
const clone = res.clone();
|
|
209
|
+
const j = await clone.json();
|
|
210
|
+
window.__opencli_xhr.push({url: u, data: j});
|
|
211
|
+
}
|
|
212
|
+
} catch(e) {}
|
|
213
|
+
}, 0);
|
|
214
|
+
return res;
|
|
215
|
+
};
|
|
216
|
+
window.__patched_xhr = true;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
`;
|
|
220
|
+
await this.evaluate(js);
|
|
221
|
+
}
|
|
222
|
+
async getInterceptedRequests() {
|
|
223
|
+
return (await this.evaluate('() => window.__opencli_xhr')) || [];
|
|
224
|
+
}
|
|
137
225
|
}
|
|
138
226
|
/**
|
|
139
227
|
* Playwright MCP process manager.
|
|
@@ -153,7 +241,11 @@ export class PlaywrightMCP {
|
|
|
153
241
|
throw new Error('Playwright MCP server not found. Install: npm install -D @playwright/mcp');
|
|
154
242
|
return new Promise((resolve, reject) => {
|
|
155
243
|
const timer = setTimeout(() => reject(new Error(`Timed out connecting to browser (${timeout}s)`)), timeout * 1000);
|
|
156
|
-
|
|
244
|
+
const mcpArgs = [mcpPath, '--extension'];
|
|
245
|
+
if (process.env.OPENCLI_BROWSER_EXECUTABLE_PATH) {
|
|
246
|
+
mcpArgs.push('--executablePath', process.env.OPENCLI_BROWSER_EXECUTABLE_PATH);
|
|
247
|
+
}
|
|
248
|
+
this._proc = spawn('node', mcpArgs, {
|
|
157
249
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
158
250
|
env: { ...process.env, ...(process.env.PLAYWRIGHT_MCP_EXTENSION_TOKEN ? { PLAYWRIGHT_MCP_EXTENSION_TOKEN: process.env.PLAYWRIGHT_MCP_EXTENSION_TOKEN } : {}) },
|
|
159
251
|
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|