@jackwener/opencli 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLI-CREATOR.md +51 -72
- package/README.md +8 -5
- package/README.zh-CN.md +8 -5
- package/SKILL.md +27 -14
- package/dist/browser.d.ts +6 -0
- package/dist/browser.js +65 -1
- package/dist/clis/bilibili/dynamic.d.ts +1 -0
- package/dist/clis/bilibili/dynamic.js +33 -0
- package/dist/clis/bilibili/ranking.d.ts +1 -0
- package/dist/clis/bilibili/ranking.js +24 -0
- package/dist/clis/reddit/frontpage.yaml +30 -0
- package/dist/clis/reddit/hot.yaml +3 -2
- package/dist/clis/reddit/search.yaml +34 -0
- package/dist/clis/reddit/subreddit.yaml +39 -0
- package/dist/clis/twitter/bookmarks.yaml +85 -0
- package/dist/clis/twitter/profile.d.ts +1 -0
- package/dist/clis/twitter/profile.js +56 -0
- package/dist/clis/twitter/search.d.ts +1 -0
- package/dist/clis/twitter/search.js +60 -0
- package/dist/clis/twitter/timeline.d.ts +1 -0
- package/dist/clis/twitter/timeline.js +47 -0
- package/dist/clis/xiaohongshu/user.d.ts +1 -0
- package/dist/clis/xiaohongshu/user.js +40 -0
- package/dist/clis/xueqiu/feed.yaml +53 -0
- package/dist/clis/xueqiu/hot-stock.yaml +49 -0
- package/dist/clis/xueqiu/hot.yaml +46 -0
- package/dist/clis/xueqiu/search.yaml +53 -0
- package/dist/clis/xueqiu/stock.yaml +67 -0
- package/dist/clis/xueqiu/watchlist.yaml +46 -0
- package/dist/clis/zhihu/hot.yaml +6 -2
- package/dist/clis/zhihu/search.yaml +3 -1
- package/dist/engine.d.ts +1 -1
- package/dist/engine.js +9 -1
- package/dist/main.d.ts +1 -1
- package/dist/main.js +10 -3
- package/dist/pipeline/steps/intercept.js +56 -29
- package/dist/pipeline/template.js +3 -1
- package/dist/pipeline/template.test.js +6 -0
- package/dist/types.d.ts +6 -0
- package/package.json +1 -1
- package/src/browser.ts +72 -4
- package/src/clis/bilibili/dynamic.ts +34 -0
- package/src/clis/bilibili/ranking.ts +25 -0
- package/src/clis/reddit/frontpage.yaml +30 -0
- package/src/clis/reddit/hot.yaml +3 -2
- package/src/clis/reddit/search.yaml +34 -0
- package/src/clis/reddit/subreddit.yaml +39 -0
- package/src/clis/twitter/bookmarks.yaml +85 -0
- package/src/clis/twitter/profile.ts +61 -0
- package/src/clis/twitter/search.ts +65 -0
- package/src/clis/twitter/timeline.ts +50 -0
- package/src/clis/xiaohongshu/user.ts +45 -0
- package/src/clis/xueqiu/feed.yaml +53 -0
- package/src/clis/xueqiu/hot-stock.yaml +49 -0
- package/src/clis/xueqiu/hot.yaml +46 -0
- package/src/clis/xueqiu/search.yaml +53 -0
- package/src/clis/xueqiu/stock.yaml +67 -0
- package/src/clis/xueqiu/watchlist.yaml +46 -0
- package/src/clis/zhihu/hot.yaml +6 -2
- package/src/clis/zhihu/search.yaml +3 -1
- package/src/engine.ts +10 -1
- package/src/main.ts +9 -3
- package/src/pipeline/steps/intercept.ts +58 -28
- package/src/pipeline/template.test.ts +6 -0
- package/src/pipeline/template.ts +3 -1
- package/src/types.ts +3 -0
- package/dist/clis/index.d.ts +0 -22
- package/dist/clis/index.js +0 -34
- package/src/clis/index.ts +0 -46
package/CLI-CREATOR.md
CHANGED
|
@@ -51,7 +51,16 @@ opencli bilibili hot -v # 查看已有命令的 pipeline 每步数据流
|
|
|
51
51
|
- **Request Headers**: Cookie? Bearer? 自定义签名头(X-s、X-t)?
|
|
52
52
|
- **Response Body**: JSON 结构,特别是数据在哪个路径(`data.items`、`data.list`)
|
|
53
53
|
|
|
54
|
-
### 1c.
|
|
54
|
+
### 1c. 高阶 API 发现捷径法则 (Heuristics)
|
|
55
|
+
|
|
56
|
+
在开始死磕复杂的抓包拦截之前,按照以下优先级进行尝试:
|
|
57
|
+
|
|
58
|
+
1. **后缀爆破法 (`.json`)**: 像 Reddit 这样复杂的网站,只要在其 URL 后加上 `.json`(例如 `/r/all.json`),就能在带 Cookie 的情况下直接利用 `fetch` 拿到极其干净的 REST 数据(Tier 2 Cookie 策略极速秒杀)。另外如功能完备的**雪球 (xueqiu)** 也可以走这种纯 API 的方式极简获取,成为你构建简单 YAML 的黄金标杆。
|
|
59
|
+
2. **全局状态查找法 (`__INITIAL_STATE__`)**: 许多服务端渲染 (SSR) 的网站(如小红书、Bilibili)会将首页或详情页的完整数据挂载到全局 window 对象上。与其去拦截网络请求,不如直接 `page.evaluate('() => window.__INITIAL_STATE__')` 获取整个数据树。
|
|
60
|
+
3. **框架探测与 Store Action 截断**: 如果站点使用 Vue + Pinia,可以使用 `tap` 步骤调用 action,让前端框架代替你完成复杂的鉴权签名封装。
|
|
61
|
+
4. **底层 XHR/Fetch 拦截**: 最后手段,当上述都不行时,使用 TypeScript 适配器进行无侵入式的请求抓取。
|
|
62
|
+
|
|
63
|
+
### 1d. 框架检测
|
|
55
64
|
|
|
56
65
|
Explore 自动检测前端框架。如果需要手动确认:
|
|
57
66
|
|
|
@@ -110,9 +119,9 @@ opencli cascade https://api.example.com/hot
|
|
|
110
119
|
|
|
111
120
|
```
|
|
112
121
|
你的 pipeline 里有 evaluate 步骤(内嵌 JS 代码)?
|
|
113
|
-
→ ✅ 用 TypeScript (src/clis/<site>/<name>.ts)
|
|
122
|
+
→ ✅ 用 TypeScript (src/clis/<site>/<name>.ts),保存即自动动态注册
|
|
114
123
|
→ ❌ 纯声明式(navigate + tap + map + limit)?
|
|
115
|
-
→ ✅ 用 YAML (src/clis/<site>/<name>.yaml)
|
|
124
|
+
→ ✅ 用 YAML (src/clis/<site>/<name>.yaml),保存即自动注册
|
|
116
125
|
```
|
|
117
126
|
|
|
118
127
|
| 场景 | 选择 | 示例 |
|
|
@@ -310,7 +319,7 @@ pipeline:
|
|
|
310
319
|
|
|
311
320
|
适用于需要嵌入 JS 代码读取 Pinia state、XHR 拦截、GraphQL、分页、复杂数据转换等场景。
|
|
312
321
|
|
|
313
|
-
文件路径: `src/clis/<site>/<name>.ts
|
|
322
|
+
文件路径: `src/clis/<site>/<name>.ts`。文件将会在运行时被动态扫描并注册(切勿在 `index.ts` 中手动 `import`)。
|
|
314
323
|
|
|
315
324
|
#### Tier 3 — Header 认证(Twitter)
|
|
316
325
|
|
|
@@ -353,84 +362,54 @@ cli({
|
|
|
353
362
|
});
|
|
354
363
|
```
|
|
355
364
|
|
|
356
|
-
#### Tier 4 —
|
|
365
|
+
#### Tier 4 — XHR/Fetch 双重拦截 (Twitter/小红书 通用模式)
|
|
357
366
|
|
|
358
367
|
```typescript
|
|
359
|
-
// src/clis/xiaohongshu/
|
|
368
|
+
// src/clis/xiaohongshu/user.ts
|
|
360
369
|
import { cli, Strategy } from '../../registry.js';
|
|
361
370
|
|
|
362
371
|
cli({
|
|
363
372
|
site: 'xiaohongshu',
|
|
364
|
-
name: '
|
|
365
|
-
description: '
|
|
366
|
-
strategy: Strategy.
|
|
367
|
-
args: [{ name: '
|
|
368
|
-
columns: ['rank', 'title', '
|
|
373
|
+
name: 'user',
|
|
374
|
+
description: '获取用户笔记',
|
|
375
|
+
strategy: Strategy.INTERCEPT,
|
|
376
|
+
args: [{ name: 'id', required: true }],
|
|
377
|
+
columns: ['rank', 'title', 'likes', 'url'],
|
|
369
378
|
func: async (page, kwargs) => {
|
|
370
|
-
await page.goto(
|
|
371
|
-
await page.wait(
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
const x = this;
|
|
393
|
-
const orig = x.onreadystatechange;
|
|
394
|
-
x.onreadystatechange = function() {
|
|
395
|
-
if (x.readyState === 4 && !captured) {
|
|
396
|
-
try { captured = JSON.parse(x.responseText); } catch {}
|
|
397
|
-
}
|
|
398
|
-
if (orig) orig.apply(this, arguments);
|
|
399
|
-
};
|
|
400
|
-
}
|
|
401
|
-
return origSend.apply(this, arguments);
|
|
402
|
-
};
|
|
403
|
-
|
|
404
|
-
try {
|
|
405
|
-
// 触发 Store Action,让网站自己签名发请求
|
|
406
|
-
searchStore.mutateSearchValue('${kwargs.keyword}');
|
|
407
|
-
await searchStore.loadMore();
|
|
408
|
-
await new Promise(r => setTimeout(r, 800));
|
|
409
|
-
} finally {
|
|
410
|
-
// 恢复原始 XHR
|
|
411
|
-
XMLHttpRequest.prototype.open = origOpen;
|
|
412
|
-
XMLHttpRequest.prototype.send = origSend;
|
|
379
|
+
await page.goto(`https://www.xiaohongshu.com/user/profile/${kwargs.id}`);
|
|
380
|
+
await page.wait(5);
|
|
381
|
+
|
|
382
|
+
// XHR/Fetch 底层拦截:捕获所有包含 'v1/user/posted' 的请求
|
|
383
|
+
await page.installInterceptor('v1/user/posted');
|
|
384
|
+
|
|
385
|
+
// 触发后端 API:模拟人类用户向底部滚动2次
|
|
386
|
+
await page.autoScroll({ times: 2, delayMs: 2000 });
|
|
387
|
+
|
|
388
|
+
// 提取所有被拦截捕获的 JSON 响应体
|
|
389
|
+
const requests = await page.getInterceptedRequests();
|
|
390
|
+
if (!requests || requests.length === 0) return [];
|
|
391
|
+
|
|
392
|
+
let results = [];
|
|
393
|
+
for (const req of requests) {
|
|
394
|
+
if (req.data?.data?.notes) {
|
|
395
|
+
for (const note of req.data.data.notes) {
|
|
396
|
+
results.push({
|
|
397
|
+
title: note.display_title || '',
|
|
398
|
+
likes: note.interact_info?.liked_count || '0',
|
|
399
|
+
url: `https://explore/${note.note_id || note.id}`
|
|
400
|
+
});
|
|
413
401
|
}
|
|
402
|
+
}
|
|
403
|
+
}
|
|
414
404
|
|
|
415
|
-
|
|
416
|
-
return (captured.data?.items || []).map(i => ({
|
|
417
|
-
title: i.note_card?.display_title || '',
|
|
418
|
-
author: i.note_card?.user?.nickname || '',
|
|
419
|
-
likes: i.note_card?.interact_info?.liked_count || '0',
|
|
420
|
-
type: i.note_card?.type || '',
|
|
421
|
-
}));
|
|
422
|
-
})()
|
|
423
|
-
`);
|
|
424
|
-
|
|
425
|
-
if (!Array.isArray(data)) return [];
|
|
426
|
-
return data.slice(0, kwargs.limit || 20).map((item, i) => ({
|
|
405
|
+
return results.slice(0, 20).map((item, i) => ({
|
|
427
406
|
rank: i + 1, ...item,
|
|
428
407
|
}));
|
|
429
408
|
},
|
|
430
409
|
});
|
|
431
410
|
```
|
|
432
411
|
|
|
433
|
-
>
|
|
412
|
+
> **拦截核心思路**:不自己构造签名,而是利用 `installInterceptor` 劫持网站自己的 `XMLHttpRequest` 和 `fetch`,让网站发请求,我们直接在底层取出解析好的 `response.json()`。
|
|
434
413
|
|
|
435
414
|
---
|
|
436
415
|
|
|
@@ -537,11 +516,7 @@ opencli mysite hot -f csv > data.csv
|
|
|
537
516
|
|
|
538
517
|
### TS 适配器
|
|
539
518
|
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
```typescript
|
|
543
|
-
import './mysite/search.js';
|
|
544
|
-
```
|
|
519
|
+
放入 `src/clis/<site>/<name>.ts` 即自动加载模块,无需在 `index.ts` 中写入 `import`。
|
|
545
520
|
|
|
546
521
|
### 验证注册
|
|
547
522
|
|
|
@@ -558,6 +533,10 @@ git commit -m "feat(mysite): add hot and search adapters"
|
|
|
558
533
|
git push
|
|
559
534
|
```
|
|
560
535
|
|
|
536
|
+
## 设计哲学: Zero-Dependency jq
|
|
537
|
+
|
|
538
|
+
> 💡 **架构理念升级**: OpenCLI 的原生机制本质上内建了一个 **Zero-Dependency jq 数据处理流**。使用时不需要依赖系统命令级别的 `jq` 包,而是将所有的解析拍平动作放在 `evaluate` 块内的原生 JavaScript 里,再由外层 YAML 通过 `select`、`map` 等命令提取。这将彻底消灭跨操作系统下产生的第三方二进制库依赖。
|
|
539
|
+
|
|
561
540
|
---
|
|
562
541
|
|
|
563
542
|
## 常见陷阱
|
package/README.md
CHANGED
|
@@ -7,14 +7,16 @@
|
|
|
7
7
|
|
|
8
8
|
[](https://www.npmjs.com/package/@jackwener/opencli)
|
|
9
9
|
|
|
10
|
-
A CLI tool that turns **any website** into a command-line interface. **
|
|
10
|
+
A CLI tool that turns **any website** into a command-line interface. **35+ commands** across **17 sites** — bilibili, zhihu, xiaohongshu, twitter, reddit, xueqiu, github, v2ex, hackernews, bbc, weibo, boss, yahoo-finance, reuters, smzdm, ctrip, youtube — powered by browser session reuse and AI-native discovery.
|
|
11
11
|
|
|
12
12
|
## ✨ Highlights
|
|
13
13
|
|
|
14
14
|
- 🔐 **Account-safe** — Reuses Chrome's logged-in state; your credentials never leave the browser
|
|
15
15
|
- 🤖 **AI Agent ready** — `explore` discovers APIs, `synthesize` generates adapters, `cascade` finds auth strategies
|
|
16
|
-
-
|
|
17
|
-
-
|
|
16
|
+
- 🚀 **Dynamic Loader** — Simply drop `.ts` or `.yaml` adapters into the `clis/` folder for auto-registration
|
|
17
|
+
- 📝 **Dual-Engine Architecture**:
|
|
18
|
+
- **YAML Declarative Engine**: Most adapters are minimal ~30 lines of YAML pipeline
|
|
19
|
+
- **Native Browser Injection Engine**: Advanced TypeScript utilities (`installInterceptor`, `autoScroll`) for XHR hijacking, GraphQL unwrapping, and store mutation
|
|
18
20
|
|
|
19
21
|
## 🚀 Quick Start
|
|
20
22
|
|
|
@@ -83,7 +85,8 @@ Public API commands (`hackernews`, `github search`, `v2ex`) need no browser at a
|
|
|
83
85
|
| **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` | 🔐 Browser |
|
|
84
86
|
| **zhihu** | `hot` `search` `question` | 🔐 Browser |
|
|
85
87
|
| **xiaohongshu** | `search` `notifications` `feed` | 🔐 Browser |
|
|
86
|
-
| **
|
|
88
|
+
| **xueqiu** | `feed` `hot-stock` `hot` `search` `stock` `watchlist` | 🔐 Browser |
|
|
89
|
+
| **twitter** | `trending` `bookmarks` | 🔐 Browser |
|
|
87
90
|
| **reddit** | `hot` | 🔐 Browser |
|
|
88
91
|
| **weibo** | `hot` | 🔐 Browser |
|
|
89
92
|
| **boss** | `search` | 🔐 Browser |
|
|
@@ -131,7 +134,7 @@ Explore outputs to `.opencli/explore/<site>/`:
|
|
|
131
134
|
|
|
132
135
|
## 🔧 Create New Commands
|
|
133
136
|
|
|
134
|
-
See **[
|
|
137
|
+
See **[CLI-CREATOR.md](./CLI-CREATOR.md)** for the full adapter guide (YAML pipeline + TypeScript).
|
|
135
138
|
|
|
136
139
|
## Releasing New Versions
|
|
137
140
|
|
package/README.zh-CN.md
CHANGED
|
@@ -11,11 +11,13 @@ OpenCLI 通过 Chrome 浏览器 + [Playwright MCP Bridge](https://github.com/nic
|
|
|
11
11
|
|
|
12
12
|
## ✨ 亮点
|
|
13
13
|
|
|
14
|
-
- 🌐 **
|
|
14
|
+
- 🌐 **35+ 命令,17 个站点** — B站、知乎、小红书、Twitter、Reddit、雪球(xueqiu)、GitHub、V2EX、Hacker News、BBC、微博、BOSS直聘、Yahoo Finance、路透社、什么值得买、携程、YouTube
|
|
15
15
|
- 🔐 **零风控** — 复用 Chrome 登录态,无需存储任何凭证
|
|
16
16
|
- 🤖 **AI 原生** — `explore` 自动发现 API,`synthesize` 生成适配器,`cascade` 探测认证策略
|
|
17
|
-
-
|
|
18
|
-
-
|
|
17
|
+
- 🚀 **动态加载引擎** — 只需将 `.ts` 或 `.yaml` 适配器放入 `clis/` 文件夹即可自动注册生效
|
|
18
|
+
- 📝 **双引擎架构设计**:
|
|
19
|
+
- **YAML 声明式引擎**:大部分适配器只需极简的 ~30 行 YAML 声明
|
|
20
|
+
- **原生浏览器注入引擎**:提供高级 TS API(`installInterceptor`、`autoScroll`)轻松实现 XHR 劫持、GraphQL 解包及状态库注入
|
|
19
21
|
|
|
20
22
|
## 🚀 快速开始
|
|
21
23
|
|
|
@@ -84,7 +86,8 @@ npm install -g @jackwener/opencli@latest
|
|
|
84
86
|
| **bilibili** | `hot` `search` `me` `favorite` `history` `feed` `user-videos` | 🔐 浏览器 |
|
|
85
87
|
| **zhihu** | `hot` `search` `question` | 🔐 浏览器 |
|
|
86
88
|
| **xiaohongshu** | `search` `notifications` `feed` | 🔐 浏览器 |
|
|
87
|
-
| **
|
|
89
|
+
| **xueqiu** | `feed` `hot-stock` `hot` `search` `stock` `watchlist` | 🔐 浏览器 |
|
|
90
|
+
| **twitter** | `trending` `bookmarks` | 🔐 浏览器 |
|
|
88
91
|
| **reddit** | `hot` | 🔐 浏览器 |
|
|
89
92
|
| **weibo** | `hot` | 🔐 浏览器 |
|
|
90
93
|
| **boss** | `search` | 🔐 浏览器 |
|
|
@@ -132,7 +135,7 @@ opencli cascade https://api.example.com/data
|
|
|
132
135
|
|
|
133
136
|
## 🔧 创建新命令
|
|
134
137
|
|
|
135
|
-
查看 **[
|
|
138
|
+
查看 **[CLI-CREATOR.md](./CLI-CREATOR.md)** 了解完整的适配器开发指南(YAML pipeline + TypeScript)。
|
|
136
139
|
|
|
137
140
|
## 版本发布
|
|
138
141
|
|
package/SKILL.md
CHANGED
|
@@ -3,7 +3,7 @@ name: opencli
|
|
|
3
3
|
description: "OpenCLI — Make any website your CLI. Zero risk, AI-powered, reuse Chrome login."
|
|
4
4
|
version: 0.1.0
|
|
5
5
|
author: jackwener
|
|
6
|
-
tags: [cli, browser, web, mcp, playwright, bilibili, zhihu, twitter, github, v2ex, hackernews, reddit, xiaohongshu, AI, agent]
|
|
6
|
+
tags: [cli, browser, web, mcp, playwright, bilibili, zhihu, twitter, github, v2ex, hackernews, reddit, xiaohongshu, xueqiu, AI, agent]
|
|
7
7
|
---
|
|
8
8
|
|
|
9
9
|
# OpenCLI
|
|
@@ -60,12 +60,19 @@ opencli xiaohongshu search --keyword "美食" # 搜索笔记
|
|
|
60
60
|
opencli xiaohongshu notifications # 通知(mentions/likes/connections)
|
|
61
61
|
opencli xiaohongshu feed --limit 10 # 推荐 Feed
|
|
62
62
|
|
|
63
|
+
# 雪球 Xueqiu (browser)
|
|
64
|
+
opencli xueqiu hot-stock --limit 10 # 雪球热门股票榜
|
|
65
|
+
opencli xueqiu stock --symbol SH600519 # 查看股票实时行情
|
|
66
|
+
opencli xueqiu watchlist # 获取自选股/持仓列表
|
|
67
|
+
opencli xueqiu feed # 我的关注 timeline
|
|
68
|
+
|
|
63
69
|
# GitHub (trending=browser, search=public)
|
|
64
70
|
opencli github trending --limit 10 # GitHub Trending
|
|
65
71
|
opencli github search --keyword "cli" # 搜索仓库
|
|
66
72
|
|
|
67
73
|
# Twitter/X (browser)
|
|
68
74
|
opencli twitter trending --limit 10 # 热门话题
|
|
75
|
+
opencli twitter bookmarks --limit 20 # 获取收藏的书签推文
|
|
69
76
|
|
|
70
77
|
# Reddit (browser)
|
|
71
78
|
opencli reddit hot --limit 10 # 热门帖子
|
|
@@ -209,7 +216,7 @@ pipeline:
|
|
|
209
216
|
|
|
210
217
|
### TypeScript Adapter (programmatic)
|
|
211
218
|
|
|
212
|
-
Create `src/clis/<site>/<name>.ts
|
|
219
|
+
Create `src/clis/<site>/<name>.ts`. It will be automatically dynamically loaded (DO NOT manually import it in `index.ts`):
|
|
213
220
|
|
|
214
221
|
```typescript
|
|
215
222
|
import { cli, Strategy } from '../../registry.js';
|
|
@@ -217,27 +224,33 @@ import { cli, Strategy } from '../../registry.js';
|
|
|
217
224
|
cli({
|
|
218
225
|
site: 'mysite',
|
|
219
226
|
name: 'search',
|
|
220
|
-
strategy: Strategy.COOKIE
|
|
227
|
+
strategy: Strategy.INTERCEPT, // Or COOKIE
|
|
221
228
|
args: [{ name: 'keyword', required: true }],
|
|
222
229
|
columns: ['rank', 'title', 'url'],
|
|
223
230
|
func: async (page, kwargs) => {
|
|
224
|
-
await page.goto('https://www.mysite.com');
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
231
|
+
await page.goto('https://www.mysite.com/search');
|
|
232
|
+
|
|
233
|
+
// Inject native XHR/Fetch interceptor hook
|
|
234
|
+
await page.installInterceptor('/api/search');
|
|
235
|
+
|
|
236
|
+
// Auto scroll down to trigger lazy loading
|
|
237
|
+
await page.autoScroll({ times: 3, delayMs: 2000 });
|
|
238
|
+
|
|
239
|
+
// Retrieve intercepted JSON payloads
|
|
240
|
+
const requests = await page.getInterceptedRequests();
|
|
241
|
+
|
|
242
|
+
let results = [];
|
|
243
|
+
for (const req of requests) {
|
|
244
|
+
results.push(...req.data.items);
|
|
245
|
+
}
|
|
246
|
+
return results.map((item, i) => ({
|
|
234
247
|
rank: i + 1, title: item.title, url: item.url,
|
|
235
248
|
}));
|
|
236
249
|
},
|
|
237
250
|
});
|
|
238
251
|
```
|
|
239
252
|
|
|
240
|
-
**When to use TS**: XHR interception (
|
|
253
|
+
**When to use TS**: XHR interception (`page.installInterceptor`), infinite scrolling (`page.autoScroll`), cookie extraction, complex data transforms (like GraphQL unwrapping).
|
|
241
254
|
|
|
242
255
|
## Pipeline Steps
|
|
243
256
|
|
package/dist/browser.d.ts
CHANGED
|
@@ -31,6 +31,12 @@ export declare class Page implements IPage {
|
|
|
31
31
|
networkRequests(includeStatic?: boolean): Promise<any>;
|
|
32
32
|
consoleMessages(level?: string): Promise<any>;
|
|
33
33
|
scroll(direction?: string, amount?: number): Promise<void>;
|
|
34
|
+
autoScroll(options?: {
|
|
35
|
+
times?: number;
|
|
36
|
+
delayMs?: number;
|
|
37
|
+
}): Promise<void>;
|
|
38
|
+
installInterceptor(pattern: string): Promise<void>;
|
|
39
|
+
getInterceptedRequests(): Promise<any[]>;
|
|
34
40
|
}
|
|
35
41
|
/**
|
|
36
42
|
* Playwright MCP process manager.
|
package/dist/browser.js
CHANGED
|
@@ -134,6 +134,66 @@ export class Page {
|
|
|
134
134
|
async scroll(direction = 'down', amount = 500) {
|
|
135
135
|
await this.call('tools/call', { name: 'browser_press_key', arguments: { key: direction === 'down' ? 'PageDown' : 'PageUp' } });
|
|
136
136
|
}
|
|
137
|
+
async autoScroll(options = {}) {
|
|
138
|
+
const times = options.times ?? 3;
|
|
139
|
+
const delayMs = options.delayMs ?? 2000;
|
|
140
|
+
for (let i = 0; i < times; i++) {
|
|
141
|
+
await this.evaluate('() => window.scrollTo(0, document.body.scrollHeight)');
|
|
142
|
+
await this.wait(delayMs / 1000);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
async installInterceptor(pattern) {
|
|
146
|
+
const js = `
|
|
147
|
+
() => {
|
|
148
|
+
window.__opencli_xhr = window.__opencli_xhr || [];
|
|
149
|
+
window.__opencli_patterns = window.__opencli_patterns || [];
|
|
150
|
+
if (!window.__opencli_patterns.includes('${pattern}')) {
|
|
151
|
+
window.__opencli_patterns.push('${pattern}');
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (!window.__patched_xhr) {
|
|
155
|
+
const checkMatch = (url) => window.__opencli_patterns.some(p => url.includes(p));
|
|
156
|
+
|
|
157
|
+
const XHR = XMLHttpRequest.prototype;
|
|
158
|
+
const open = XHR.open;
|
|
159
|
+
const send = XHR.send;
|
|
160
|
+
XHR.open = function(method, url) {
|
|
161
|
+
this._url = url;
|
|
162
|
+
return open.call(this, method, url, ...Array.prototype.slice.call(arguments, 2));
|
|
163
|
+
};
|
|
164
|
+
XHR.send = function() {
|
|
165
|
+
this.addEventListener('load', function() {
|
|
166
|
+
if (checkMatch(this._url)) {
|
|
167
|
+
try { window.__opencli_xhr.push({url: this._url, data: JSON.parse(this.responseText)}); } catch(e){}
|
|
168
|
+
}
|
|
169
|
+
});
|
|
170
|
+
return send.apply(this, arguments);
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
const origFetch = window.fetch;
|
|
174
|
+
window.fetch = async function(...args) {
|
|
175
|
+
let u = typeof args[0] === 'string' ? args[0] : (args[0] && args[0].url) || '';
|
|
176
|
+
const res = await origFetch.apply(this, args);
|
|
177
|
+
setTimeout(async () => {
|
|
178
|
+
try {
|
|
179
|
+
if (checkMatch(u)) {
|
|
180
|
+
const clone = res.clone();
|
|
181
|
+
const j = await clone.json();
|
|
182
|
+
window.__opencli_xhr.push({url: u, data: j});
|
|
183
|
+
}
|
|
184
|
+
} catch(e) {}
|
|
185
|
+
}, 0);
|
|
186
|
+
return res;
|
|
187
|
+
};
|
|
188
|
+
window.__patched_xhr = true;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
`;
|
|
192
|
+
await this.evaluate(js);
|
|
193
|
+
}
|
|
194
|
+
async getInterceptedRequests() {
|
|
195
|
+
return (await this.evaluate('() => window.__opencli_xhr')) || [];
|
|
196
|
+
}
|
|
137
197
|
}
|
|
138
198
|
/**
|
|
139
199
|
* Playwright MCP process manager.
|
|
@@ -153,7 +213,11 @@ export class PlaywrightMCP {
|
|
|
153
213
|
throw new Error('Playwright MCP server not found. Install: npm install -D @playwright/mcp');
|
|
154
214
|
return new Promise((resolve, reject) => {
|
|
155
215
|
const timer = setTimeout(() => reject(new Error(`Timed out connecting to browser (${timeout}s)`)), timeout * 1000);
|
|
156
|
-
|
|
216
|
+
const mcpArgs = [mcpPath, '--extension'];
|
|
217
|
+
if (process.env.OPENCLI_BROWSER_EXECUTABLE_PATH) {
|
|
218
|
+
mcpArgs.push('--executablePath', process.env.OPENCLI_BROWSER_EXECUTABLE_PATH);
|
|
219
|
+
}
|
|
220
|
+
this._proc = spawn('node', mcpArgs, {
|
|
157
221
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
158
222
|
env: { ...process.env, ...(process.env.PLAYWRIGHT_MCP_EXTENSION_TOKEN ? { PLAYWRIGHT_MCP_EXTENSION_TOKEN: process.env.PLAYWRIGHT_MCP_EXTENSION_TOKEN } : {}) },
|
|
159
223
|
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { cli, Strategy } from '../../registry.js';
|
|
2
|
+
import { apiGet } from '../../bilibili.js';
|
|
3
|
+
cli({
|
|
4
|
+
site: 'bilibili',
|
|
5
|
+
name: 'dynamic',
|
|
6
|
+
description: 'Get Bilibili user dynamic feed',
|
|
7
|
+
domain: 'www.bilibili.com',
|
|
8
|
+
strategy: Strategy.COOKIE,
|
|
9
|
+
args: [
|
|
10
|
+
{ name: 'limit', type: 'int', default: 15 },
|
|
11
|
+
],
|
|
12
|
+
columns: ['id', 'author', 'text', 'likes', 'url'],
|
|
13
|
+
func: async (page, kwargs) => {
|
|
14
|
+
const payload = await apiGet(page, '/x/polymer/web-dynamic/v1/feed/all', { params: {}, signed: false });
|
|
15
|
+
const results = payload?.data?.items ?? [];
|
|
16
|
+
return results.slice(0, Number(kwargs.limit)).map((item) => {
|
|
17
|
+
let text = '';
|
|
18
|
+
if (item.modules?.module_dynamic?.desc?.text) {
|
|
19
|
+
text = item.modules.module_dynamic.desc.text;
|
|
20
|
+
}
|
|
21
|
+
else if (item.modules?.module_dynamic?.major?.archive?.title) {
|
|
22
|
+
text = item.modules.module_dynamic.major.archive.title;
|
|
23
|
+
}
|
|
24
|
+
return {
|
|
25
|
+
id: item.id_str ?? '',
|
|
26
|
+
author: item.modules?.module_author?.name ?? '',
|
|
27
|
+
text: text,
|
|
28
|
+
likes: item.modules?.module_stat?.like?.count ?? 0,
|
|
29
|
+
url: item.id_str ? `https://t.bilibili.com/${item.id_str}` : ''
|
|
30
|
+
};
|
|
31
|
+
});
|
|
32
|
+
},
|
|
33
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { cli, Strategy } from '../../registry.js';
|
|
2
|
+
import { apiGet } from '../../bilibili.js';
|
|
3
|
+
cli({
|
|
4
|
+
site: 'bilibili',
|
|
5
|
+
name: 'ranking',
|
|
6
|
+
description: 'Get Bilibili video ranking board',
|
|
7
|
+
domain: 'www.bilibili.com',
|
|
8
|
+
strategy: Strategy.COOKIE,
|
|
9
|
+
args: [
|
|
10
|
+
{ name: 'limit', type: 'int', default: 20 },
|
|
11
|
+
],
|
|
12
|
+
columns: ['rank', 'title', 'author', 'score', 'url'],
|
|
13
|
+
func: async (page, kwargs) => {
|
|
14
|
+
const payload = await apiGet(page, '/x/web-interface/ranking/v2', { params: { rid: 0, type: 'all' }, signed: false });
|
|
15
|
+
const results = payload?.data?.list ?? [];
|
|
16
|
+
return results.slice(0, Number(kwargs.limit)).map((item, i) => ({
|
|
17
|
+
rank: i + 1,
|
|
18
|
+
title: item.title ?? '',
|
|
19
|
+
author: item.owner?.name ?? '',
|
|
20
|
+
score: item.stat?.view ?? 0,
|
|
21
|
+
url: item.bvid ? `https://www.bilibili.com/video/${item.bvid}` : ''
|
|
22
|
+
}));
|
|
23
|
+
},
|
|
24
|
+
});
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
site: reddit
|
|
2
|
+
name: frontpage
|
|
3
|
+
description: Reddit Frontpage / r/all
|
|
4
|
+
domain: reddit.com
|
|
5
|
+
strategy: cookie
|
|
6
|
+
browser: true
|
|
7
|
+
|
|
8
|
+
args:
|
|
9
|
+
limit:
|
|
10
|
+
type: int
|
|
11
|
+
default: 15
|
|
12
|
+
|
|
13
|
+
columns: [title, subreddit, author, upvotes, comments, url]
|
|
14
|
+
|
|
15
|
+
pipeline:
|
|
16
|
+
- navigate: https://www.reddit.com
|
|
17
|
+
- evaluate: |
|
|
18
|
+
(async () => {
|
|
19
|
+
const res = await fetch('/r/all.json?limit=${{ args.limit }}', { credentials: 'include' });
|
|
20
|
+
const j = await res.json();
|
|
21
|
+
return j?.data?.children || [];
|
|
22
|
+
})()
|
|
23
|
+
- map:
|
|
24
|
+
title: ${{ item.data.title }}
|
|
25
|
+
subreddit: ${{ item.data.subreddit_name_prefixed }}
|
|
26
|
+
author: ${{ item.data.author }}
|
|
27
|
+
upvotes: ${{ item.data.score }}
|
|
28
|
+
comments: ${{ item.data.num_comments }}
|
|
29
|
+
url: https://www.reddit.com${{ item.data.permalink }}
|
|
30
|
+
- limit: ${{ args.limit }}
|
|
@@ -18,9 +18,10 @@ pipeline:
|
|
|
18
18
|
|
|
19
19
|
- evaluate: |
|
|
20
20
|
(async () => {
|
|
21
|
-
const sub =
|
|
21
|
+
const sub = ${{ args.subreddit | json }};
|
|
22
22
|
const path = sub ? '/r/' + sub + '/hot.json' : '/hot.json';
|
|
23
|
-
const
|
|
23
|
+
const limit = ${{ args.limit }};
|
|
24
|
+
const res = await fetch(path + '?limit=' + limit + '&raw_json=1', {
|
|
24
25
|
credentials: 'include'
|
|
25
26
|
});
|
|
26
27
|
const d = await res.json();
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
site: reddit
|
|
2
|
+
name: search
|
|
3
|
+
description: Search Reddit Posts
|
|
4
|
+
domain: reddit.com
|
|
5
|
+
strategy: cookie
|
|
6
|
+
browser: true
|
|
7
|
+
|
|
8
|
+
args:
|
|
9
|
+
query:
|
|
10
|
+
type: string
|
|
11
|
+
required: true
|
|
12
|
+
limit:
|
|
13
|
+
type: int
|
|
14
|
+
default: 15
|
|
15
|
+
|
|
16
|
+
columns: [title, subreddit, author, upvotes, comments, url]
|
|
17
|
+
|
|
18
|
+
pipeline:
|
|
19
|
+
- navigate: https://www.reddit.com
|
|
20
|
+
- evaluate: |
|
|
21
|
+
(async () => {
|
|
22
|
+
const q = encodeURIComponent('${{ args.query }}');
|
|
23
|
+
const res = await fetch('/search.json?q=' + q + '&limit=${{ args.limit }}', { credentials: 'include' });
|
|
24
|
+
const j = await res.json();
|
|
25
|
+
return j?.data?.children || [];
|
|
26
|
+
})()
|
|
27
|
+
- map:
|
|
28
|
+
title: ${{ item.data.title }}
|
|
29
|
+
subreddit: ${{ item.data.subreddit_name_prefixed }}
|
|
30
|
+
author: ${{ item.data.author }}
|
|
31
|
+
upvotes: ${{ item.data.score }}
|
|
32
|
+
comments: ${{ item.data.num_comments }}
|
|
33
|
+
url: https://www.reddit.com${{ item.data.permalink }}
|
|
34
|
+
- limit: ${{ args.limit }}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
site: reddit
|
|
2
|
+
name: subreddit
|
|
3
|
+
description: Get posts from a specific Subreddit
|
|
4
|
+
domain: reddit.com
|
|
5
|
+
strategy: cookie
|
|
6
|
+
browser: true
|
|
7
|
+
|
|
8
|
+
args:
|
|
9
|
+
name:
|
|
10
|
+
type: string
|
|
11
|
+
required: true
|
|
12
|
+
sort:
|
|
13
|
+
type: string
|
|
14
|
+
default: hot
|
|
15
|
+
description: "Sorting method: hot, new, top, rising"
|
|
16
|
+
limit:
|
|
17
|
+
type: int
|
|
18
|
+
default: 15
|
|
19
|
+
|
|
20
|
+
columns: [title, author, upvotes, comments, url]
|
|
21
|
+
|
|
22
|
+
pipeline:
|
|
23
|
+
- navigate: https://www.reddit.com
|
|
24
|
+
- evaluate: |
|
|
25
|
+
(async () => {
|
|
26
|
+
let sub = '${{ args.name }}';
|
|
27
|
+
if (sub.startsWith('r/')) sub = sub.slice(2);
|
|
28
|
+
const sort = '${{ args.sort }}';
|
|
29
|
+
const res = await fetch('/r/' + sub + '/' + sort + '.json?limit=${{ args.limit }}', { credentials: 'include' });
|
|
30
|
+
const j = await res.json();
|
|
31
|
+
return j?.data?.children || [];
|
|
32
|
+
})()
|
|
33
|
+
- map:
|
|
34
|
+
title: ${{ item.data.title }}
|
|
35
|
+
author: ${{ item.data.author }}
|
|
36
|
+
upvotes: ${{ item.data.score }}
|
|
37
|
+
comments: ${{ item.data.num_comments }}
|
|
38
|
+
url: https://www.reddit.com${{ item.data.permalink }}
|
|
39
|
+
- limit: ${{ args.limit }}
|