@jackwener/opencli 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLI-CREATOR.md +190 -5
- package/README.md +6 -6
- package/README.zh-CN.md +6 -6
- package/SKILL.md +19 -3
- package/dist/browser.d.ts +5 -1
- package/dist/browser.js +33 -5
- package/dist/build-manifest.d.ts +11 -0
- package/dist/build-manifest.js +101 -0
- package/dist/cli-manifest.json +1273 -0
- package/dist/clis/bilibili/following.d.ts +1 -0
- package/dist/clis/bilibili/following.js +41 -0
- package/dist/clis/bilibili/subtitle.d.ts +1 -0
- package/dist/clis/bilibili/subtitle.js +86 -0
- package/dist/engine.d.ts +13 -0
- package/dist/engine.js +122 -17
- package/dist/explore.js +50 -0
- package/dist/main.js +2 -2
- package/dist/pipeline/steps/browser.js +4 -8
- package/dist/pipeline/steps/fetch.js +74 -5
- package/dist/pipeline/steps/tap.js +8 -6
- package/dist/registry.d.ts +3 -0
- package/dist/types.d.ts +5 -1
- package/package.json +3 -2
- package/src/browser.ts +33 -6
- package/src/build-manifest.ts +133 -0
- package/src/clis/bilibili/following.ts +50 -0
- package/src/clis/bilibili/subtitle.ts +100 -0
- package/src/engine.ts +123 -17
- package/src/explore.ts +51 -0
- package/src/main.ts +2 -2
- package/src/pipeline/steps/browser.ts +4 -7
- package/src/pipeline/steps/fetch.ts +83 -5
- package/src/pipeline/steps/tap.ts +8 -6
- package/src/registry.ts +3 -0
- package/src/types.ts +1 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { cli, Strategy } from '../../registry.js';
|
|
2
|
+
import { fetchJson, getSelfUid, resolveUid } from '../../bilibili.js';
|
|
3
|
+
cli({
|
|
4
|
+
site: 'bilibili',
|
|
5
|
+
name: 'following',
|
|
6
|
+
description: '获取 Bilibili 用户的关注列表',
|
|
7
|
+
strategy: Strategy.COOKIE,
|
|
8
|
+
args: [
|
|
9
|
+
{ name: 'uid', required: false, help: '目标用户 ID(默认为当前登录用户)' },
|
|
10
|
+
{ name: 'page', type: 'int', required: false, default: 1, help: '页码' },
|
|
11
|
+
{ name: 'limit', type: 'int', required: false, default: 50, help: '每页数量 (最大 50)' },
|
|
12
|
+
],
|
|
13
|
+
columns: ['mid', 'name', 'sign', 'following', 'fans'],
|
|
14
|
+
func: async (page, kwargs) => {
|
|
15
|
+
if (!page)
|
|
16
|
+
throw new Error('Requires browser');
|
|
17
|
+
// 1. Resolve UID (default to self)
|
|
18
|
+
const uid = kwargs.uid
|
|
19
|
+
? await resolveUid(page, kwargs.uid)
|
|
20
|
+
: await getSelfUid(page);
|
|
21
|
+
const pn = kwargs.page ?? 1;
|
|
22
|
+
const ps = Math.min(kwargs.limit ?? 50, 50);
|
|
23
|
+
// 2. Fetch following list (standard Cookie API, no Wbi signing needed)
|
|
24
|
+
const payload = await fetchJson(page, `https://api.bilibili.com/x/relation/followings?vmid=${uid}&pn=${pn}&ps=${ps}&order=desc`);
|
|
25
|
+
if (payload.code !== 0) {
|
|
26
|
+
throw new Error(`获取关注列表失败: ${payload.message} (${payload.code})`);
|
|
27
|
+
}
|
|
28
|
+
const list = payload.data?.list || [];
|
|
29
|
+
if (list.length === 0) {
|
|
30
|
+
return [{ mid: '-', name: `共 ${payload.data?.total ?? 0} 人关注,当前页无数据`, sign: '', following: '', fans: '' }];
|
|
31
|
+
}
|
|
32
|
+
// 3. Map to output
|
|
33
|
+
return list.map((u) => ({
|
|
34
|
+
mid: u.mid,
|
|
35
|
+
name: u.uname,
|
|
36
|
+
sign: (u.sign || '').slice(0, 40),
|
|
37
|
+
following: u.attribute === 6 ? '互相关注' : '已关注',
|
|
38
|
+
fans: u.official_verify?.desc || '',
|
|
39
|
+
}));
|
|
40
|
+
},
|
|
41
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { cli, Strategy } from '../../registry.js';
|
|
2
|
+
import { apiGet } from '../../bilibili.js';
|
|
3
|
+
cli({
|
|
4
|
+
site: 'bilibili',
|
|
5
|
+
name: 'subtitle',
|
|
6
|
+
description: '获取 Bilibili 视频的字幕',
|
|
7
|
+
strategy: Strategy.COOKIE,
|
|
8
|
+
args: [
|
|
9
|
+
{ name: 'bvid', required: true },
|
|
10
|
+
{ name: 'lang', required: false, help: '字幕语言代码 (如 zh-CN, en-US, ai-zh),默认取第一个' },
|
|
11
|
+
],
|
|
12
|
+
columns: ['index', 'from', 'to', 'content'],
|
|
13
|
+
func: async (page, kwargs) => {
|
|
14
|
+
if (!page)
|
|
15
|
+
throw new Error('Requires browser');
|
|
16
|
+
// 1. 先前往视频详情页 (建立有鉴权的 Session,且这里不需要加载完整个视频)
|
|
17
|
+
await page.goto(`https://www.bilibili.com/video/${kwargs.bvid}/`);
|
|
18
|
+
// 2. 利用 __INITIAL_STATE__ 获取基础信息,拿 CID
|
|
19
|
+
const cid = await page.evaluate(`(async () => {
|
|
20
|
+
const state = window.__INITIAL_STATE__ || {};
|
|
21
|
+
return state?.videoData?.cid;
|
|
22
|
+
})()`);
|
|
23
|
+
if (!cid) {
|
|
24
|
+
throw new Error('无法在页面中提取到当前视频的 CID,请检查页面是否正常加载。');
|
|
25
|
+
}
|
|
26
|
+
// 3. 在 Node 端使用 apiGet 获取带 Wbi 签名的字幕列表
|
|
27
|
+
// 之前纯靠 evaluate 里的 fetch 会失败,因为 B 站 /wbi/ 开头的接口强校验 w_rid,未签名直接被风控返回 403 HTML
|
|
28
|
+
const payload = await apiGet(page, '/x/player/wbi/v2', {
|
|
29
|
+
params: { bvid: kwargs.bvid, cid },
|
|
30
|
+
signed: true, // 开启 wbi_sign 自动签名
|
|
31
|
+
});
|
|
32
|
+
if (payload.code !== 0) {
|
|
33
|
+
throw new Error(`获取视频播放信息失败: ${payload.message} (${payload.code})`);
|
|
34
|
+
}
|
|
35
|
+
const subtitles = payload.data?.subtitle?.subtitles || [];
|
|
36
|
+
if (subtitles.length === 0) {
|
|
37
|
+
throw new Error('此视频没有发现外挂或智能字幕。');
|
|
38
|
+
}
|
|
39
|
+
// 4. 选择目标字幕语言
|
|
40
|
+
const target = kwargs.lang
|
|
41
|
+
? subtitles.find((s) => s.lan === kwargs.lang) || subtitles[0]
|
|
42
|
+
: subtitles[0];
|
|
43
|
+
const targetSubUrl = target.subtitle_url;
|
|
44
|
+
if (!targetSubUrl || targetSubUrl === '') {
|
|
45
|
+
throw new Error('[风控拦截/未登录] 获取到的 subtitle_url 为空!请确保 CLI 已成功登录且风控未封锁此账号。');
|
|
46
|
+
}
|
|
47
|
+
const finalUrl = targetSubUrl.startsWith('//') ? 'https:' + targetSubUrl : targetSubUrl;
|
|
48
|
+
// 5. 解析并拉取 CDN 的 JSON 文件
|
|
49
|
+
const fetchJs = `
|
|
50
|
+
(async () => {
|
|
51
|
+
const url = ${JSON.stringify(finalUrl)};
|
|
52
|
+
const res = await fetch(url);
|
|
53
|
+
const text = await res.text();
|
|
54
|
+
|
|
55
|
+
if (text.startsWith('<!DOCTYPE') || text.startsWith('<html')) {
|
|
56
|
+
return { error: 'HTML', text: text.substring(0, 100), url };
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
try {
|
|
60
|
+
const subJson = JSON.parse(text);
|
|
61
|
+
// B站真实返回格式是 { font_size: 0.4, font_color: "#FFFFFF", background_alpha: 0.5, background_color: "#9C27B0", Stroke: "none", type: "json" , body: [{from: 0, to: 0, content: ""}] }
|
|
62
|
+
if (Array.isArray(subJson?.body)) return { success: true, data: subJson.body };
|
|
63
|
+
if (Array.isArray(subJson)) return { success: true, data: subJson };
|
|
64
|
+
return { error: 'UNKNOWN_JSON', data: subJson };
|
|
65
|
+
} catch (e) {
|
|
66
|
+
return { error: 'PARSE_FAILED', text: text.substring(0, 100) };
|
|
67
|
+
}
|
|
68
|
+
})()
|
|
69
|
+
`;
|
|
70
|
+
const items = await page.evaluate(fetchJs);
|
|
71
|
+
if (items?.error) {
|
|
72
|
+
throw new Error(`字幕获取失败: ${items.error}${items.text ? ' — ' + items.text : ''}`);
|
|
73
|
+
}
|
|
74
|
+
const finalItems = items?.data || [];
|
|
75
|
+
if (!Array.isArray(finalItems)) {
|
|
76
|
+
throw new Error('解析到的字幕列表对象不符合数组格式');
|
|
77
|
+
}
|
|
78
|
+
// 6. 数据映射
|
|
79
|
+
return finalItems.map((item, idx) => ({
|
|
80
|
+
index: idx + 1,
|
|
81
|
+
from: Number(item.from || 0).toFixed(2) + 's',
|
|
82
|
+
to: Number(item.to || 0).toFixed(2) + 's',
|
|
83
|
+
content: item.content
|
|
84
|
+
}));
|
|
85
|
+
},
|
|
86
|
+
});
|
package/dist/engine.d.ts
CHANGED
|
@@ -1,7 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* CLI discovery: finds YAML/TS CLI definitions and registers them.
|
|
3
|
+
*
|
|
4
|
+
* Supports two modes:
|
|
5
|
+
* 1. FAST PATH (manifest): If a pre-compiled cli-manifest.json exists,
|
|
6
|
+
* registers all YAML commands instantly without runtime YAML parsing.
|
|
7
|
+
* TS modules are loaded lazily only when their command is executed.
|
|
8
|
+
* 2. FALLBACK (filesystem scan): Traditional runtime discovery for development.
|
|
3
9
|
*/
|
|
4
10
|
import { type CliCommand } from './registry.js';
|
|
5
11
|
import type { IPage } from './types.js';
|
|
12
|
+
/**
|
|
13
|
+
* Discover and register CLI commands.
|
|
14
|
+
* Uses pre-compiled manifest when available for instant startup.
|
|
15
|
+
*/
|
|
6
16
|
export declare function discoverClis(...dirs: string[]): Promise<void>;
|
|
17
|
+
/**
|
|
18
|
+
* Execute a CLI command. Handles lazy-loading of TS modules.
|
|
19
|
+
*/
|
|
7
20
|
export declare function executeCommand(cmd: CliCommand, page: IPage | null, kwargs: Record<string, any>, debug?: boolean): Promise<any>;
|
package/dist/engine.js
CHANGED
|
@@ -1,31 +1,110 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* CLI discovery: finds YAML/TS CLI definitions and registers them.
|
|
3
|
+
*
|
|
4
|
+
* Supports two modes:
|
|
5
|
+
* 1. FAST PATH (manifest): If a pre-compiled cli-manifest.json exists,
|
|
6
|
+
* registers all YAML commands instantly without runtime YAML parsing.
|
|
7
|
+
* TS modules are loaded lazily only when their command is executed.
|
|
8
|
+
* 2. FALLBACK (filesystem scan): Traditional runtime discovery for development.
|
|
3
9
|
*/
|
|
4
10
|
import * as fs from 'node:fs';
|
|
5
11
|
import * as path from 'node:path';
|
|
6
12
|
import yaml from 'js-yaml';
|
|
7
13
|
import { Strategy, registerCommand } from './registry.js';
|
|
8
14
|
import { executePipeline } from './pipeline.js';
|
|
15
|
+
/** Set of TS module paths that have been loaded */
|
|
16
|
+
const _loadedModules = new Set();
|
|
17
|
+
/**
|
|
18
|
+
* Discover and register CLI commands.
|
|
19
|
+
* Uses pre-compiled manifest when available for instant startup.
|
|
20
|
+
*/
|
|
9
21
|
export async function discoverClis(...dirs) {
|
|
10
|
-
|
|
22
|
+
// Fast path: try manifest first (production / post-build)
|
|
11
23
|
for (const dir of dirs) {
|
|
12
|
-
|
|
24
|
+
const manifestPath = path.resolve(dir, '..', 'cli-manifest.json');
|
|
25
|
+
if (fs.existsSync(manifestPath)) {
|
|
26
|
+
loadFromManifest(manifestPath, dir);
|
|
27
|
+
continue; // Skip filesystem scan for this directory
|
|
28
|
+
}
|
|
29
|
+
// Fallback: runtime filesystem scan (development)
|
|
30
|
+
await discoverClisFromFs(dir);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Fast-path: register commands from pre-compiled manifest.
|
|
35
|
+
* YAML pipelines are inlined — zero YAML parsing at runtime.
|
|
36
|
+
* TS modules are deferred — loaded lazily on first execution.
|
|
37
|
+
*/
|
|
38
|
+
function loadFromManifest(manifestPath, clisDir) {
|
|
39
|
+
try {
|
|
40
|
+
const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf-8'));
|
|
41
|
+
for (const entry of manifest) {
|
|
42
|
+
if (entry.type === 'yaml') {
|
|
43
|
+
// YAML pipelines fully inlined in manifest — register directly
|
|
44
|
+
const strategy = Strategy[entry.strategy.toUpperCase()] ?? Strategy.COOKIE;
|
|
45
|
+
const cmd = {
|
|
46
|
+
site: entry.site,
|
|
47
|
+
name: entry.name,
|
|
48
|
+
description: entry.description ?? '',
|
|
49
|
+
domain: entry.domain,
|
|
50
|
+
strategy,
|
|
51
|
+
browser: entry.browser,
|
|
52
|
+
args: entry.args ?? [],
|
|
53
|
+
columns: entry.columns,
|
|
54
|
+
pipeline: entry.pipeline,
|
|
55
|
+
timeoutSeconds: entry.timeout,
|
|
56
|
+
source: `manifest:${entry.site}/${entry.name}`,
|
|
57
|
+
};
|
|
58
|
+
registerCommand(cmd);
|
|
59
|
+
}
|
|
60
|
+
else if (entry.type === 'ts' && entry.modulePath) {
|
|
61
|
+
// TS adapters: register a lightweight stub.
|
|
62
|
+
// The actual module is loaded lazily on first executeCommand().
|
|
63
|
+
const strategy = Strategy[(entry.strategy ?? 'cookie').toUpperCase()] ?? Strategy.COOKIE;
|
|
64
|
+
const modulePath = path.resolve(clisDir, entry.modulePath);
|
|
65
|
+
const cmd = {
|
|
66
|
+
site: entry.site,
|
|
67
|
+
name: entry.name,
|
|
68
|
+
description: entry.description ?? '',
|
|
69
|
+
domain: entry.domain,
|
|
70
|
+
strategy,
|
|
71
|
+
browser: entry.browser ?? true,
|
|
72
|
+
args: entry.args ?? [],
|
|
73
|
+
columns: entry.columns,
|
|
74
|
+
timeoutSeconds: entry.timeout,
|
|
75
|
+
source: modulePath,
|
|
76
|
+
// Mark as lazy — executeCommand will load the module before running
|
|
77
|
+
_lazy: true,
|
|
78
|
+
_modulePath: modulePath,
|
|
79
|
+
};
|
|
80
|
+
registerCommand(cmd);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
catch (err) {
|
|
85
|
+
process.stderr.write(`Warning: failed to load manifest ${manifestPath}: ${err.message}\n`);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Fallback: traditional filesystem scan (used during development with tsx).
|
|
90
|
+
*/
|
|
91
|
+
async function discoverClisFromFs(dir) {
|
|
92
|
+
if (!fs.existsSync(dir))
|
|
93
|
+
return;
|
|
94
|
+
const promises = [];
|
|
95
|
+
for (const site of fs.readdirSync(dir)) {
|
|
96
|
+
const siteDir = path.join(dir, site);
|
|
97
|
+
if (!fs.statSync(siteDir).isDirectory())
|
|
13
98
|
continue;
|
|
14
|
-
for (const
|
|
15
|
-
const
|
|
16
|
-
if (
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
}
|
|
23
|
-
else if (file.endsWith('.js')) {
|
|
24
|
-
// Dynamic import of compiled adapter modules
|
|
25
|
-
promises.push(import(`file://${filePath}`).catch((err) => {
|
|
26
|
-
process.stderr.write(`Warning: failed to load module ${filePath}: ${err.message}\n`);
|
|
27
|
-
}));
|
|
28
|
-
}
|
|
99
|
+
for (const file of fs.readdirSync(siteDir)) {
|
|
100
|
+
const filePath = path.join(siteDir, file);
|
|
101
|
+
if (file.endsWith('.yaml') || file.endsWith('.yml')) {
|
|
102
|
+
registerYamlCli(filePath, site);
|
|
103
|
+
}
|
|
104
|
+
else if (file.endsWith('.js') && !file.endsWith('.d.js')) {
|
|
105
|
+
promises.push(import(`file://${filePath}`).catch((err) => {
|
|
106
|
+
process.stderr.write(`Warning: failed to load module ${filePath}: ${err.message}\n`);
|
|
107
|
+
}));
|
|
29
108
|
}
|
|
30
109
|
}
|
|
31
110
|
}
|
|
@@ -74,7 +153,33 @@ function registerYamlCli(filePath, defaultSite) {
|
|
|
74
153
|
process.stderr.write(`Warning: failed to load ${filePath}: ${err.message}\n`);
|
|
75
154
|
}
|
|
76
155
|
}
|
|
156
|
+
/**
|
|
157
|
+
* Execute a CLI command. Handles lazy-loading of TS modules.
|
|
158
|
+
*/
|
|
77
159
|
export async function executeCommand(cmd, page, kwargs, debug = false) {
|
|
160
|
+
// Lazy-load TS module on first execution
|
|
161
|
+
if (cmd._lazy && cmd._modulePath) {
|
|
162
|
+
const modulePath = cmd._modulePath;
|
|
163
|
+
if (!_loadedModules.has(modulePath)) {
|
|
164
|
+
try {
|
|
165
|
+
await import(`file://${modulePath}`);
|
|
166
|
+
_loadedModules.add(modulePath);
|
|
167
|
+
}
|
|
168
|
+
catch (err) {
|
|
169
|
+
throw new Error(`Failed to load adapter module ${modulePath}: ${err.message}`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
// After loading, the module's cli() call will have updated the registry
|
|
173
|
+
// with the real func/pipeline. Re-fetch the command.
|
|
174
|
+
const { getRegistry, fullName } = await import('./registry.js');
|
|
175
|
+
const updated = getRegistry().get(fullName(cmd));
|
|
176
|
+
if (updated && updated.func) {
|
|
177
|
+
return updated.func(page, kwargs, debug);
|
|
178
|
+
}
|
|
179
|
+
if (updated && updated.pipeline) {
|
|
180
|
+
return executePipeline(page, updated.pipeline, { args: kwargs, debug });
|
|
181
|
+
}
|
|
182
|
+
}
|
|
78
183
|
if (cmd.func) {
|
|
79
184
|
return cmd.func(page, kwargs, debug);
|
|
80
185
|
}
|
package/dist/explore.js
CHANGED
|
@@ -175,6 +175,9 @@ function scoreEndpoint(ep) {
|
|
|
175
175
|
s += 2;
|
|
176
176
|
if (ep.status === 200)
|
|
177
177
|
s += 2;
|
|
178
|
+
// Anti-Bot Empty Value Detection: penalize JSON endpoints returning empty data
|
|
179
|
+
if (ep.responseAnalysis && ep.responseAnalysis.itemCount === 0 && ep.contentType.includes('json'))
|
|
180
|
+
s -= 3;
|
|
178
181
|
return s;
|
|
179
182
|
}
|
|
180
183
|
function inferCapabilityName(url, goal) {
|
|
@@ -266,6 +269,28 @@ const STORE_DISCOVER_JS = `
|
|
|
266
269
|
return stores;
|
|
267
270
|
}
|
|
268
271
|
`;
|
|
272
|
+
// ── Auto-Interaction (Fuzzing) ─────────────────────────────────────────────
|
|
273
|
+
const INTERACT_FUZZ_JS = `
|
|
274
|
+
async () => {
|
|
275
|
+
const sleep = ms => new Promise(r => setTimeout(r, ms));
|
|
276
|
+
const clickables = Array.from(document.querySelectorAll(
|
|
277
|
+
'button, [role="button"], [role="tab"], .tab, .btn, a[href="javascript:void(0)"], a[href="#"]'
|
|
278
|
+
)).slice(0, 15); // limit to 15 to avoid endless loops
|
|
279
|
+
|
|
280
|
+
let clicked = 0;
|
|
281
|
+
for (const el of clickables) {
|
|
282
|
+
try {
|
|
283
|
+
const rect = el.getBoundingClientRect();
|
|
284
|
+
if (rect.width > 0 && rect.height > 0) {
|
|
285
|
+
el.dispatchEvent(new MouseEvent('click', { bubbles: true, cancelable: true, view: window }));
|
|
286
|
+
clicked++;
|
|
287
|
+
await sleep(300); // give it time to trigger network
|
|
288
|
+
}
|
|
289
|
+
} catch {}
|
|
290
|
+
}
|
|
291
|
+
return clicked;
|
|
292
|
+
}
|
|
293
|
+
`;
|
|
269
294
|
// ── Main explore function ──────────────────────────────────────────────────
|
|
270
295
|
export async function exploreUrl(url, opts) {
|
|
271
296
|
const waitSeconds = opts.waitSeconds ?? 3.0;
|
|
@@ -283,6 +308,31 @@ export async function exploreUrl(url, opts) {
|
|
|
283
308
|
catch { }
|
|
284
309
|
await page.wait(1);
|
|
285
310
|
}
|
|
311
|
+
// Step 2.5: Interactive Fuzzing (if requested)
|
|
312
|
+
if (opts.auto) {
|
|
313
|
+
try {
|
|
314
|
+
// First: targeted clicks by label (e.g. "字幕", "CC", "评论")
|
|
315
|
+
if (opts.clickLabels?.length) {
|
|
316
|
+
for (const label of opts.clickLabels) {
|
|
317
|
+
const safeLabel = label.replace(/'/g, "\\'");
|
|
318
|
+
await page.evaluate(`
|
|
319
|
+
(() => {
|
|
320
|
+
const el = [...document.querySelectorAll('button, [role="button"], [role="tab"], a, span')]
|
|
321
|
+
.find(e => e.textContent && e.textContent.trim().includes('${safeLabel}'));
|
|
322
|
+
if (el) el.click();
|
|
323
|
+
})()
|
|
324
|
+
`);
|
|
325
|
+
await page.wait(1);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
// Then: blind fuzzing on generic interactive elements
|
|
329
|
+
const clicks = await page.evaluate(INTERACT_FUZZ_JS);
|
|
330
|
+
await page.wait(2); // wait for XHRs to settle
|
|
331
|
+
}
|
|
332
|
+
catch (e) {
|
|
333
|
+
// fuzzing is best-effort, don't fail the whole explore
|
|
334
|
+
}
|
|
335
|
+
}
|
|
286
336
|
// Step 3: Read page metadata
|
|
287
337
|
const metadata = await readPageMetadata(page);
|
|
288
338
|
// Step 4: Capture network traffic
|
package/dist/main.js
CHANGED
|
@@ -56,8 +56,8 @@ program.command('validate').description('Validate CLI definitions').argument('[t
|
|
|
56
56
|
.action(async (target) => { const { validateClisWithTarget, renderValidationReport } = await import('./validate.js'); console.log(renderValidationReport(validateClisWithTarget([BUILTIN_CLIS, USER_CLIS], target))); });
|
|
57
57
|
program.command('verify').description('Validate + smoke test').argument('[target]').option('--smoke', 'Run smoke tests', false)
|
|
58
58
|
.action(async (target, opts) => { const { verifyClis, renderVerifyReport } = await import('./verify.js'); const r = await verifyClis({ builtinClis: BUILTIN_CLIS, userClis: USER_CLIS, target, smoke: opts.smoke }); console.log(renderVerifyReport(r)); process.exitCode = r.ok ? 0 : 1; });
|
|
59
|
-
program.command('explore').alias('probe').description('Explore a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3')
|
|
60
|
-
.action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); });
|
|
59
|
+
program.command('explore').alias('probe').description('Explore a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3').option('--auto', 'Enable interactive fuzzing (simulate clicks to trigger lazy APIs)').option('--click <labels>', 'Comma-separated labels to click before fuzzing (e.g. "字幕,CC,评论")')
|
|
60
|
+
.action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); const clickLabels = opts.click ? opts.click.split(',').map((s) => s.trim()) : undefined; console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait), auto: opts.auto, clickLabels }))); });
|
|
61
61
|
program.command('synthesize').description('Synthesize CLIs from explore').argument('<target>').option('--top <n>', '', '3')
|
|
62
62
|
.action(async (target, opts) => { const { synthesizeFromExplore, renderSynthesizeSummary } = await import('./synthesize.js'); console.log(renderSynthesizeSummary(synthesizeFromExplore(target, { top: parseInt(opts.top) }))); });
|
|
63
63
|
program.command('generate').description('One-shot: explore → synthesize → register').argument('<url>').option('--goal <text>').option('--site <name>')
|
|
@@ -27,14 +27,10 @@ export async function stepWait(page, params, data, args) {
|
|
|
27
27
|
await page.wait(params);
|
|
28
28
|
else if (typeof params === 'object' && params) {
|
|
29
29
|
if ('text' in params) {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if (typeof snap === 'string' && snap.includes(params.text))
|
|
35
|
-
break;
|
|
36
|
-
await page.wait(0.5);
|
|
37
|
-
}
|
|
30
|
+
await page.wait({
|
|
31
|
+
text: String(render(params.text, { args, data })),
|
|
32
|
+
timeout: params.timeout
|
|
33
|
+
});
|
|
38
34
|
}
|
|
39
35
|
else if ('time' in params)
|
|
40
36
|
await page.wait(Number(params.time));
|
|
@@ -2,6 +2,20 @@
|
|
|
2
2
|
* Pipeline step: fetch — HTTP API requests.
|
|
3
3
|
*/
|
|
4
4
|
import { render } from '../template.js';
|
|
5
|
+
/** Simple async concurrency limiter */
|
|
6
|
+
async function mapConcurrent(items, limit, fn) {
|
|
7
|
+
const results = new Array(items.length);
|
|
8
|
+
let index = 0;
|
|
9
|
+
async function worker() {
|
|
10
|
+
while (index < items.length) {
|
|
11
|
+
const i = index++;
|
|
12
|
+
results[i] = await fn(items[i], i);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
const workers = Array.from({ length: Math.min(limit, items.length) }, () => worker());
|
|
16
|
+
await Promise.all(workers);
|
|
17
|
+
return results;
|
|
18
|
+
}
|
|
5
19
|
/** Single URL fetch helper */
|
|
6
20
|
async function fetchSingle(page, url, method, queryParams, headers, args, data) {
|
|
7
21
|
const renderedParams = {};
|
|
@@ -30,6 +44,42 @@ async function fetchSingle(page, url, method, queryParams, headers, args, data)
|
|
|
30
44
|
}
|
|
31
45
|
`);
|
|
32
46
|
}
|
|
47
|
+
/**
|
|
48
|
+
* Batch fetch: send all URLs into the browser as a single evaluate() call.
|
|
49
|
+
* This eliminates N-1 cross-process IPC round trips, performing all fetches
|
|
50
|
+
* inside the V8 engine and returning results as one JSON array.
|
|
51
|
+
*/
|
|
52
|
+
async function fetchBatchInBrowser(page, urls, method, headers, concurrency) {
|
|
53
|
+
const headersJs = JSON.stringify(headers);
|
|
54
|
+
const urlsJs = JSON.stringify(urls);
|
|
55
|
+
return page.evaluate(`
|
|
56
|
+
async () => {
|
|
57
|
+
const urls = ${urlsJs};
|
|
58
|
+
const method = "${method}";
|
|
59
|
+
const headers = ${headersJs};
|
|
60
|
+
const concurrency = ${concurrency};
|
|
61
|
+
|
|
62
|
+
const results = new Array(urls.length);
|
|
63
|
+
let idx = 0;
|
|
64
|
+
|
|
65
|
+
async function worker() {
|
|
66
|
+
while (idx < urls.length) {
|
|
67
|
+
const i = idx++;
|
|
68
|
+
try {
|
|
69
|
+
const resp = await fetch(urls[i], { method, headers, credentials: "include" });
|
|
70
|
+
results[i] = await resp.json();
|
|
71
|
+
} catch (e) {
|
|
72
|
+
results[i] = { error: e.message };
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const workers = Array.from({ length: Math.min(concurrency, urls.length) }, () => worker());
|
|
78
|
+
await Promise.all(workers);
|
|
79
|
+
return results;
|
|
80
|
+
}
|
|
81
|
+
`);
|
|
82
|
+
}
|
|
33
83
|
export async function stepFetch(page, params, data, args) {
|
|
34
84
|
const urlOrObj = typeof params === 'string' ? params : (params?.url ?? '');
|
|
35
85
|
const method = params?.method ?? 'GET';
|
|
@@ -38,12 +88,31 @@ export async function stepFetch(page, params, data, args) {
|
|
|
38
88
|
const urlTemplate = String(urlOrObj);
|
|
39
89
|
// Per-item fetch when data is array and URL references item
|
|
40
90
|
if (Array.isArray(data) && urlTemplate.includes('item')) {
|
|
41
|
-
const
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
91
|
+
const concurrency = typeof params?.concurrency === 'number' ? params.concurrency : 5;
|
|
92
|
+
// Render all URLs upfront
|
|
93
|
+
const renderedHeaders = {};
|
|
94
|
+
for (const [k, v] of Object.entries(headers))
|
|
95
|
+
renderedHeaders[k] = String(render(v, { args, data }));
|
|
96
|
+
const renderedParams = {};
|
|
97
|
+
for (const [k, v] of Object.entries(queryParams))
|
|
98
|
+
renderedParams[k] = String(render(v, { args, data }));
|
|
99
|
+
const urls = data.map((item, index) => {
|
|
100
|
+
let url = String(render(urlTemplate, { args, data, item, index }));
|
|
101
|
+
if (Object.keys(renderedParams).length > 0) {
|
|
102
|
+
const qs = new URLSearchParams(renderedParams).toString();
|
|
103
|
+
url = `${url}${url.includes('?') ? '&' : '?'}${qs}`;
|
|
104
|
+
}
|
|
105
|
+
return url;
|
|
106
|
+
});
|
|
107
|
+
// BATCH IPC: if browser is available, batch all fetches into a single evaluate() call
|
|
108
|
+
if (page !== null) {
|
|
109
|
+
return fetchBatchInBrowser(page, urls, method.toUpperCase(), renderedHeaders, concurrency);
|
|
45
110
|
}
|
|
46
|
-
|
|
111
|
+
// Non-browser: use concurrent pool (already optimized)
|
|
112
|
+
return mapConcurrent(data, concurrency, async (item, index) => {
|
|
113
|
+
const itemUrl = String(render(urlTemplate, { args, data, item, index }));
|
|
114
|
+
return fetchSingle(null, itemUrl, method, queryParams, headers, args, data);
|
|
115
|
+
});
|
|
47
116
|
}
|
|
48
117
|
const url = render(urlOrObj, { args, data });
|
|
49
118
|
return fetchSingle(page, String(url), method, queryParams, headers, args, data);
|
|
@@ -36,6 +36,8 @@ export async function stepTap(page, params, data, args) {
|
|
|
36
36
|
async () => {
|
|
37
37
|
// ── 1. Setup capture proxy (fetch + XHR dual interception) ──
|
|
38
38
|
let captured = null;
|
|
39
|
+
let captureResolve;
|
|
40
|
+
const capturePromise = new Promise(r => { captureResolve = r; });
|
|
39
41
|
const capturePattern = ${JSON.stringify(capturePattern)};
|
|
40
42
|
|
|
41
43
|
// Intercept fetch API
|
|
@@ -46,7 +48,7 @@ export async function stepTap(page, params, data, args) {
|
|
|
46
48
|
const url = typeof fetchArgs[0] === 'string' ? fetchArgs[0]
|
|
47
49
|
: fetchArgs[0] instanceof Request ? fetchArgs[0].url : String(fetchArgs[0]);
|
|
48
50
|
if (capturePattern && url.includes(capturePattern) && !captured) {
|
|
49
|
-
try { captured = await resp.clone().json(); } catch {}
|
|
51
|
+
try { captured = await resp.clone().json(); captureResolve(); } catch {}
|
|
50
52
|
}
|
|
51
53
|
} catch {}
|
|
52
54
|
return resp;
|
|
@@ -65,13 +67,13 @@ export async function stepTap(page, params, data, args) {
|
|
|
65
67
|
const origHandler = xhr.onreadystatechange;
|
|
66
68
|
xhr.onreadystatechange = function() {
|
|
67
69
|
if (xhr.readyState === 4 && !captured) {
|
|
68
|
-
try { captured = JSON.parse(xhr.responseText); } catch {}
|
|
70
|
+
try { captured = JSON.parse(xhr.responseText); captureResolve(); } catch {}
|
|
69
71
|
}
|
|
70
72
|
if (origHandler) origHandler.apply(this, arguments);
|
|
71
73
|
};
|
|
72
74
|
const origOnload = xhr.onload;
|
|
73
75
|
xhr.onload = function() {
|
|
74
|
-
if (!captured) { try { captured = JSON.parse(xhr.responseText); } catch {} }
|
|
76
|
+
if (!captured) { try { captured = JSON.parse(xhr.responseText); captureResolve(); } catch {} }
|
|
75
77
|
if (origOnload) origOnload.apply(this, arguments);
|
|
76
78
|
};
|
|
77
79
|
}
|
|
@@ -111,9 +113,9 @@ export async function stepTap(page, params, data, args) {
|
|
|
111
113
|
await ${actionCall};
|
|
112
114
|
|
|
113
115
|
// ── 4. Wait for network response ──
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
await
|
|
116
|
+
if (!captured) {
|
|
117
|
+
const timeoutPromise = new Promise(r => setTimeout(r, ${timeout} * 1000));
|
|
118
|
+
await Promise.race([capturePromise, timeoutPromise]);
|
|
117
119
|
}
|
|
118
120
|
} finally {
|
|
119
121
|
// ── 5. Always restore originals ──
|
package/dist/registry.d.ts
CHANGED
package/dist/types.d.ts
CHANGED
|
@@ -16,7 +16,11 @@ export interface IPage {
|
|
|
16
16
|
click(ref: string): Promise<void>;
|
|
17
17
|
typeText(ref: string, text: string): Promise<void>;
|
|
18
18
|
pressKey(key: string): Promise<void>;
|
|
19
|
-
wait(
|
|
19
|
+
wait(options: number | {
|
|
20
|
+
text?: string;
|
|
21
|
+
time?: number;
|
|
22
|
+
timeout?: number;
|
|
23
|
+
}): Promise<void>;
|
|
20
24
|
tabs(): Promise<any>;
|
|
21
25
|
closeTab(index?: number): Promise<void>;
|
|
22
26
|
newTab(): Promise<void>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@jackwener/opencli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.1",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"access": "public"
|
|
6
6
|
},
|
|
@@ -12,7 +12,8 @@
|
|
|
12
12
|
},
|
|
13
13
|
"scripts": {
|
|
14
14
|
"dev": "tsx src/main.ts",
|
|
15
|
-
"build": "tsc && npm run clean-yaml && npm run copy-yaml",
|
|
15
|
+
"build": "tsc && npm run clean-yaml && npm run copy-yaml && npm run build-manifest",
|
|
16
|
+
"build-manifest": "node dist/build-manifest.js || true",
|
|
16
17
|
"clean-yaml": "find dist/clis -name '*.yaml' -o -name '*.yml' 2>/dev/null | xargs rm -f",
|
|
17
18
|
"copy-yaml": "find src/clis -name '*.yaml' -o -name '*.yml' | while read f; do d=\"dist/${f#src/}\"; mkdir -p \"$(dirname \"$d\")\"; cp \"$f\" \"$d\"; done",
|
|
18
19
|
"start": "node dist/main.js",
|