@jackwener/opencli 0.5.1 โ 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/README.zh-CN.md +1 -1
- package/SKILL.md +7 -4
- package/dist/browser.d.ts +7 -3
- package/dist/browser.js +25 -92
- package/dist/browser.test.js +18 -1
- package/dist/cascade.d.ts +1 -1
- package/dist/cascade.js +42 -75
- package/dist/constants.d.ts +13 -0
- package/dist/constants.js +30 -0
- package/dist/engine.js +3 -3
- package/dist/engine.test.d.ts +4 -0
- package/dist/engine.test.js +67 -0
- package/dist/explore.js +1 -15
- package/dist/interceptor.d.ts +42 -0
- package/dist/interceptor.js +138 -0
- package/dist/main.js +1 -4
- package/dist/output.js +0 -5
- package/dist/pipeline/steps/intercept.js +4 -54
- package/dist/pipeline/steps/tap.js +11 -51
- package/dist/registry.d.ts +3 -1
- package/dist/registry.test.d.ts +4 -0
- package/dist/registry.test.js +90 -0
- package/dist/runtime.d.ts +15 -1
- package/dist/runtime.js +11 -6
- package/dist/synthesize.js +5 -5
- package/dist/validate.js +21 -0
- package/dist/verify.d.ts +7 -0
- package/dist/verify.js +7 -1
- package/dist/version.d.ts +4 -0
- package/dist/version.js +16 -0
- package/package.json +1 -1
- package/src/browser.test.ts +20 -1
- package/src/browser.ts +25 -87
- package/src/cascade.ts +47 -75
- package/src/constants.ts +35 -0
- package/src/engine.test.ts +77 -0
- package/src/engine.ts +5 -5
- package/src/explore.ts +2 -15
- package/src/interceptor.ts +153 -0
- package/src/main.ts +1 -5
- package/src/output.ts +0 -4
- package/src/pipeline/executor.ts +15 -15
- package/src/pipeline/steps/intercept.ts +4 -55
- package/src/pipeline/steps/tap.ts +12 -51
- package/src/registry.test.ts +106 -0
- package/src/registry.ts +4 -1
- package/src/runtime.ts +22 -8
- package/src/synthesize.ts +5 -5
- package/src/validate.ts +22 -0
- package/src/verify.ts +10 -1
- package/src/version.ts +18 -0
package/README.md
CHANGED
|
@@ -132,7 +132,7 @@ npm install -g @jackwener/opencli@latest
|
|
|
132
132
|
| **smzdm** | `search` | ๐ Browser |
|
|
133
133
|
| **ctrip** | `search` | ๐ Browser |
|
|
134
134
|
| **github** | `search` | ๐ Public |
|
|
135
|
-
| **v2ex** | `hot` `latest` `topic` | ๐ Public |
|
|
135
|
+
| **v2ex** | `hot` `latest` `topic` `daily` `me` `notifications` | ๐ Public / ๐ Browser |
|
|
136
136
|
| **hackernews** | `top` | ๐ Public |
|
|
137
137
|
| **bbc** | `news` | ๐ Public |
|
|
138
138
|
|
package/README.zh-CN.md
CHANGED
|
@@ -132,7 +132,7 @@ npm install -g @jackwener/opencli@latest
|
|
|
132
132
|
| **smzdm** | `search` | ๐ ๆต่งๅจ |
|
|
133
133
|
| **ctrip** | `search` | ๐ ๆต่งๅจ |
|
|
134
134
|
| **github** | `search` | ๐ ๅ
ฌๅ
ฑ API |
|
|
135
|
-
| **v2ex** | `hot` `latest` `topic` | ๐ ๅ
ฌๅ
ฑ API |
|
|
135
|
+
| **v2ex** | `hot` `latest` `topic` `daily` `me` `notifications` | ๐ ๅ
ฌๅ
ฑ API / ๐ ๆต่งๅจ |
|
|
136
136
|
| **hackernews** | `top` | ๐ ๅ
ฌๅ
ฑ API |
|
|
137
137
|
| **bbc** | `news` | ๐ ๅ
ฌๅ
ฑ API |
|
|
138
138
|
|
package/SKILL.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: opencli
|
|
3
3
|
description: "OpenCLI โ Make any website your CLI. Zero risk, AI-powered, reuse Chrome login."
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.1
|
|
5
5
|
author: jackwener
|
|
6
6
|
tags: [cli, browser, web, mcp, playwright, bilibili, zhihu, twitter, github, v2ex, hackernews, reddit, xiaohongshu, xueqiu, AI, agent]
|
|
7
7
|
---
|
|
@@ -95,10 +95,13 @@ opencli reddit frontpage --limit 10 # ้ฆ้กต
|
|
|
95
95
|
opencli reddit search --keyword "AI" # ๆ็ดข
|
|
96
96
|
opencli reddit subreddit --name rust # ๅญ็ๅๆต่ง
|
|
97
97
|
|
|
98
|
-
# V2EX (public)
|
|
98
|
+
# V2EX (public + browser)
|
|
99
99
|
opencli v2ex hot --limit 10 # ็ญ้จ่ฏ้ข
|
|
100
100
|
opencli v2ex latest --limit 10 # ๆๆฐ่ฏ้ข
|
|
101
101
|
opencli v2ex topic --id 1024 # ไธป้ข่ฏฆๆ
|
|
102
|
+
opencli v2ex daily # ๆฏๆฅ็ญพๅฐ (browser)
|
|
103
|
+
opencli v2ex me # ๆ็ไฟกๆฏ (browser)
|
|
104
|
+
opencli v2ex notifications --limit 10 # ้็ฅ (browser)
|
|
102
105
|
|
|
103
106
|
# Hacker News (public)
|
|
104
107
|
opencli hackernews top --limit 10 # Top stories
|
|
@@ -156,8 +159,8 @@ opencli cascade <api-url>
|
|
|
156
159
|
# Explore with interactive fuzzing (click buttons to trigger lazy APIs)
|
|
157
160
|
opencli explore <url> --auto --click "ๅญๅน,CC,่ฏ่ฎบ"
|
|
158
161
|
|
|
159
|
-
# Verify:
|
|
160
|
-
opencli verify
|
|
162
|
+
# Verify: validate adapter definitions
|
|
163
|
+
opencli verify
|
|
161
164
|
```
|
|
162
165
|
|
|
163
166
|
## Output Formats
|
package/dist/browser.d.ts
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* Browser interaction via Playwright MCP Bridge extension.
|
|
3
3
|
* Connects to an existing Chrome browser through the extension.
|
|
4
4
|
*/
|
|
5
|
+
import { withTimeoutMs } from './runtime.js';
|
|
5
6
|
type ConnectFailureKind = 'missing-token' | 'extension-timeout' | 'extension-not-installed' | 'mcp-init' | 'process-exit' | 'unknown';
|
|
6
7
|
type PlaywrightMCPState = 'idle' | 'connecting' | 'connected' | 'closing' | 'closed';
|
|
7
8
|
type ConnectFailureInput = {
|
|
@@ -29,7 +30,6 @@ export declare class Page implements IPage {
|
|
|
29
30
|
call(method: string, params?: Record<string, any>): Promise<any>;
|
|
30
31
|
goto(url: string): Promise<void>;
|
|
31
32
|
evaluate(js: string): Promise<any>;
|
|
32
|
-
private normalizeEval;
|
|
33
33
|
snapshot(opts?: {
|
|
34
34
|
interactive?: boolean;
|
|
35
35
|
compact?: boolean;
|
|
@@ -90,12 +90,16 @@ declare function diffTabIndexes(initialIdentities: string[], currentTabs: Array<
|
|
|
90
90
|
identity: string;
|
|
91
91
|
}>): number[];
|
|
92
92
|
declare function appendLimited(current: string, chunk: string, limit: number): string;
|
|
93
|
-
declare function
|
|
93
|
+
declare function buildMcpArgs(input: {
|
|
94
|
+
mcpPath: string;
|
|
95
|
+
executablePath?: string | null;
|
|
96
|
+
}): string[];
|
|
94
97
|
export declare const __test__: {
|
|
95
98
|
createJsonRpcRequest: typeof createJsonRpcRequest;
|
|
96
99
|
extractTabEntries: typeof extractTabEntries;
|
|
97
100
|
diffTabIndexes: typeof diffTabIndexes;
|
|
98
101
|
appendLimited: typeof appendLimited;
|
|
99
|
-
|
|
102
|
+
buildMcpArgs: typeof buildMcpArgs;
|
|
103
|
+
withTimeoutMs: typeof withTimeoutMs;
|
|
100
104
|
};
|
|
101
105
|
export {};
|
package/dist/browser.js
CHANGED
|
@@ -9,14 +9,10 @@ import * as fs from 'node:fs';
|
|
|
9
9
|
import * as os from 'node:os';
|
|
10
10
|
import * as path from 'node:path';
|
|
11
11
|
import { formatSnapshot } from './snapshotFormatter.js';
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
}
|
|
17
|
-
catch {
|
|
18
|
-
return '0.0.0';
|
|
19
|
-
} })();
|
|
12
|
+
import { PKG_VERSION } from './version.js';
|
|
13
|
+
import { normalizeEvaluateSource } from './pipeline/template.js';
|
|
14
|
+
import { generateInterceptorJs, generateReadInterceptedJs } from './interceptor.js';
|
|
15
|
+
import { withTimeoutMs } from './runtime.js';
|
|
20
16
|
const CONNECT_TIMEOUT = parseInt(process.env.OPENCLI_BROWSER_CONNECT_TIMEOUT ?? '30', 10);
|
|
21
17
|
const STDERR_BUFFER_LIMIT = 16 * 1024;
|
|
22
18
|
const INITIAL_TABS_TIMEOUT_MS = 1500;
|
|
@@ -126,26 +122,9 @@ export class Page {
|
|
|
126
122
|
}
|
|
127
123
|
async evaluate(js) {
|
|
128
124
|
// Normalize IIFE format to function format expected by MCP browser_evaluate
|
|
129
|
-
const normalized =
|
|
125
|
+
const normalized = normalizeEvaluateSource(js);
|
|
130
126
|
return this.call('tools/call', { name: 'browser_evaluate', arguments: { function: normalized } });
|
|
131
127
|
}
|
|
132
|
-
normalizeEval(source) {
|
|
133
|
-
const s = source.trim();
|
|
134
|
-
if (!s)
|
|
135
|
-
return '() => undefined';
|
|
136
|
-
// IIFE: (async () => {...})() โ wrap as () => (...)
|
|
137
|
-
if (s.startsWith('(') && s.endsWith(')()'))
|
|
138
|
-
return `() => (${s})`;
|
|
139
|
-
// Already a function/arrow
|
|
140
|
-
if (/^(async\s+)?\([^)]*\)\s*=>/.test(s))
|
|
141
|
-
return s;
|
|
142
|
-
if (/^(async\s+)?[A-Za-z_][A-Za-z0-9_]*\s*=>/.test(s))
|
|
143
|
-
return s;
|
|
144
|
-
if (s.startsWith('function ') || s.startsWith('async function '))
|
|
145
|
-
return s;
|
|
146
|
-
// Raw expression โ wrap
|
|
147
|
-
return `() => (${s})`;
|
|
148
|
-
}
|
|
149
128
|
async snapshot(opts = {}) {
|
|
150
129
|
const raw = await this.call('tools/call', { name: 'browser_snapshot', arguments: {} });
|
|
151
130
|
if (opts.raw)
|
|
@@ -224,56 +203,14 @@ export class Page {
|
|
|
224
203
|
await this.evaluate(js);
|
|
225
204
|
}
|
|
226
205
|
async installInterceptor(pattern) {
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
if (!window.__opencli_patterns.includes('${pattern}')) {
|
|
232
|
-
window.__opencli_patterns.push('${pattern}');
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
if (!window.__patched_xhr) {
|
|
236
|
-
const checkMatch = (url) => window.__opencli_patterns.some(p => url.includes(p));
|
|
237
|
-
|
|
238
|
-
const XHR = XMLHttpRequest.prototype;
|
|
239
|
-
const open = XHR.open;
|
|
240
|
-
const send = XHR.send;
|
|
241
|
-
XHR.open = function(method, url) {
|
|
242
|
-
this._url = url;
|
|
243
|
-
return open.call(this, method, url, ...Array.prototype.slice.call(arguments, 2));
|
|
244
|
-
};
|
|
245
|
-
XHR.send = function() {
|
|
246
|
-
this.addEventListener('load', function() {
|
|
247
|
-
if (checkMatch(this._url)) {
|
|
248
|
-
try { window.__opencli_xhr.push({url: this._url, data: JSON.parse(this.responseText)}); } catch(e){}
|
|
249
|
-
}
|
|
250
|
-
});
|
|
251
|
-
return send.apply(this, arguments);
|
|
252
|
-
};
|
|
253
|
-
|
|
254
|
-
const origFetch = window.fetch;
|
|
255
|
-
window.fetch = async function(...args) {
|
|
256
|
-
let u = typeof args[0] === 'string' ? args[0] : (args[0] && args[0].url) || '';
|
|
257
|
-
const res = await origFetch.apply(this, args);
|
|
258
|
-
setTimeout(async () => {
|
|
259
|
-
try {
|
|
260
|
-
if (checkMatch(u)) {
|
|
261
|
-
const clone = res.clone();
|
|
262
|
-
const j = await clone.json();
|
|
263
|
-
window.__opencli_xhr.push({url: u, data: j});
|
|
264
|
-
}
|
|
265
|
-
} catch(e) {}
|
|
266
|
-
}, 0);
|
|
267
|
-
return res;
|
|
268
|
-
};
|
|
269
|
-
window.__patched_xhr = true;
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
`;
|
|
273
|
-
await this.evaluate(js);
|
|
206
|
+
await this.evaluate(generateInterceptorJs(JSON.stringify(pattern), {
|
|
207
|
+
arrayName: '__opencli_xhr',
|
|
208
|
+
patchGuard: '__opencli_interceptor_patched',
|
|
209
|
+
}));
|
|
274
210
|
}
|
|
275
211
|
async getInterceptedRequests() {
|
|
276
|
-
|
|
212
|
+
const result = await this.evaluate(generateReadInterceptedJs('__opencli_xhr'));
|
|
213
|
+
return result || [];
|
|
277
214
|
}
|
|
278
215
|
}
|
|
279
216
|
/**
|
|
@@ -402,13 +339,13 @@ export class PlaywrightMCP {
|
|
|
402
339
|
stderr: stderrBuffer,
|
|
403
340
|
}));
|
|
404
341
|
}, timeout * 1000);
|
|
405
|
-
const mcpArgs =
|
|
342
|
+
const mcpArgs = buildMcpArgs({
|
|
343
|
+
mcpPath,
|
|
344
|
+
executablePath: process.env.OPENCLI_BROWSER_EXECUTABLE_PATH,
|
|
345
|
+
});
|
|
406
346
|
if (process.env.OPENCLI_VERBOSE) {
|
|
407
347
|
console.error(`[opencli] Extension token: ${extensionToken ? `configured (fingerprint ${tokenFingerprint})` : 'missing'}`);
|
|
408
348
|
}
|
|
409
|
-
if (process.env.OPENCLI_BROWSER_EXECUTABLE_PATH) {
|
|
410
|
-
mcpArgs.push('--executablePath', process.env.OPENCLI_BROWSER_EXECUTABLE_PATH);
|
|
411
|
-
}
|
|
412
349
|
debugLog(`Spawning node ${mcpArgs.join(' ')}`);
|
|
413
350
|
this._proc = spawn('node', mcpArgs, {
|
|
414
351
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
@@ -485,7 +422,7 @@ export class PlaywrightMCP {
|
|
|
485
422
|
this._proc?.stdin?.write(initializedMsg);
|
|
486
423
|
// Use tabs as a readiness probe and for tab cleanup bookkeeping.
|
|
487
424
|
debugLog('Fetching initial tabs count...');
|
|
488
|
-
|
|
425
|
+
withTimeoutMs(page.tabs(), INITIAL_TABS_TIMEOUT_MS, 'Timed out fetching initial tabs').then((tabs) => {
|
|
489
426
|
debugLog(`Tabs response: ${typeof tabs === 'string' ? tabs : JSON.stringify(tabs)}`);
|
|
490
427
|
this._initialTabIdentities = extractTabIdentities(tabs);
|
|
491
428
|
settleSuccess(page);
|
|
@@ -510,7 +447,7 @@ export class PlaywrightMCP {
|
|
|
510
447
|
// Extension mode opens bridge/session tabs that we can clean up best-effort.
|
|
511
448
|
if (this._page && this._proc && !this._proc.killed) {
|
|
512
449
|
try {
|
|
513
|
-
const tabs = await
|
|
450
|
+
const tabs = await withTimeoutMs(this._page.tabs(), TAB_CLEANUP_TIMEOUT_MS, 'Timed out fetching tabs during cleanup');
|
|
514
451
|
const tabEntries = extractTabEntries(tabs);
|
|
515
452
|
const tabsToClose = diffTabIndexes(this._initialTabIdentities, tabEntries);
|
|
516
453
|
for (const index of tabsToClose) {
|
|
@@ -621,24 +558,20 @@ function appendLimited(current, chunk, limit) {
|
|
|
621
558
|
return next;
|
|
622
559
|
return next.slice(-limit);
|
|
623
560
|
}
|
|
624
|
-
function
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
}, (error) => {
|
|
631
|
-
clearTimeout(timer);
|
|
632
|
-
reject(error);
|
|
633
|
-
});
|
|
634
|
-
});
|
|
561
|
+
function buildMcpArgs(input) {
|
|
562
|
+
const args = [input.mcpPath, '--extension'];
|
|
563
|
+
if (input.executablePath) {
|
|
564
|
+
args.push('--executable-path', input.executablePath);
|
|
565
|
+
}
|
|
566
|
+
return args;
|
|
635
567
|
}
|
|
636
568
|
export const __test__ = {
|
|
637
569
|
createJsonRpcRequest,
|
|
638
570
|
extractTabEntries,
|
|
639
571
|
diffTabIndexes,
|
|
640
572
|
appendLimited,
|
|
641
|
-
|
|
573
|
+
buildMcpArgs,
|
|
574
|
+
withTimeoutMs,
|
|
642
575
|
};
|
|
643
576
|
function findMcpServerPath() {
|
|
644
577
|
if (_cachedMcpServerPath !== undefined)
|
package/dist/browser.test.js
CHANGED
|
@@ -34,8 +34,25 @@ describe('browser helpers', () => {
|
|
|
34
34
|
it('keeps only the tail of stderr buffers', () => {
|
|
35
35
|
expect(__test__.appendLimited('12345', '67890', 8)).toBe('34567890');
|
|
36
36
|
});
|
|
37
|
+
it('builds Playwright MCP args with kebab-case executable path', () => {
|
|
38
|
+
expect(__test__.buildMcpArgs({
|
|
39
|
+
mcpPath: '/tmp/cli.js',
|
|
40
|
+
executablePath: '/mnt/c/Program Files/Google/Chrome/Application/chrome.exe',
|
|
41
|
+
})).toEqual([
|
|
42
|
+
'/tmp/cli.js',
|
|
43
|
+
'--extension',
|
|
44
|
+
'--executable-path',
|
|
45
|
+
'/mnt/c/Program Files/Google/Chrome/Application/chrome.exe',
|
|
46
|
+
]);
|
|
47
|
+
expect(__test__.buildMcpArgs({
|
|
48
|
+
mcpPath: '/tmp/cli.js',
|
|
49
|
+
})).toEqual([
|
|
50
|
+
'/tmp/cli.js',
|
|
51
|
+
'--extension',
|
|
52
|
+
]);
|
|
53
|
+
});
|
|
37
54
|
it('times out slow promises', async () => {
|
|
38
|
-
await expect(__test__.
|
|
55
|
+
await expect(__test__.withTimeoutMs(new Promise(() => { }), 10, 'timeout')).rejects.toThrow('timeout');
|
|
39
56
|
});
|
|
40
57
|
});
|
|
41
58
|
describe('PlaywrightMCP state', () => {
|
package/dist/cascade.d.ts
CHANGED
|
@@ -28,7 +28,7 @@ interface CascadeResult {
|
|
|
28
28
|
* Probe an endpoint with a specific strategy.
|
|
29
29
|
* Returns whether the probe succeeded and basic response info.
|
|
30
30
|
*/
|
|
31
|
-
export declare function probeEndpoint(page: IPage, url: string, strategy: Strategy,
|
|
31
|
+
export declare function probeEndpoint(page: IPage, url: string, strategy: Strategy, _opts?: {
|
|
32
32
|
timeout?: number;
|
|
33
33
|
}): Promise<ProbeResult>;
|
|
34
34
|
/**
|
package/dist/cascade.js
CHANGED
|
@@ -18,34 +18,54 @@ const CASCADE_ORDER = [
|
|
|
18
18
|
Strategy.INTERCEPT,
|
|
19
19
|
Strategy.UI,
|
|
20
20
|
];
|
|
21
|
+
/**
|
|
22
|
+
* Build the JavaScript source for a fetch probe.
|
|
23
|
+
* Shared logic for PUBLIC, COOKIE, and HEADER strategies.
|
|
24
|
+
*/
|
|
25
|
+
function buildFetchProbeJs(url, opts) {
|
|
26
|
+
const credentialsLine = opts.credentials ? `credentials: 'include',` : '';
|
|
27
|
+
const headerSetup = opts.extractCsrf
|
|
28
|
+
? `
|
|
29
|
+
const cookies = document.cookie.split(';').map(c => c.trim());
|
|
30
|
+
const csrf = cookies.find(c => c.startsWith('ct0=') || c.startsWith('csrf_token=') || c.startsWith('_csrf='))?.split('=').slice(1).join('=');
|
|
31
|
+
const headers = {};
|
|
32
|
+
if (csrf) { headers['X-Csrf-Token'] = csrf; headers['X-XSRF-Token'] = csrf; }
|
|
33
|
+
`
|
|
34
|
+
: 'const headers = {};';
|
|
35
|
+
return `
|
|
36
|
+
async () => {
|
|
37
|
+
try {
|
|
38
|
+
${headerSetup}
|
|
39
|
+
const resp = await fetch(${JSON.stringify(url)}, {
|
|
40
|
+
${credentialsLine}
|
|
41
|
+
headers
|
|
42
|
+
});
|
|
43
|
+
const status = resp.status;
|
|
44
|
+
if (!resp.ok) return { status, ok: false };
|
|
45
|
+
const text = await resp.text();
|
|
46
|
+
let hasData = false;
|
|
47
|
+
try {
|
|
48
|
+
const json = JSON.parse(text);
|
|
49
|
+
hasData = !!json && (Array.isArray(json) ? json.length > 0 :
|
|
50
|
+
typeof json === 'object' && Object.keys(json).length > 0);
|
|
51
|
+
// Check for API-level error codes (common in Chinese sites)
|
|
52
|
+
if (json.code !== undefined && json.code !== 0) hasData = false;
|
|
53
|
+
} catch {}
|
|
54
|
+
return { status, ok: true, hasData, preview: text.slice(0, 200) };
|
|
55
|
+
} catch (e) { return { ok: false, error: e.message }; }
|
|
56
|
+
}
|
|
57
|
+
`;
|
|
58
|
+
}
|
|
21
59
|
/**
|
|
22
60
|
* Probe an endpoint with a specific strategy.
|
|
23
61
|
* Returns whether the probe succeeded and basic response info.
|
|
24
62
|
*/
|
|
25
|
-
export async function probeEndpoint(page, url, strategy,
|
|
63
|
+
export async function probeEndpoint(page, url, strategy, _opts = {}) {
|
|
26
64
|
const result = { strategy, success: false };
|
|
27
65
|
try {
|
|
28
66
|
switch (strategy) {
|
|
29
67
|
case Strategy.PUBLIC: {
|
|
30
|
-
|
|
31
|
-
const js = `
|
|
32
|
-
async () => {
|
|
33
|
-
try {
|
|
34
|
-
const resp = await fetch(${JSON.stringify(url)});
|
|
35
|
-
const status = resp.status;
|
|
36
|
-
if (!resp.ok) return { status, ok: false };
|
|
37
|
-
const text = await resp.text();
|
|
38
|
-
let hasData = false;
|
|
39
|
-
try {
|
|
40
|
-
const json = JSON.parse(text);
|
|
41
|
-
hasData = !!json && (Array.isArray(json) ? json.length > 0 :
|
|
42
|
-
typeof json === 'object' && Object.keys(json).length > 0);
|
|
43
|
-
} catch {}
|
|
44
|
-
return { status, ok: true, hasData, preview: text.slice(0, 200) };
|
|
45
|
-
} catch (e) { return { ok: false, error: e.message }; }
|
|
46
|
-
}
|
|
47
|
-
`;
|
|
48
|
-
const resp = await page.evaluate(js);
|
|
68
|
+
const resp = await page.evaluate(buildFetchProbeJs(url, {}));
|
|
49
69
|
result.statusCode = resp?.status;
|
|
50
70
|
result.success = resp?.ok && resp?.hasData;
|
|
51
71
|
result.hasData = resp?.hasData;
|
|
@@ -53,27 +73,7 @@ export async function probeEndpoint(page, url, strategy, opts = {}) {
|
|
|
53
73
|
break;
|
|
54
74
|
}
|
|
55
75
|
case Strategy.COOKIE: {
|
|
56
|
-
|
|
57
|
-
const js = `
|
|
58
|
-
async () => {
|
|
59
|
-
try {
|
|
60
|
-
const resp = await fetch(${JSON.stringify(url)}, { credentials: 'include' });
|
|
61
|
-
const status = resp.status;
|
|
62
|
-
if (!resp.ok) return { status, ok: false };
|
|
63
|
-
const text = await resp.text();
|
|
64
|
-
let hasData = false;
|
|
65
|
-
try {
|
|
66
|
-
const json = JSON.parse(text);
|
|
67
|
-
hasData = !!json && (Array.isArray(json) ? json.length > 0 :
|
|
68
|
-
typeof json === 'object' && Object.keys(json).length > 0);
|
|
69
|
-
// Check for API-level error codes (common in Chinese sites)
|
|
70
|
-
if (json.code !== undefined && json.code !== 0) hasData = false;
|
|
71
|
-
} catch {}
|
|
72
|
-
return { status, ok: true, hasData, preview: text.slice(0, 200) };
|
|
73
|
-
} catch (e) { return { ok: false, error: e.message }; }
|
|
74
|
-
}
|
|
75
|
-
`;
|
|
76
|
-
const resp = await page.evaluate(js);
|
|
76
|
+
const resp = await page.evaluate(buildFetchProbeJs(url, { credentials: true }));
|
|
77
77
|
result.statusCode = resp?.status;
|
|
78
78
|
result.success = resp?.ok && resp?.hasData;
|
|
79
79
|
result.hasData = resp?.hasData;
|
|
@@ -81,39 +81,7 @@ export async function probeEndpoint(page, url, strategy, opts = {}) {
|
|
|
81
81
|
break;
|
|
82
82
|
}
|
|
83
83
|
case Strategy.HEADER: {
|
|
84
|
-
|
|
85
|
-
const js = `
|
|
86
|
-
async () => {
|
|
87
|
-
try {
|
|
88
|
-
// Try to extract CSRF tokens from cookies
|
|
89
|
-
const cookies = document.cookie.split(';').map(c => c.trim());
|
|
90
|
-
const csrf = cookies.find(c => c.startsWith('ct0=') || c.startsWith('csrf_token=') || c.startsWith('_csrf='))?.split('=').slice(1).join('=');
|
|
91
|
-
|
|
92
|
-
const headers = {};
|
|
93
|
-
if (csrf) {
|
|
94
|
-
headers['X-Csrf-Token'] = csrf;
|
|
95
|
-
headers['X-XSRF-Token'] = csrf;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
const resp = await fetch(${JSON.stringify(url)}, {
|
|
99
|
-
credentials: 'include',
|
|
100
|
-
headers
|
|
101
|
-
});
|
|
102
|
-
const status = resp.status;
|
|
103
|
-
if (!resp.ok) return { status, ok: false };
|
|
104
|
-
const text = await resp.text();
|
|
105
|
-
let hasData = false;
|
|
106
|
-
try {
|
|
107
|
-
const json = JSON.parse(text);
|
|
108
|
-
hasData = !!json && (Array.isArray(json) ? json.length > 0 :
|
|
109
|
-
typeof json === 'object' && Object.keys(json).length > 0);
|
|
110
|
-
if (json.code !== undefined && json.code !== 0) hasData = false;
|
|
111
|
-
} catch {}
|
|
112
|
-
return { status, ok: true, hasData, preview: text.slice(0, 200) };
|
|
113
|
-
} catch (e) { return { ok: false, error: e.message }; }
|
|
114
|
-
}
|
|
115
|
-
`;
|
|
116
|
-
const resp = await page.evaluate(js);
|
|
84
|
+
const resp = await page.evaluate(buildFetchProbeJs(url, { credentials: true, extractCsrf: true }));
|
|
117
85
|
result.statusCode = resp?.status;
|
|
118
86
|
result.success = resp?.ok && resp?.hasData;
|
|
119
87
|
result.hasData = resp?.hasData;
|
|
@@ -123,7 +91,6 @@ export async function probeEndpoint(page, url, strategy, opts = {}) {
|
|
|
123
91
|
case Strategy.INTERCEPT:
|
|
124
92
|
case Strategy.UI:
|
|
125
93
|
// These require specific implementation per-site
|
|
126
|
-
// Mark as needing manual implementation
|
|
127
94
|
result.success = false;
|
|
128
95
|
result.error = `Strategy ${strategy} requires site-specific implementation`;
|
|
129
96
|
break;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared constants used across explore, synthesize, and pipeline modules.
|
|
3
|
+
*/
|
|
4
|
+
/** URL query params that are volatile/ephemeral and should be stripped from patterns */
|
|
5
|
+
export declare const VOLATILE_PARAMS: Set<string>;
|
|
6
|
+
/** Search-related query parameter names */
|
|
7
|
+
export declare const SEARCH_PARAMS: Set<string>;
|
|
8
|
+
/** Pagination-related query parameter names */
|
|
9
|
+
export declare const PAGINATION_PARAMS: Set<string>;
|
|
10
|
+
/** Limit/page-size query parameter names */
|
|
11
|
+
export declare const LIMIT_PARAMS: Set<string>;
|
|
12
|
+
/** Field role โ common API field names mapping */
|
|
13
|
+
export declare const FIELD_ROLES: Record<string, string[]>;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared constants used across explore, synthesize, and pipeline modules.
|
|
3
|
+
*/
|
|
4
|
+
/** URL query params that are volatile/ephemeral and should be stripped from patterns */
|
|
5
|
+
export const VOLATILE_PARAMS = new Set([
|
|
6
|
+
'w_rid', 'wts', '_', 'callback', 'timestamp', 't', 'nonce', 'sign',
|
|
7
|
+
]);
|
|
8
|
+
/** Search-related query parameter names */
|
|
9
|
+
export const SEARCH_PARAMS = new Set([
|
|
10
|
+
'q', 'query', 'keyword', 'search', 'wd', 'kw', 'search_query', 'w',
|
|
11
|
+
]);
|
|
12
|
+
/** Pagination-related query parameter names */
|
|
13
|
+
export const PAGINATION_PARAMS = new Set([
|
|
14
|
+
'page', 'pn', 'offset', 'cursor', 'next', 'page_num',
|
|
15
|
+
]);
|
|
16
|
+
/** Limit/page-size query parameter names */
|
|
17
|
+
export const LIMIT_PARAMS = new Set([
|
|
18
|
+
'limit', 'count', 'size', 'per_page', 'page_size', 'ps', 'num',
|
|
19
|
+
]);
|
|
20
|
+
/** Field role โ common API field names mapping */
|
|
21
|
+
export const FIELD_ROLES = {
|
|
22
|
+
title: ['title', 'name', 'text', 'content', 'desc', 'description', 'headline', 'subject'],
|
|
23
|
+
url: ['url', 'uri', 'link', 'href', 'permalink', 'jump_url', 'web_url', 'share_url'],
|
|
24
|
+
author: ['author', 'username', 'user_name', 'nickname', 'nick', 'owner', 'creator', 'up_name', 'uname'],
|
|
25
|
+
score: ['score', 'hot', 'heat', 'likes', 'like_count', 'view_count', 'views', 'play', 'favorite_count', 'reply_count'],
|
|
26
|
+
time: ['time', 'created_at', 'publish_time', 'pub_time', 'date', 'ctime', 'mtime', 'pubdate', 'created'],
|
|
27
|
+
id: ['id', 'aid', 'bvid', 'mid', 'uid', 'oid', 'note_id', 'item_id'],
|
|
28
|
+
cover: ['cover', 'pic', 'image', 'thumbnail', 'poster', 'avatar'],
|
|
29
|
+
category: ['category', 'tag', 'type', 'tname', 'channel', 'section'],
|
|
30
|
+
};
|
package/dist/engine.js
CHANGED
|
@@ -73,7 +73,6 @@ function loadFromManifest(manifestPath, clisDir) {
|
|
|
73
73
|
columns: entry.columns,
|
|
74
74
|
timeoutSeconds: entry.timeout,
|
|
75
75
|
source: modulePath,
|
|
76
|
-
// Mark as lazy โ executeCommand will load the module before running
|
|
77
76
|
_lazy: true,
|
|
78
77
|
_modulePath: modulePath,
|
|
79
78
|
};
|
|
@@ -158,8 +157,9 @@ function registerYamlCli(filePath, defaultSite) {
|
|
|
158
157
|
*/
|
|
159
158
|
export async function executeCommand(cmd, page, kwargs, debug = false) {
|
|
160
159
|
// Lazy-load TS module on first execution
|
|
161
|
-
|
|
162
|
-
|
|
160
|
+
const internal = cmd;
|
|
161
|
+
if (internal._lazy && internal._modulePath) {
|
|
162
|
+
const modulePath = internal._modulePath;
|
|
163
163
|
if (!_loadedModules.has(modulePath)) {
|
|
164
164
|
try {
|
|
165
165
|
await import(`file://${modulePath}`);
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for engine.ts: CLI discovery and command execution.
|
|
3
|
+
*/
|
|
4
|
+
import { describe, it, expect } from 'vitest';
|
|
5
|
+
import { discoverClis, executeCommand } from './engine.js';
|
|
6
|
+
import { cli, Strategy } from './registry.js';
|
|
7
|
+
describe('discoverClis', () => {
|
|
8
|
+
it('handles non-existent directories gracefully', async () => {
|
|
9
|
+
// Should not throw for missing directories
|
|
10
|
+
await expect(discoverClis('/tmp/nonexistent-opencli-test-dir')).resolves.not.toThrow();
|
|
11
|
+
});
|
|
12
|
+
});
|
|
13
|
+
describe('executeCommand', () => {
|
|
14
|
+
it('executes a command with func', async () => {
|
|
15
|
+
const cmd = cli({
|
|
16
|
+
site: 'test-engine',
|
|
17
|
+
name: 'func-test',
|
|
18
|
+
description: 'test command with func',
|
|
19
|
+
browser: false,
|
|
20
|
+
strategy: Strategy.PUBLIC,
|
|
21
|
+
func: async (_page, kwargs) => {
|
|
22
|
+
return [{ title: kwargs.query ?? 'default' }];
|
|
23
|
+
},
|
|
24
|
+
});
|
|
25
|
+
const result = await executeCommand(cmd, null, { query: 'hello' });
|
|
26
|
+
expect(result).toEqual([{ title: 'hello' }]);
|
|
27
|
+
});
|
|
28
|
+
it('executes a command with pipeline', async () => {
|
|
29
|
+
const cmd = cli({
|
|
30
|
+
site: 'test-engine',
|
|
31
|
+
name: 'pipe-test',
|
|
32
|
+
description: 'test command with pipeline',
|
|
33
|
+
browser: false,
|
|
34
|
+
strategy: Strategy.PUBLIC,
|
|
35
|
+
pipeline: [
|
|
36
|
+
{ evaluate: '() => [{ n: 1 }, { n: 2 }, { n: 3 }]' },
|
|
37
|
+
{ limit: '2' },
|
|
38
|
+
],
|
|
39
|
+
});
|
|
40
|
+
// Pipeline commands require page for evaluate step, so we'll test the error path
|
|
41
|
+
await expect(executeCommand(cmd, null, {})).rejects.toThrow();
|
|
42
|
+
});
|
|
43
|
+
it('throws for command with no func or pipeline', async () => {
|
|
44
|
+
const cmd = cli({
|
|
45
|
+
site: 'test-engine',
|
|
46
|
+
name: 'empty-test',
|
|
47
|
+
description: 'empty command',
|
|
48
|
+
browser: false,
|
|
49
|
+
});
|
|
50
|
+
await expect(executeCommand(cmd, null, {})).rejects.toThrow('has no func or pipeline');
|
|
51
|
+
});
|
|
52
|
+
it('passes debug flag to func', async () => {
|
|
53
|
+
let receivedDebug = false;
|
|
54
|
+
const cmd = cli({
|
|
55
|
+
site: 'test-engine',
|
|
56
|
+
name: 'debug-test',
|
|
57
|
+
description: 'debug test',
|
|
58
|
+
browser: false,
|
|
59
|
+
func: async (_page, _kwargs, debug) => {
|
|
60
|
+
receivedDebug = debug ?? false;
|
|
61
|
+
return [];
|
|
62
|
+
},
|
|
63
|
+
});
|
|
64
|
+
await executeCommand(cmd, null, {}, true);
|
|
65
|
+
expect(receivedDebug).toBe(true);
|
|
66
|
+
});
|
|
67
|
+
});
|
package/dist/explore.js
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import * as fs from 'node:fs';
|
|
9
9
|
import * as path from 'node:path';
|
|
10
10
|
import { DEFAULT_BROWSER_EXPLORE_TIMEOUT, browserSession, runWithTimeout } from './runtime.js';
|
|
11
|
+
import { VOLATILE_PARAMS, SEARCH_PARAMS, PAGINATION_PARAMS, LIMIT_PARAMS, FIELD_ROLES } from './constants.js';
|
|
11
12
|
// โโ Site name detection โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
12
13
|
const KNOWN_SITE_ALIASES = {
|
|
13
14
|
'x.com': 'twitter', 'twitter.com': 'twitter',
|
|
@@ -39,21 +40,6 @@ export function detectSiteName(url) {
|
|
|
39
40
|
export function slugify(value) {
|
|
40
41
|
return value.trim().toLowerCase().replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-|-$/g, '') || 'site';
|
|
41
42
|
}
|
|
42
|
-
// โโ Field & capability inference โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
43
|
-
const FIELD_ROLES = {
|
|
44
|
-
title: ['title', 'name', 'text', 'content', 'desc', 'description', 'headline', 'subject'],
|
|
45
|
-
url: ['url', 'uri', 'link', 'href', 'permalink', 'jump_url', 'web_url', 'share_url'],
|
|
46
|
-
author: ['author', 'username', 'user_name', 'nickname', 'nick', 'owner', 'creator', 'up_name', 'uname'],
|
|
47
|
-
score: ['score', 'hot', 'heat', 'likes', 'like_count', 'view_count', 'views', 'play', 'favorite_count', 'reply_count'],
|
|
48
|
-
time: ['time', 'created_at', 'publish_time', 'pub_time', 'date', 'ctime', 'mtime', 'pubdate', 'created'],
|
|
49
|
-
id: ['id', 'aid', 'bvid', 'mid', 'uid', 'oid', 'note_id', 'item_id'],
|
|
50
|
-
cover: ['cover', 'pic', 'image', 'thumbnail', 'poster', 'avatar'],
|
|
51
|
-
category: ['category', 'tag', 'type', 'tname', 'channel', 'section'],
|
|
52
|
-
};
|
|
53
|
-
const SEARCH_PARAMS = new Set(['q', 'query', 'keyword', 'search', 'wd', 'kw', 'search_query', 'w']);
|
|
54
|
-
const PAGINATION_PARAMS = new Set(['page', 'pn', 'offset', 'cursor', 'next', 'page_num']);
|
|
55
|
-
const LIMIT_PARAMS = new Set(['limit', 'count', 'size', 'per_page', 'page_size', 'ps', 'num']);
|
|
56
|
-
const VOLATILE_PARAMS = new Set(['w_rid', 'wts', '_', 'callback', 'timestamp', 't', 'nonce', 'sign']);
|
|
57
43
|
/**
|
|
58
44
|
* Parse raw network output from Playwright MCP.
|
|
59
45
|
* Handles text format: [GET] url => [200]
|