@jackwener/opencli 0.5.1 โ†’ 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +1 -1
  2. package/README.zh-CN.md +1 -1
  3. package/SKILL.md +7 -4
  4. package/dist/browser.d.ts +7 -3
  5. package/dist/browser.js +25 -92
  6. package/dist/browser.test.js +18 -1
  7. package/dist/cascade.d.ts +1 -1
  8. package/dist/cascade.js +42 -75
  9. package/dist/constants.d.ts +13 -0
  10. package/dist/constants.js +30 -0
  11. package/dist/engine.js +3 -3
  12. package/dist/engine.test.d.ts +4 -0
  13. package/dist/engine.test.js +67 -0
  14. package/dist/explore.js +1 -15
  15. package/dist/interceptor.d.ts +42 -0
  16. package/dist/interceptor.js +138 -0
  17. package/dist/main.js +1 -4
  18. package/dist/output.js +0 -5
  19. package/dist/pipeline/steps/intercept.js +4 -54
  20. package/dist/pipeline/steps/tap.js +11 -51
  21. package/dist/registry.d.ts +3 -1
  22. package/dist/registry.test.d.ts +4 -0
  23. package/dist/registry.test.js +90 -0
  24. package/dist/runtime.d.ts +15 -1
  25. package/dist/runtime.js +11 -6
  26. package/dist/synthesize.js +5 -5
  27. package/dist/validate.js +21 -0
  28. package/dist/verify.d.ts +7 -0
  29. package/dist/verify.js +7 -1
  30. package/dist/version.d.ts +4 -0
  31. package/dist/version.js +16 -0
  32. package/package.json +1 -1
  33. package/src/browser.test.ts +20 -1
  34. package/src/browser.ts +25 -87
  35. package/src/cascade.ts +47 -75
  36. package/src/constants.ts +35 -0
  37. package/src/engine.test.ts +77 -0
  38. package/src/engine.ts +5 -5
  39. package/src/explore.ts +2 -15
  40. package/src/interceptor.ts +153 -0
  41. package/src/main.ts +1 -5
  42. package/src/output.ts +0 -4
  43. package/src/pipeline/executor.ts +15 -15
  44. package/src/pipeline/steps/intercept.ts +4 -55
  45. package/src/pipeline/steps/tap.ts +12 -51
  46. package/src/registry.test.ts +106 -0
  47. package/src/registry.ts +4 -1
  48. package/src/runtime.ts +22 -8
  49. package/src/synthesize.ts +5 -5
  50. package/src/validate.ts +22 -0
  51. package/src/verify.ts +10 -1
  52. package/src/version.ts +18 -0
package/README.md CHANGED
@@ -132,7 +132,7 @@ npm install -g @jackwener/opencli@latest
132
132
  | **smzdm** | `search` | ๐Ÿ” Browser |
133
133
  | **ctrip** | `search` | ๐Ÿ” Browser |
134
134
  | **github** | `search` | ๐ŸŒ Public |
135
- | **v2ex** | `hot` `latest` `topic` | ๐ŸŒ Public |
135
+ | **v2ex** | `hot` `latest` `topic` `daily` `me` `notifications` | ๐ŸŒ Public / ๐Ÿ” Browser |
136
136
  | **hackernews** | `top` | ๐ŸŒ Public |
137
137
  | **bbc** | `news` | ๐ŸŒ Public |
138
138
 
package/README.zh-CN.md CHANGED
@@ -132,7 +132,7 @@ npm install -g @jackwener/opencli@latest
132
132
  | **smzdm** | `search` | ๐Ÿ” ๆต่งˆๅ™จ |
133
133
  | **ctrip** | `search` | ๐Ÿ” ๆต่งˆๅ™จ |
134
134
  | **github** | `search` | ๐ŸŒ ๅ…ฌๅ…ฑ API |
135
- | **v2ex** | `hot` `latest` `topic` | ๐ŸŒ ๅ…ฌๅ…ฑ API |
135
+ | **v2ex** | `hot` `latest` `topic` `daily` `me` `notifications` | ๐ŸŒ ๅ…ฌๅ…ฑ API / ๐Ÿ” ๆต่งˆๅ™จ |
136
136
  | **hackernews** | `top` | ๐ŸŒ ๅ…ฌๅ…ฑ API |
137
137
  | **bbc** | `news` | ๐ŸŒ ๅ…ฌๅ…ฑ API |
138
138
 
package/SKILL.md CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: opencli
3
3
  description: "OpenCLI โ€” Make any website your CLI. Zero risk, AI-powered, reuse Chrome login."
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  author: jackwener
6
6
  tags: [cli, browser, web, mcp, playwright, bilibili, zhihu, twitter, github, v2ex, hackernews, reddit, xiaohongshu, xueqiu, AI, agent]
7
7
  ---
@@ -95,10 +95,13 @@ opencli reddit frontpage --limit 10 # ้ฆ–้กต
95
95
  opencli reddit search --keyword "AI" # ๆœ็ดข
96
96
  opencli reddit subreddit --name rust # ๅญ็‰ˆๅ—ๆต่งˆ
97
97
 
98
- # V2EX (public)
98
+ # V2EX (public + browser)
99
99
  opencli v2ex hot --limit 10 # ็ƒญ้—จ่ฏ้ข˜
100
100
  opencli v2ex latest --limit 10 # ๆœ€ๆ–ฐ่ฏ้ข˜
101
101
  opencli v2ex topic --id 1024 # ไธป้ข˜่ฏฆๆƒ…
102
+ opencli v2ex daily # ๆฏๆ—ฅ็ญพๅˆฐ (browser)
103
+ opencli v2ex me # ๆˆ‘็š„ไฟกๆฏ (browser)
104
+ opencli v2ex notifications --limit 10 # ้€š็Ÿฅ (browser)
102
105
 
103
106
  # Hacker News (public)
104
107
  opencli hackernews top --limit 10 # Top stories
@@ -156,8 +159,8 @@ opencli cascade <api-url>
156
159
  # Explore with interactive fuzzing (click buttons to trigger lazy APIs)
157
160
  opencli explore <url> --auto --click "ๅญ—ๅน•,CC,่ฏ„่ฎบ"
158
161
 
159
- # Verify: smoke-test a generated adapter
160
- opencli verify <site/name> --smoke
162
+ # Verify: validate adapter definitions
163
+ opencli verify
161
164
  ```
162
165
 
163
166
  ## Output Formats
package/dist/browser.d.ts CHANGED
@@ -2,6 +2,7 @@
2
2
  * Browser interaction via Playwright MCP Bridge extension.
3
3
  * Connects to an existing Chrome browser through the extension.
4
4
  */
5
+ import { withTimeoutMs } from './runtime.js';
5
6
  type ConnectFailureKind = 'missing-token' | 'extension-timeout' | 'extension-not-installed' | 'mcp-init' | 'process-exit' | 'unknown';
6
7
  type PlaywrightMCPState = 'idle' | 'connecting' | 'connected' | 'closing' | 'closed';
7
8
  type ConnectFailureInput = {
@@ -29,7 +30,6 @@ export declare class Page implements IPage {
29
30
  call(method: string, params?: Record<string, any>): Promise<any>;
30
31
  goto(url: string): Promise<void>;
31
32
  evaluate(js: string): Promise<any>;
32
- private normalizeEval;
33
33
  snapshot(opts?: {
34
34
  interactive?: boolean;
35
35
  compact?: boolean;
@@ -90,12 +90,16 @@ declare function diffTabIndexes(initialIdentities: string[], currentTabs: Array<
90
90
  identity: string;
91
91
  }>): number[];
92
92
  declare function appendLimited(current: string, chunk: string, limit: number): string;
93
- declare function withTimeout<T>(promise: Promise<T>, timeoutMs: number, message: string): Promise<T>;
93
+ declare function buildMcpArgs(input: {
94
+ mcpPath: string;
95
+ executablePath?: string | null;
96
+ }): string[];
94
97
  export declare const __test__: {
95
98
  createJsonRpcRequest: typeof createJsonRpcRequest;
96
99
  extractTabEntries: typeof extractTabEntries;
97
100
  diffTabIndexes: typeof diffTabIndexes;
98
101
  appendLimited: typeof appendLimited;
99
- withTimeout: typeof withTimeout;
102
+ buildMcpArgs: typeof buildMcpArgs;
103
+ withTimeoutMs: typeof withTimeoutMs;
100
104
  };
101
105
  export {};
package/dist/browser.js CHANGED
@@ -9,14 +9,10 @@ import * as fs from 'node:fs';
9
9
  import * as os from 'node:os';
10
10
  import * as path from 'node:path';
11
11
  import { formatSnapshot } from './snapshotFormatter.js';
12
- // Read version from package.json (single source of truth)
13
- const __browser_dirname = path.dirname(fileURLToPath(import.meta.url));
14
- const PKG_VERSION = (() => { try {
15
- return JSON.parse(fs.readFileSync(path.resolve(__browser_dirname, '..', 'package.json'), 'utf-8')).version;
16
- }
17
- catch {
18
- return '0.0.0';
19
- } })();
12
+ import { PKG_VERSION } from './version.js';
13
+ import { normalizeEvaluateSource } from './pipeline/template.js';
14
+ import { generateInterceptorJs, generateReadInterceptedJs } from './interceptor.js';
15
+ import { withTimeoutMs } from './runtime.js';
20
16
  const CONNECT_TIMEOUT = parseInt(process.env.OPENCLI_BROWSER_CONNECT_TIMEOUT ?? '30', 10);
21
17
  const STDERR_BUFFER_LIMIT = 16 * 1024;
22
18
  const INITIAL_TABS_TIMEOUT_MS = 1500;
@@ -126,26 +122,9 @@ export class Page {
126
122
  }
127
123
  async evaluate(js) {
128
124
  // Normalize IIFE format to function format expected by MCP browser_evaluate
129
- const normalized = this.normalizeEval(js);
125
+ const normalized = normalizeEvaluateSource(js);
130
126
  return this.call('tools/call', { name: 'browser_evaluate', arguments: { function: normalized } });
131
127
  }
132
- normalizeEval(source) {
133
- const s = source.trim();
134
- if (!s)
135
- return '() => undefined';
136
- // IIFE: (async () => {...})() โ†’ wrap as () => (...)
137
- if (s.startsWith('(') && s.endsWith(')()'))
138
- return `() => (${s})`;
139
- // Already a function/arrow
140
- if (/^(async\s+)?\([^)]*\)\s*=>/.test(s))
141
- return s;
142
- if (/^(async\s+)?[A-Za-z_][A-Za-z0-9_]*\s*=>/.test(s))
143
- return s;
144
- if (s.startsWith('function ') || s.startsWith('async function '))
145
- return s;
146
- // Raw expression โ†’ wrap
147
- return `() => (${s})`;
148
- }
149
128
  async snapshot(opts = {}) {
150
129
  const raw = await this.call('tools/call', { name: 'browser_snapshot', arguments: {} });
151
130
  if (opts.raw)
@@ -224,56 +203,14 @@ export class Page {
224
203
  await this.evaluate(js);
225
204
  }
226
205
  async installInterceptor(pattern) {
227
- const js = `
228
- () => {
229
- window.__opencli_xhr = window.__opencli_xhr || [];
230
- window.__opencli_patterns = window.__opencli_patterns || [];
231
- if (!window.__opencli_patterns.includes('${pattern}')) {
232
- window.__opencli_patterns.push('${pattern}');
233
- }
234
-
235
- if (!window.__patched_xhr) {
236
- const checkMatch = (url) => window.__opencli_patterns.some(p => url.includes(p));
237
-
238
- const XHR = XMLHttpRequest.prototype;
239
- const open = XHR.open;
240
- const send = XHR.send;
241
- XHR.open = function(method, url) {
242
- this._url = url;
243
- return open.call(this, method, url, ...Array.prototype.slice.call(arguments, 2));
244
- };
245
- XHR.send = function() {
246
- this.addEventListener('load', function() {
247
- if (checkMatch(this._url)) {
248
- try { window.__opencli_xhr.push({url: this._url, data: JSON.parse(this.responseText)}); } catch(e){}
249
- }
250
- });
251
- return send.apply(this, arguments);
252
- };
253
-
254
- const origFetch = window.fetch;
255
- window.fetch = async function(...args) {
256
- let u = typeof args[0] === 'string' ? args[0] : (args[0] && args[0].url) || '';
257
- const res = await origFetch.apply(this, args);
258
- setTimeout(async () => {
259
- try {
260
- if (checkMatch(u)) {
261
- const clone = res.clone();
262
- const j = await clone.json();
263
- window.__opencli_xhr.push({url: u, data: j});
264
- }
265
- } catch(e) {}
266
- }, 0);
267
- return res;
268
- };
269
- window.__patched_xhr = true;
270
- }
271
- }
272
- `;
273
- await this.evaluate(js);
206
+ await this.evaluate(generateInterceptorJs(JSON.stringify(pattern), {
207
+ arrayName: '__opencli_xhr',
208
+ patchGuard: '__opencli_interceptor_patched',
209
+ }));
274
210
  }
275
211
  async getInterceptedRequests() {
276
- return (await this.evaluate('() => window.__opencli_xhr')) || [];
212
+ const result = await this.evaluate(generateReadInterceptedJs('__opencli_xhr'));
213
+ return result || [];
277
214
  }
278
215
  }
279
216
  /**
@@ -402,13 +339,13 @@ export class PlaywrightMCP {
402
339
  stderr: stderrBuffer,
403
340
  }));
404
341
  }, timeout * 1000);
405
- const mcpArgs = [mcpPath, '--extension'];
342
+ const mcpArgs = buildMcpArgs({
343
+ mcpPath,
344
+ executablePath: process.env.OPENCLI_BROWSER_EXECUTABLE_PATH,
345
+ });
406
346
  if (process.env.OPENCLI_VERBOSE) {
407
347
  console.error(`[opencli] Extension token: ${extensionToken ? `configured (fingerprint ${tokenFingerprint})` : 'missing'}`);
408
348
  }
409
- if (process.env.OPENCLI_BROWSER_EXECUTABLE_PATH) {
410
- mcpArgs.push('--executablePath', process.env.OPENCLI_BROWSER_EXECUTABLE_PATH);
411
- }
412
349
  debugLog(`Spawning node ${mcpArgs.join(' ')}`);
413
350
  this._proc = spawn('node', mcpArgs, {
414
351
  stdio: ['pipe', 'pipe', 'pipe'],
@@ -485,7 +422,7 @@ export class PlaywrightMCP {
485
422
  this._proc?.stdin?.write(initializedMsg);
486
423
  // Use tabs as a readiness probe and for tab cleanup bookkeeping.
487
424
  debugLog('Fetching initial tabs count...');
488
- withTimeout(page.tabs(), INITIAL_TABS_TIMEOUT_MS, 'Timed out fetching initial tabs').then((tabs) => {
425
+ withTimeoutMs(page.tabs(), INITIAL_TABS_TIMEOUT_MS, 'Timed out fetching initial tabs').then((tabs) => {
489
426
  debugLog(`Tabs response: ${typeof tabs === 'string' ? tabs : JSON.stringify(tabs)}`);
490
427
  this._initialTabIdentities = extractTabIdentities(tabs);
491
428
  settleSuccess(page);
@@ -510,7 +447,7 @@ export class PlaywrightMCP {
510
447
  // Extension mode opens bridge/session tabs that we can clean up best-effort.
511
448
  if (this._page && this._proc && !this._proc.killed) {
512
449
  try {
513
- const tabs = await withTimeout(this._page.tabs(), TAB_CLEANUP_TIMEOUT_MS, 'Timed out fetching tabs during cleanup');
450
+ const tabs = await withTimeoutMs(this._page.tabs(), TAB_CLEANUP_TIMEOUT_MS, 'Timed out fetching tabs during cleanup');
514
451
  const tabEntries = extractTabEntries(tabs);
515
452
  const tabsToClose = diffTabIndexes(this._initialTabIdentities, tabEntries);
516
453
  for (const index of tabsToClose) {
@@ -621,24 +558,20 @@ function appendLimited(current, chunk, limit) {
621
558
  return next;
622
559
  return next.slice(-limit);
623
560
  }
624
- function withTimeout(promise, timeoutMs, message) {
625
- return new Promise((resolve, reject) => {
626
- const timer = setTimeout(() => reject(new Error(message)), timeoutMs);
627
- promise.then((value) => {
628
- clearTimeout(timer);
629
- resolve(value);
630
- }, (error) => {
631
- clearTimeout(timer);
632
- reject(error);
633
- });
634
- });
561
+ function buildMcpArgs(input) {
562
+ const args = [input.mcpPath, '--extension'];
563
+ if (input.executablePath) {
564
+ args.push('--executable-path', input.executablePath);
565
+ }
566
+ return args;
635
567
  }
636
568
  export const __test__ = {
637
569
  createJsonRpcRequest,
638
570
  extractTabEntries,
639
571
  diffTabIndexes,
640
572
  appendLimited,
641
- withTimeout,
573
+ buildMcpArgs,
574
+ withTimeoutMs,
642
575
  };
643
576
  function findMcpServerPath() {
644
577
  if (_cachedMcpServerPath !== undefined)
@@ -34,8 +34,25 @@ describe('browser helpers', () => {
34
34
  it('keeps only the tail of stderr buffers', () => {
35
35
  expect(__test__.appendLimited('12345', '67890', 8)).toBe('34567890');
36
36
  });
37
+ it('builds Playwright MCP args with kebab-case executable path', () => {
38
+ expect(__test__.buildMcpArgs({
39
+ mcpPath: '/tmp/cli.js',
40
+ executablePath: '/mnt/c/Program Files/Google/Chrome/Application/chrome.exe',
41
+ })).toEqual([
42
+ '/tmp/cli.js',
43
+ '--extension',
44
+ '--executable-path',
45
+ '/mnt/c/Program Files/Google/Chrome/Application/chrome.exe',
46
+ ]);
47
+ expect(__test__.buildMcpArgs({
48
+ mcpPath: '/tmp/cli.js',
49
+ })).toEqual([
50
+ '/tmp/cli.js',
51
+ '--extension',
52
+ ]);
53
+ });
37
54
  it('times out slow promises', async () => {
38
- await expect(__test__.withTimeout(new Promise(() => { }), 10, 'timeout')).rejects.toThrow('timeout');
55
+ await expect(__test__.withTimeoutMs(new Promise(() => { }), 10, 'timeout')).rejects.toThrow('timeout');
39
56
  });
40
57
  });
41
58
  describe('PlaywrightMCP state', () => {
package/dist/cascade.d.ts CHANGED
@@ -28,7 +28,7 @@ interface CascadeResult {
28
28
  * Probe an endpoint with a specific strategy.
29
29
  * Returns whether the probe succeeded and basic response info.
30
30
  */
31
- export declare function probeEndpoint(page: IPage, url: string, strategy: Strategy, opts?: {
31
+ export declare function probeEndpoint(page: IPage, url: string, strategy: Strategy, _opts?: {
32
32
  timeout?: number;
33
33
  }): Promise<ProbeResult>;
34
34
  /**
package/dist/cascade.js CHANGED
@@ -18,34 +18,54 @@ const CASCADE_ORDER = [
18
18
  Strategy.INTERCEPT,
19
19
  Strategy.UI,
20
20
  ];
21
+ /**
22
+ * Build the JavaScript source for a fetch probe.
23
+ * Shared logic for PUBLIC, COOKIE, and HEADER strategies.
24
+ */
25
+ function buildFetchProbeJs(url, opts) {
26
+ const credentialsLine = opts.credentials ? `credentials: 'include',` : '';
27
+ const headerSetup = opts.extractCsrf
28
+ ? `
29
+ const cookies = document.cookie.split(';').map(c => c.trim());
30
+ const csrf = cookies.find(c => c.startsWith('ct0=') || c.startsWith('csrf_token=') || c.startsWith('_csrf='))?.split('=').slice(1).join('=');
31
+ const headers = {};
32
+ if (csrf) { headers['X-Csrf-Token'] = csrf; headers['X-XSRF-Token'] = csrf; }
33
+ `
34
+ : 'const headers = {};';
35
+ return `
36
+ async () => {
37
+ try {
38
+ ${headerSetup}
39
+ const resp = await fetch(${JSON.stringify(url)}, {
40
+ ${credentialsLine}
41
+ headers
42
+ });
43
+ const status = resp.status;
44
+ if (!resp.ok) return { status, ok: false };
45
+ const text = await resp.text();
46
+ let hasData = false;
47
+ try {
48
+ const json = JSON.parse(text);
49
+ hasData = !!json && (Array.isArray(json) ? json.length > 0 :
50
+ typeof json === 'object' && Object.keys(json).length > 0);
51
+ // Check for API-level error codes (common in Chinese sites)
52
+ if (json.code !== undefined && json.code !== 0) hasData = false;
53
+ } catch {}
54
+ return { status, ok: true, hasData, preview: text.slice(0, 200) };
55
+ } catch (e) { return { ok: false, error: e.message }; }
56
+ }
57
+ `;
58
+ }
21
59
  /**
22
60
  * Probe an endpoint with a specific strategy.
23
61
  * Returns whether the probe succeeded and basic response info.
24
62
  */
25
- export async function probeEndpoint(page, url, strategy, opts = {}) {
63
+ export async function probeEndpoint(page, url, strategy, _opts = {}) {
26
64
  const result = { strategy, success: false };
27
65
  try {
28
66
  switch (strategy) {
29
67
  case Strategy.PUBLIC: {
30
- // Try direct fetch without browser (no credentials)
31
- const js = `
32
- async () => {
33
- try {
34
- const resp = await fetch(${JSON.stringify(url)});
35
- const status = resp.status;
36
- if (!resp.ok) return { status, ok: false };
37
- const text = await resp.text();
38
- let hasData = false;
39
- try {
40
- const json = JSON.parse(text);
41
- hasData = !!json && (Array.isArray(json) ? json.length > 0 :
42
- typeof json === 'object' && Object.keys(json).length > 0);
43
- } catch {}
44
- return { status, ok: true, hasData, preview: text.slice(0, 200) };
45
- } catch (e) { return { ok: false, error: e.message }; }
46
- }
47
- `;
48
- const resp = await page.evaluate(js);
68
+ const resp = await page.evaluate(buildFetchProbeJs(url, {}));
49
69
  result.statusCode = resp?.status;
50
70
  result.success = resp?.ok && resp?.hasData;
51
71
  result.hasData = resp?.hasData;
@@ -53,27 +73,7 @@ export async function probeEndpoint(page, url, strategy, opts = {}) {
53
73
  break;
54
74
  }
55
75
  case Strategy.COOKIE: {
56
- // Fetch with credentials: 'include' (uses browser cookies)
57
- const js = `
58
- async () => {
59
- try {
60
- const resp = await fetch(${JSON.stringify(url)}, { credentials: 'include' });
61
- const status = resp.status;
62
- if (!resp.ok) return { status, ok: false };
63
- const text = await resp.text();
64
- let hasData = false;
65
- try {
66
- const json = JSON.parse(text);
67
- hasData = !!json && (Array.isArray(json) ? json.length > 0 :
68
- typeof json === 'object' && Object.keys(json).length > 0);
69
- // Check for API-level error codes (common in Chinese sites)
70
- if (json.code !== undefined && json.code !== 0) hasData = false;
71
- } catch {}
72
- return { status, ok: true, hasData, preview: text.slice(0, 200) };
73
- } catch (e) { return { ok: false, error: e.message }; }
74
- }
75
- `;
76
- const resp = await page.evaluate(js);
76
+ const resp = await page.evaluate(buildFetchProbeJs(url, { credentials: true }));
77
77
  result.statusCode = resp?.status;
78
78
  result.success = resp?.ok && resp?.hasData;
79
79
  result.hasData = resp?.hasData;
@@ -81,39 +81,7 @@ export async function probeEndpoint(page, url, strategy, opts = {}) {
81
81
  break;
82
82
  }
83
83
  case Strategy.HEADER: {
84
- // Fetch with credentials + try to extract common auth headers
85
- const js = `
86
- async () => {
87
- try {
88
- // Try to extract CSRF tokens from cookies
89
- const cookies = document.cookie.split(';').map(c => c.trim());
90
- const csrf = cookies.find(c => c.startsWith('ct0=') || c.startsWith('csrf_token=') || c.startsWith('_csrf='))?.split('=').slice(1).join('=');
91
-
92
- const headers = {};
93
- if (csrf) {
94
- headers['X-Csrf-Token'] = csrf;
95
- headers['X-XSRF-Token'] = csrf;
96
- }
97
-
98
- const resp = await fetch(${JSON.stringify(url)}, {
99
- credentials: 'include',
100
- headers
101
- });
102
- const status = resp.status;
103
- if (!resp.ok) return { status, ok: false };
104
- const text = await resp.text();
105
- let hasData = false;
106
- try {
107
- const json = JSON.parse(text);
108
- hasData = !!json && (Array.isArray(json) ? json.length > 0 :
109
- typeof json === 'object' && Object.keys(json).length > 0);
110
- if (json.code !== undefined && json.code !== 0) hasData = false;
111
- } catch {}
112
- return { status, ok: true, hasData, preview: text.slice(0, 200) };
113
- } catch (e) { return { ok: false, error: e.message }; }
114
- }
115
- `;
116
- const resp = await page.evaluate(js);
84
+ const resp = await page.evaluate(buildFetchProbeJs(url, { credentials: true, extractCsrf: true }));
117
85
  result.statusCode = resp?.status;
118
86
  result.success = resp?.ok && resp?.hasData;
119
87
  result.hasData = resp?.hasData;
@@ -123,7 +91,6 @@ export async function probeEndpoint(page, url, strategy, opts = {}) {
123
91
  case Strategy.INTERCEPT:
124
92
  case Strategy.UI:
125
93
  // These require specific implementation per-site
126
- // Mark as needing manual implementation
127
94
  result.success = false;
128
95
  result.error = `Strategy ${strategy} requires site-specific implementation`;
129
96
  break;
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Shared constants used across explore, synthesize, and pipeline modules.
3
+ */
4
+ /** URL query params that are volatile/ephemeral and should be stripped from patterns */
5
+ export declare const VOLATILE_PARAMS: Set<string>;
6
+ /** Search-related query parameter names */
7
+ export declare const SEARCH_PARAMS: Set<string>;
8
+ /** Pagination-related query parameter names */
9
+ export declare const PAGINATION_PARAMS: Set<string>;
10
+ /** Limit/page-size query parameter names */
11
+ export declare const LIMIT_PARAMS: Set<string>;
12
+ /** Field role โ†’ common API field names mapping */
13
+ export declare const FIELD_ROLES: Record<string, string[]>;
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Shared constants used across explore, synthesize, and pipeline modules.
3
+ */
4
+ /** URL query params that are volatile/ephemeral and should be stripped from patterns */
5
+ export const VOLATILE_PARAMS = new Set([
6
+ 'w_rid', 'wts', '_', 'callback', 'timestamp', 't', 'nonce', 'sign',
7
+ ]);
8
+ /** Search-related query parameter names */
9
+ export const SEARCH_PARAMS = new Set([
10
+ 'q', 'query', 'keyword', 'search', 'wd', 'kw', 'search_query', 'w',
11
+ ]);
12
+ /** Pagination-related query parameter names */
13
+ export const PAGINATION_PARAMS = new Set([
14
+ 'page', 'pn', 'offset', 'cursor', 'next', 'page_num',
15
+ ]);
16
+ /** Limit/page-size query parameter names */
17
+ export const LIMIT_PARAMS = new Set([
18
+ 'limit', 'count', 'size', 'per_page', 'page_size', 'ps', 'num',
19
+ ]);
20
+ /** Field role โ†’ common API field names mapping */
21
+ export const FIELD_ROLES = {
22
+ title: ['title', 'name', 'text', 'content', 'desc', 'description', 'headline', 'subject'],
23
+ url: ['url', 'uri', 'link', 'href', 'permalink', 'jump_url', 'web_url', 'share_url'],
24
+ author: ['author', 'username', 'user_name', 'nickname', 'nick', 'owner', 'creator', 'up_name', 'uname'],
25
+ score: ['score', 'hot', 'heat', 'likes', 'like_count', 'view_count', 'views', 'play', 'favorite_count', 'reply_count'],
26
+ time: ['time', 'created_at', 'publish_time', 'pub_time', 'date', 'ctime', 'mtime', 'pubdate', 'created'],
27
+ id: ['id', 'aid', 'bvid', 'mid', 'uid', 'oid', 'note_id', 'item_id'],
28
+ cover: ['cover', 'pic', 'image', 'thumbnail', 'poster', 'avatar'],
29
+ category: ['category', 'tag', 'type', 'tname', 'channel', 'section'],
30
+ };
package/dist/engine.js CHANGED
@@ -73,7 +73,6 @@ function loadFromManifest(manifestPath, clisDir) {
73
73
  columns: entry.columns,
74
74
  timeoutSeconds: entry.timeout,
75
75
  source: modulePath,
76
- // Mark as lazy โ€” executeCommand will load the module before running
77
76
  _lazy: true,
78
77
  _modulePath: modulePath,
79
78
  };
@@ -158,8 +157,9 @@ function registerYamlCli(filePath, defaultSite) {
158
157
  */
159
158
  export async function executeCommand(cmd, page, kwargs, debug = false) {
160
159
  // Lazy-load TS module on first execution
161
- if (cmd._lazy && cmd._modulePath) {
162
- const modulePath = cmd._modulePath;
160
+ const internal = cmd;
161
+ if (internal._lazy && internal._modulePath) {
162
+ const modulePath = internal._modulePath;
163
163
  if (!_loadedModules.has(modulePath)) {
164
164
  try {
165
165
  await import(`file://${modulePath}`);
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Tests for engine.ts: CLI discovery and command execution.
3
+ */
4
+ export {};
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Tests for engine.ts: CLI discovery and command execution.
3
+ */
4
+ import { describe, it, expect } from 'vitest';
5
+ import { discoverClis, executeCommand } from './engine.js';
6
+ import { cli, Strategy } from './registry.js';
7
+ describe('discoverClis', () => {
8
+ it('handles non-existent directories gracefully', async () => {
9
+ // Should not throw for missing directories
10
+ await expect(discoverClis('/tmp/nonexistent-opencli-test-dir')).resolves.not.toThrow();
11
+ });
12
+ });
13
+ describe('executeCommand', () => {
14
+ it('executes a command with func', async () => {
15
+ const cmd = cli({
16
+ site: 'test-engine',
17
+ name: 'func-test',
18
+ description: 'test command with func',
19
+ browser: false,
20
+ strategy: Strategy.PUBLIC,
21
+ func: async (_page, kwargs) => {
22
+ return [{ title: kwargs.query ?? 'default' }];
23
+ },
24
+ });
25
+ const result = await executeCommand(cmd, null, { query: 'hello' });
26
+ expect(result).toEqual([{ title: 'hello' }]);
27
+ });
28
+ it('executes a command with pipeline', async () => {
29
+ const cmd = cli({
30
+ site: 'test-engine',
31
+ name: 'pipe-test',
32
+ description: 'test command with pipeline',
33
+ browser: false,
34
+ strategy: Strategy.PUBLIC,
35
+ pipeline: [
36
+ { evaluate: '() => [{ n: 1 }, { n: 2 }, { n: 3 }]' },
37
+ { limit: '2' },
38
+ ],
39
+ });
40
+ // Pipeline commands require page for evaluate step, so we'll test the error path
41
+ await expect(executeCommand(cmd, null, {})).rejects.toThrow();
42
+ });
43
+ it('throws for command with no func or pipeline', async () => {
44
+ const cmd = cli({
45
+ site: 'test-engine',
46
+ name: 'empty-test',
47
+ description: 'empty command',
48
+ browser: false,
49
+ });
50
+ await expect(executeCommand(cmd, null, {})).rejects.toThrow('has no func or pipeline');
51
+ });
52
+ it('passes debug flag to func', async () => {
53
+ let receivedDebug = false;
54
+ const cmd = cli({
55
+ site: 'test-engine',
56
+ name: 'debug-test',
57
+ description: 'debug test',
58
+ browser: false,
59
+ func: async (_page, _kwargs, debug) => {
60
+ receivedDebug = debug ?? false;
61
+ return [];
62
+ },
63
+ });
64
+ await executeCommand(cmd, null, {}, true);
65
+ expect(receivedDebug).toBe(true);
66
+ });
67
+ });
package/dist/explore.js CHANGED
@@ -8,6 +8,7 @@
8
8
  import * as fs from 'node:fs';
9
9
  import * as path from 'node:path';
10
10
  import { DEFAULT_BROWSER_EXPLORE_TIMEOUT, browserSession, runWithTimeout } from './runtime.js';
11
+ import { VOLATILE_PARAMS, SEARCH_PARAMS, PAGINATION_PARAMS, LIMIT_PARAMS, FIELD_ROLES } from './constants.js';
11
12
  // โ”€โ”€ Site name detection โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
12
13
  const KNOWN_SITE_ALIASES = {
13
14
  'x.com': 'twitter', 'twitter.com': 'twitter',
@@ -39,21 +40,6 @@ export function detectSiteName(url) {
39
40
  export function slugify(value) {
40
41
  return value.trim().toLowerCase().replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-|-$/g, '') || 'site';
41
42
  }
42
- // โ”€โ”€ Field & capability inference โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
43
- const FIELD_ROLES = {
44
- title: ['title', 'name', 'text', 'content', 'desc', 'description', 'headline', 'subject'],
45
- url: ['url', 'uri', 'link', 'href', 'permalink', 'jump_url', 'web_url', 'share_url'],
46
- author: ['author', 'username', 'user_name', 'nickname', 'nick', 'owner', 'creator', 'up_name', 'uname'],
47
- score: ['score', 'hot', 'heat', 'likes', 'like_count', 'view_count', 'views', 'play', 'favorite_count', 'reply_count'],
48
- time: ['time', 'created_at', 'publish_time', 'pub_time', 'date', 'ctime', 'mtime', 'pubdate', 'created'],
49
- id: ['id', 'aid', 'bvid', 'mid', 'uid', 'oid', 'note_id', 'item_id'],
50
- cover: ['cover', 'pic', 'image', 'thumbnail', 'poster', 'avatar'],
51
- category: ['category', 'tag', 'type', 'tname', 'channel', 'section'],
52
- };
53
- const SEARCH_PARAMS = new Set(['q', 'query', 'keyword', 'search', 'wd', 'kw', 'search_query', 'w']);
54
- const PAGINATION_PARAMS = new Set(['page', 'pn', 'offset', 'cursor', 'next', 'page_num']);
55
- const LIMIT_PARAMS = new Set(['limit', 'count', 'size', 'per_page', 'page_size', 'ps', 'num']);
56
- const VOLATILE_PARAMS = new Set(['w_rid', 'wts', '_', 'callback', 'timestamp', 't', 'nonce', 'sign']);
57
43
  /**
58
44
  * Parse raw network output from Playwright MCP.
59
45
  * Handles text format: [GET] url => [200]