@jackwener/opencli 1.7.4 → 1.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/README.md +76 -51
  2. package/README.zh-CN.md +78 -62
  3. package/cli-manifest.json +4558 -2979
  4. package/clis/antigravity/serve.js +71 -25
  5. package/clis/baidu-scholar/search.js +87 -0
  6. package/clis/baidu-scholar/search.test.js +23 -0
  7. package/clis/bilibili/video.js +61 -0
  8. package/clis/bilibili/video.test.js +81 -0
  9. package/clis/deepseek/ask.js +94 -0
  10. package/clis/deepseek/ask.test.js +73 -0
  11. package/clis/deepseek/history.js +25 -0
  12. package/clis/deepseek/new.js +20 -0
  13. package/clis/deepseek/read.js +22 -0
  14. package/clis/deepseek/status.js +24 -0
  15. package/clis/deepseek/utils.js +291 -0
  16. package/clis/deepseek/utils.test.js +37 -0
  17. package/clis/eastmoney/_secid.js +78 -0
  18. package/clis/eastmoney/announcement.js +52 -0
  19. package/clis/eastmoney/convertible.js +73 -0
  20. package/clis/eastmoney/etf.js +65 -0
  21. package/clis/eastmoney/holders.js +78 -0
  22. package/clis/eastmoney/index-board.js +96 -0
  23. package/clis/eastmoney/kline.js +87 -0
  24. package/clis/eastmoney/kuaixun.js +54 -0
  25. package/clis/eastmoney/longhu.js +67 -0
  26. package/clis/eastmoney/money-flow.js +78 -0
  27. package/clis/eastmoney/northbound.js +57 -0
  28. package/clis/eastmoney/quote.js +107 -0
  29. package/clis/eastmoney/rank.js +94 -0
  30. package/clis/eastmoney/sectors.js +76 -0
  31. package/clis/google-scholar/search.js +58 -0
  32. package/clis/google-scholar/search.test.js +23 -0
  33. package/clis/gov-law/commands.test.js +39 -0
  34. package/clis/gov-law/recent.js +22 -0
  35. package/clis/gov-law/search.js +41 -0
  36. package/clis/gov-law/shared.js +51 -0
  37. package/clis/gov-policy/commands.test.js +27 -0
  38. package/clis/gov-policy/recent.js +47 -0
  39. package/clis/gov-policy/search.js +48 -0
  40. package/clis/jianyu/search.js +139 -3
  41. package/clis/jianyu/search.test.js +25 -0
  42. package/clis/jianyu/shared/procurement-detail.js +15 -0
  43. package/clis/jianyu/shared/procurement-detail.test.js +12 -0
  44. package/clis/nowcoder/companies.js +23 -0
  45. package/clis/nowcoder/creators.js +27 -0
  46. package/clis/nowcoder/detail.js +61 -0
  47. package/clis/nowcoder/experience.js +36 -0
  48. package/clis/nowcoder/hot.js +24 -0
  49. package/clis/nowcoder/jobs.js +21 -0
  50. package/clis/nowcoder/notifications.js +29 -0
  51. package/clis/nowcoder/papers.js +40 -0
  52. package/clis/nowcoder/practice.js +37 -0
  53. package/clis/nowcoder/recommend.js +30 -0
  54. package/clis/nowcoder/referral.js +39 -0
  55. package/clis/nowcoder/salary.js +40 -0
  56. package/clis/nowcoder/search.js +49 -0
  57. package/clis/nowcoder/suggest.js +33 -0
  58. package/clis/nowcoder/topics.js +27 -0
  59. package/clis/nowcoder/trending.js +25 -0
  60. package/clis/twitter/list-add.js +337 -0
  61. package/clis/twitter/list-add.test.js +15 -0
  62. package/clis/twitter/list-remove.js +297 -0
  63. package/clis/twitter/list-remove.test.js +14 -0
  64. package/clis/twitter/list-tweets.js +185 -0
  65. package/clis/twitter/list-tweets.test.js +108 -0
  66. package/clis/twitter/lists.js +134 -47
  67. package/clis/twitter/lists.test.js +105 -38
  68. package/clis/twitter/shared.js +7 -2
  69. package/clis/twitter/tweets.js +218 -0
  70. package/clis/twitter/tweets.test.js +125 -0
  71. package/clis/wanfang/search.js +66 -0
  72. package/clis/wanfang/search.test.js +23 -0
  73. package/clis/web/read.js +1 -1
  74. package/clis/weixin/download.js +3 -2
  75. package/clis/xiaohongshu/publish.js +149 -28
  76. package/clis/xiaohongshu/publish.test.js +319 -6
  77. package/clis/xiaoyuzhou/download.js +8 -4
  78. package/clis/xiaoyuzhou/download.test.js +23 -13
  79. package/clis/xiaoyuzhou/episode.js +9 -4
  80. package/clis/xiaoyuzhou/podcast-episodes.js +15 -11
  81. package/clis/xiaoyuzhou/podcast.js +9 -4
  82. package/clis/xiaoyuzhou/utils.js +0 -40
  83. package/clis/xiaoyuzhou/utils.test.js +15 -75
  84. package/clis/youtube/channel.js +35 -0
  85. package/clis/zsxq/dynamics.js +1 -1
  86. package/clis/zsxq/utils.js +6 -3
  87. package/clis/zsxq/utils.test.js +31 -0
  88. package/dist/src/browser/base-page.d.ts +14 -4
  89. package/dist/src/browser/base-page.js +35 -25
  90. package/dist/src/browser/bridge.d.ts +1 -0
  91. package/dist/src/browser/bridge.js +1 -1
  92. package/dist/src/browser/cdp.d.ts +1 -0
  93. package/dist/src/browser/cdp.js +13 -4
  94. package/dist/src/browser/compound.d.ts +59 -0
  95. package/dist/src/browser/compound.js +112 -0
  96. package/dist/src/browser/compound.test.js +175 -0
  97. package/dist/src/browser/daemon-client.d.ts +6 -4
  98. package/dist/src/browser/daemon-client.js +6 -1
  99. package/dist/src/browser/daemon-client.test.js +40 -1
  100. package/dist/src/browser/dom-snapshot.d.ts +7 -0
  101. package/dist/src/browser/dom-snapshot.js +83 -5
  102. package/dist/src/browser/dom-snapshot.test.js +65 -0
  103. package/dist/src/browser/extract.d.ts +69 -0
  104. package/dist/src/browser/extract.js +132 -0
  105. package/dist/src/browser/extract.test.js +129 -0
  106. package/dist/src/browser/find.d.ts +76 -0
  107. package/dist/src/browser/find.js +179 -0
  108. package/dist/src/browser/find.test.js +120 -0
  109. package/dist/src/browser/html-tree.d.ts +75 -0
  110. package/dist/src/browser/html-tree.js +112 -0
  111. package/dist/src/browser/html-tree.test.d.ts +1 -0
  112. package/dist/src/browser/html-tree.test.js +181 -0
  113. package/dist/src/browser/network-cache.d.ts +48 -0
  114. package/dist/src/browser/network-cache.js +66 -0
  115. package/dist/src/browser/network-cache.test.d.ts +1 -0
  116. package/dist/src/browser/network-cache.test.js +58 -0
  117. package/dist/src/browser/network-key.d.ts +22 -0
  118. package/dist/src/browser/network-key.js +66 -0
  119. package/dist/src/browser/network-key.test.d.ts +1 -0
  120. package/dist/src/browser/network-key.test.js +49 -0
  121. package/dist/src/browser/page.d.ts +14 -4
  122. package/dist/src/browser/page.js +48 -7
  123. package/dist/src/browser/page.test.js +97 -0
  124. package/dist/src/browser/shape-filter.d.ts +52 -0
  125. package/dist/src/browser/shape-filter.js +101 -0
  126. package/dist/src/browser/shape-filter.test.d.ts +1 -0
  127. package/dist/src/browser/shape-filter.test.js +101 -0
  128. package/dist/src/browser/shape.d.ts +23 -0
  129. package/dist/src/browser/shape.js +95 -0
  130. package/dist/src/browser/shape.test.d.ts +1 -0
  131. package/dist/src/browser/shape.test.js +82 -0
  132. package/dist/src/browser/target-errors.d.ts +14 -1
  133. package/dist/src/browser/target-errors.js +13 -0
  134. package/dist/src/browser/target-errors.test.js +39 -6
  135. package/dist/src/browser/target-resolver.d.ts +57 -10
  136. package/dist/src/browser/target-resolver.js +195 -75
  137. package/dist/src/browser/target-resolver.test.js +80 -5
  138. package/dist/src/cli.js +849 -267
  139. package/dist/src/cli.test.js +961 -90
  140. package/dist/src/commanderAdapter.d.ts +0 -1
  141. package/dist/src/commanderAdapter.js +2 -16
  142. package/dist/src/commanderAdapter.test.js +1 -1
  143. package/dist/src/completion-shared.js +2 -5
  144. package/dist/src/daemon.js +8 -0
  145. package/dist/src/download/article-download.d.ts +1 -0
  146. package/dist/src/download/article-download.js +3 -0
  147. package/dist/src/download/article-download.test.d.ts +1 -0
  148. package/dist/src/download/article-download.test.js +39 -0
  149. package/dist/src/execution.js +7 -2
  150. package/dist/src/execution.test.js +54 -0
  151. package/dist/src/main.js +16 -0
  152. package/dist/src/plugin.d.ts +1 -8
  153. package/dist/src/plugin.js +1 -27
  154. package/dist/src/plugin.test.js +1 -59
  155. package/dist/src/registry.d.ts +1 -0
  156. package/dist/src/registry.js +3 -2
  157. package/dist/src/registry.test.js +22 -0
  158. package/dist/src/types.d.ts +32 -8
  159. package/package.json +1 -1
  160. package/clis/twitter/lists-parser.js +0 -77
  161. package/clis/twitter/lists.d.ts +0 -5
  162. package/dist/src/cascade.d.ts +0 -46
  163. package/dist/src/cascade.js +0 -135
  164. package/dist/src/explore.d.ts +0 -99
  165. package/dist/src/explore.js +0 -402
  166. package/dist/src/generate-verified.d.ts +0 -105
  167. package/dist/src/generate-verified.js +0 -696
  168. package/dist/src/generate-verified.test.js +0 -925
  169. package/dist/src/generate.d.ts +0 -46
  170. package/dist/src/generate.js +0 -117
  171. package/dist/src/record.d.ts +0 -96
  172. package/dist/src/record.js +0 -657
  173. package/dist/src/record.test.js +0 -293
  174. package/dist/src/skill-generate.d.ts +0 -30
  175. package/dist/src/skill-generate.js +0 -75
  176. package/dist/src/skill-generate.test.js +0 -173
  177. package/dist/src/synthesize.d.ts +0 -97
  178. package/dist/src/synthesize.js +0 -208
  179. /package/dist/src/{generate-verified.test.d.ts → browser/compound.test.d.ts} +0 -0
  180. /package/dist/src/{record.test.d.ts → browser/extract.test.d.ts} +0 -0
  181. /package/dist/src/{skill-generate.test.d.ts → browser/find.test.d.ts} +0 -0
@@ -1,46 +0,0 @@
1
- /**
2
- * Strategy Cascade: automatic strategy downgrade chain.
3
- *
4
- * Probes an API endpoint starting from the simplest strategy (PUBLIC)
5
- * and automatically downgrades through the strategy tiers until one works:
6
- *
7
- * PUBLIC → COOKIE → HEADER → INTERCEPT → UI
8
- *
9
- * This eliminates the need for manual strategy selection — the system
10
- * automatically finds the minimum-privilege strategy that works.
11
- */
12
- import { Strategy } from './registry.js';
13
- import type { IPage } from './types.js';
14
- interface ProbeResult {
15
- strategy: Strategy;
16
- success: boolean;
17
- statusCode?: number;
18
- hasData?: boolean;
19
- error?: string;
20
- responsePreview?: string;
21
- }
22
- interface CascadeResult {
23
- bestStrategy: Strategy;
24
- probes: ProbeResult[];
25
- confidence: number;
26
- }
27
- /**
28
- * Probe an endpoint with a specific strategy.
29
- * Returns whether the probe succeeded and basic response info.
30
- */
31
- export declare function probeEndpoint(page: IPage, url: string, strategy: Strategy, _opts?: {
32
- timeout?: number;
33
- }): Promise<ProbeResult>;
34
- /**
35
- * Run the cascade: try each strategy in order until one works.
36
- * Returns the simplest working strategy.
37
- */
38
- export declare function cascadeProbe(page: IPage, url: string, opts?: {
39
- maxStrategy?: Strategy;
40
- timeout?: number;
41
- }): Promise<CascadeResult>;
42
- /**
43
- * Render cascade results for display.
44
- */
45
- export declare function renderCascadeResult(result: CascadeResult): string;
46
- export {};
@@ -1,135 +0,0 @@
1
- /**
2
- * Strategy Cascade: automatic strategy downgrade chain.
3
- *
4
- * Probes an API endpoint starting from the simplest strategy (PUBLIC)
5
- * and automatically downgrades through the strategy tiers until one works:
6
- *
7
- * PUBLIC → COOKIE → HEADER → INTERCEPT → UI
8
- *
9
- * This eliminates the need for manual strategy selection — the system
10
- * automatically finds the minimum-privilege strategy that works.
11
- */
12
- import { Strategy } from './registry.js';
13
- import { getErrorMessage } from './errors.js';
14
- /** Strategy cascade order (simplest → most complex) */
15
- const CASCADE_ORDER = [
16
- Strategy.PUBLIC,
17
- Strategy.COOKIE,
18
- Strategy.HEADER,
19
- Strategy.INTERCEPT,
20
- Strategy.UI,
21
- ];
22
- /**
23
- * Build the JavaScript source for a fetch probe.
24
- * Shared logic for PUBLIC, COOKIE, and HEADER strategies.
25
- */
26
- function buildFetchProbeJs(url, opts) {
27
- const credentialsLine = opts.credentials ? `credentials: 'include',` : '';
28
- const headerSetup = opts.extractCsrf
29
- ? `
30
- const cookies = document.cookie.split(';').map(c => c.trim());
31
- const csrf = cookies.find(c => c.startsWith('ct0=') || c.startsWith('csrf_token=') || c.startsWith('_csrf='))?.split('=').slice(1).join('=');
32
- const headers = {};
33
- if (csrf) { headers['X-Csrf-Token'] = csrf; headers['X-XSRF-Token'] = csrf; }
34
- `
35
- : 'const headers = {};';
36
- return `
37
- async () => {
38
- try {
39
- ${headerSetup}
40
- const resp = await fetch(${JSON.stringify(url)}, {
41
- ${credentialsLine}
42
- headers
43
- });
44
- const status = resp.status;
45
- if (!resp.ok) return { status, ok: false };
46
- const text = await resp.text();
47
- let hasData = false;
48
- try {
49
- const json = JSON.parse(text);
50
- hasData = !!json && (Array.isArray(json) ? json.length > 0 :
51
- typeof json === 'object' && Object.keys(json).length > 0);
52
- // Check for API-level error codes (common in Chinese sites)
53
- if (json.code !== undefined && json.code !== 0) hasData = false;
54
- } catch {}
55
- return { status, ok: true, hasData, preview: text.slice(0, 200) };
56
- } catch (e) { return { ok: false, error: e.message }; }
57
- }
58
- `;
59
- }
60
- /** Strategy → fetch probe options mapping for strategies that support probing. */
61
- const PROBE_OPTIONS = {
62
- [Strategy.PUBLIC]: {},
63
- [Strategy.COOKIE]: { credentials: true },
64
- [Strategy.HEADER]: { credentials: true, extractCsrf: true },
65
- };
66
- /**
67
- * Probe an endpoint with a specific strategy.
68
- * Returns whether the probe succeeded and basic response info.
69
- */
70
- export async function probeEndpoint(page, url, strategy, _opts = {}) {
71
- const result = { strategy, success: false };
72
- try {
73
- const opts = PROBE_OPTIONS[strategy];
74
- if (opts) {
75
- const resp = (await page.evaluate(buildFetchProbeJs(url, opts)));
76
- result.statusCode = resp?.status;
77
- result.success = !!(resp?.ok && resp?.hasData);
78
- result.hasData = resp?.hasData;
79
- result.responsePreview = resp?.preview;
80
- }
81
- else {
82
- // INTERCEPT / UI require site-specific implementation.
83
- result.error = `Strategy ${strategy} requires site-specific implementation`;
84
- }
85
- }
86
- catch (err) {
87
- result.success = false;
88
- result.error = getErrorMessage(err);
89
- }
90
- return result;
91
- }
92
- /**
93
- * Run the cascade: try each strategy in order until one works.
94
- * Returns the simplest working strategy.
95
- */
96
- export async function cascadeProbe(page, url, opts = {}) {
97
- const rawIdx = opts.maxStrategy
98
- ? CASCADE_ORDER.indexOf(opts.maxStrategy)
99
- : CASCADE_ORDER.indexOf(Strategy.HEADER); // Don't auto-try INTERCEPT/UI
100
- const maxIdx = rawIdx === -1 ? CASCADE_ORDER.indexOf(Strategy.HEADER) : rawIdx;
101
- const probes = [];
102
- for (let i = 0; i <= Math.min(maxIdx, CASCADE_ORDER.length - 1); i++) {
103
- const strategy = CASCADE_ORDER[i];
104
- const probe = await probeEndpoint(page, url, strategy, opts);
105
- probes.push(probe);
106
- if (probe.success) {
107
- return {
108
- bestStrategy: strategy,
109
- probes,
110
- confidence: 1.0 - (i * 0.1), // Higher confidence for simpler strategies
111
- };
112
- }
113
- }
114
- // None worked — default to COOKIE (most common for logged-in sites)
115
- return {
116
- bestStrategy: Strategy.COOKIE,
117
- probes,
118
- confidence: 0.3,
119
- };
120
- }
121
- /**
122
- * Render cascade results for display.
123
- */
124
- export function renderCascadeResult(result) {
125
- const lines = [
126
- `Strategy Cascade: ${result.bestStrategy} (${(result.confidence * 100).toFixed(0)}% confidence)`,
127
- ];
128
- for (const probe of result.probes) {
129
- const icon = probe.success ? '✅' : '❌';
130
- const status = probe.statusCode ? ` [${probe.statusCode}]` : '';
131
- const err = probe.error ? ` — ${probe.error}` : '';
132
- lines.push(` ${icon} ${probe.strategy}${status}${err}`);
133
- }
134
- return lines.join('\n');
135
- }
@@ -1,99 +0,0 @@
1
- /**
2
- * Deep Explore: intelligent API discovery with response analysis.
3
- *
4
- * Navigates to the target URL, auto-scrolls to trigger lazy loading,
5
- * captures network traffic, analyzes JSON responses, and automatically
6
- * infers CLI capabilities from discovered API endpoints.
7
- */
8
- import type { IBrowserFactory } from './runtime.js';
9
- export declare function detectSiteName(url: string): string;
10
- export declare function slugify(value: string): string;
11
- interface InferredCapability {
12
- name: string;
13
- description: string;
14
- strategy: string;
15
- endpoint: string;
16
- itemPath: string | null;
17
- recommendedColumns: string[];
18
- recommendedArgs: Array<{
19
- name: string;
20
- type: string;
21
- required: boolean;
22
- default?: unknown;
23
- }>;
24
- storeHint?: {
25
- store: string;
26
- action: string;
27
- };
28
- }
29
- export interface ExploreManifest {
30
- site: string;
31
- target_url: string;
32
- final_url: string;
33
- title: string;
34
- framework: Record<string, boolean>;
35
- stores: Array<{
36
- type: DiscoveredStore['type'];
37
- id: string;
38
- actions: string[];
39
- }>;
40
- top_strategy: string;
41
- explored_at?: string;
42
- }
43
- export interface ExploreAuthSummary {
44
- top_strategy: string;
45
- indicators: string[];
46
- framework: Record<string, boolean>;
47
- }
48
- export interface ExploreEndpointArtifact {
49
- pattern: string;
50
- method: string;
51
- url: string;
52
- status: number | null;
53
- contentType: string;
54
- queryParams: string[];
55
- itemPath: string | null;
56
- itemCount: number;
57
- detectedFields: Record<string, string>;
58
- authIndicators: string[];
59
- }
60
- export interface ExploreResult {
61
- site: string;
62
- target_url: string;
63
- final_url: string;
64
- title: string;
65
- framework: Record<string, boolean>;
66
- stores: DiscoveredStore[];
67
- top_strategy: string;
68
- endpoint_count: number;
69
- api_endpoint_count: number;
70
- capabilities: InferredCapability[];
71
- auth_indicators: string[];
72
- out_dir: string;
73
- }
74
- export interface ExploreBundle {
75
- manifest: ExploreManifest;
76
- endpoints: ExploreEndpointArtifact[];
77
- capabilities: InferredCapability[];
78
- auth: ExploreAuthSummary;
79
- }
80
- export interface DiscoveredStore {
81
- type: 'pinia' | 'vuex';
82
- id: string;
83
- actions: string[];
84
- stateKeys: string[];
85
- }
86
- export declare function exploreUrl(url: string, opts: {
87
- BrowserFactory: new () => IBrowserFactory;
88
- site?: string;
89
- goal?: string;
90
- authenticated?: boolean;
91
- outDir?: string;
92
- waitSeconds?: number;
93
- query?: string;
94
- clickLabels?: string[];
95
- auto?: boolean;
96
- workspace?: string;
97
- }): Promise<ExploreResult>;
98
- export declare function renderExploreSummary(result: ExploreResult): string;
99
- export {};
@@ -1,402 +0,0 @@
1
- /**
2
- * Deep Explore: intelligent API discovery with response analysis.
3
- *
4
- * Navigates to the target URL, auto-scrolls to trigger lazy loading,
5
- * captures network traffic, analyzes JSON responses, and automatically
6
- * infers CLI capabilities from discovered API endpoints.
7
- */
8
- import * as fs from 'node:fs';
9
- import * as path from 'node:path';
10
- import { DEFAULT_BROWSER_EXPLORE_TIMEOUT, browserSession, runWithTimeout } from './runtime.js';
11
- import { LIMIT_PARAMS } from './constants.js';
12
- import { detectFramework } from './scripts/framework.js';
13
- import { discoverStores } from './scripts/store.js';
14
- import { interactFuzz } from './scripts/interact.js';
15
- import { log } from './logger.js';
16
- import { urlToPattern, findArrayPath, flattenFields, detectFieldRoles, inferCapabilityName, inferStrategy, detectAuthFromHeaders, classifyQueryParams, isNoiseUrl, } from './analysis.js';
17
- // ── Site name detection ────────────────────────────────────────────────────
18
- const KNOWN_SITE_ALIASES = {
19
- 'x.com': 'twitter', 'twitter.com': 'twitter',
20
- 'news.ycombinator.com': 'hackernews',
21
- 'www.zhihu.com': 'zhihu', 'www.bilibili.com': 'bilibili',
22
- 'search.bilibili.com': 'bilibili',
23
- 'www.v2ex.com': 'v2ex', 'www.reddit.com': 'reddit',
24
- 'www.xiaohongshu.com': 'xiaohongshu', 'www.douban.com': 'douban',
25
- 'www.weibo.com': 'weibo', 'www.bbc.com': 'bbc',
26
- };
27
- export function detectSiteName(url) {
28
- try {
29
- const host = new URL(url).hostname.toLowerCase();
30
- if (host in KNOWN_SITE_ALIASES)
31
- return KNOWN_SITE_ALIASES[host];
32
- const parts = host.split('.').filter(p => p && p !== 'www');
33
- if (parts.length >= 2) {
34
- if (['uk', 'jp', 'cn', 'com'].includes(parts[parts.length - 1]) && parts.length >= 3) {
35
- return slugify(parts[parts.length - 3]);
36
- }
37
- return slugify(parts[parts.length - 2]);
38
- }
39
- return parts[0] ? slugify(parts[0]) : 'site';
40
- }
41
- catch {
42
- return 'site';
43
- }
44
- }
45
- export function slugify(value) {
46
- return value.trim().toLowerCase().replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-|-$/g, '') || 'site';
47
- }
48
- /**
49
- * Parse raw network output from browser page.
50
- * Handles text format: [GET] url => [200]
51
- */
52
- function parseNetworkRequests(raw) {
53
- if (typeof raw === 'string') {
54
- const entries = [];
55
- for (const line of raw.split('\n')) {
56
- // Format: [GET] URL => [200]
57
- const m = line.match(/\[?(GET|POST|PUT|DELETE|PATCH|OPTIONS)\]?\s+(\S+)\s*(?:=>|→)\s*\[?(\d+)\]?/i);
58
- if (m) {
59
- const [, method, url, status] = m;
60
- entries.push({
61
- method: method.toUpperCase(), url, status: status ? parseInt(status) : null,
62
- contentType: (url.includes('/api/') || url.includes('/x/') || url.endsWith('.json')) ? 'application/json' : '',
63
- });
64
- }
65
- }
66
- return entries;
67
- }
68
- if (Array.isArray(raw)) {
69
- return raw.filter(e => e && typeof e === 'object').map(e => {
70
- // Handle both legacy shape (status/contentType/responseBody) and
71
- // extension/CDP capture shape (responseStatus/responseContentType/responsePreview)
72
- let body = e.responseBody;
73
- if (body === undefined && e.responsePreview !== undefined) {
74
- const preview = e.responsePreview;
75
- if (typeof preview === 'string') {
76
- try {
77
- body = JSON.parse(preview);
78
- }
79
- catch {
80
- body = preview;
81
- }
82
- }
83
- }
84
- return {
85
- method: (e.method ?? 'GET').toUpperCase(),
86
- url: String(e.url ?? e.request?.url ?? e.requestUrl ?? ''),
87
- status: e.status ?? e.responseStatus ?? e.statusCode ?? null,
88
- contentType: e.contentType ?? e.responseContentType ?? e.response?.contentType ?? '',
89
- responseBody: body, requestHeaders: e.requestHeaders,
90
- };
91
- });
92
- }
93
- return [];
94
- }
95
- function analyzeResponseBody(body) {
96
- if (!body || typeof body !== 'object')
97
- return null;
98
- const result = findArrayPath(body);
99
- if (!result)
100
- return null;
101
- const sample = result.items[0];
102
- const sampleFields = sample && typeof sample === 'object' ? flattenFields(sample, '', 2) : [];
103
- const detectedFields = detectFieldRoles(sampleFields);
104
- return { itemPath: result.path || null, itemCount: result.items.length, detectedFields, sampleFields };
105
- }
106
- function isBooleanRecord(value) {
107
- return typeof value === 'object' && value !== null && !Array.isArray(value)
108
- && Object.values(value).every(v => typeof v === 'boolean');
109
- }
110
- /**
111
- * Deterministic sort key for endpoint ordering — transparent, observable signals only.
112
- * Used by generate/synthesize to pick a stable default candidate.
113
- * Not exposed externally; AI agents see the raw metadata and decide for themselves.
114
- */
115
- function endpointSortKey(ep) {
116
- let k = 0;
117
- // Prefer endpoints with array data (list APIs are more useful for automation)
118
- const items = ep.responseAnalysis?.itemCount ?? 0;
119
- if (items > 0)
120
- k += 100 + Math.min(items, 50);
121
- // Prefer endpoints with detected semantic fields
122
- k += Object.keys(ep.responseAnalysis?.detectedFields ?? {}).length * 10;
123
- // Prefer API-style paths
124
- if (ep.pattern.includes('/api/') || ep.pattern.includes('/x/'))
125
- k += 5;
126
- // Prefer endpoints with query params (more likely to be parameterized APIs)
127
- if (ep.hasSearchParam || ep.hasPaginationParam || ep.hasLimitParam)
128
- k += 5;
129
- return k;
130
- }
131
- /** Check whether an endpoint carries useful structured data (any JSON response, not noise). */
132
- function isUsefulEndpoint(ep) {
133
- if (isNoiseUrl(ep.url))
134
- return false;
135
- return ep.contentType.includes('json');
136
- }
137
- // ── Framework detection ────────────────────────────────────────────────────
138
- const FRAMEWORK_DETECT_JS = detectFramework.toString();
139
- // ── Store discovery ────────────────────────────────────────────────────────
140
- const STORE_DISCOVER_JS = discoverStores.toString();
141
- // ── Auto-Interaction (Fuzzing) ─────────────────────────────────────────────
142
- const INTERACT_FUZZ_JS = interactFuzz.toString();
143
- // ── Analysis helpers (extracted from exploreUrl) ───────────────────────────
144
- /** Filter and deduplicate network endpoints, keeping only useful structured-data APIs. */
145
- function analyzeEndpoints(networkEntries) {
146
- const seen = new Map();
147
- for (const entry of networkEntries) {
148
- if (!entry.url)
149
- continue;
150
- const ct = entry.contentType.toLowerCase();
151
- if (ct.includes('image/') || ct.includes('font/') || ct.includes('css') || ct.includes('javascript') || ct.includes('wasm'))
152
- continue;
153
- if (entry.status && entry.status >= 400)
154
- continue;
155
- const pattern = urlToPattern(entry.url);
156
- const key = `${entry.method}:${pattern}`;
157
- if (seen.has(key))
158
- continue;
159
- const { params: qp, hasSearch, hasPagination, hasLimit } = classifyQueryParams(entry.url);
160
- const ep = {
161
- pattern, method: entry.method, url: entry.url, status: entry.status, contentType: ct,
162
- queryParams: qp, hasSearchParam: hasSearch,
163
- hasPaginationParam: hasPagination,
164
- hasLimitParam: hasLimit || qp.some(p => LIMIT_PARAMS.has(p)),
165
- authIndicators: detectAuthFromHeaders(entry.requestHeaders),
166
- responseAnalysis: entry.responseBody ? analyzeResponseBody(entry.responseBody) : null,
167
- };
168
- seen.set(key, ep);
169
- }
170
- // Filter to useful endpoints; deterministic ordering by observable metadata signals
171
- const analyzed = [...seen.values()]
172
- .filter(isUsefulEndpoint)
173
- .sort((a, b) => endpointSortKey(b) - endpointSortKey(a));
174
- return { analyzed, totalCount: seen.size };
175
- }
176
- /** Infer CLI capabilities from analyzed endpoints. */
177
- function inferCapabilitiesFromEndpoints(endpoints, stores, opts) {
178
- const capabilities = [];
179
- const usedNames = new Set();
180
- for (const ep of endpoints.slice(0, 8)) {
181
- let capName = inferCapabilityName(ep.url, opts.goal);
182
- if (usedNames.has(capName)) {
183
- const suffix = ep.pattern.split('/').filter(s => s && !s.startsWith('{') && !s.includes('.')).pop();
184
- capName = suffix ? `${capName}_${suffix}` : `${capName}_${usedNames.size}`;
185
- }
186
- usedNames.add(capName);
187
- const cols = [];
188
- if (ep.responseAnalysis) {
189
- for (const role of ['title', 'url', 'author', 'score', 'time']) {
190
- if (ep.responseAnalysis.detectedFields[role])
191
- cols.push(role);
192
- }
193
- }
194
- const args = [];
195
- if (ep.hasSearchParam)
196
- args.push({ name: 'keyword', type: 'str', required: true });
197
- args.push({ name: 'limit', type: 'int', required: false, default: 20 });
198
- if (ep.hasPaginationParam)
199
- args.push({ name: 'page', type: 'int', required: false, default: 1 });
200
- const epStrategy = inferStrategy(ep.authIndicators);
201
- let storeHint;
202
- if ((epStrategy === 'intercept' || ep.authIndicators.includes('signature')) && stores.length > 0) {
203
- for (const s of stores) {
204
- const matchingAction = s.actions.find(a => capName.split('_').some(part => a.toLowerCase().includes(part)) ||
205
- a.toLowerCase().includes('fetch') || a.toLowerCase().includes('get'));
206
- if (matchingAction) {
207
- storeHint = { store: s.id, action: matchingAction };
208
- break;
209
- }
210
- }
211
- }
212
- capabilities.push({
213
- name: capName, description: `${opts.site ?? detectSiteName(opts.url)} ${capName}`,
214
- strategy: storeHint ? 'store-action' : epStrategy,
215
- endpoint: ep.pattern,
216
- itemPath: ep.responseAnalysis?.itemPath ?? null,
217
- recommendedColumns: cols.length ? cols : ['title', 'url'],
218
- recommendedArgs: args,
219
- ...(storeHint ? { storeHint } : {}),
220
- });
221
- }
222
- const allAuth = new Set(endpoints.flatMap(ep => ep.authIndicators));
223
- const topStrategy = allAuth.has('signature') ? 'intercept'
224
- : allAuth.has('bearer') || allAuth.has('csrf') ? 'header'
225
- : allAuth.size === 0 ? 'public' : 'cookie';
226
- return { capabilities, topStrategy, authIndicators: [...allAuth] };
227
- }
228
- /** Write explore artifacts (manifest, endpoints, capabilities, auth, stores) to disk. */
229
- async function writeExploreArtifacts(targetDir, result, analyzedEndpoints, stores) {
230
- await fs.promises.mkdir(targetDir, { recursive: true });
231
- const tasks = [
232
- fs.promises.writeFile(path.join(targetDir, 'manifest.json'), JSON.stringify({
233
- site: result.site, target_url: result.target_url, final_url: result.final_url, title: result.title,
234
- framework: result.framework, stores: stores.map(s => ({ type: s.type, id: s.id, actions: s.actions })),
235
- top_strategy: result.top_strategy, explored_at: new Date().toISOString(),
236
- }, null, 2)),
237
- fs.promises.writeFile(path.join(targetDir, 'endpoints.json'), JSON.stringify(analyzedEndpoints.map(ep => ({
238
- pattern: ep.pattern, method: ep.method, url: ep.url, status: ep.status,
239
- contentType: ep.contentType, queryParams: ep.queryParams,
240
- itemPath: ep.responseAnalysis?.itemPath ?? null, itemCount: ep.responseAnalysis?.itemCount ?? 0,
241
- detectedFields: ep.responseAnalysis?.detectedFields ?? {}, authIndicators: ep.authIndicators,
242
- })), null, 2)),
243
- fs.promises.writeFile(path.join(targetDir, 'capabilities.json'), JSON.stringify(result.capabilities, null, 2)),
244
- fs.promises.writeFile(path.join(targetDir, 'auth.json'), JSON.stringify({
245
- top_strategy: result.top_strategy, indicators: result.auth_indicators, framework: result.framework,
246
- }, null, 2)),
247
- ];
248
- if (stores.length > 0) {
249
- tasks.push(fs.promises.writeFile(path.join(targetDir, 'stores.json'), JSON.stringify(stores, null, 2)));
250
- }
251
- await Promise.all(tasks);
252
- }
253
- // ── Main explore function ──────────────────────────────────────────────────
254
- export async function exploreUrl(url, opts) {
255
- const waitSeconds = opts.waitSeconds ?? 3.0;
256
- const exploreTimeout = Math.max(DEFAULT_BROWSER_EXPLORE_TIMEOUT, 45.0 + waitSeconds * 8.0);
257
- return browserSession(opts.BrowserFactory, async (page) => {
258
- return runWithTimeout((async () => {
259
- // Step 1: Navigate
260
- await page.startNetworkCapture?.().catch(() => { });
261
- await page.goto(url);
262
- await page.wait(waitSeconds);
263
- // Step 2: Auto-scroll to trigger lazy loading intelligently
264
- await page.autoScroll({ times: 3, delayMs: 1500 }).catch(() => { });
265
- // Step 2.5: Interactive Fuzzing (if requested)
266
- if (opts.auto) {
267
- try {
268
- // First: targeted clicks by label (e.g. "字幕", "CC", "评论")
269
- if (opts.clickLabels?.length) {
270
- for (const label of opts.clickLabels) {
271
- const safeLabel = JSON.stringify(label);
272
- await page.evaluate(`
273
- (() => {
274
- const el = [...document.querySelectorAll('button, [role="button"], [role="tab"], a, span')]
275
- .find(e => e.textContent && e.textContent.trim().includes(${safeLabel}));
276
- if (el) el.click();
277
- })()
278
- `);
279
- await page.wait(1);
280
- }
281
- }
282
- // Then: blind fuzzing on generic interactive elements
283
- const clicks = await page.evaluate(INTERACT_FUZZ_JS);
284
- await page.wait(2); // wait for XHRs to settle
285
- }
286
- catch (e) {
287
- log.verbose(`Interactive fuzzing skipped: ${e instanceof Error ? e.message : String(e)}`);
288
- }
289
- }
290
- // Step 3: Read page metadata
291
- const metadata = await readPageMetadata(page);
292
- // Step 4: Capture network traffic
293
- const rawNetwork = page.readNetworkCapture
294
- ? await page.readNetworkCapture()
295
- : await page.networkRequests(false);
296
- const networkEntries = parseNetworkRequests(rawNetwork);
297
- // Step 5: For JSON endpoints missing a body, carefully re-fetch in-browser via a pristine iframe
298
- const jsonEndpoints = networkEntries.filter(e => e.contentType.includes('json') && e.method === 'GET' && e.status === 200 && !e.responseBody);
299
- await Promise.allSettled(jsonEndpoints.slice(0, 5).map(async (ep) => {
300
- try {
301
- const body = await page.evaluate(`async () => {
302
- let iframe = null;
303
- try {
304
- iframe = document.createElement('iframe');
305
- iframe.style.display = 'none';
306
- document.body.appendChild(iframe);
307
- const cleanFetch = iframe.contentWindow.fetch || window.fetch;
308
- const r = await cleanFetch(${JSON.stringify(ep.url)}, { credentials: 'include' });
309
- if (!r.ok) return null;
310
- const d = await r.json();
311
- return JSON.stringify(d).slice(0, 10000);
312
- } catch {
313
- return null;
314
- } finally {
315
- if (iframe && iframe.parentNode) iframe.parentNode.removeChild(iframe);
316
- }
317
- }`);
318
- if (body && typeof body === 'string') {
319
- try {
320
- ep.responseBody = JSON.parse(body);
321
- }
322
- catch { }
323
- }
324
- else if (body && typeof body === 'object')
325
- ep.responseBody = body;
326
- }
327
- catch { }
328
- }));
329
- // Step 6: Detect framework
330
- let framework = {};
331
- try {
332
- const fw = await page.evaluate(FRAMEWORK_DETECT_JS);
333
- if (isBooleanRecord(fw))
334
- framework = fw;
335
- }
336
- catch { }
337
- // Step 6.5: Discover stores (Pinia / Vuex)
338
- let stores = [];
339
- if (framework.pinia || framework.vuex) {
340
- try {
341
- const raw = await page.evaluate(STORE_DISCOVER_JS);
342
- if (Array.isArray(raw))
343
- stores = raw;
344
- }
345
- catch { }
346
- }
347
- // Step 7+8: Analyze endpoints and infer capabilities
348
- const { analyzed: analyzedEndpoints, totalCount } = analyzeEndpoints(networkEntries);
349
- const { capabilities, topStrategy, authIndicators } = inferCapabilitiesFromEndpoints(analyzedEndpoints, stores, { site: opts.site, goal: opts.goal, url });
350
- // Step 9: Assemble result and write artifacts
351
- const siteName = opts.site ?? detectSiteName(metadata.url || url);
352
- const targetDir = opts.outDir ?? path.join('.opencli', 'explore', siteName);
353
- const result = {
354
- site: siteName, target_url: url, final_url: metadata.url, title: metadata.title,
355
- framework, stores, top_strategy: topStrategy,
356
- endpoint_count: totalCount,
357
- api_endpoint_count: analyzedEndpoints.length,
358
- capabilities, auth_indicators: authIndicators,
359
- };
360
- await writeExploreArtifacts(targetDir, result, analyzedEndpoints, stores);
361
- return { ...result, out_dir: targetDir };
362
- })(), { timeout: exploreTimeout, label: `Explore ${url}` });
363
- }, { workspace: opts.workspace });
364
- }
365
- export function renderExploreSummary(result) {
366
- const lines = [
367
- 'opencli probe: OK', `Site: ${result.site}`, `URL: ${result.target_url}`,
368
- `Title: ${result.title || '(none)'}`, `Strategy: ${result.top_strategy}`,
369
- `Endpoints: ${result.endpoint_count} total, ${result.api_endpoint_count} API`,
370
- `Capabilities: ${result.capabilities?.length ?? 0}`,
371
- ];
372
- for (const cap of (result.capabilities ?? []).slice(0, 5)) {
373
- const storeInfo = cap.storeHint ? ` → ${cap.storeHint.store}.${cap.storeHint.action}()` : '';
374
- lines.push(` • ${cap.name} (${cap.strategy})${storeInfo}`);
375
- }
376
- const fw = result.framework ?? {};
377
- const fwNames = Object.entries(fw).filter(([, v]) => v).map(([k]) => k);
378
- if (fwNames.length)
379
- lines.push(`Framework: ${fwNames.join(', ')}`);
380
- const stores = result.stores ?? [];
381
- if (stores.length) {
382
- lines.push(`Stores: ${stores.length}`);
383
- for (const s of stores.slice(0, 5)) {
384
- lines.push(` • ${s.type}/${s.id}: ${s.actions.slice(0, 5).join(', ')}${s.actions.length > 5 ? '...' : ''}`);
385
- }
386
- }
387
- lines.push(`Output: ${result.out_dir}`);
388
- return lines.join('\n');
389
- }
390
- async function readPageMetadata(page) {
391
- try {
392
- const result = await page.evaluate(`() => ({ url: window.location.href, title: document.title || '' })`);
393
- if (result && typeof result === 'object' && !Array.isArray(result)) {
394
- return {
395
- url: String(result.url ?? ''),
396
- title: String(result.title ?? ''),
397
- };
398
- }
399
- }
400
- catch { }
401
- return { url: '', title: '' };
402
- }