@jackwener/opencli 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLI-CREATOR.md +594 -0
- package/README.md +116 -38
- package/README.zh-CN.md +143 -0
- package/SKILL.md +154 -102
- package/dist/browser.d.ts +1 -0
- package/dist/browser.js +35 -1
- package/dist/cascade.d.ts +45 -0
- package/dist/cascade.js +180 -0
- package/dist/clis/bilibili/hot.yaml +38 -0
- package/dist/clis/github/trending.yaml +58 -0
- package/dist/clis/hackernews/top.yaml +36 -0
- package/dist/clis/index.d.ts +2 -1
- package/dist/clis/index.js +3 -1
- package/dist/clis/reddit/hot.yaml +46 -0
- package/dist/clis/twitter/trending.yaml +40 -0
- package/dist/clis/v2ex/hot.yaml +25 -0
- package/dist/clis/v2ex/latest.yaml +25 -0
- package/dist/clis/v2ex/topic.yaml +27 -0
- package/dist/clis/xiaohongshu/feed.yaml +32 -0
- package/dist/clis/xiaohongshu/notifications.yaml +38 -0
- package/dist/clis/xiaohongshu/search.d.ts +5 -0
- package/dist/clis/xiaohongshu/search.js +68 -0
- package/dist/clis/zhihu/hot.yaml +42 -0
- package/dist/clis/zhihu/question.js +39 -0
- package/dist/clis/zhihu/search.yaml +55 -0
- package/dist/explore.d.ts +23 -13
- package/dist/explore.js +293 -422
- package/dist/main.js +17 -0
- package/dist/pipeline.js +238 -2
- package/dist/synthesize.d.ts +11 -8
- package/dist/synthesize.js +142 -118
- package/package.json +4 -2
- package/src/browser.ts +33 -1
- package/src/cascade.ts +217 -0
- package/src/clis/index.ts +4 -1
- package/src/clis/reddit/hot.yaml +46 -0
- package/src/clis/v2ex/hot.yaml +5 -9
- package/src/clis/v2ex/latest.yaml +5 -8
- package/src/clis/v2ex/topic.yaml +27 -0
- package/src/clis/xiaohongshu/feed.yaml +32 -0
- package/src/clis/xiaohongshu/notifications.yaml +38 -0
- package/src/clis/xiaohongshu/search.ts +71 -0
- package/src/clis/zhihu/hot.yaml +22 -8
- package/src/clis/zhihu/question.ts +45 -0
- package/src/clis/zhihu/search.yaml +55 -0
- package/src/explore.ts +303 -465
- package/src/main.ts +14 -0
- package/src/pipeline.ts +239 -2
- package/src/synthesize.ts +142 -137
- package/dist/clis/zhihu/search.js +0 -58
- package/src/clis/zhihu/search.ts +0 -65
- /package/dist/clis/zhihu/{search.d.ts → question.d.ts} +0 -0
package/src/explore.ts
CHANGED
|
@@ -1,208 +1,128 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Deep Explore: intelligent API discovery with response analysis.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* analyzes
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
* Flow:
|
|
9
|
-
* 1. Navigate to target URL
|
|
10
|
-
* 2. Auto-scroll to trigger lazy loading
|
|
11
|
-
* 3. Capture network requests (with body analysis)
|
|
12
|
-
* 4. For each JSON response: detect list fields, infer columns, analyze auth
|
|
13
|
-
* 5. Detect frontend framework (Vue/React/Pinia/Next.js)
|
|
14
|
-
* 6. Generate structured capabilities.json
|
|
4
|
+
* Navigates to the target URL, auto-scrolls to trigger lazy loading,
|
|
5
|
+
* captures network traffic, analyzes JSON responses, and automatically
|
|
6
|
+
* infers CLI capabilities from discovered API endpoints.
|
|
15
7
|
*/
|
|
16
8
|
|
|
17
9
|
import * as fs from 'node:fs';
|
|
18
10
|
import * as path from 'node:path';
|
|
19
|
-
import {
|
|
11
|
+
import { DEFAULT_BROWSER_EXPLORE_TIMEOUT, browserSession, runWithTimeout } from './runtime.js';
|
|
20
12
|
|
|
21
13
|
// ── Site name detection ────────────────────────────────────────────────────
|
|
22
14
|
|
|
23
|
-
const
|
|
15
|
+
const KNOWN_SITE_ALIASES: Record<string, string> = {
|
|
24
16
|
'x.com': 'twitter', 'twitter.com': 'twitter',
|
|
25
17
|
'news.ycombinator.com': 'hackernews',
|
|
26
18
|
'www.zhihu.com': 'zhihu', 'www.bilibili.com': 'bilibili',
|
|
19
|
+
'search.bilibili.com': 'bilibili',
|
|
27
20
|
'www.v2ex.com': 'v2ex', 'www.reddit.com': 'reddit',
|
|
28
21
|
'www.xiaohongshu.com': 'xiaohongshu', 'www.douban.com': 'douban',
|
|
29
|
-
'www.weibo.com': 'weibo', '
|
|
22
|
+
'www.weibo.com': 'weibo', 'www.bbc.com': 'bbc',
|
|
30
23
|
};
|
|
31
24
|
|
|
32
|
-
function detectSiteName(url: string): string {
|
|
25
|
+
export function detectSiteName(url: string): string {
|
|
33
26
|
try {
|
|
34
27
|
const host = new URL(url).hostname.toLowerCase();
|
|
35
|
-
if (host in
|
|
28
|
+
if (host in KNOWN_SITE_ALIASES) return KNOWN_SITE_ALIASES[host];
|
|
36
29
|
const parts = host.split('.').filter(p => p && p !== 'www');
|
|
37
30
|
if (parts.length >= 2) {
|
|
38
31
|
if (['uk', 'jp', 'cn', 'com'].includes(parts[parts.length - 1]) && parts.length >= 3) {
|
|
39
|
-
return parts[parts.length - 3]
|
|
32
|
+
return slugify(parts[parts.length - 3]);
|
|
40
33
|
}
|
|
41
|
-
return parts[parts.length - 2]
|
|
34
|
+
return slugify(parts[parts.length - 2]);
|
|
42
35
|
}
|
|
43
|
-
return parts[0]
|
|
36
|
+
return parts[0] ? slugify(parts[0]) : 'site';
|
|
44
37
|
} catch { return 'site'; }
|
|
45
38
|
}
|
|
46
39
|
|
|
40
|
+
export function slugify(value: string): string {
|
|
41
|
+
return value.trim().toLowerCase().replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-|-$/g, '') || 'site';
|
|
42
|
+
}
|
|
43
|
+
|
|
47
44
|
// ── Field & capability inference ───────────────────────────────────────────
|
|
48
45
|
|
|
49
|
-
/**
|
|
50
|
-
* Common field names grouped by semantic role.
|
|
51
|
-
* Used to auto-detect which response fields map to which columns.
|
|
52
|
-
*/
|
|
53
46
|
const FIELD_ROLES: Record<string, string[]> = {
|
|
54
|
-
title:
|
|
55
|
-
url:
|
|
56
|
-
author:
|
|
57
|
-
score:
|
|
58
|
-
time:
|
|
59
|
-
id:
|
|
60
|
-
cover:
|
|
61
|
-
category:
|
|
47
|
+
title: ['title', 'name', 'text', 'content', 'desc', 'description', 'headline', 'subject'],
|
|
48
|
+
url: ['url', 'uri', 'link', 'href', 'permalink', 'jump_url', 'web_url', 'share_url'],
|
|
49
|
+
author: ['author', 'username', 'user_name', 'nickname', 'nick', 'owner', 'creator', 'up_name', 'uname'],
|
|
50
|
+
score: ['score', 'hot', 'heat', 'likes', 'like_count', 'view_count', 'views', 'play', 'favorite_count', 'reply_count'],
|
|
51
|
+
time: ['time', 'created_at', 'publish_time', 'pub_time', 'date', 'ctime', 'mtime', 'pubdate', 'created'],
|
|
52
|
+
id: ['id', 'aid', 'bvid', 'mid', 'uid', 'oid', 'note_id', 'item_id'],
|
|
53
|
+
cover: ['cover', 'pic', 'image', 'thumbnail', 'poster', 'avatar'],
|
|
54
|
+
category: ['category', 'tag', 'type', 'tname', 'channel', 'section'],
|
|
62
55
|
};
|
|
63
56
|
|
|
64
|
-
/** Param names that indicate searchable APIs */
|
|
65
57
|
const SEARCH_PARAMS = new Set(['q', 'query', 'keyword', 'search', 'wd', 'kw', 'search_query', 'w']);
|
|
66
|
-
/** Param names that indicate pagination */
|
|
67
58
|
const PAGINATION_PARAMS = new Set(['page', 'pn', 'offset', 'cursor', 'next', 'page_num']);
|
|
68
|
-
/** Param names that indicate limit control */
|
|
69
59
|
const LIMIT_PARAMS = new Set(['limit', 'count', 'size', 'per_page', 'page_size', 'ps', 'num']);
|
|
70
|
-
/** Content types to ignore */
|
|
71
|
-
const IGNORED_CONTENT_TYPES = new Set(['image/', 'font/', 'text/css', 'text/javascript', 'application/javascript', 'application/wasm']);
|
|
72
|
-
/** Volatile query params to strip from patterns */
|
|
73
60
|
const VOLATILE_PARAMS = new Set(['w_rid', 'wts', '_', 'callback', 'timestamp', 't', 'nonce', 'sign']);
|
|
74
61
|
|
|
75
62
|
// ── Network analysis ───────────────────────────────────────────────────────
|
|
76
63
|
|
|
77
64
|
interface NetworkEntry {
|
|
78
|
-
method: string;
|
|
79
|
-
|
|
80
|
-
status: number | null;
|
|
81
|
-
contentType: string;
|
|
82
|
-
responseBody?: any;
|
|
83
|
-
requestHeaders?: Record<string, string>;
|
|
84
|
-
queryParams?: Record<string, string>;
|
|
65
|
+
method: string; url: string; status: number | null;
|
|
66
|
+
contentType: string; responseBody?: any; requestHeaders?: Record<string, string>;
|
|
85
67
|
}
|
|
86
68
|
|
|
87
69
|
interface AnalyzedEndpoint {
|
|
88
|
-
pattern: string;
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
status: number | null;
|
|
92
|
-
contentType: string;
|
|
93
|
-
queryParams: string[];
|
|
94
|
-
hasSearchParam: boolean;
|
|
95
|
-
hasPaginationParam: boolean;
|
|
96
|
-
hasLimitParam: boolean;
|
|
70
|
+
pattern: string; method: string; url: string; status: number | null;
|
|
71
|
+
contentType: string; queryParams: string[]; score: number;
|
|
72
|
+
hasSearchParam: boolean; hasPaginationParam: boolean; hasLimitParam: boolean;
|
|
97
73
|
authIndicators: string[];
|
|
98
|
-
responseAnalysis:
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
interface ResponseAnalysis {
|
|
102
|
-
itemPath: string | null;
|
|
103
|
-
itemCount: number;
|
|
104
|
-
detectedFields: Record<string, string>; // role → actual field name
|
|
105
|
-
sampleFieldNames: string[];
|
|
74
|
+
responseAnalysis: { itemPath: string | null; itemCount: number; detectedFields: Record<string, string>; sampleFields: string[] } | null;
|
|
106
75
|
}
|
|
107
76
|
|
|
108
77
|
interface InferredCapability {
|
|
109
|
-
name: string;
|
|
110
|
-
|
|
111
|
-
strategy: string;
|
|
112
|
-
confidence: number;
|
|
113
|
-
endpoint: string;
|
|
114
|
-
itemPath: string | null;
|
|
78
|
+
name: string; description: string; strategy: string; confidence: number;
|
|
79
|
+
endpoint: string; itemPath: string | null;
|
|
115
80
|
recommendedColumns: string[];
|
|
116
81
|
recommendedArgs: Array<{ name: string; type: string; required: boolean; default?: any }>;
|
|
117
82
|
}
|
|
118
83
|
|
|
119
84
|
/**
|
|
120
|
-
* Parse raw network output from Playwright MCP
|
|
121
|
-
* Handles
|
|
85
|
+
* Parse raw network output from Playwright MCP.
|
|
86
|
+
* Handles text format: [GET] url => [200]
|
|
122
87
|
*/
|
|
123
|
-
function
|
|
88
|
+
function parseNetworkRequests(raw: any): NetworkEntry[] {
|
|
124
89
|
if (typeof raw === 'string') {
|
|
125
|
-
// Playwright MCP returns network as text lines like:
|
|
126
|
-
// "[GET] https://api.example.com/xxx => [200] "
|
|
127
|
-
// May also have markdown headers like "### Result"
|
|
128
90
|
const entries: NetworkEntry[] = [];
|
|
129
|
-
const
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
const [, method, url, status] = bracketMatch;
|
|
91
|
+
for (const line of raw.split('\n')) {
|
|
92
|
+
// Format: [GET] URL => [200]
|
|
93
|
+
const m = line.match(/\[?(GET|POST|PUT|DELETE|PATCH|OPTIONS)\]?\s+(\S+)\s*(?:=>|→)\s*\[?(\d+)\]?/i);
|
|
94
|
+
if (m) {
|
|
95
|
+
const [, method, url, status] = m;
|
|
135
96
|
entries.push({
|
|
136
|
-
method: method.toUpperCase(),
|
|
137
|
-
url,
|
|
138
|
-
status: status ? parseInt(status) : null,
|
|
139
|
-
contentType: url.endsWith('.json') ? 'application/json' :
|
|
140
|
-
(url.includes('/api/') || url.includes('/x/')) ? 'application/json' : '',
|
|
141
|
-
});
|
|
142
|
-
continue;
|
|
143
|
-
}
|
|
144
|
-
// Legacy format: GET url → 200 (application/json)
|
|
145
|
-
const legacyMatch = line.match(/^(GET|POST|PUT|DELETE|PATCH|OPTIONS)\s+(\S+)\s*→?\s*(\d+)?\s*(?:\(([^)]*)\))?/i);
|
|
146
|
-
if (legacyMatch) {
|
|
147
|
-
const [, method, url, status, ct] = legacyMatch;
|
|
148
|
-
entries.push({
|
|
149
|
-
method: method.toUpperCase(),
|
|
150
|
-
url,
|
|
151
|
-
status: status ? parseInt(status) : null,
|
|
152
|
-
contentType: ct ?? '',
|
|
97
|
+
method: method.toUpperCase(), url, status: status ? parseInt(status) : null,
|
|
98
|
+
contentType: (url.includes('/api/') || url.includes('/x/') || url.endsWith('.json')) ? 'application/json' : '',
|
|
153
99
|
});
|
|
154
100
|
}
|
|
155
101
|
}
|
|
156
102
|
return entries;
|
|
157
103
|
}
|
|
158
104
|
if (Array.isArray(raw)) {
|
|
159
|
-
return raw.
|
|
105
|
+
return raw.filter(e => e && typeof e === 'object').map(e => ({
|
|
160
106
|
method: (e.method ?? 'GET').toUpperCase(),
|
|
161
|
-
url: e.url ?? e.request?.url ?? '',
|
|
107
|
+
url: String(e.url ?? e.request?.url ?? e.requestUrl ?? ''),
|
|
162
108
|
status: e.status ?? e.statusCode ?? null,
|
|
163
|
-
contentType: e.contentType ?? e.
|
|
164
|
-
responseBody: e.responseBody
|
|
165
|
-
requestHeaders: e.requestHeaders ?? e.headers,
|
|
109
|
+
contentType: e.contentType ?? e.response?.contentType ?? '',
|
|
110
|
+
responseBody: e.responseBody, requestHeaders: e.requestHeaders,
|
|
166
111
|
}));
|
|
167
112
|
}
|
|
168
113
|
return [];
|
|
169
114
|
}
|
|
170
115
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
/**
|
|
174
|
-
* Normalize a URL into a pattern by replacing IDs with placeholders.
|
|
175
|
-
*/
|
|
176
116
|
function urlToPattern(url: string): string {
|
|
177
117
|
try {
|
|
178
|
-
const
|
|
179
|
-
const pathNorm =
|
|
180
|
-
.replace(/\/\d+/g, '/{id}')
|
|
181
|
-
.replace(/\/[0-9a-fA-F]{8,}/g, '/{hex}')
|
|
182
|
-
.replace(/\/BV[a-zA-Z0-9]{10}/g, '/{bvid}');
|
|
118
|
+
const p = new URL(url);
|
|
119
|
+
const pathNorm = p.pathname.replace(/\/\d+/g, '/{id}').replace(/\/[0-9a-fA-F]{8,}/g, '/{hex}').replace(/\/BV[a-zA-Z0-9]{10}/g, '/{bvid}');
|
|
183
120
|
const params: string[] = [];
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
});
|
|
187
|
-
const qs = params.length ? '?' + params.sort().map(k => `${k}={}`).join('&') : '';
|
|
188
|
-
return `${parsed.host}${pathNorm}${qs}`;
|
|
121
|
+
p.searchParams.forEach((_v, k) => { if (!VOLATILE_PARAMS.has(k)) params.push(k); });
|
|
122
|
+
return `${p.host}${pathNorm}${params.length ? '?' + params.sort().map(k => `${k}={}`).join('&') : ''}`;
|
|
189
123
|
} catch { return url; }
|
|
190
124
|
}
|
|
191
125
|
|
|
192
|
-
/**
|
|
193
|
-
* Extract query params from a URL.
|
|
194
|
-
*/
|
|
195
|
-
function extractQueryParams(url: string): Record<string, string> {
|
|
196
|
-
try {
|
|
197
|
-
const params: Record<string, string> = {};
|
|
198
|
-
new URL(url).searchParams.forEach((v, k) => { params[k] = v; });
|
|
199
|
-
return params;
|
|
200
|
-
} catch { return {}; }
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
/**
|
|
204
|
-
* Detect auth indicators from request headers.
|
|
205
|
-
*/
|
|
206
126
|
function detectAuthIndicators(headers?: Record<string, string>): string[] {
|
|
207
127
|
if (!headers) return [];
|
|
208
128
|
const indicators: string[] = [];
|
|
@@ -210,439 +130,357 @@ function detectAuthIndicators(headers?: Record<string, string>): string[] {
|
|
|
210
130
|
if (keys.some(k => k === 'authorization')) indicators.push('bearer');
|
|
211
131
|
if (keys.some(k => k.startsWith('x-csrf') || k.startsWith('x-xsrf'))) indicators.push('csrf');
|
|
212
132
|
if (keys.some(k => k.startsWith('x-s') || k === 'x-t' || k === 'x-s-common')) indicators.push('signature');
|
|
213
|
-
if (keys.some(k => k === 'x-client-transaction-id')) indicators.push('transaction');
|
|
214
133
|
return indicators;
|
|
215
134
|
}
|
|
216
135
|
|
|
217
|
-
|
|
218
|
-
* Analyze a JSON response to find list data and field mappings.
|
|
219
|
-
*/
|
|
220
|
-
function analyzeResponseBody(body: any): ResponseAnalysis | null {
|
|
136
|
+
function analyzeResponseBody(body: any): AnalyzedEndpoint['responseAnalysis'] {
|
|
221
137
|
if (!body || typeof body !== 'object') return null;
|
|
222
|
-
|
|
223
|
-
// Try to find the main list in the response
|
|
224
138
|
const candidates: Array<{ path: string; items: any[] }> = [];
|
|
225
139
|
|
|
226
|
-
function findArrays(obj: any,
|
|
140
|
+
function findArrays(obj: any, path: string, depth: number) {
|
|
227
141
|
if (depth > 4) return;
|
|
228
|
-
if (Array.isArray(obj) && obj.length >= 2) {
|
|
229
|
-
|
|
230
|
-
if (obj.some(item => item && typeof item === 'object' && !Array.isArray(item))) {
|
|
231
|
-
candidates.push({ path: currentPath, items: obj });
|
|
232
|
-
}
|
|
142
|
+
if (Array.isArray(obj) && obj.length >= 2 && obj.some(item => item && typeof item === 'object' && !Array.isArray(item))) {
|
|
143
|
+
candidates.push({ path, items: obj });
|
|
233
144
|
}
|
|
234
145
|
if (obj && typeof obj === 'object' && !Array.isArray(obj)) {
|
|
235
|
-
for (const [key, val] of Object.entries(obj)) {
|
|
236
|
-
const nextPath = currentPath ? `${currentPath}.${key}` : key;
|
|
237
|
-
findArrays(val, nextPath, depth + 1);
|
|
238
|
-
}
|
|
146
|
+
for (const [key, val] of Object.entries(obj)) findArrays(val, path ? `${path}.${key}` : key, depth + 1);
|
|
239
147
|
}
|
|
240
148
|
}
|
|
241
|
-
|
|
242
149
|
findArrays(body, '', 0);
|
|
243
150
|
if (!candidates.length) return null;
|
|
244
151
|
|
|
245
|
-
// Pick the largest array as the main list
|
|
246
152
|
candidates.sort((a, b) => b.items.length - a.items.length);
|
|
247
153
|
const best = candidates[0];
|
|
154
|
+
const sample = best.items[0];
|
|
155
|
+
const sampleFields = sample && typeof sample === 'object' ? flattenFields(sample, '', 2) : [];
|
|
248
156
|
|
|
249
|
-
// Analyze field names in the first item
|
|
250
|
-
const sampleItem = best.items[0];
|
|
251
|
-
const sampleFieldNames = sampleItem && typeof sampleItem === 'object'
|
|
252
|
-
? flattenFieldNames(sampleItem, '', 2)
|
|
253
|
-
: [];
|
|
254
|
-
|
|
255
|
-
// Match fields to semantic roles
|
|
256
157
|
const detectedFields: Record<string, string> = {};
|
|
257
158
|
for (const [role, aliases] of Object.entries(FIELD_ROLES)) {
|
|
258
|
-
for (const
|
|
259
|
-
|
|
260
|
-
if (aliases.includes(basename)) {
|
|
261
|
-
detectedFields[role] = fieldName;
|
|
262
|
-
break;
|
|
263
|
-
}
|
|
159
|
+
for (const f of sampleFields) {
|
|
160
|
+
if (aliases.includes(f.split('.').pop()?.toLowerCase() ?? '')) { detectedFields[role] = f; break; }
|
|
264
161
|
}
|
|
265
162
|
}
|
|
266
163
|
|
|
267
|
-
return {
|
|
268
|
-
itemPath: best.path || null,
|
|
269
|
-
itemCount: best.items.length,
|
|
270
|
-
detectedFields,
|
|
271
|
-
sampleFieldNames,
|
|
272
|
-
};
|
|
164
|
+
return { itemPath: best.path || null, itemCount: best.items.length, detectedFields, sampleFields };
|
|
273
165
|
}
|
|
274
166
|
|
|
275
|
-
|
|
276
|
-
* Flatten nested object field names for analysis.
|
|
277
|
-
*/
|
|
278
|
-
function flattenFieldNames(obj: any, prefix: string, maxDepth: number): string[] {
|
|
167
|
+
function flattenFields(obj: any, prefix: string, maxDepth: number): string[] {
|
|
279
168
|
if (maxDepth <= 0 || !obj || typeof obj !== 'object') return [];
|
|
280
169
|
const names: string[] = [];
|
|
281
170
|
for (const key of Object.keys(obj)) {
|
|
282
|
-
const
|
|
283
|
-
names.push(
|
|
284
|
-
if (obj[key] && typeof obj[key] === 'object' && !Array.isArray(obj[key]))
|
|
285
|
-
names.push(...flattenFieldNames(obj[key], fullKey, maxDepth - 1));
|
|
286
|
-
}
|
|
171
|
+
const full = prefix ? `${prefix}.${key}` : key;
|
|
172
|
+
names.push(full);
|
|
173
|
+
if (obj[key] && typeof obj[key] === 'object' && !Array.isArray(obj[key])) names.push(...flattenFields(obj[key], full, maxDepth - 1));
|
|
287
174
|
}
|
|
288
175
|
return names;
|
|
289
176
|
}
|
|
290
177
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
const ct = entry.contentType.toLowerCase();
|
|
302
|
-
if (IGNORED_CONTENT_TYPES.has(ct.split(';')[0]?.trim() ?? '') ||
|
|
303
|
-
ct.includes('image/') || ct.includes('font/') || ct.includes('css') ||
|
|
304
|
-
ct.includes('javascript') || ct.includes('wasm')) continue;
|
|
305
|
-
|
|
306
|
-
// Skip non-JSON and failed responses
|
|
307
|
-
if (entry.status && entry.status >= 400) continue;
|
|
308
|
-
|
|
309
|
-
const pattern = urlToPattern(entry.url);
|
|
310
|
-
const queryParams = extractQueryParams(entry.url);
|
|
311
|
-
const paramNames = Object.keys(queryParams).filter(k => !VOLATILE_PARAMS.has(k));
|
|
312
|
-
|
|
313
|
-
const key = `${entry.method}:${pattern}`;
|
|
314
|
-
if (seen.has(key)) continue;
|
|
315
|
-
|
|
316
|
-
const endpoint: AnalyzedEndpoint = {
|
|
317
|
-
pattern,
|
|
318
|
-
method: entry.method,
|
|
319
|
-
url: entry.url,
|
|
320
|
-
status: entry.status,
|
|
321
|
-
contentType: ct,
|
|
322
|
-
queryParams: paramNames,
|
|
323
|
-
hasSearchParam: paramNames.some(p => SEARCH_PARAMS.has(p)),
|
|
324
|
-
hasPaginationParam: paramNames.some(p => PAGINATION_PARAMS.has(p)),
|
|
325
|
-
hasLimitParam: paramNames.some(p => LIMIT_PARAMS.has(p)),
|
|
326
|
-
authIndicators: detectAuthIndicators(entry.requestHeaders),
|
|
327
|
-
responseAnalysis: entry.responseBody ? analyzeResponseBody(entry.responseBody) : null,
|
|
328
|
-
};
|
|
329
|
-
|
|
330
|
-
seen.set(key, endpoint);
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
return [...seen.values()];
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
/**
|
|
337
|
-
* Infer what strategy to use based on endpoint analysis.
|
|
338
|
-
*/
|
|
339
|
-
function inferStrategy(endpoint: AnalyzedEndpoint): string {
|
|
340
|
-
if (endpoint.authIndicators.includes('signature')) return 'intercept';
|
|
341
|
-
if (endpoint.authIndicators.includes('transaction')) return 'header';
|
|
342
|
-
if (endpoint.authIndicators.includes('bearer') || endpoint.authIndicators.includes('csrf')) return 'header';
|
|
343
|
-
// Check if the URL is a public API (no auth indicators)
|
|
344
|
-
if (endpoint.authIndicators.length === 0) {
|
|
345
|
-
// If it's the same domain, likely cookie auth
|
|
346
|
-
return 'cookie';
|
|
347
|
-
}
|
|
348
|
-
return 'cookie';
|
|
178
|
+
function scoreEndpoint(ep: { contentType: string; responseAnalysis: any; pattern: string; status: number | null; hasSearchParam: boolean; hasPaginationParam: boolean; hasLimitParam: boolean }): number {
|
|
179
|
+
let s = 0;
|
|
180
|
+
if (ep.contentType.includes('json')) s += 10;
|
|
181
|
+
if (ep.responseAnalysis) { s += 5; s += Math.min(ep.responseAnalysis.itemCount, 10); s += Object.keys(ep.responseAnalysis.detectedFields).length * 2; }
|
|
182
|
+
if (ep.pattern.includes('/api/') || ep.pattern.includes('/x/')) s += 3;
|
|
183
|
+
if (ep.hasSearchParam) s += 3;
|
|
184
|
+
if (ep.hasPaginationParam) s += 2;
|
|
185
|
+
if (ep.hasLimitParam) s += 2;
|
|
186
|
+
if (ep.status === 200) s += 2;
|
|
187
|
+
return s;
|
|
349
188
|
}
|
|
350
189
|
|
|
351
|
-
|
|
352
|
-
* Infer the capability name from an endpoint pattern.
|
|
353
|
-
*/
|
|
354
|
-
function inferCapabilityName(endpoint: AnalyzedEndpoint, goal?: string): string {
|
|
190
|
+
function inferCapabilityName(url: string, goal?: string): string {
|
|
355
191
|
if (goal) return goal;
|
|
356
|
-
|
|
357
|
-
const u = endpoint.url.toLowerCase();
|
|
358
|
-
const p = endpoint.pattern.toLowerCase();
|
|
359
|
-
|
|
360
|
-
// Match common patterns
|
|
361
|
-
if (endpoint.hasSearchParam) return 'search';
|
|
192
|
+
const u = url.toLowerCase();
|
|
362
193
|
if (u.includes('hot') || u.includes('popular') || u.includes('ranking') || u.includes('trending')) return 'hot';
|
|
194
|
+
if (u.includes('search')) return 'search';
|
|
363
195
|
if (u.includes('feed') || u.includes('timeline') || u.includes('dynamic')) return 'feed';
|
|
364
196
|
if (u.includes('comment') || u.includes('reply')) return 'comments';
|
|
365
197
|
if (u.includes('history')) return 'history';
|
|
366
|
-
if (u.includes('profile') || u.includes('userinfo') || u.includes('/me')
|
|
367
|
-
if (u.includes('video') || u.includes('article') || u.includes('detail') || u.includes('view')) return 'detail';
|
|
198
|
+
if (u.includes('profile') || u.includes('userinfo') || u.includes('/me')) return 'me';
|
|
368
199
|
if (u.includes('favorite') || u.includes('collect') || u.includes('bookmark')) return 'favorite';
|
|
369
|
-
if (u.includes('notification') || u.includes('notice')) return 'notifications';
|
|
370
|
-
|
|
371
|
-
// Fallback: try to extract from path
|
|
372
200
|
try {
|
|
373
|
-
const
|
|
374
|
-
|
|
375
|
-
if (segments.length) return segments[segments.length - 1].replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
|
201
|
+
const segs = new URL(url).pathname.split('/').filter(s => s && !s.match(/^\d+$/) && !s.match(/^[0-9a-f]{8,}$/i));
|
|
202
|
+
if (segs.length) return segs[segs.length - 1].replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
|
376
203
|
} catch {}
|
|
377
|
-
|
|
378
204
|
return 'data';
|
|
379
205
|
}
|
|
380
206
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
if (!analysis) return ['title', 'url'];
|
|
386
|
-
const cols: string[] = [];
|
|
387
|
-
// Prioritize: title → url → author → score → time
|
|
388
|
-
const priority = ['title', 'url', 'author', 'score', 'time'];
|
|
389
|
-
for (const role of priority) {
|
|
390
|
-
if (analysis.detectedFields[role]) cols.push(role);
|
|
391
|
-
}
|
|
392
|
-
return cols.length ? cols : ['title', 'url'];
|
|
207
|
+
function inferStrategy(authIndicators: string[]): string {
|
|
208
|
+
if (authIndicators.includes('signature')) return 'intercept';
|
|
209
|
+
if (authIndicators.includes('bearer') || authIndicators.includes('csrf')) return 'header';
|
|
210
|
+
return 'cookie';
|
|
393
211
|
}
|
|
394
212
|
|
|
395
|
-
|
|
396
|
-
* Build recommended args from endpoint query params.
|
|
397
|
-
*/
|
|
398
|
-
function buildRecommendedArgs(endpoint: AnalyzedEndpoint): InferredCapability['recommendedArgs'] {
|
|
399
|
-
const args: InferredCapability['recommendedArgs'] = [];
|
|
213
|
+
// ── Framework detection ────────────────────────────────────────────────────
|
|
400
214
|
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
215
|
+
const FRAMEWORK_DETECT_JS = `
|
|
216
|
+
() => {
|
|
217
|
+
const r = {};
|
|
218
|
+
try {
|
|
219
|
+
const app = document.querySelector('#app');
|
|
220
|
+
r.vue3 = !!(app && app.__vue_app__);
|
|
221
|
+
r.vue2 = !!(app && app.__vue__);
|
|
222
|
+
r.react = !!window.__REACT_DEVTOOLS_GLOBAL_HOOK__ || !!document.querySelector('[data-reactroot]');
|
|
223
|
+
r.nextjs = !!window.__NEXT_DATA__;
|
|
224
|
+
r.nuxt = !!window.__NUXT__;
|
|
225
|
+
if (r.vue3 && app.__vue_app__) { const gp = app.__vue_app__.config?.globalProperties; r.pinia = !!(gp && gp.$pinia); r.vuex = !!(gp && gp.$store); }
|
|
226
|
+
} catch {}
|
|
227
|
+
return r;
|
|
404
228
|
}
|
|
229
|
+
`;
|
|
405
230
|
|
|
406
|
-
|
|
407
|
-
args.push({ name: 'limit', type: 'int', required: false, default: 20 });
|
|
231
|
+
// ── Store discovery ────────────────────────────────────────────────────────
|
|
408
232
|
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
233
|
+
const STORE_DISCOVER_JS = `
|
|
234
|
+
() => {
|
|
235
|
+
const stores = [];
|
|
236
|
+
try {
|
|
237
|
+
const app = document.querySelector('#app');
|
|
238
|
+
if (!app?.__vue_app__) return stores;
|
|
239
|
+
const gp = app.__vue_app__.config?.globalProperties;
|
|
412
240
|
|
|
413
|
-
|
|
414
|
-
|
|
241
|
+
// Pinia stores
|
|
242
|
+
const pinia = gp?.$pinia;
|
|
243
|
+
if (pinia?._s) {
|
|
244
|
+
pinia._s.forEach((store, id) => {
|
|
245
|
+
const actions = [];
|
|
246
|
+
const stateKeys = [];
|
|
247
|
+
for (const k in store) {
|
|
248
|
+
try {
|
|
249
|
+
if (k.startsWith('$') || k.startsWith('_')) continue;
|
|
250
|
+
if (typeof store[k] === 'function') actions.push(k);
|
|
251
|
+
else stateKeys.push(k);
|
|
252
|
+
} catch {}
|
|
253
|
+
}
|
|
254
|
+
stores.push({ type: 'pinia', id, actions: actions.slice(0, 20), stateKeys: stateKeys.slice(0, 15) });
|
|
255
|
+
});
|
|
256
|
+
}
|
|
415
257
|
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
score += Object.keys(ep.responseAnalysis.detectedFields).length * 2;
|
|
258
|
+
// Vuex store modules
|
|
259
|
+
const vuex = gp?.$store;
|
|
260
|
+
if (vuex?._modules?.root?._children) {
|
|
261
|
+
const children = vuex._modules.root._children;
|
|
262
|
+
for (const [modName, mod] of Object.entries(children)) {
|
|
263
|
+
const actions = Object.keys(mod._rawModule?.actions ?? {}).slice(0, 20);
|
|
264
|
+
const stateKeys = Object.keys(mod.state ?? {}).slice(0, 15);
|
|
265
|
+
stores.push({ type: 'vuex', id: modName, actions, stateKeys });
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
} catch {}
|
|
269
|
+
return stores;
|
|
429
270
|
}
|
|
430
|
-
// API-like path patterns
|
|
431
|
-
if (ep.pattern.includes('/api/') || ep.pattern.includes('/x/')) score += 3;
|
|
432
|
-
// Has search/pagination params
|
|
433
|
-
if (ep.hasSearchParam) score += 3;
|
|
434
|
-
if (ep.hasPaginationParam) score += 2;
|
|
435
|
-
if (ep.hasLimitParam) score += 2;
|
|
436
|
-
// 200 OK
|
|
437
|
-
if (ep.status === 200) score += 2;
|
|
438
|
-
return score;
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
// ── Framework detection ────────────────────────────────────────────────────
|
|
442
|
-
|
|
443
|
-
const FRAMEWORK_DETECT_JS = `
|
|
444
|
-
(() => {
|
|
445
|
-
const result = {};
|
|
446
|
-
try {
|
|
447
|
-
const app = document.querySelector('#app');
|
|
448
|
-
result.vue3 = !!(app && app.__vue_app__);
|
|
449
|
-
result.vue2 = !!(app && app.__vue__);
|
|
450
|
-
result.react = !!window.__REACT_DEVTOOLS_GLOBAL_HOOK__ || !!document.querySelector('[data-reactroot]');
|
|
451
|
-
result.nextjs = !!window.__NEXT_DATA__;
|
|
452
|
-
result.nuxt = !!window.__NUXT__;
|
|
453
|
-
if (result.vue3 && app.__vue_app__) {
|
|
454
|
-
const gp = app.__vue_app__.config?.globalProperties;
|
|
455
|
-
result.pinia = !!(gp && gp.$pinia);
|
|
456
|
-
result.vuex = !!(gp && gp.$store);
|
|
457
|
-
}
|
|
458
|
-
} catch {}
|
|
459
|
-
return JSON.stringify(result);
|
|
460
|
-
})()
|
|
461
271
|
`;
|
|
462
272
|
|
|
463
|
-
|
|
273
|
+
export interface DiscoveredStore {
|
|
274
|
+
type: 'pinia' | 'vuex';
|
|
275
|
+
id: string;
|
|
276
|
+
actions: string[];
|
|
277
|
+
stateKeys: string[];
|
|
278
|
+
}
|
|
464
279
|
|
|
465
|
-
|
|
466
|
-
const site = opts.site ?? detectSiteName(url);
|
|
467
|
-
const outDir = opts.outDir ?? path.join('.opencli', 'explore', site);
|
|
468
|
-
fs.mkdirSync(outDir, { recursive: true });
|
|
280
|
+
// ── Main explore function ──────────────────────────────────────────────────
|
|
469
281
|
|
|
470
|
-
|
|
282
|
+
export async function exploreUrl(
|
|
283
|
+
url: string,
|
|
284
|
+
opts: {
|
|
285
|
+
BrowserFactory: new () => any;
|
|
286
|
+
site?: string; goal?: string; authenticated?: boolean;
|
|
287
|
+
outDir?: string; waitSeconds?: number; query?: string;
|
|
288
|
+
clickLabels?: string[]; auto?: boolean;
|
|
289
|
+
},
|
|
290
|
+
): Promise<Record<string, any>> {
|
|
291
|
+
const waitSeconds = opts.waitSeconds ?? 3.0;
|
|
292
|
+
const exploreTimeout = Math.max(DEFAULT_BROWSER_EXPLORE_TIMEOUT, 45.0 + waitSeconds * 8.0);
|
|
293
|
+
|
|
294
|
+
return browserSession(opts.BrowserFactory, async (page) => {
|
|
471
295
|
return runWithTimeout((async () => {
|
|
472
296
|
// Step 1: Navigate
|
|
473
297
|
await page.goto(url);
|
|
474
|
-
await page.wait(
|
|
298
|
+
await page.wait(waitSeconds);
|
|
475
299
|
|
|
476
|
-
// Step 2: Auto-scroll to trigger lazy loading
|
|
477
|
-
for (let i = 0; i < 3; i++) {
|
|
478
|
-
await page.scroll('down');
|
|
479
|
-
await page.wait(1);
|
|
480
|
-
}
|
|
300
|
+
// Step 2: Auto-scroll to trigger lazy loading (use keyboard since page.scroll may not exist)
|
|
301
|
+
for (let i = 0; i < 3; i++) { try { await page.pressKey('End'); } catch {} await page.wait(1); }
|
|
481
302
|
|
|
482
|
-
// Step 3:
|
|
483
|
-
const
|
|
484
|
-
const networkEntries = parseNetworkOutput(rawNetwork);
|
|
303
|
+
// Step 3: Read page metadata
|
|
304
|
+
const metadata = await readPageMetadata(page);
|
|
485
305
|
|
|
486
|
-
// Step 4:
|
|
487
|
-
const
|
|
488
|
-
|
|
489
|
-
);
|
|
306
|
+
// Step 4: Capture network traffic
|
|
307
|
+
const rawNetwork = await page.networkRequests(false);
|
|
308
|
+
const networkEntries = parseNetworkRequests(rawNetwork);
|
|
490
309
|
|
|
310
|
+
// Step 5: For JSON endpoints, re-fetch response body in-browser
|
|
311
|
+
const jsonEndpoints = networkEntries.filter(e => e.contentType.includes('json') && e.method === 'GET' && e.status === 200);
|
|
491
312
|
for (const ep of jsonEndpoints.slice(0, 10)) {
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
async () => {
|
|
498
|
-
try {
|
|
499
|
-
const resp = await fetch(${JSON.stringify(ep.url)}, { credentials: 'include' });
|
|
500
|
-
if (!resp.ok) return null;
|
|
501
|
-
const data = await resp.json();
|
|
502
|
-
return JSON.stringify(data).slice(0, 10000);
|
|
503
|
-
} catch { return null; }
|
|
504
|
-
}
|
|
505
|
-
`);
|
|
506
|
-
if (bodyResult && typeof bodyResult === 'string') {
|
|
507
|
-
try { ep.responseBody = JSON.parse(bodyResult); } catch {}
|
|
508
|
-
} else if (bodyResult && typeof bodyResult === 'object') {
|
|
509
|
-
ep.responseBody = bodyResult;
|
|
510
|
-
}
|
|
511
|
-
} catch {}
|
|
512
|
-
}
|
|
313
|
+
try {
|
|
314
|
+
const body = await page.evaluate(`async () => { try { const r = await fetch(${JSON.stringify(ep.url)}, {credentials:'include'}); if (!r.ok) return null; const d = await r.json(); return JSON.stringify(d).slice(0,10000); } catch { return null; } }`);
|
|
315
|
+
if (body && typeof body === 'string') { try { ep.responseBody = JSON.parse(body); } catch {} }
|
|
316
|
+
else if (body && typeof body === 'object') ep.responseBody = body;
|
|
317
|
+
} catch {}
|
|
513
318
|
}
|
|
514
319
|
|
|
515
|
-
// Step
|
|
320
|
+
// Step 6: Detect framework
|
|
516
321
|
let framework: Record<string, boolean> = {};
|
|
517
|
-
try {
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
() => JSON.stringify({ url: window.location.href, title: document.title || '' })
|
|
528
|
-
`);
|
|
529
|
-
if (typeof meta === 'string') {
|
|
530
|
-
const parsed = JSON.parse(meta);
|
|
531
|
-
title = parsed.title; finalUrl = parsed.url;
|
|
532
|
-
} else if (typeof meta === 'object') {
|
|
533
|
-
title = meta.title; finalUrl = meta.url;
|
|
534
|
-
}
|
|
535
|
-
} catch {}
|
|
322
|
+
try { const fw = await page.evaluate(FRAMEWORK_DETECT_JS); if (fw && typeof fw === 'object') framework = fw; } catch {}
|
|
323
|
+
|
|
324
|
+
// Step 6.5: Discover stores (Pinia / Vuex)
|
|
325
|
+
let stores: DiscoveredStore[] = [];
|
|
326
|
+
if (framework.pinia || framework.vuex) {
|
|
327
|
+
try {
|
|
328
|
+
const raw = await page.evaluate(STORE_DISCOVER_JS);
|
|
329
|
+
if (Array.isArray(raw)) stores = raw;
|
|
330
|
+
} catch {}
|
|
331
|
+
}
|
|
536
332
|
|
|
537
333
|
// Step 7: Analyze endpoints
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
334
|
+
const seen = new Map<string, AnalyzedEndpoint>();
|
|
335
|
+
for (const entry of networkEntries) {
|
|
336
|
+
if (!entry.url) continue;
|
|
337
|
+
const ct = entry.contentType.toLowerCase();
|
|
338
|
+
if (ct.includes('image/') || ct.includes('font/') || ct.includes('css') || ct.includes('javascript') || ct.includes('wasm')) continue;
|
|
339
|
+
if (entry.status && entry.status >= 400) continue;
|
|
340
|
+
|
|
341
|
+
const pattern = urlToPattern(entry.url);
|
|
342
|
+
const key = `${entry.method}:${pattern}`;
|
|
343
|
+
if (seen.has(key)) continue;
|
|
344
|
+
|
|
345
|
+
const qp: string[] = [];
|
|
346
|
+
try { new URL(entry.url).searchParams.forEach((_v, k) => { if (!VOLATILE_PARAMS.has(k)) qp.push(k); }); } catch {}
|
|
347
|
+
|
|
348
|
+
const ep: AnalyzedEndpoint = {
|
|
349
|
+
pattern, method: entry.method, url: entry.url, status: entry.status, contentType: ct,
|
|
350
|
+
queryParams: qp, hasSearchParam: qp.some(p => SEARCH_PARAMS.has(p)),
|
|
351
|
+
hasPaginationParam: qp.some(p => PAGINATION_PARAMS.has(p)),
|
|
352
|
+
hasLimitParam: qp.some(p => LIMIT_PARAMS.has(p)),
|
|
353
|
+
authIndicators: detectAuthIndicators(entry.requestHeaders),
|
|
354
|
+
responseAnalysis: entry.responseBody ? analyzeResponseBody(entry.responseBody) : null,
|
|
355
|
+
score: 0,
|
|
356
|
+
};
|
|
357
|
+
ep.score = scoreEndpoint(ep);
|
|
358
|
+
seen.set(key, ep);
|
|
359
|
+
}
|
|
541
360
|
|
|
542
|
-
|
|
543
|
-
const scoredEndpoints = analyzedEndpoints
|
|
544
|
-
.map(ep => ({ ...ep, score: scoreEndpoint(ep) }))
|
|
545
|
-
.filter(ep => ep.score >= 5)
|
|
546
|
-
.sort((a, b) => b.score - a.score);
|
|
361
|
+
const analyzedEndpoints = [...seen.values()].filter(ep => ep.score >= 5).sort((a, b) => b.score - a.score);
|
|
547
362
|
|
|
548
|
-
// Step
|
|
363
|
+
// Step 8: Infer capabilities
|
|
549
364
|
const capabilities: InferredCapability[] = [];
|
|
550
365
|
const usedNames = new Set<string>();
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
let capName = inferCapabilityName(ep, opts.goal);
|
|
554
|
-
// Deduplicate names
|
|
366
|
+
for (const ep of analyzedEndpoints.slice(0, 8)) {
|
|
367
|
+
let capName = inferCapabilityName(ep.url, opts.goal);
|
|
555
368
|
if (usedNames.has(capName)) {
|
|
556
369
|
const suffix = ep.pattern.split('/').filter(s => s && !s.startsWith('{') && !s.includes('.')).pop();
|
|
557
370
|
capName = suffix ? `${capName}_${suffix}` : `${capName}_${usedNames.size}`;
|
|
558
371
|
}
|
|
559
372
|
usedNames.add(capName);
|
|
560
373
|
|
|
374
|
+
const cols: string[] = [];
|
|
375
|
+
if (ep.responseAnalysis) {
|
|
376
|
+
for (const role of ['title', 'url', 'author', 'score', 'time']) {
|
|
377
|
+
if (ep.responseAnalysis.detectedFields[role]) cols.push(role);
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
const args: InferredCapability['recommendedArgs'] = [];
|
|
382
|
+
if (ep.hasSearchParam) args.push({ name: 'keyword', type: 'str', required: true });
|
|
383
|
+
args.push({ name: 'limit', type: 'int', required: false, default: 20 });
|
|
384
|
+
if (ep.hasPaginationParam) args.push({ name: 'page', type: 'int', required: false, default: 1 });
|
|
385
|
+
|
|
386
|
+
// Link store actions to capabilities when store-action strategy is recommended
|
|
387
|
+
const epStrategy = inferStrategy(ep.authIndicators);
|
|
388
|
+
let storeHint: { store: string; action: string } | undefined;
|
|
389
|
+
if ((epStrategy === 'intercept' || ep.authIndicators.includes('signature')) && stores.length > 0) {
|
|
390
|
+
// Try to find a store/action that matches this endpoint's purpose
|
|
391
|
+
for (const s of stores) {
|
|
392
|
+
const matchingAction = s.actions.find(a =>
|
|
393
|
+
capName.split('_').some(part => a.toLowerCase().includes(part)) ||
|
|
394
|
+
a.toLowerCase().includes('fetch') || a.toLowerCase().includes('get')
|
|
395
|
+
);
|
|
396
|
+
if (matchingAction) {
|
|
397
|
+
storeHint = { store: s.id, action: matchingAction };
|
|
398
|
+
break;
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
561
403
|
capabilities.push({
|
|
562
|
-
name: capName,
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
confidence: Math.min(ep.score / 20, 1.0),
|
|
566
|
-
endpoint: ep.pattern,
|
|
404
|
+
name: capName, description: `${opts.site ?? detectSiteName(url)} ${capName}`,
|
|
405
|
+
strategy: storeHint ? 'store-action' : epStrategy,
|
|
406
|
+
confidence: Math.min(ep.score / 20, 1.0), endpoint: ep.pattern,
|
|
567
407
|
itemPath: ep.responseAnalysis?.itemPath ?? null,
|
|
568
|
-
recommendedColumns:
|
|
569
|
-
recommendedArgs:
|
|
408
|
+
recommendedColumns: cols.length ? cols : ['title', 'url'],
|
|
409
|
+
recommendedArgs: args,
|
|
410
|
+
...(storeHint ? { storeHint } : {}),
|
|
570
411
|
});
|
|
571
412
|
}
|
|
572
413
|
|
|
573
|
-
// Step
|
|
574
|
-
const
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
target_url: url,
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
endpoint_count: analyzedEndpoints.length,
|
|
588
|
-
api_endpoint_count: scoredEndpoints.length,
|
|
589
|
-
capabilities,
|
|
590
|
-
endpoints: scoredEndpoints.map(ep => ({
|
|
591
|
-
pattern: ep.pattern,
|
|
592
|
-
method: ep.method,
|
|
593
|
-
url: ep.url,
|
|
594
|
-
status: ep.status,
|
|
595
|
-
contentType: ep.contentType,
|
|
596
|
-
score: ep.score,
|
|
597
|
-
queryParams: ep.queryParams,
|
|
598
|
-
itemPath: ep.responseAnalysis?.itemPath ?? null,
|
|
599
|
-
itemCount: ep.responseAnalysis?.itemCount ?? 0,
|
|
600
|
-
detectedFields: ep.responseAnalysis?.detectedFields ?? {},
|
|
601
|
-
authIndicators: ep.authIndicators,
|
|
602
|
-
})),
|
|
603
|
-
auth_indicators: [...allAuthIndicators],
|
|
414
|
+
// Step 9: Determine overall auth strategy
|
|
415
|
+
const allAuth = new Set(analyzedEndpoints.flatMap(ep => ep.authIndicators));
|
|
416
|
+
const topStrategy = allAuth.has('signature') ? 'intercept' : allAuth.has('bearer') || allAuth.has('csrf') ? 'header' : allAuth.size === 0 ? 'public' : 'cookie';
|
|
417
|
+
|
|
418
|
+
const siteName = opts.site ?? detectSiteName(metadata.url || url);
|
|
419
|
+
const targetDir = opts.outDir ?? path.join('.opencli', 'explore', siteName);
|
|
420
|
+
fs.mkdirSync(targetDir, { recursive: true });
|
|
421
|
+
|
|
422
|
+
const result = {
|
|
423
|
+
site: siteName, target_url: url, final_url: metadata.url, title: metadata.title,
|
|
424
|
+
framework, stores, top_strategy: topStrategy,
|
|
425
|
+
endpoint_count: analyzedEndpoints.length + [...seen.values()].filter(ep => ep.score < 5).length,
|
|
426
|
+
api_endpoint_count: analyzedEndpoints.length,
|
|
427
|
+
capabilities, auth_indicators: [...allAuth],
|
|
604
428
|
};
|
|
605
|
-
})(), { timeout: DEFAULT_BROWSER_EXPLORE_TIMEOUT, label: 'explore' });
|
|
606
|
-
});
|
|
607
429
|
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
430
|
+
// Write artifacts
|
|
431
|
+
fs.writeFileSync(path.join(targetDir, 'manifest.json'), JSON.stringify({
|
|
432
|
+
site: siteName, target_url: url, final_url: metadata.url, title: metadata.title,
|
|
433
|
+
framework, stores: stores.map(s => ({ type: s.type, id: s.id, actions: s.actions })),
|
|
434
|
+
top_strategy: topStrategy, explored_at: new Date().toISOString(),
|
|
435
|
+
}, null, 2));
|
|
436
|
+
fs.writeFileSync(path.join(targetDir, 'endpoints.json'), JSON.stringify(analyzedEndpoints.map(ep => ({
|
|
437
|
+
pattern: ep.pattern, method: ep.method, url: ep.url, status: ep.status,
|
|
438
|
+
contentType: ep.contentType, score: ep.score, queryParams: ep.queryParams,
|
|
439
|
+
itemPath: ep.responseAnalysis?.itemPath ?? null, itemCount: ep.responseAnalysis?.itemCount ?? 0,
|
|
440
|
+
detectedFields: ep.responseAnalysis?.detectedFields ?? {}, authIndicators: ep.authIndicators,
|
|
441
|
+
})), null, 2));
|
|
442
|
+
fs.writeFileSync(path.join(targetDir, 'capabilities.json'), JSON.stringify(capabilities, null, 2));
|
|
443
|
+
fs.writeFileSync(path.join(targetDir, 'auth.json'), JSON.stringify({
|
|
444
|
+
top_strategy: topStrategy, indicators: [...allAuth], framework,
|
|
445
|
+
}, null, 2));
|
|
446
|
+
if (stores.length > 0) {
|
|
447
|
+
fs.writeFileSync(path.join(targetDir, 'stores.json'), JSON.stringify(stores, null, 2));
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
return { ...result, out_dir: targetDir };
|
|
451
|
+
})(), { timeout: exploreTimeout, label: `Explore ${url}` });
|
|
452
|
+
});
|
|
628
453
|
}
|
|
629
454
|
|
|
630
|
-
export function renderExploreSummary(result: any): string {
|
|
455
|
+
export function renderExploreSummary(result: Record<string, any>): string {
|
|
631
456
|
const lines = [
|
|
632
|
-
'opencli
|
|
633
|
-
`
|
|
634
|
-
`URL: ${result.target_url}`,
|
|
635
|
-
`Title: ${result.title || '(none)'}`,
|
|
636
|
-
`Strategy: ${result.top_strategy}`,
|
|
457
|
+
'opencli probe: OK', `Site: ${result.site}`, `URL: ${result.target_url}`,
|
|
458
|
+
`Title: ${result.title || '(none)'}`, `Strategy: ${result.top_strategy}`,
|
|
637
459
|
`Endpoints: ${result.endpoint_count} total, ${result.api_endpoint_count} API`,
|
|
638
460
|
`Capabilities: ${result.capabilities?.length ?? 0}`,
|
|
639
461
|
];
|
|
640
462
|
for (const cap of (result.capabilities ?? []).slice(0, 5)) {
|
|
641
|
-
|
|
463
|
+
const storeInfo = cap.storeHint ? ` → ${cap.storeHint.store}.${cap.storeHint.action}()` : '';
|
|
464
|
+
lines.push(` • ${cap.name} (${cap.strategy}, ${(cap.confidence * 100).toFixed(0)}%)${storeInfo}`);
|
|
642
465
|
}
|
|
643
466
|
const fw = result.framework ?? {};
|
|
644
467
|
const fwNames = Object.entries(fw).filter(([, v]) => v).map(([k]) => k);
|
|
645
468
|
if (fwNames.length) lines.push(`Framework: ${fwNames.join(', ')}`);
|
|
469
|
+
const stores: DiscoveredStore[] = result.stores ?? [];
|
|
470
|
+
if (stores.length) {
|
|
471
|
+
lines.push(`Stores: ${stores.length}`);
|
|
472
|
+
for (const s of stores.slice(0, 5)) {
|
|
473
|
+
lines.push(` • ${s.type}/${s.id}: ${s.actions.slice(0, 5).join(', ')}${s.actions.length > 5 ? '...' : ''}`);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
646
476
|
lines.push(`Output: ${result.out_dir}`);
|
|
647
477
|
return lines.join('\n');
|
|
648
478
|
}
|
|
479
|
+
|
|
480
|
+
async function readPageMetadata(page: any): Promise<{ url: string; title: string }> {
|
|
481
|
+
try {
|
|
482
|
+
const result = await page.evaluate(`() => ({ url: window.location.href, title: document.title || '' })`);
|
|
483
|
+
if (result && typeof result === 'object') return { url: String(result.url ?? ''), title: String(result.title ?? '') };
|
|
484
|
+
} catch {}
|
|
485
|
+
return { url: '', title: '' };
|
|
486
|
+
}
|