@jackwener/opencli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +26 -0
- package/.github/workflows/release.yml +40 -0
- package/README.md +67 -0
- package/SKILL.md +230 -0
- package/dist/bilibili.d.ts +13 -0
- package/dist/bilibili.js +93 -0
- package/dist/browser.d.ts +48 -0
- package/dist/browser.js +261 -0
- package/dist/clis/bilibili/favorite.d.ts +1 -0
- package/dist/clis/bilibili/favorite.js +39 -0
- package/dist/clis/bilibili/feed.d.ts +1 -0
- package/dist/clis/bilibili/feed.js +64 -0
- package/dist/clis/bilibili/history.d.ts +1 -0
- package/dist/clis/bilibili/history.js +44 -0
- package/dist/clis/bilibili/me.d.ts +1 -0
- package/dist/clis/bilibili/me.js +13 -0
- package/dist/clis/bilibili/search.d.ts +1 -0
- package/dist/clis/bilibili/search.js +24 -0
- package/dist/clis/bilibili/user-videos.d.ts +1 -0
- package/dist/clis/bilibili/user-videos.js +38 -0
- package/dist/clis/github/search.d.ts +1 -0
- package/dist/clis/github/search.js +20 -0
- package/dist/clis/index.d.ts +13 -0
- package/dist/clis/index.js +16 -0
- package/dist/clis/zhihu/search.d.ts +1 -0
- package/dist/clis/zhihu/search.js +58 -0
- package/dist/engine.d.ts +6 -0
- package/dist/engine.js +77 -0
- package/dist/explore.d.ts +17 -0
- package/dist/explore.js +603 -0
- package/dist/generate.d.ts +11 -0
- package/dist/generate.js +134 -0
- package/dist/main.d.ts +5 -0
- package/dist/main.js +117 -0
- package/dist/output.d.ts +11 -0
- package/dist/output.js +98 -0
- package/dist/pipeline.d.ts +9 -0
- package/dist/pipeline.js +315 -0
- package/dist/promote.d.ts +1 -0
- package/dist/promote.js +3 -0
- package/dist/register.d.ts +2 -0
- package/dist/register.js +2 -0
- package/dist/registry.d.ts +50 -0
- package/dist/registry.js +42 -0
- package/dist/runtime.d.ts +12 -0
- package/dist/runtime.js +27 -0
- package/dist/scaffold.d.ts +2 -0
- package/dist/scaffold.js +2 -0
- package/dist/smoke.d.ts +2 -0
- package/dist/smoke.js +2 -0
- package/dist/snapshotFormatter.d.ts +9 -0
- package/dist/snapshotFormatter.js +41 -0
- package/dist/synthesize.d.ts +10 -0
- package/dist/synthesize.js +191 -0
- package/dist/validate.d.ts +2 -0
- package/dist/validate.js +73 -0
- package/dist/verify.d.ts +2 -0
- package/dist/verify.js +9 -0
- package/package.json +47 -0
- package/src/bilibili.ts +111 -0
- package/src/browser.ts +260 -0
- package/src/clis/bilibili/favorite.ts +42 -0
- package/src/clis/bilibili/feed.ts +71 -0
- package/src/clis/bilibili/history.ts +48 -0
- package/src/clis/bilibili/hot.yaml +38 -0
- package/src/clis/bilibili/me.ts +14 -0
- package/src/clis/bilibili/search.ts +25 -0
- package/src/clis/bilibili/user-videos.ts +42 -0
- package/src/clis/github/search.ts +21 -0
- package/src/clis/github/trending.yaml +58 -0
- package/src/clis/hackernews/top.yaml +36 -0
- package/src/clis/index.ts +19 -0
- package/src/clis/twitter/trending.yaml +40 -0
- package/src/clis/v2ex/hot.yaml +29 -0
- package/src/clis/v2ex/latest.yaml +28 -0
- package/src/clis/zhihu/hot.yaml +28 -0
- package/src/clis/zhihu/search.ts +65 -0
- package/src/engine.ts +86 -0
- package/src/explore.ts +648 -0
- package/src/generate.ts +145 -0
- package/src/main.ts +103 -0
- package/src/output.ts +96 -0
- package/src/pipeline.ts +295 -0
- package/src/promote.ts +3 -0
- package/src/register.ts +2 -0
- package/src/registry.ts +87 -0
- package/src/runtime.ts +36 -0
- package/src/scaffold.ts +2 -0
- package/src/smoke.ts +2 -0
- package/src/snapshotFormatter.ts +51 -0
- package/src/synthesize.ts +210 -0
- package/src/validate.ts +55 -0
- package/src/verify.ts +9 -0
- package/tsconfig.json +17 -0
package/dist/explore.js
ADDED
|
@@ -0,0 +1,603 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deep Explore: intelligent API discovery with response analysis.
|
|
3
|
+
*
|
|
4
|
+
* Unlike simple page snapshots, Deep Explore intercepts network traffic,
|
|
5
|
+
* analyzes response schemas, and automatically infers capabilities that
|
|
6
|
+
* can be turned into CLI commands.
|
|
7
|
+
*
|
|
8
|
+
* Flow:
|
|
9
|
+
* 1. Navigate to target URL
|
|
10
|
+
* 2. Auto-scroll to trigger lazy loading
|
|
11
|
+
* 3. Capture network requests (with body analysis)
|
|
12
|
+
* 4. For each JSON response: detect list fields, infer columns, analyze auth
|
|
13
|
+
* 5. Detect frontend framework (Vue/React/Pinia/Next.js)
|
|
14
|
+
* 6. Generate structured capabilities.json
|
|
15
|
+
*/
|
|
16
|
+
import * as fs from 'node:fs';
|
|
17
|
+
import * as path from 'node:path';
|
|
18
|
+
import { browserSession, DEFAULT_BROWSER_EXPLORE_TIMEOUT, runWithTimeout } from './runtime.js';
|
|
19
|
+
// ── Site name detection ────────────────────────────────────────────────────
|
|
20
|
+
const KNOWN_ALIASES = {
|
|
21
|
+
'x.com': 'twitter', 'twitter.com': 'twitter',
|
|
22
|
+
'news.ycombinator.com': 'hackernews',
|
|
23
|
+
'www.zhihu.com': 'zhihu', 'www.bilibili.com': 'bilibili',
|
|
24
|
+
'www.v2ex.com': 'v2ex', 'www.reddit.com': 'reddit',
|
|
25
|
+
'www.xiaohongshu.com': 'xiaohongshu', 'www.douban.com': 'douban',
|
|
26
|
+
'www.weibo.com': 'weibo', 'search.bilibili.com': 'bilibili',
|
|
27
|
+
};
|
|
28
|
+
function detectSiteName(url) {
|
|
29
|
+
try {
|
|
30
|
+
const host = new URL(url).hostname.toLowerCase();
|
|
31
|
+
if (host in KNOWN_ALIASES)
|
|
32
|
+
return KNOWN_ALIASES[host];
|
|
33
|
+
const parts = host.split('.').filter(p => p && p !== 'www');
|
|
34
|
+
if (parts.length >= 2) {
|
|
35
|
+
if (['uk', 'jp', 'cn', 'com'].includes(parts[parts.length - 1]) && parts.length >= 3) {
|
|
36
|
+
return parts[parts.length - 3].replace(/[^a-z0-9]/g, '');
|
|
37
|
+
}
|
|
38
|
+
return parts[parts.length - 2].replace(/[^a-z0-9]/g, '');
|
|
39
|
+
}
|
|
40
|
+
return parts[0]?.replace(/[^a-z0-9]/g, '') ?? 'site';
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
return 'site';
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
// ── Field & capability inference ───────────────────────────────────────────
|
|
47
|
+
/**
|
|
48
|
+
* Common field names grouped by semantic role.
|
|
49
|
+
* Used to auto-detect which response fields map to which columns.
|
|
50
|
+
*/
|
|
51
|
+
const FIELD_ROLES = {
|
|
52
|
+
title: ['title', 'name', 'text', 'content', 'desc', 'description', 'headline', 'subject'],
|
|
53
|
+
url: ['url', 'uri', 'link', 'href', 'permalink', 'jump_url', 'web_url', 'short_link', 'share_url'],
|
|
54
|
+
author: ['author', 'username', 'user_name', 'nickname', 'nick', 'owner', 'creator', 'up_name', 'uname'],
|
|
55
|
+
score: ['score', 'hot', 'heat', 'likes', 'like_count', 'view_count', 'views', 'stat', 'play', 'favorite_count', 'reply_count'],
|
|
56
|
+
time: ['time', 'created_at', 'publish_time', 'pub_time', 'date', 'ctime', 'mtime', 'pubdate', 'created'],
|
|
57
|
+
id: ['id', 'aid', 'bvid', 'mid', 'uid', 'oid', 'note_id', 'item_id'],
|
|
58
|
+
cover: ['cover', 'pic', 'image', 'thumbnail', 'poster', 'avatar'],
|
|
59
|
+
category: ['category', 'tag', 'type', 'tname', 'channel', 'section'],
|
|
60
|
+
};
|
|
61
|
+
/** Param names that indicate searchable APIs */
|
|
62
|
+
const SEARCH_PARAMS = new Set(['q', 'query', 'keyword', 'search', 'wd', 'kw', 'search_query', 'w']);
|
|
63
|
+
/** Param names that indicate pagination */
|
|
64
|
+
const PAGINATION_PARAMS = new Set(['page', 'pn', 'offset', 'cursor', 'next', 'page_num']);
|
|
65
|
+
/** Param names that indicate limit control */
|
|
66
|
+
const LIMIT_PARAMS = new Set(['limit', 'count', 'size', 'per_page', 'page_size', 'ps', 'num']);
|
|
67
|
+
/** Content types to ignore */
|
|
68
|
+
const IGNORED_CONTENT_TYPES = new Set(['image/', 'font/', 'text/css', 'text/javascript', 'application/javascript', 'application/wasm']);
|
|
69
|
+
/** Volatile query params to strip from patterns */
|
|
70
|
+
const VOLATILE_PARAMS = new Set(['w_rid', 'wts', '_', 'callback', 'timestamp', 't', 'nonce', 'sign']);
|
|
71
|
+
/**
|
|
72
|
+
* Parse raw network output from Playwright MCP into structured entries.
|
|
73
|
+
* Handles both text format ([GET] url => [200]) and structured JSON.
|
|
74
|
+
*/
|
|
75
|
+
function parseNetworkOutput(raw) {
|
|
76
|
+
if (typeof raw === 'string') {
|
|
77
|
+
// Playwright MCP returns network as text lines like:
|
|
78
|
+
// "[GET] https://api.example.com/xxx => [200] "
|
|
79
|
+
// May also have markdown headers like "### Result"
|
|
80
|
+
const entries = [];
|
|
81
|
+
const lines = raw.split('\n').filter((l) => l.trim());
|
|
82
|
+
for (const line of lines) {
|
|
83
|
+
// Format: [METHOD] URL => [STATUS] optional_extra
|
|
84
|
+
const bracketMatch = line.match(/^\[?(GET|POST|PUT|DELETE|PATCH|OPTIONS)\]?\s+(\S+)\s*(?:=>|→)\s*\[?(\d+)\]?/i);
|
|
85
|
+
if (bracketMatch) {
|
|
86
|
+
const [, method, url, status] = bracketMatch;
|
|
87
|
+
entries.push({
|
|
88
|
+
method: method.toUpperCase(),
|
|
89
|
+
url,
|
|
90
|
+
status: status ? parseInt(status) : null,
|
|
91
|
+
contentType: url.endsWith('.json') ? 'application/json' :
|
|
92
|
+
(url.includes('/api/') || url.includes('/x/')) ? 'application/json' : '',
|
|
93
|
+
});
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
// Legacy format: GET url → 200 (application/json)
|
|
97
|
+
const legacyMatch = line.match(/^(GET|POST|PUT|DELETE|PATCH|OPTIONS)\s+(\S+)\s*→?\s*(\d+)?\s*(?:\(([^)]*)\))?/i);
|
|
98
|
+
if (legacyMatch) {
|
|
99
|
+
const [, method, url, status, ct] = legacyMatch;
|
|
100
|
+
entries.push({
|
|
101
|
+
method: method.toUpperCase(),
|
|
102
|
+
url,
|
|
103
|
+
status: status ? parseInt(status) : null,
|
|
104
|
+
contentType: ct ?? '',
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return entries;
|
|
109
|
+
}
|
|
110
|
+
if (Array.isArray(raw)) {
|
|
111
|
+
return raw.map((e) => ({
|
|
112
|
+
method: (e.method ?? 'GET').toUpperCase(),
|
|
113
|
+
url: e.url ?? e.request?.url ?? '',
|
|
114
|
+
status: e.status ?? e.statusCode ?? null,
|
|
115
|
+
contentType: e.contentType ?? e.mimeType ?? '',
|
|
116
|
+
responseBody: e.responseBody ?? e.body,
|
|
117
|
+
requestHeaders: e.requestHeaders ?? e.headers,
|
|
118
|
+
}));
|
|
119
|
+
}
|
|
120
|
+
return [];
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Normalize a URL into a pattern by replacing IDs with placeholders.
|
|
124
|
+
*/
|
|
125
|
+
function urlToPattern(url) {
|
|
126
|
+
try {
|
|
127
|
+
const parsed = new URL(url);
|
|
128
|
+
const pathNorm = parsed.pathname
|
|
129
|
+
.replace(/\/\d+/g, '/{id}')
|
|
130
|
+
.replace(/\/[0-9a-fA-F]{8,}/g, '/{hex}')
|
|
131
|
+
.replace(/\/BV[a-zA-Z0-9]{10}/g, '/{bvid}');
|
|
132
|
+
const params = [];
|
|
133
|
+
parsed.searchParams.forEach((_v, k) => {
|
|
134
|
+
if (!VOLATILE_PARAMS.has(k))
|
|
135
|
+
params.push(k);
|
|
136
|
+
});
|
|
137
|
+
const qs = params.length ? '?' + params.sort().map(k => `${k}={}`).join('&') : '';
|
|
138
|
+
return `${parsed.host}${pathNorm}${qs}`;
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
return url;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Extract query params from a URL.
|
|
146
|
+
*/
|
|
147
|
+
function extractQueryParams(url) {
|
|
148
|
+
try {
|
|
149
|
+
const params = {};
|
|
150
|
+
new URL(url).searchParams.forEach((v, k) => { params[k] = v; });
|
|
151
|
+
return params;
|
|
152
|
+
}
|
|
153
|
+
catch {
|
|
154
|
+
return {};
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Detect auth indicators from request headers.
|
|
159
|
+
*/
|
|
160
|
+
function detectAuthIndicators(headers) {
|
|
161
|
+
if (!headers)
|
|
162
|
+
return [];
|
|
163
|
+
const indicators = [];
|
|
164
|
+
const keys = Object.keys(headers).map(k => k.toLowerCase());
|
|
165
|
+
if (keys.some(k => k === 'authorization'))
|
|
166
|
+
indicators.push('bearer');
|
|
167
|
+
if (keys.some(k => k.startsWith('x-csrf') || k.startsWith('x-xsrf')))
|
|
168
|
+
indicators.push('csrf');
|
|
169
|
+
if (keys.some(k => k.startsWith('x-s') || k === 'x-t' || k === 'x-s-common'))
|
|
170
|
+
indicators.push('signature');
|
|
171
|
+
if (keys.some(k => k === 'x-client-transaction-id'))
|
|
172
|
+
indicators.push('transaction');
|
|
173
|
+
return indicators;
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Analyze a JSON response to find list data and field mappings.
|
|
177
|
+
*/
|
|
178
|
+
function analyzeResponseBody(body) {
|
|
179
|
+
if (!body || typeof body !== 'object')
|
|
180
|
+
return null;
|
|
181
|
+
// Try to find the main list in the response
|
|
182
|
+
const candidates = [];
|
|
183
|
+
function findArrays(obj, currentPath, depth) {
|
|
184
|
+
if (depth > 4)
|
|
185
|
+
return;
|
|
186
|
+
if (Array.isArray(obj) && obj.length >= 2) {
|
|
187
|
+
// Check if items are objects (not primitive arrays)
|
|
188
|
+
if (obj.some(item => item && typeof item === 'object' && !Array.isArray(item))) {
|
|
189
|
+
candidates.push({ path: currentPath, items: obj });
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
if (obj && typeof obj === 'object' && !Array.isArray(obj)) {
|
|
193
|
+
for (const [key, val] of Object.entries(obj)) {
|
|
194
|
+
const nextPath = currentPath ? `${currentPath}.${key}` : key;
|
|
195
|
+
findArrays(val, nextPath, depth + 1);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
findArrays(body, '', 0);
|
|
200
|
+
if (!candidates.length)
|
|
201
|
+
return null;
|
|
202
|
+
// Pick the largest array as the main list
|
|
203
|
+
candidates.sort((a, b) => b.items.length - a.items.length);
|
|
204
|
+
const best = candidates[0];
|
|
205
|
+
// Analyze field names in the first item
|
|
206
|
+
const sampleItem = best.items[0];
|
|
207
|
+
const sampleFieldNames = sampleItem && typeof sampleItem === 'object'
|
|
208
|
+
? flattenFieldNames(sampleItem, '', 2)
|
|
209
|
+
: [];
|
|
210
|
+
// Match fields to semantic roles
|
|
211
|
+
const detectedFields = {};
|
|
212
|
+
for (const [role, aliases] of Object.entries(FIELD_ROLES)) {
|
|
213
|
+
for (const fieldName of sampleFieldNames) {
|
|
214
|
+
const basename = fieldName.split('.').pop()?.toLowerCase() ?? '';
|
|
215
|
+
if (aliases.includes(basename)) {
|
|
216
|
+
detectedFields[role] = fieldName;
|
|
217
|
+
break;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
return {
|
|
222
|
+
itemPath: best.path || null,
|
|
223
|
+
itemCount: best.items.length,
|
|
224
|
+
detectedFields,
|
|
225
|
+
sampleFieldNames,
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Flatten nested object field names for analysis.
|
|
230
|
+
*/
|
|
231
|
+
function flattenFieldNames(obj, prefix, maxDepth) {
|
|
232
|
+
if (maxDepth <= 0 || !obj || typeof obj !== 'object')
|
|
233
|
+
return [];
|
|
234
|
+
const names = [];
|
|
235
|
+
for (const key of Object.keys(obj)) {
|
|
236
|
+
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
237
|
+
names.push(fullKey);
|
|
238
|
+
if (obj[key] && typeof obj[key] === 'object' && !Array.isArray(obj[key])) {
|
|
239
|
+
names.push(...flattenFieldNames(obj[key], fullKey, maxDepth - 1));
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
return names;
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Analyze a list of network entries into structured endpoints.
|
|
246
|
+
*/
|
|
247
|
+
function analyzeEndpoints(entries, siteHost) {
|
|
248
|
+
const seen = new Map();
|
|
249
|
+
for (const entry of entries) {
|
|
250
|
+
if (!entry.url)
|
|
251
|
+
continue;
|
|
252
|
+
// Skip static resources
|
|
253
|
+
const ct = entry.contentType.toLowerCase();
|
|
254
|
+
if (IGNORED_CONTENT_TYPES.has(ct.split(';')[0]?.trim() ?? '') ||
|
|
255
|
+
ct.includes('image/') || ct.includes('font/') || ct.includes('css') ||
|
|
256
|
+
ct.includes('javascript') || ct.includes('wasm'))
|
|
257
|
+
continue;
|
|
258
|
+
// Skip non-JSON and failed responses
|
|
259
|
+
if (entry.status && entry.status >= 400)
|
|
260
|
+
continue;
|
|
261
|
+
const pattern = urlToPattern(entry.url);
|
|
262
|
+
const queryParams = extractQueryParams(entry.url);
|
|
263
|
+
const paramNames = Object.keys(queryParams).filter(k => !VOLATILE_PARAMS.has(k));
|
|
264
|
+
const key = `${entry.method}:${pattern}`;
|
|
265
|
+
if (seen.has(key))
|
|
266
|
+
continue;
|
|
267
|
+
const endpoint = {
|
|
268
|
+
pattern,
|
|
269
|
+
method: entry.method,
|
|
270
|
+
url: entry.url,
|
|
271
|
+
status: entry.status,
|
|
272
|
+
contentType: ct,
|
|
273
|
+
queryParams: paramNames,
|
|
274
|
+
hasSearchParam: paramNames.some(p => SEARCH_PARAMS.has(p)),
|
|
275
|
+
hasPaginationParam: paramNames.some(p => PAGINATION_PARAMS.has(p)),
|
|
276
|
+
hasLimitParam: paramNames.some(p => LIMIT_PARAMS.has(p)),
|
|
277
|
+
authIndicators: detectAuthIndicators(entry.requestHeaders),
|
|
278
|
+
responseAnalysis: entry.responseBody ? analyzeResponseBody(entry.responseBody) : null,
|
|
279
|
+
};
|
|
280
|
+
seen.set(key, endpoint);
|
|
281
|
+
}
|
|
282
|
+
return [...seen.values()];
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Infer what strategy to use based on endpoint analysis.
|
|
286
|
+
*/
|
|
287
|
+
function inferStrategy(endpoint) {
|
|
288
|
+
if (endpoint.authIndicators.includes('signature'))
|
|
289
|
+
return 'intercept';
|
|
290
|
+
if (endpoint.authIndicators.includes('transaction'))
|
|
291
|
+
return 'header';
|
|
292
|
+
if (endpoint.authIndicators.includes('bearer') || endpoint.authIndicators.includes('csrf'))
|
|
293
|
+
return 'header';
|
|
294
|
+
// Check if the URL is a public API (no auth indicators)
|
|
295
|
+
if (endpoint.authIndicators.length === 0) {
|
|
296
|
+
// If it's the same domain, likely cookie auth
|
|
297
|
+
return 'cookie';
|
|
298
|
+
}
|
|
299
|
+
return 'cookie';
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* Infer the capability name from an endpoint pattern.
|
|
303
|
+
*/
|
|
304
|
+
function inferCapabilityName(endpoint, goal) {
|
|
305
|
+
if (goal)
|
|
306
|
+
return goal;
|
|
307
|
+
const u = endpoint.url.toLowerCase();
|
|
308
|
+
const p = endpoint.pattern.toLowerCase();
|
|
309
|
+
// Match common patterns
|
|
310
|
+
if (endpoint.hasSearchParam)
|
|
311
|
+
return 'search';
|
|
312
|
+
if (u.includes('hot') || u.includes('popular') || u.includes('ranking') || u.includes('trending'))
|
|
313
|
+
return 'hot';
|
|
314
|
+
if (u.includes('feed') || u.includes('timeline') || u.includes('dynamic'))
|
|
315
|
+
return 'feed';
|
|
316
|
+
if (u.includes('comment') || u.includes('reply'))
|
|
317
|
+
return 'comments';
|
|
318
|
+
if (u.includes('history'))
|
|
319
|
+
return 'history';
|
|
320
|
+
if (u.includes('profile') || u.includes('userinfo') || u.includes('/me') || u.includes('myinfo'))
|
|
321
|
+
return 'me';
|
|
322
|
+
if (u.includes('video') || u.includes('article') || u.includes('detail') || u.includes('view'))
|
|
323
|
+
return 'detail';
|
|
324
|
+
if (u.includes('favorite') || u.includes('collect') || u.includes('bookmark'))
|
|
325
|
+
return 'favorite';
|
|
326
|
+
if (u.includes('notification') || u.includes('notice'))
|
|
327
|
+
return 'notifications';
|
|
328
|
+
// Fallback: try to extract from path
|
|
329
|
+
try {
|
|
330
|
+
const pathname = new URL(endpoint.url).pathname;
|
|
331
|
+
const segments = pathname.split('/').filter(s => s && !s.match(/^\d+$/) && !s.match(/^[0-9a-f]{8,}$/i));
|
|
332
|
+
if (segments.length)
|
|
333
|
+
return segments[segments.length - 1].replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
|
334
|
+
}
|
|
335
|
+
catch { }
|
|
336
|
+
return 'data';
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Build recommended columns from response analysis.
|
|
340
|
+
*/
|
|
341
|
+
function buildRecommendedColumns(analysis) {
|
|
342
|
+
if (!analysis)
|
|
343
|
+
return ['title', 'url'];
|
|
344
|
+
const cols = [];
|
|
345
|
+
// Prioritize: title → url → author → score → time
|
|
346
|
+
const priority = ['title', 'url', 'author', 'score', 'time'];
|
|
347
|
+
for (const role of priority) {
|
|
348
|
+
if (analysis.detectedFields[role])
|
|
349
|
+
cols.push(role);
|
|
350
|
+
}
|
|
351
|
+
return cols.length ? cols : ['title', 'url'];
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* Build recommended args from endpoint query params.
|
|
355
|
+
*/
|
|
356
|
+
function buildRecommendedArgs(endpoint) {
|
|
357
|
+
const args = [];
|
|
358
|
+
if (endpoint.hasSearchParam) {
|
|
359
|
+
const paramName = endpoint.queryParams.find(p => SEARCH_PARAMS.has(p)) ?? 'keyword';
|
|
360
|
+
args.push({ name: 'keyword', type: 'str', required: true });
|
|
361
|
+
}
|
|
362
|
+
// Always add limit
|
|
363
|
+
args.push({ name: 'limit', type: 'int', required: false, default: 20 });
|
|
364
|
+
if (endpoint.hasPaginationParam) {
|
|
365
|
+
args.push({ name: 'page', type: 'int', required: false, default: 1 });
|
|
366
|
+
}
|
|
367
|
+
return args;
|
|
368
|
+
}
|
|
369
|
+
/**
|
|
370
|
+
* Score an endpoint's interest level for capability generation.
|
|
371
|
+
* Higher score = more likely to be a useful API endpoint.
|
|
372
|
+
*/
|
|
373
|
+
function scoreEndpoint(ep) {
|
|
374
|
+
let score = 0;
|
|
375
|
+
// JSON content type is strongly preferred
|
|
376
|
+
if (ep.contentType.includes('json'))
|
|
377
|
+
score += 10;
|
|
378
|
+
// Has response analysis with items
|
|
379
|
+
if (ep.responseAnalysis) {
|
|
380
|
+
score += 5;
|
|
381
|
+
score += Math.min(ep.responseAnalysis.itemCount, 10);
|
|
382
|
+
score += Object.keys(ep.responseAnalysis.detectedFields).length * 2;
|
|
383
|
+
}
|
|
384
|
+
// API-like path patterns
|
|
385
|
+
if (ep.pattern.includes('/api/') || ep.pattern.includes('/x/'))
|
|
386
|
+
score += 3;
|
|
387
|
+
// Has search/pagination params
|
|
388
|
+
if (ep.hasSearchParam)
|
|
389
|
+
score += 3;
|
|
390
|
+
if (ep.hasPaginationParam)
|
|
391
|
+
score += 2;
|
|
392
|
+
if (ep.hasLimitParam)
|
|
393
|
+
score += 2;
|
|
394
|
+
// 200 OK
|
|
395
|
+
if (ep.status === 200)
|
|
396
|
+
score += 2;
|
|
397
|
+
return score;
|
|
398
|
+
}
|
|
399
|
+
// ── Framework detection ────────────────────────────────────────────────────
|
|
400
|
+
const FRAMEWORK_DETECT_JS = `
|
|
401
|
+
(() => {
|
|
402
|
+
const result = {};
|
|
403
|
+
try {
|
|
404
|
+
const app = document.querySelector('#app');
|
|
405
|
+
result.vue3 = !!(app && app.__vue_app__);
|
|
406
|
+
result.vue2 = !!(app && app.__vue__);
|
|
407
|
+
result.react = !!window.__REACT_DEVTOOLS_GLOBAL_HOOK__ || !!document.querySelector('[data-reactroot]');
|
|
408
|
+
result.nextjs = !!window.__NEXT_DATA__;
|
|
409
|
+
result.nuxt = !!window.__NUXT__;
|
|
410
|
+
if (result.vue3 && app.__vue_app__) {
|
|
411
|
+
const gp = app.__vue_app__.config?.globalProperties;
|
|
412
|
+
result.pinia = !!(gp && gp.$pinia);
|
|
413
|
+
result.vuex = !!(gp && gp.$store);
|
|
414
|
+
}
|
|
415
|
+
} catch {}
|
|
416
|
+
return JSON.stringify(result);
|
|
417
|
+
})()
|
|
418
|
+
`;
|
|
419
|
+
// ── Main explore function ──────────────────────────────────────────────────
|
|
420
|
+
export async function exploreUrl(url, opts = {}) {
|
|
421
|
+
const site = opts.site ?? detectSiteName(url);
|
|
422
|
+
const outDir = opts.outDir ?? path.join('.opencli', 'explore', site);
|
|
423
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
424
|
+
const result = await browserSession(opts.BrowserFactory, async (page) => {
|
|
425
|
+
return runWithTimeout((async () => {
|
|
426
|
+
// Step 1: Navigate
|
|
427
|
+
await page.goto(url);
|
|
428
|
+
await page.wait(opts.waitSeconds ?? 3);
|
|
429
|
+
// Step 2: Auto-scroll to trigger lazy loading
|
|
430
|
+
for (let i = 0; i < 3; i++) {
|
|
431
|
+
await page.scroll('down');
|
|
432
|
+
await page.wait(1);
|
|
433
|
+
}
|
|
434
|
+
// Step 3: Capture network traffic
|
|
435
|
+
const rawNetwork = await page.networkRequests(false);
|
|
436
|
+
const networkEntries = parseNetworkOutput(rawNetwork);
|
|
437
|
+
// Step 4: For JSON endpoints, try to fetch response body in-browser
|
|
438
|
+
const jsonEndpoints = networkEntries.filter(e => e.contentType.includes('json') && e.method === 'GET' && e.status === 200);
|
|
439
|
+
for (const ep of jsonEndpoints.slice(0, 10)) {
|
|
440
|
+
// Only fetch body for promising-looking API endpoints
|
|
441
|
+
if (ep.url.includes('/api/') || ep.url.includes('/x/') || ep.url.includes('/web/') ||
|
|
442
|
+
ep.contentType.includes('json')) {
|
|
443
|
+
try {
|
|
444
|
+
const bodyResult = await page.evaluate(`
|
|
445
|
+
async () => {
|
|
446
|
+
try {
|
|
447
|
+
const resp = await fetch(${JSON.stringify(ep.url)}, { credentials: 'include' });
|
|
448
|
+
if (!resp.ok) return null;
|
|
449
|
+
const data = await resp.json();
|
|
450
|
+
return JSON.stringify(data).slice(0, 10000);
|
|
451
|
+
} catch { return null; }
|
|
452
|
+
}
|
|
453
|
+
`);
|
|
454
|
+
if (bodyResult && typeof bodyResult === 'string') {
|
|
455
|
+
try {
|
|
456
|
+
ep.responseBody = JSON.parse(bodyResult);
|
|
457
|
+
}
|
|
458
|
+
catch { }
|
|
459
|
+
}
|
|
460
|
+
else if (bodyResult && typeof bodyResult === 'object') {
|
|
461
|
+
ep.responseBody = bodyResult;
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
catch { }
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
// Step 5: Detect frontend framework
|
|
468
|
+
let framework = {};
|
|
469
|
+
try {
|
|
470
|
+
const fwResult = await page.evaluate(FRAMEWORK_DETECT_JS);
|
|
471
|
+
if (typeof fwResult === 'string')
|
|
472
|
+
framework = JSON.parse(fwResult);
|
|
473
|
+
else if (typeof fwResult === 'object')
|
|
474
|
+
framework = fwResult;
|
|
475
|
+
}
|
|
476
|
+
catch { }
|
|
477
|
+
// Step 6: Get page metadata
|
|
478
|
+
let title = '', finalUrl = '';
|
|
479
|
+
try {
|
|
480
|
+
const meta = await page.evaluate(`
|
|
481
|
+
() => JSON.stringify({ url: window.location.href, title: document.title || '' })
|
|
482
|
+
`);
|
|
483
|
+
if (typeof meta === 'string') {
|
|
484
|
+
const parsed = JSON.parse(meta);
|
|
485
|
+
title = parsed.title;
|
|
486
|
+
finalUrl = parsed.url;
|
|
487
|
+
}
|
|
488
|
+
else if (typeof meta === 'object') {
|
|
489
|
+
title = meta.title;
|
|
490
|
+
finalUrl = meta.url;
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
catch { }
|
|
494
|
+
// Step 7: Analyze endpoints
|
|
495
|
+
let siteHost = '';
|
|
496
|
+
try {
|
|
497
|
+
siteHost = new URL(url).hostname;
|
|
498
|
+
}
|
|
499
|
+
catch { }
|
|
500
|
+
const analyzedEndpoints = analyzeEndpoints(networkEntries, siteHost);
|
|
501
|
+
// Step 8: Score and rank endpoints
|
|
502
|
+
const scoredEndpoints = analyzedEndpoints
|
|
503
|
+
.map(ep => ({ ...ep, score: scoreEndpoint(ep) }))
|
|
504
|
+
.filter(ep => ep.score >= 5)
|
|
505
|
+
.sort((a, b) => b.score - a.score);
|
|
506
|
+
// Step 9: Infer capabilities from top endpoints
|
|
507
|
+
const capabilities = [];
|
|
508
|
+
const usedNames = new Set();
|
|
509
|
+
for (const ep of scoredEndpoints.slice(0, 8)) {
|
|
510
|
+
let capName = inferCapabilityName(ep, opts.goal);
|
|
511
|
+
// Deduplicate names
|
|
512
|
+
if (usedNames.has(capName)) {
|
|
513
|
+
const suffix = ep.pattern.split('/').filter(s => s && !s.startsWith('{') && !s.includes('.')).pop();
|
|
514
|
+
capName = suffix ? `${capName}_${suffix}` : `${capName}_${usedNames.size}`;
|
|
515
|
+
}
|
|
516
|
+
usedNames.add(capName);
|
|
517
|
+
capabilities.push({
|
|
518
|
+
name: capName,
|
|
519
|
+
description: `${site} ${capName}`,
|
|
520
|
+
strategy: inferStrategy(ep),
|
|
521
|
+
confidence: Math.min(ep.score / 20, 1.0),
|
|
522
|
+
endpoint: ep.pattern,
|
|
523
|
+
itemPath: ep.responseAnalysis?.itemPath ?? null,
|
|
524
|
+
recommendedColumns: buildRecommendedColumns(ep.responseAnalysis),
|
|
525
|
+
recommendedArgs: buildRecommendedArgs(ep),
|
|
526
|
+
});
|
|
527
|
+
}
|
|
528
|
+
// Step 10: Determine auth strategy
|
|
529
|
+
const allAuthIndicators = new Set(analyzedEndpoints.flatMap(ep => ep.authIndicators));
|
|
530
|
+
let topStrategy = 'cookie';
|
|
531
|
+
if (allAuthIndicators.has('signature'))
|
|
532
|
+
topStrategy = 'intercept';
|
|
533
|
+
else if (allAuthIndicators.has('transaction') || allAuthIndicators.has('bearer'))
|
|
534
|
+
topStrategy = 'header';
|
|
535
|
+
else if (allAuthIndicators.size === 0 && scoredEndpoints.some(ep => ep.contentType.includes('json')))
|
|
536
|
+
topStrategy = 'public';
|
|
537
|
+
return {
|
|
538
|
+
site,
|
|
539
|
+
target_url: url,
|
|
540
|
+
final_url: finalUrl,
|
|
541
|
+
title,
|
|
542
|
+
framework,
|
|
543
|
+
top_strategy: topStrategy,
|
|
544
|
+
endpoint_count: analyzedEndpoints.length,
|
|
545
|
+
api_endpoint_count: scoredEndpoints.length,
|
|
546
|
+
capabilities,
|
|
547
|
+
endpoints: scoredEndpoints.map(ep => ({
|
|
548
|
+
pattern: ep.pattern,
|
|
549
|
+
method: ep.method,
|
|
550
|
+
url: ep.url,
|
|
551
|
+
status: ep.status,
|
|
552
|
+
contentType: ep.contentType,
|
|
553
|
+
score: ep.score,
|
|
554
|
+
queryParams: ep.queryParams,
|
|
555
|
+
itemPath: ep.responseAnalysis?.itemPath ?? null,
|
|
556
|
+
itemCount: ep.responseAnalysis?.itemCount ?? 0,
|
|
557
|
+
detectedFields: ep.responseAnalysis?.detectedFields ?? {},
|
|
558
|
+
authIndicators: ep.authIndicators,
|
|
559
|
+
})),
|
|
560
|
+
auth_indicators: [...allAuthIndicators],
|
|
561
|
+
};
|
|
562
|
+
})(), { timeout: DEFAULT_BROWSER_EXPLORE_TIMEOUT, label: 'explore' });
|
|
563
|
+
});
|
|
564
|
+
// Write artifacts
|
|
565
|
+
const manifest = {
|
|
566
|
+
site: result.site,
|
|
567
|
+
target_url: result.target_url,
|
|
568
|
+
final_url: result.final_url,
|
|
569
|
+
title: result.title,
|
|
570
|
+
framework: result.framework,
|
|
571
|
+
top_strategy: result.top_strategy,
|
|
572
|
+
explored_at: new Date().toISOString(),
|
|
573
|
+
};
|
|
574
|
+
fs.writeFileSync(path.join(outDir, 'manifest.json'), JSON.stringify(manifest, null, 2));
|
|
575
|
+
fs.writeFileSync(path.join(outDir, 'endpoints.json'), JSON.stringify(result.endpoints ?? [], null, 2));
|
|
576
|
+
fs.writeFileSync(path.join(outDir, 'capabilities.json'), JSON.stringify(result.capabilities ?? [], null, 2));
|
|
577
|
+
fs.writeFileSync(path.join(outDir, 'auth.json'), JSON.stringify({
|
|
578
|
+
top_strategy: result.top_strategy,
|
|
579
|
+
indicators: result.auth_indicators ?? [],
|
|
580
|
+
framework: result.framework ?? {},
|
|
581
|
+
}, null, 2));
|
|
582
|
+
return { ...result, out_dir: outDir };
|
|
583
|
+
}
|
|
584
|
+
export function renderExploreSummary(result) {
|
|
585
|
+
const lines = [
|
|
586
|
+
'opencli explore: OK',
|
|
587
|
+
`Site: ${result.site}`,
|
|
588
|
+
`URL: ${result.target_url}`,
|
|
589
|
+
`Title: ${result.title || '(none)'}`,
|
|
590
|
+
`Strategy: ${result.top_strategy}`,
|
|
591
|
+
`Endpoints: ${result.endpoint_count} total, ${result.api_endpoint_count} API`,
|
|
592
|
+
`Capabilities: ${result.capabilities?.length ?? 0}`,
|
|
593
|
+
];
|
|
594
|
+
for (const cap of (result.capabilities ?? []).slice(0, 5)) {
|
|
595
|
+
lines.push(` • ${cap.name} (${cap.strategy}, confidence: ${(cap.confidence * 100).toFixed(0)}%)`);
|
|
596
|
+
}
|
|
597
|
+
const fw = result.framework ?? {};
|
|
598
|
+
const fwNames = Object.entries(fw).filter(([, v]) => v).map(([k]) => k);
|
|
599
|
+
if (fwNames.length)
|
|
600
|
+
lines.push(`Framework: ${fwNames.join(', ')}`);
|
|
601
|
+
lines.push(`Output: ${result.out_dir}`);
|
|
602
|
+
return lines.join('\n');
|
|
603
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generate: one-shot CLI creation from URL.
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates the full pipeline:
|
|
5
|
+
* explore (Deep Explore) → synthesize (YAML generation) → register → verify
|
|
6
|
+
*
|
|
7
|
+
* Includes Strategy Cascade: if the initial strategy fails,
|
|
8
|
+
* automatically downgrades and retries.
|
|
9
|
+
*/
|
|
10
|
+
export declare function generateCliFromUrl(opts: any): Promise<any>;
|
|
11
|
+
export declare function renderGenerateSummary(r: any): string;
|