@jackwener/opencli 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLI-CREATOR.md +594 -0
- package/README.md +124 -39
- package/README.zh-CN.md +151 -0
- package/SKILL.md +178 -102
- package/dist/bilibili.d.ts +6 -5
- package/dist/browser.d.ts +3 -1
- package/dist/browser.js +44 -2
- package/dist/cascade.d.ts +46 -0
- package/dist/cascade.js +180 -0
- package/dist/clis/bbc/news.js +42 -0
- package/dist/clis/bilibili/hot.yaml +38 -0
- package/dist/clis/boss/search.js +47 -0
- package/dist/clis/ctrip/search.d.ts +1 -0
- package/dist/clis/ctrip/search.js +62 -0
- package/dist/clis/hackernews/top.yaml +36 -0
- package/dist/clis/index.d.ts +10 -1
- package/dist/clis/index.js +19 -1
- package/dist/clis/reddit/hot.yaml +46 -0
- package/dist/clis/reuters/search.d.ts +1 -0
- package/dist/clis/reuters/search.js +52 -0
- package/dist/clis/smzdm/search.d.ts +1 -0
- package/dist/clis/smzdm/search.js +66 -0
- package/dist/clis/twitter/trending.yaml +40 -0
- package/dist/clis/v2ex/hot.yaml +25 -0
- package/dist/clis/v2ex/latest.yaml +25 -0
- package/dist/clis/v2ex/topic.yaml +27 -0
- package/dist/clis/weibo/hot.d.ts +1 -0
- package/dist/clis/weibo/hot.js +41 -0
- package/dist/clis/xiaohongshu/feed.yaml +32 -0
- package/dist/clis/xiaohongshu/notifications.yaml +38 -0
- package/dist/clis/xiaohongshu/search.d.ts +5 -0
- package/dist/clis/xiaohongshu/search.js +68 -0
- package/dist/clis/yahoo-finance/quote.d.ts +1 -0
- package/dist/clis/yahoo-finance/quote.js +74 -0
- package/dist/clis/youtube/search.d.ts +1 -0
- package/dist/clis/youtube/search.js +60 -0
- package/dist/clis/zhihu/hot.yaml +42 -0
- package/dist/clis/zhihu/question.d.ts +1 -0
- package/dist/clis/zhihu/question.js +39 -0
- package/dist/clis/zhihu/search.yaml +55 -0
- package/dist/engine.d.ts +2 -1
- package/dist/explore.d.ts +23 -13
- package/dist/explore.js +293 -422
- package/dist/generate.js +2 -1
- package/dist/main.js +21 -2
- package/dist/pipeline/executor.d.ts +9 -0
- package/dist/pipeline/executor.js +88 -0
- package/dist/pipeline/index.d.ts +5 -0
- package/dist/pipeline/index.js +5 -0
- package/dist/pipeline/steps/browser.d.ts +12 -0
- package/dist/pipeline/steps/browser.js +68 -0
- package/dist/pipeline/steps/fetch.d.ts +5 -0
- package/dist/pipeline/steps/fetch.js +50 -0
- package/dist/pipeline/steps/intercept.d.ts +5 -0
- package/dist/pipeline/steps/intercept.js +75 -0
- package/dist/pipeline/steps/tap.d.ts +12 -0
- package/dist/pipeline/steps/tap.js +130 -0
- package/dist/pipeline/steps/transform.d.ts +8 -0
- package/dist/pipeline/steps/transform.js +53 -0
- package/dist/pipeline/template.d.ts +16 -0
- package/dist/pipeline/template.js +115 -0
- package/dist/pipeline/template.test.d.ts +4 -0
- package/dist/pipeline/template.test.js +102 -0
- package/dist/pipeline/transform.test.d.ts +4 -0
- package/dist/pipeline/transform.test.js +90 -0
- package/dist/pipeline.d.ts +5 -7
- package/dist/pipeline.js +5 -313
- package/dist/registry.d.ts +3 -2
- package/dist/runtime.d.ts +2 -1
- package/dist/synthesize.d.ts +11 -8
- package/dist/synthesize.js +142 -118
- package/dist/types.d.ts +27 -0
- package/dist/types.js +7 -0
- package/package.json +9 -4
- package/src/bilibili.ts +9 -7
- package/src/browser.ts +41 -3
- package/src/cascade.ts +218 -0
- package/src/clis/bbc/news.ts +42 -0
- package/src/clis/boss/search.ts +47 -0
- package/src/clis/ctrip/search.ts +62 -0
- package/src/clis/index.ts +28 -1
- package/src/clis/reddit/hot.yaml +46 -0
- package/src/clis/reuters/search.ts +52 -0
- package/src/clis/smzdm/search.ts +66 -0
- package/src/clis/v2ex/hot.yaml +5 -9
- package/src/clis/v2ex/latest.yaml +5 -8
- package/src/clis/v2ex/topic.yaml +27 -0
- package/src/clis/weibo/hot.ts +41 -0
- package/src/clis/xiaohongshu/feed.yaml +32 -0
- package/src/clis/xiaohongshu/notifications.yaml +38 -0
- package/src/clis/xiaohongshu/search.ts +71 -0
- package/src/clis/yahoo-finance/quote.ts +74 -0
- package/src/clis/youtube/search.ts +60 -0
- package/src/clis/zhihu/hot.yaml +22 -8
- package/src/clis/zhihu/question.ts +45 -0
- package/src/clis/zhihu/search.yaml +55 -0
- package/src/engine.ts +2 -1
- package/src/explore.ts +303 -465
- package/src/generate.ts +3 -1
- package/src/main.ts +18 -2
- package/src/pipeline/executor.ts +98 -0
- package/src/pipeline/index.ts +6 -0
- package/src/pipeline/steps/browser.ts +67 -0
- package/src/pipeline/steps/fetch.ts +60 -0
- package/src/pipeline/steps/intercept.ts +78 -0
- package/src/pipeline/steps/tap.ts +137 -0
- package/src/pipeline/steps/transform.ts +50 -0
- package/src/pipeline/template.test.ts +107 -0
- package/src/pipeline/template.ts +101 -0
- package/src/pipeline/transform.test.ts +107 -0
- package/src/pipeline.ts +5 -292
- package/src/registry.ts +4 -2
- package/src/runtime.ts +3 -1
- package/src/synthesize.ts +142 -137
- package/src/types.ts +23 -0
- package/vitest.config.ts +7 -0
- package/dist/clis/github/search.js +0 -20
- package/dist/clis/zhihu/search.js +0 -58
- package/dist/promote.d.ts +0 -1
- package/dist/promote.js +0 -3
- package/dist/register.d.ts +0 -2
- package/dist/register.js +0 -2
- package/dist/scaffold.d.ts +0 -2
- package/dist/scaffold.js +0 -2
- package/dist/smoke.d.ts +0 -2
- package/dist/smoke.js +0 -2
- package/src/clis/github/search.ts +0 -21
- package/src/clis/github/trending.yaml +0 -58
- package/src/clis/zhihu/search.ts +0 -65
- package/src/promote.ts +0 -3
- package/src/register.ts +0 -2
- package/src/scaffold.ts +0 -2
- package/src/smoke.ts +0 -2
- /package/dist/clis/{github/search.d.ts → bbc/news.d.ts} +0 -0
- /package/dist/clis/{zhihu → boss}/search.d.ts +0 -0
package/dist/explore.js
CHANGED
|
@@ -1,162 +1,103 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Deep Explore: intelligent API discovery with response analysis.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* analyzes
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
* Flow:
|
|
9
|
-
* 1. Navigate to target URL
|
|
10
|
-
* 2. Auto-scroll to trigger lazy loading
|
|
11
|
-
* 3. Capture network requests (with body analysis)
|
|
12
|
-
* 4. For each JSON response: detect list fields, infer columns, analyze auth
|
|
13
|
-
* 5. Detect frontend framework (Vue/React/Pinia/Next.js)
|
|
14
|
-
* 6. Generate structured capabilities.json
|
|
4
|
+
* Navigates to the target URL, auto-scrolls to trigger lazy loading,
|
|
5
|
+
* captures network traffic, analyzes JSON responses, and automatically
|
|
6
|
+
* infers CLI capabilities from discovered API endpoints.
|
|
15
7
|
*/
|
|
16
8
|
import * as fs from 'node:fs';
|
|
17
9
|
import * as path from 'node:path';
|
|
18
|
-
import {
|
|
10
|
+
import { DEFAULT_BROWSER_EXPLORE_TIMEOUT, browserSession, runWithTimeout } from './runtime.js';
|
|
19
11
|
// ── Site name detection ────────────────────────────────────────────────────
|
|
20
|
-
const
|
|
12
|
+
const KNOWN_SITE_ALIASES = {
|
|
21
13
|
'x.com': 'twitter', 'twitter.com': 'twitter',
|
|
22
14
|
'news.ycombinator.com': 'hackernews',
|
|
23
15
|
'www.zhihu.com': 'zhihu', 'www.bilibili.com': 'bilibili',
|
|
16
|
+
'search.bilibili.com': 'bilibili',
|
|
24
17
|
'www.v2ex.com': 'v2ex', 'www.reddit.com': 'reddit',
|
|
25
18
|
'www.xiaohongshu.com': 'xiaohongshu', 'www.douban.com': 'douban',
|
|
26
|
-
'www.weibo.com': 'weibo', '
|
|
19
|
+
'www.weibo.com': 'weibo', 'www.bbc.com': 'bbc',
|
|
27
20
|
};
|
|
28
|
-
function detectSiteName(url) {
|
|
21
|
+
export function detectSiteName(url) {
|
|
29
22
|
try {
|
|
30
23
|
const host = new URL(url).hostname.toLowerCase();
|
|
31
|
-
if (host in
|
|
32
|
-
return
|
|
24
|
+
if (host in KNOWN_SITE_ALIASES)
|
|
25
|
+
return KNOWN_SITE_ALIASES[host];
|
|
33
26
|
const parts = host.split('.').filter(p => p && p !== 'www');
|
|
34
27
|
if (parts.length >= 2) {
|
|
35
28
|
if (['uk', 'jp', 'cn', 'com'].includes(parts[parts.length - 1]) && parts.length >= 3) {
|
|
36
|
-
return parts[parts.length - 3]
|
|
29
|
+
return slugify(parts[parts.length - 3]);
|
|
37
30
|
}
|
|
38
|
-
return parts[parts.length - 2]
|
|
31
|
+
return slugify(parts[parts.length - 2]);
|
|
39
32
|
}
|
|
40
|
-
return parts[0]
|
|
33
|
+
return parts[0] ? slugify(parts[0]) : 'site';
|
|
41
34
|
}
|
|
42
35
|
catch {
|
|
43
36
|
return 'site';
|
|
44
37
|
}
|
|
45
38
|
}
|
|
39
|
+
export function slugify(value) {
|
|
40
|
+
return value.trim().toLowerCase().replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-|-$/g, '') || 'site';
|
|
41
|
+
}
|
|
46
42
|
// ── Field & capability inference ───────────────────────────────────────────
|
|
47
|
-
/**
|
|
48
|
-
* Common field names grouped by semantic role.
|
|
49
|
-
* Used to auto-detect which response fields map to which columns.
|
|
50
|
-
*/
|
|
51
43
|
const FIELD_ROLES = {
|
|
52
44
|
title: ['title', 'name', 'text', 'content', 'desc', 'description', 'headline', 'subject'],
|
|
53
|
-
url: ['url', 'uri', 'link', 'href', 'permalink', 'jump_url', 'web_url', '
|
|
45
|
+
url: ['url', 'uri', 'link', 'href', 'permalink', 'jump_url', 'web_url', 'share_url'],
|
|
54
46
|
author: ['author', 'username', 'user_name', 'nickname', 'nick', 'owner', 'creator', 'up_name', 'uname'],
|
|
55
|
-
score: ['score', 'hot', 'heat', 'likes', 'like_count', 'view_count', 'views', '
|
|
47
|
+
score: ['score', 'hot', 'heat', 'likes', 'like_count', 'view_count', 'views', 'play', 'favorite_count', 'reply_count'],
|
|
56
48
|
time: ['time', 'created_at', 'publish_time', 'pub_time', 'date', 'ctime', 'mtime', 'pubdate', 'created'],
|
|
57
49
|
id: ['id', 'aid', 'bvid', 'mid', 'uid', 'oid', 'note_id', 'item_id'],
|
|
58
50
|
cover: ['cover', 'pic', 'image', 'thumbnail', 'poster', 'avatar'],
|
|
59
51
|
category: ['category', 'tag', 'type', 'tname', 'channel', 'section'],
|
|
60
52
|
};
|
|
61
|
-
/** Param names that indicate searchable APIs */
|
|
62
53
|
const SEARCH_PARAMS = new Set(['q', 'query', 'keyword', 'search', 'wd', 'kw', 'search_query', 'w']);
|
|
63
|
-
/** Param names that indicate pagination */
|
|
64
54
|
const PAGINATION_PARAMS = new Set(['page', 'pn', 'offset', 'cursor', 'next', 'page_num']);
|
|
65
|
-
/** Param names that indicate limit control */
|
|
66
55
|
const LIMIT_PARAMS = new Set(['limit', 'count', 'size', 'per_page', 'page_size', 'ps', 'num']);
|
|
67
|
-
/** Content types to ignore */
|
|
68
|
-
const IGNORED_CONTENT_TYPES = new Set(['image/', 'font/', 'text/css', 'text/javascript', 'application/javascript', 'application/wasm']);
|
|
69
|
-
/** Volatile query params to strip from patterns */
|
|
70
56
|
const VOLATILE_PARAMS = new Set(['w_rid', 'wts', '_', 'callback', 'timestamp', 't', 'nonce', 'sign']);
|
|
71
57
|
/**
|
|
72
|
-
* Parse raw network output from Playwright MCP
|
|
73
|
-
* Handles
|
|
58
|
+
* Parse raw network output from Playwright MCP.
|
|
59
|
+
* Handles text format: [GET] url => [200]
|
|
74
60
|
*/
|
|
75
|
-
function
|
|
61
|
+
function parseNetworkRequests(raw) {
|
|
76
62
|
if (typeof raw === 'string') {
|
|
77
|
-
// Playwright MCP returns network as text lines like:
|
|
78
|
-
// "[GET] https://api.example.com/xxx => [200] "
|
|
79
|
-
// May also have markdown headers like "### Result"
|
|
80
63
|
const entries = [];
|
|
81
|
-
const
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
const [, method, url, status] = bracketMatch;
|
|
87
|
-
entries.push({
|
|
88
|
-
method: method.toUpperCase(),
|
|
89
|
-
url,
|
|
90
|
-
status: status ? parseInt(status) : null,
|
|
91
|
-
contentType: url.endsWith('.json') ? 'application/json' :
|
|
92
|
-
(url.includes('/api/') || url.includes('/x/')) ? 'application/json' : '',
|
|
93
|
-
});
|
|
94
|
-
continue;
|
|
95
|
-
}
|
|
96
|
-
// Legacy format: GET url → 200 (application/json)
|
|
97
|
-
const legacyMatch = line.match(/^(GET|POST|PUT|DELETE|PATCH|OPTIONS)\s+(\S+)\s*→?\s*(\d+)?\s*(?:\(([^)]*)\))?/i);
|
|
98
|
-
if (legacyMatch) {
|
|
99
|
-
const [, method, url, status, ct] = legacyMatch;
|
|
64
|
+
for (const line of raw.split('\n')) {
|
|
65
|
+
// Format: [GET] URL => [200]
|
|
66
|
+
const m = line.match(/\[?(GET|POST|PUT|DELETE|PATCH|OPTIONS)\]?\s+(\S+)\s*(?:=>|→)\s*\[?(\d+)\]?/i);
|
|
67
|
+
if (m) {
|
|
68
|
+
const [, method, url, status] = m;
|
|
100
69
|
entries.push({
|
|
101
|
-
method: method.toUpperCase(),
|
|
102
|
-
url,
|
|
103
|
-
status: status ? parseInt(status) : null,
|
|
104
|
-
contentType: ct ?? '',
|
|
70
|
+
method: method.toUpperCase(), url, status: status ? parseInt(status) : null,
|
|
71
|
+
contentType: (url.includes('/api/') || url.includes('/x/') || url.endsWith('.json')) ? 'application/json' : '',
|
|
105
72
|
});
|
|
106
73
|
}
|
|
107
74
|
}
|
|
108
75
|
return entries;
|
|
109
76
|
}
|
|
110
77
|
if (Array.isArray(raw)) {
|
|
111
|
-
return raw.map(
|
|
78
|
+
return raw.filter(e => e && typeof e === 'object').map(e => ({
|
|
112
79
|
method: (e.method ?? 'GET').toUpperCase(),
|
|
113
|
-
url: e.url ?? e.request?.url ?? '',
|
|
80
|
+
url: String(e.url ?? e.request?.url ?? e.requestUrl ?? ''),
|
|
114
81
|
status: e.status ?? e.statusCode ?? null,
|
|
115
|
-
contentType: e.contentType ?? e.
|
|
116
|
-
responseBody: e.responseBody
|
|
117
|
-
requestHeaders: e.requestHeaders ?? e.headers,
|
|
82
|
+
contentType: e.contentType ?? e.response?.contentType ?? '',
|
|
83
|
+
responseBody: e.responseBody, requestHeaders: e.requestHeaders,
|
|
118
84
|
}));
|
|
119
85
|
}
|
|
120
86
|
return [];
|
|
121
87
|
}
|
|
122
|
-
/**
|
|
123
|
-
* Normalize a URL into a pattern by replacing IDs with placeholders.
|
|
124
|
-
*/
|
|
125
88
|
function urlToPattern(url) {
|
|
126
89
|
try {
|
|
127
|
-
const
|
|
128
|
-
const pathNorm =
|
|
129
|
-
.replace(/\/\d+/g, '/{id}')
|
|
130
|
-
.replace(/\/[0-9a-fA-F]{8,}/g, '/{hex}')
|
|
131
|
-
.replace(/\/BV[a-zA-Z0-9]{10}/g, '/{bvid}');
|
|
90
|
+
const p = new URL(url);
|
|
91
|
+
const pathNorm = p.pathname.replace(/\/\d+/g, '/{id}').replace(/\/[0-9a-fA-F]{8,}/g, '/{hex}').replace(/\/BV[a-zA-Z0-9]{10}/g, '/{bvid}');
|
|
132
92
|
const params = [];
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
});
|
|
137
|
-
const qs = params.length ? '?' + params.sort().map(k => `${k}={}`).join('&') : '';
|
|
138
|
-
return `${parsed.host}${pathNorm}${qs}`;
|
|
93
|
+
p.searchParams.forEach((_v, k) => { if (!VOLATILE_PARAMS.has(k))
|
|
94
|
+
params.push(k); });
|
|
95
|
+
return `${p.host}${pathNorm}${params.length ? '?' + params.sort().map(k => `${k}={}`).join('&') : ''}`;
|
|
139
96
|
}
|
|
140
97
|
catch {
|
|
141
98
|
return url;
|
|
142
99
|
}
|
|
143
100
|
}
|
|
144
|
-
/**
|
|
145
|
-
* Extract query params from a URL.
|
|
146
|
-
*/
|
|
147
|
-
function extractQueryParams(url) {
|
|
148
|
-
try {
|
|
149
|
-
const params = {};
|
|
150
|
-
new URL(url).searchParams.forEach((v, k) => { params[k] = v; });
|
|
151
|
-
return params;
|
|
152
|
-
}
|
|
153
|
-
catch {
|
|
154
|
-
return {};
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
/**
|
|
158
|
-
* Detect auth indicators from request headers.
|
|
159
|
-
*/
|
|
160
101
|
function detectAuthIndicators(headers) {
|
|
161
102
|
if (!headers)
|
|
162
103
|
return [];
|
|
@@ -168,436 +109,366 @@ function detectAuthIndicators(headers) {
|
|
|
168
109
|
indicators.push('csrf');
|
|
169
110
|
if (keys.some(k => k.startsWith('x-s') || k === 'x-t' || k === 'x-s-common'))
|
|
170
111
|
indicators.push('signature');
|
|
171
|
-
if (keys.some(k => k === 'x-client-transaction-id'))
|
|
172
|
-
indicators.push('transaction');
|
|
173
112
|
return indicators;
|
|
174
113
|
}
|
|
175
|
-
/**
|
|
176
|
-
* Analyze a JSON response to find list data and field mappings.
|
|
177
|
-
*/
|
|
178
114
|
function analyzeResponseBody(body) {
|
|
179
115
|
if (!body || typeof body !== 'object')
|
|
180
116
|
return null;
|
|
181
|
-
// Try to find the main list in the response
|
|
182
117
|
const candidates = [];
|
|
183
|
-
function findArrays(obj,
|
|
118
|
+
function findArrays(obj, path, depth) {
|
|
184
119
|
if (depth > 4)
|
|
185
120
|
return;
|
|
186
|
-
if (Array.isArray(obj) && obj.length >= 2) {
|
|
187
|
-
|
|
188
|
-
if (obj.some(item => item && typeof item === 'object' && !Array.isArray(item))) {
|
|
189
|
-
candidates.push({ path: currentPath, items: obj });
|
|
190
|
-
}
|
|
121
|
+
if (Array.isArray(obj) && obj.length >= 2 && obj.some(item => item && typeof item === 'object' && !Array.isArray(item))) {
|
|
122
|
+
candidates.push({ path, items: obj });
|
|
191
123
|
}
|
|
192
124
|
if (obj && typeof obj === 'object' && !Array.isArray(obj)) {
|
|
193
|
-
for (const [key, val] of Object.entries(obj))
|
|
194
|
-
|
|
195
|
-
findArrays(val, nextPath, depth + 1);
|
|
196
|
-
}
|
|
125
|
+
for (const [key, val] of Object.entries(obj))
|
|
126
|
+
findArrays(val, path ? `${path}.${key}` : key, depth + 1);
|
|
197
127
|
}
|
|
198
128
|
}
|
|
199
129
|
findArrays(body, '', 0);
|
|
200
130
|
if (!candidates.length)
|
|
201
131
|
return null;
|
|
202
|
-
// Pick the largest array as the main list
|
|
203
132
|
candidates.sort((a, b) => b.items.length - a.items.length);
|
|
204
133
|
const best = candidates[0];
|
|
205
|
-
|
|
206
|
-
const
|
|
207
|
-
const sampleFieldNames = sampleItem && typeof sampleItem === 'object'
|
|
208
|
-
? flattenFieldNames(sampleItem, '', 2)
|
|
209
|
-
: [];
|
|
210
|
-
// Match fields to semantic roles
|
|
134
|
+
const sample = best.items[0];
|
|
135
|
+
const sampleFields = sample && typeof sample === 'object' ? flattenFields(sample, '', 2) : [];
|
|
211
136
|
const detectedFields = {};
|
|
212
137
|
for (const [role, aliases] of Object.entries(FIELD_ROLES)) {
|
|
213
|
-
for (const
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
detectedFields[role] = fieldName;
|
|
138
|
+
for (const f of sampleFields) {
|
|
139
|
+
if (aliases.includes(f.split('.').pop()?.toLowerCase() ?? '')) {
|
|
140
|
+
detectedFields[role] = f;
|
|
217
141
|
break;
|
|
218
142
|
}
|
|
219
143
|
}
|
|
220
144
|
}
|
|
221
|
-
return {
|
|
222
|
-
itemPath: best.path || null,
|
|
223
|
-
itemCount: best.items.length,
|
|
224
|
-
detectedFields,
|
|
225
|
-
sampleFieldNames,
|
|
226
|
-
};
|
|
145
|
+
return { itemPath: best.path || null, itemCount: best.items.length, detectedFields, sampleFields };
|
|
227
146
|
}
|
|
228
|
-
|
|
229
|
-
* Flatten nested object field names for analysis.
|
|
230
|
-
*/
|
|
231
|
-
function flattenFieldNames(obj, prefix, maxDepth) {
|
|
147
|
+
function flattenFields(obj, prefix, maxDepth) {
|
|
232
148
|
if (maxDepth <= 0 || !obj || typeof obj !== 'object')
|
|
233
149
|
return [];
|
|
234
150
|
const names = [];
|
|
235
151
|
for (const key of Object.keys(obj)) {
|
|
236
|
-
const
|
|
237
|
-
names.push(
|
|
238
|
-
if (obj[key] && typeof obj[key] === 'object' && !Array.isArray(obj[key]))
|
|
239
|
-
names.push(...
|
|
240
|
-
}
|
|
152
|
+
const full = prefix ? `${prefix}.${key}` : key;
|
|
153
|
+
names.push(full);
|
|
154
|
+
if (obj[key] && typeof obj[key] === 'object' && !Array.isArray(obj[key]))
|
|
155
|
+
names.push(...flattenFields(obj[key], full, maxDepth - 1));
|
|
241
156
|
}
|
|
242
157
|
return names;
|
|
243
158
|
}
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
// Skip static resources
|
|
253
|
-
const ct = entry.contentType.toLowerCase();
|
|
254
|
-
if (IGNORED_CONTENT_TYPES.has(ct.split(';')[0]?.trim() ?? '') ||
|
|
255
|
-
ct.includes('image/') || ct.includes('font/') || ct.includes('css') ||
|
|
256
|
-
ct.includes('javascript') || ct.includes('wasm'))
|
|
257
|
-
continue;
|
|
258
|
-
// Skip non-JSON and failed responses
|
|
259
|
-
if (entry.status && entry.status >= 400)
|
|
260
|
-
continue;
|
|
261
|
-
const pattern = urlToPattern(entry.url);
|
|
262
|
-
const queryParams = extractQueryParams(entry.url);
|
|
263
|
-
const paramNames = Object.keys(queryParams).filter(k => !VOLATILE_PARAMS.has(k));
|
|
264
|
-
const key = `${entry.method}:${pattern}`;
|
|
265
|
-
if (seen.has(key))
|
|
266
|
-
continue;
|
|
267
|
-
const endpoint = {
|
|
268
|
-
pattern,
|
|
269
|
-
method: entry.method,
|
|
270
|
-
url: entry.url,
|
|
271
|
-
status: entry.status,
|
|
272
|
-
contentType: ct,
|
|
273
|
-
queryParams: paramNames,
|
|
274
|
-
hasSearchParam: paramNames.some(p => SEARCH_PARAMS.has(p)),
|
|
275
|
-
hasPaginationParam: paramNames.some(p => PAGINATION_PARAMS.has(p)),
|
|
276
|
-
hasLimitParam: paramNames.some(p => LIMIT_PARAMS.has(p)),
|
|
277
|
-
authIndicators: detectAuthIndicators(entry.requestHeaders),
|
|
278
|
-
responseAnalysis: entry.responseBody ? analyzeResponseBody(entry.responseBody) : null,
|
|
279
|
-
};
|
|
280
|
-
seen.set(key, endpoint);
|
|
281
|
-
}
|
|
282
|
-
return [...seen.values()];
|
|
283
|
-
}
|
|
284
|
-
/**
|
|
285
|
-
* Infer what strategy to use based on endpoint analysis.
|
|
286
|
-
*/
|
|
287
|
-
function inferStrategy(endpoint) {
|
|
288
|
-
if (endpoint.authIndicators.includes('signature'))
|
|
289
|
-
return 'intercept';
|
|
290
|
-
if (endpoint.authIndicators.includes('transaction'))
|
|
291
|
-
return 'header';
|
|
292
|
-
if (endpoint.authIndicators.includes('bearer') || endpoint.authIndicators.includes('csrf'))
|
|
293
|
-
return 'header';
|
|
294
|
-
// Check if the URL is a public API (no auth indicators)
|
|
295
|
-
if (endpoint.authIndicators.length === 0) {
|
|
296
|
-
// If it's the same domain, likely cookie auth
|
|
297
|
-
return 'cookie';
|
|
159
|
+
function scoreEndpoint(ep) {
|
|
160
|
+
let s = 0;
|
|
161
|
+
if (ep.contentType.includes('json'))
|
|
162
|
+
s += 10;
|
|
163
|
+
if (ep.responseAnalysis) {
|
|
164
|
+
s += 5;
|
|
165
|
+
s += Math.min(ep.responseAnalysis.itemCount, 10);
|
|
166
|
+
s += Object.keys(ep.responseAnalysis.detectedFields).length * 2;
|
|
298
167
|
}
|
|
299
|
-
|
|
168
|
+
if (ep.pattern.includes('/api/') || ep.pattern.includes('/x/'))
|
|
169
|
+
s += 3;
|
|
170
|
+
if (ep.hasSearchParam)
|
|
171
|
+
s += 3;
|
|
172
|
+
if (ep.hasPaginationParam)
|
|
173
|
+
s += 2;
|
|
174
|
+
if (ep.hasLimitParam)
|
|
175
|
+
s += 2;
|
|
176
|
+
if (ep.status === 200)
|
|
177
|
+
s += 2;
|
|
178
|
+
return s;
|
|
300
179
|
}
|
|
301
|
-
|
|
302
|
-
* Infer the capability name from an endpoint pattern.
|
|
303
|
-
*/
|
|
304
|
-
function inferCapabilityName(endpoint, goal) {
|
|
180
|
+
function inferCapabilityName(url, goal) {
|
|
305
181
|
if (goal)
|
|
306
182
|
return goal;
|
|
307
|
-
const u =
|
|
308
|
-
const p = endpoint.pattern.toLowerCase();
|
|
309
|
-
// Match common patterns
|
|
310
|
-
if (endpoint.hasSearchParam)
|
|
311
|
-
return 'search';
|
|
183
|
+
const u = url.toLowerCase();
|
|
312
184
|
if (u.includes('hot') || u.includes('popular') || u.includes('ranking') || u.includes('trending'))
|
|
313
185
|
return 'hot';
|
|
186
|
+
if (u.includes('search'))
|
|
187
|
+
return 'search';
|
|
314
188
|
if (u.includes('feed') || u.includes('timeline') || u.includes('dynamic'))
|
|
315
189
|
return 'feed';
|
|
316
190
|
if (u.includes('comment') || u.includes('reply'))
|
|
317
191
|
return 'comments';
|
|
318
192
|
if (u.includes('history'))
|
|
319
193
|
return 'history';
|
|
320
|
-
if (u.includes('profile') || u.includes('userinfo') || u.includes('/me')
|
|
194
|
+
if (u.includes('profile') || u.includes('userinfo') || u.includes('/me'))
|
|
321
195
|
return 'me';
|
|
322
|
-
if (u.includes('video') || u.includes('article') || u.includes('detail') || u.includes('view'))
|
|
323
|
-
return 'detail';
|
|
324
196
|
if (u.includes('favorite') || u.includes('collect') || u.includes('bookmark'))
|
|
325
197
|
return 'favorite';
|
|
326
|
-
if (u.includes('notification') || u.includes('notice'))
|
|
327
|
-
return 'notifications';
|
|
328
|
-
// Fallback: try to extract from path
|
|
329
198
|
try {
|
|
330
|
-
const
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
return segments[segments.length - 1].replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
|
199
|
+
const segs = new URL(url).pathname.split('/').filter(s => s && !s.match(/^\d+$/) && !s.match(/^[0-9a-f]{8,}$/i));
|
|
200
|
+
if (segs.length)
|
|
201
|
+
return segs[segs.length - 1].replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
|
334
202
|
}
|
|
335
203
|
catch { }
|
|
336
204
|
return 'data';
|
|
337
205
|
}
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
const cols = [];
|
|
345
|
-
// Prioritize: title → url → author → score → time
|
|
346
|
-
const priority = ['title', 'url', 'author', 'score', 'time'];
|
|
347
|
-
for (const role of priority) {
|
|
348
|
-
if (analysis.detectedFields[role])
|
|
349
|
-
cols.push(role);
|
|
350
|
-
}
|
|
351
|
-
return cols.length ? cols : ['title', 'url'];
|
|
352
|
-
}
|
|
353
|
-
/**
|
|
354
|
-
* Build recommended args from endpoint query params.
|
|
355
|
-
*/
|
|
356
|
-
function buildRecommendedArgs(endpoint) {
|
|
357
|
-
const args = [];
|
|
358
|
-
if (endpoint.hasSearchParam) {
|
|
359
|
-
const paramName = endpoint.queryParams.find(p => SEARCH_PARAMS.has(p)) ?? 'keyword';
|
|
360
|
-
args.push({ name: 'keyword', type: 'str', required: true });
|
|
361
|
-
}
|
|
362
|
-
// Always add limit
|
|
363
|
-
args.push({ name: 'limit', type: 'int', required: false, default: 20 });
|
|
364
|
-
if (endpoint.hasPaginationParam) {
|
|
365
|
-
args.push({ name: 'page', type: 'int', required: false, default: 1 });
|
|
366
|
-
}
|
|
367
|
-
return args;
|
|
368
|
-
}
|
|
369
|
-
/**
|
|
370
|
-
* Score an endpoint's interest level for capability generation.
|
|
371
|
-
* Higher score = more likely to be a useful API endpoint.
|
|
372
|
-
*/
|
|
373
|
-
function scoreEndpoint(ep) {
|
|
374
|
-
let score = 0;
|
|
375
|
-
// JSON content type is strongly preferred
|
|
376
|
-
if (ep.contentType.includes('json'))
|
|
377
|
-
score += 10;
|
|
378
|
-
// Has response analysis with items
|
|
379
|
-
if (ep.responseAnalysis) {
|
|
380
|
-
score += 5;
|
|
381
|
-
score += Math.min(ep.responseAnalysis.itemCount, 10);
|
|
382
|
-
score += Object.keys(ep.responseAnalysis.detectedFields).length * 2;
|
|
383
|
-
}
|
|
384
|
-
// API-like path patterns
|
|
385
|
-
if (ep.pattern.includes('/api/') || ep.pattern.includes('/x/'))
|
|
386
|
-
score += 3;
|
|
387
|
-
// Has search/pagination params
|
|
388
|
-
if (ep.hasSearchParam)
|
|
389
|
-
score += 3;
|
|
390
|
-
if (ep.hasPaginationParam)
|
|
391
|
-
score += 2;
|
|
392
|
-
if (ep.hasLimitParam)
|
|
393
|
-
score += 2;
|
|
394
|
-
// 200 OK
|
|
395
|
-
if (ep.status === 200)
|
|
396
|
-
score += 2;
|
|
397
|
-
return score;
|
|
206
|
+
function inferStrategy(authIndicators) {
|
|
207
|
+
if (authIndicators.includes('signature'))
|
|
208
|
+
return 'intercept';
|
|
209
|
+
if (authIndicators.includes('bearer') || authIndicators.includes('csrf'))
|
|
210
|
+
return 'header';
|
|
211
|
+
return 'cookie';
|
|
398
212
|
}
|
|
399
213
|
// ── Framework detection ────────────────────────────────────────────────────
|
|
400
214
|
const FRAMEWORK_DETECT_JS = `
|
|
401
|
-
(
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
215
|
+
() => {
|
|
216
|
+
const r = {};
|
|
217
|
+
try {
|
|
218
|
+
const app = document.querySelector('#app');
|
|
219
|
+
r.vue3 = !!(app && app.__vue_app__);
|
|
220
|
+
r.vue2 = !!(app && app.__vue__);
|
|
221
|
+
r.react = !!window.__REACT_DEVTOOLS_GLOBAL_HOOK__ || !!document.querySelector('[data-reactroot]');
|
|
222
|
+
r.nextjs = !!window.__NEXT_DATA__;
|
|
223
|
+
r.nuxt = !!window.__NUXT__;
|
|
224
|
+
if (r.vue3 && app.__vue_app__) { const gp = app.__vue_app__.config?.globalProperties; r.pinia = !!(gp && gp.$pinia); r.vuex = !!(gp && gp.$store); }
|
|
225
|
+
} catch {}
|
|
226
|
+
return r;
|
|
227
|
+
}
|
|
228
|
+
`;
|
|
229
|
+
// ── Store discovery ────────────────────────────────────────────────────────
|
|
230
|
+
const STORE_DISCOVER_JS = `
|
|
231
|
+
() => {
|
|
232
|
+
const stores = [];
|
|
233
|
+
try {
|
|
234
|
+
const app = document.querySelector('#app');
|
|
235
|
+
if (!app?.__vue_app__) return stores;
|
|
411
236
|
const gp = app.__vue_app__.config?.globalProperties;
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
237
|
+
|
|
238
|
+
// Pinia stores
|
|
239
|
+
const pinia = gp?.$pinia;
|
|
240
|
+
if (pinia?._s) {
|
|
241
|
+
pinia._s.forEach((store, id) => {
|
|
242
|
+
const actions = [];
|
|
243
|
+
const stateKeys = [];
|
|
244
|
+
for (const k in store) {
|
|
245
|
+
try {
|
|
246
|
+
if (k.startsWith('$') || k.startsWith('_')) continue;
|
|
247
|
+
if (typeof store[k] === 'function') actions.push(k);
|
|
248
|
+
else stateKeys.push(k);
|
|
249
|
+
} catch {}
|
|
250
|
+
}
|
|
251
|
+
stores.push({ type: 'pinia', id, actions: actions.slice(0, 20), stateKeys: stateKeys.slice(0, 15) });
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Vuex store modules
|
|
256
|
+
const vuex = gp?.$store;
|
|
257
|
+
if (vuex?._modules?.root?._children) {
|
|
258
|
+
const children = vuex._modules.root._children;
|
|
259
|
+
for (const [modName, mod] of Object.entries(children)) {
|
|
260
|
+
const actions = Object.keys(mod._rawModule?.actions ?? {}).slice(0, 20);
|
|
261
|
+
const stateKeys = Object.keys(mod.state ?? {}).slice(0, 15);
|
|
262
|
+
stores.push({ type: 'vuex', id: modName, actions, stateKeys });
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
} catch {}
|
|
266
|
+
return stores;
|
|
267
|
+
}
|
|
418
268
|
`;
|
|
419
269
|
// ── Main explore function ──────────────────────────────────────────────────
|
|
420
|
-
export async function exploreUrl(url, opts
|
|
421
|
-
const
|
|
422
|
-
const
|
|
423
|
-
|
|
424
|
-
const result = await browserSession(opts.BrowserFactory, async (page) => {
|
|
270
|
+
export async function exploreUrl(url, opts) {
|
|
271
|
+
const waitSeconds = opts.waitSeconds ?? 3.0;
|
|
272
|
+
const exploreTimeout = Math.max(DEFAULT_BROWSER_EXPLORE_TIMEOUT, 45.0 + waitSeconds * 8.0);
|
|
273
|
+
return browserSession(opts.BrowserFactory, async (page) => {
|
|
425
274
|
return runWithTimeout((async () => {
|
|
426
275
|
// Step 1: Navigate
|
|
427
276
|
await page.goto(url);
|
|
428
|
-
await page.wait(
|
|
429
|
-
// Step 2: Auto-scroll to trigger lazy loading
|
|
277
|
+
await page.wait(waitSeconds);
|
|
278
|
+
// Step 2: Auto-scroll to trigger lazy loading (use keyboard since page.scroll may not exist)
|
|
430
279
|
for (let i = 0; i < 3; i++) {
|
|
431
|
-
|
|
280
|
+
try {
|
|
281
|
+
await page.pressKey('End');
|
|
282
|
+
}
|
|
283
|
+
catch { }
|
|
432
284
|
await page.wait(1);
|
|
433
285
|
}
|
|
434
|
-
// Step 3:
|
|
286
|
+
// Step 3: Read page metadata
|
|
287
|
+
const metadata = await readPageMetadata(page);
|
|
288
|
+
// Step 4: Capture network traffic
|
|
435
289
|
const rawNetwork = await page.networkRequests(false);
|
|
436
|
-
const networkEntries =
|
|
437
|
-
// Step
|
|
290
|
+
const networkEntries = parseNetworkRequests(rawNetwork);
|
|
291
|
+
// Step 5: For JSON endpoints, re-fetch response body in-browser
|
|
438
292
|
const jsonEndpoints = networkEntries.filter(e => e.contentType.includes('json') && e.method === 'GET' && e.status === 200);
|
|
439
293
|
for (const ep of jsonEndpoints.slice(0, 10)) {
|
|
440
|
-
// Only fetch body for promising-looking API endpoints
|
|
441
|
-
if (ep.url.includes('/api/') || ep.url.includes('/x/') || ep.url.includes('/web/') ||
|
|
442
|
-
ep.contentType.includes('json')) {
|
|
443
|
-
try {
|
|
444
|
-
const bodyResult = await page.evaluate(`
|
|
445
|
-
async () => {
|
|
446
294
|
try {
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
} catch { return null; }
|
|
452
|
-
}
|
|
453
|
-
`);
|
|
454
|
-
if (bodyResult && typeof bodyResult === 'string') {
|
|
455
|
-
try {
|
|
456
|
-
ep.responseBody = JSON.parse(bodyResult);
|
|
457
|
-
}
|
|
458
|
-
catch { }
|
|
459
|
-
}
|
|
460
|
-
else if (bodyResult && typeof bodyResult === 'object') {
|
|
461
|
-
ep.responseBody = bodyResult;
|
|
295
|
+
const body = await page.evaluate(`async () => { try { const r = await fetch(${JSON.stringify(ep.url)}, {credentials:'include'}); if (!r.ok) return null; const d = await r.json(); return JSON.stringify(d).slice(0,10000); } catch { return null; } }`);
|
|
296
|
+
if (body && typeof body === 'string') {
|
|
297
|
+
try {
|
|
298
|
+
ep.responseBody = JSON.parse(body);
|
|
462
299
|
}
|
|
300
|
+
catch { }
|
|
463
301
|
}
|
|
464
|
-
|
|
302
|
+
else if (body && typeof body === 'object')
|
|
303
|
+
ep.responseBody = body;
|
|
465
304
|
}
|
|
305
|
+
catch { }
|
|
466
306
|
}
|
|
467
|
-
// Step
|
|
307
|
+
// Step 6: Detect framework
|
|
468
308
|
let framework = {};
|
|
469
309
|
try {
|
|
470
|
-
const
|
|
471
|
-
if (typeof
|
|
472
|
-
framework =
|
|
473
|
-
else if (typeof fwResult === 'object')
|
|
474
|
-
framework = fwResult;
|
|
310
|
+
const fw = await page.evaluate(FRAMEWORK_DETECT_JS);
|
|
311
|
+
if (fw && typeof fw === 'object')
|
|
312
|
+
framework = fw;
|
|
475
313
|
}
|
|
476
314
|
catch { }
|
|
477
|
-
// Step 6:
|
|
478
|
-
let
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
const parsed = JSON.parse(meta);
|
|
485
|
-
title = parsed.title;
|
|
486
|
-
finalUrl = parsed.url;
|
|
487
|
-
}
|
|
488
|
-
else if (typeof meta === 'object') {
|
|
489
|
-
title = meta.title;
|
|
490
|
-
finalUrl = meta.url;
|
|
315
|
+
// Step 6.5: Discover stores (Pinia / Vuex)
|
|
316
|
+
let stores = [];
|
|
317
|
+
if (framework.pinia || framework.vuex) {
|
|
318
|
+
try {
|
|
319
|
+
const raw = await page.evaluate(STORE_DISCOVER_JS);
|
|
320
|
+
if (Array.isArray(raw))
|
|
321
|
+
stores = raw;
|
|
491
322
|
}
|
|
323
|
+
catch { }
|
|
492
324
|
}
|
|
493
|
-
catch { }
|
|
494
325
|
// Step 7: Analyze endpoints
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
326
|
+
const seen = new Map();
|
|
327
|
+
for (const entry of networkEntries) {
|
|
328
|
+
if (!entry.url)
|
|
329
|
+
continue;
|
|
330
|
+
const ct = entry.contentType.toLowerCase();
|
|
331
|
+
if (ct.includes('image/') || ct.includes('font/') || ct.includes('css') || ct.includes('javascript') || ct.includes('wasm'))
|
|
332
|
+
continue;
|
|
333
|
+
if (entry.status && entry.status >= 400)
|
|
334
|
+
continue;
|
|
335
|
+
const pattern = urlToPattern(entry.url);
|
|
336
|
+
const key = `${entry.method}:${pattern}`;
|
|
337
|
+
if (seen.has(key))
|
|
338
|
+
continue;
|
|
339
|
+
const qp = [];
|
|
340
|
+
try {
|
|
341
|
+
new URL(entry.url).searchParams.forEach((_v, k) => { if (!VOLATILE_PARAMS.has(k))
|
|
342
|
+
qp.push(k); });
|
|
343
|
+
}
|
|
344
|
+
catch { }
|
|
345
|
+
const ep = {
|
|
346
|
+
pattern, method: entry.method, url: entry.url, status: entry.status, contentType: ct,
|
|
347
|
+
queryParams: qp, hasSearchParam: qp.some(p => SEARCH_PARAMS.has(p)),
|
|
348
|
+
hasPaginationParam: qp.some(p => PAGINATION_PARAMS.has(p)),
|
|
349
|
+
hasLimitParam: qp.some(p => LIMIT_PARAMS.has(p)),
|
|
350
|
+
authIndicators: detectAuthIndicators(entry.requestHeaders),
|
|
351
|
+
responseAnalysis: entry.responseBody ? analyzeResponseBody(entry.responseBody) : null,
|
|
352
|
+
score: 0,
|
|
353
|
+
};
|
|
354
|
+
ep.score = scoreEndpoint(ep);
|
|
355
|
+
seen.set(key, ep);
|
|
498
356
|
}
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
// Step 8: Score and rank endpoints
|
|
502
|
-
const scoredEndpoints = analyzedEndpoints
|
|
503
|
-
.map(ep => ({ ...ep, score: scoreEndpoint(ep) }))
|
|
504
|
-
.filter(ep => ep.score >= 5)
|
|
505
|
-
.sort((a, b) => b.score - a.score);
|
|
506
|
-
// Step 9: Infer capabilities from top endpoints
|
|
357
|
+
const analyzedEndpoints = [...seen.values()].filter(ep => ep.score >= 5).sort((a, b) => b.score - a.score);
|
|
358
|
+
// Step 8: Infer capabilities
|
|
507
359
|
const capabilities = [];
|
|
508
360
|
const usedNames = new Set();
|
|
509
|
-
for (const ep of
|
|
510
|
-
let capName = inferCapabilityName(ep, opts.goal);
|
|
511
|
-
// Deduplicate names
|
|
361
|
+
for (const ep of analyzedEndpoints.slice(0, 8)) {
|
|
362
|
+
let capName = inferCapabilityName(ep.url, opts.goal);
|
|
512
363
|
if (usedNames.has(capName)) {
|
|
513
364
|
const suffix = ep.pattern.split('/').filter(s => s && !s.startsWith('{') && !s.includes('.')).pop();
|
|
514
365
|
capName = suffix ? `${capName}_${suffix}` : `${capName}_${usedNames.size}`;
|
|
515
366
|
}
|
|
516
367
|
usedNames.add(capName);
|
|
368
|
+
const cols = [];
|
|
369
|
+
if (ep.responseAnalysis) {
|
|
370
|
+
for (const role of ['title', 'url', 'author', 'score', 'time']) {
|
|
371
|
+
if (ep.responseAnalysis.detectedFields[role])
|
|
372
|
+
cols.push(role);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
const args = [];
|
|
376
|
+
if (ep.hasSearchParam)
|
|
377
|
+
args.push({ name: 'keyword', type: 'str', required: true });
|
|
378
|
+
args.push({ name: 'limit', type: 'int', required: false, default: 20 });
|
|
379
|
+
if (ep.hasPaginationParam)
|
|
380
|
+
args.push({ name: 'page', type: 'int', required: false, default: 1 });
|
|
381
|
+
// Link store actions to capabilities when store-action strategy is recommended
|
|
382
|
+
const epStrategy = inferStrategy(ep.authIndicators);
|
|
383
|
+
let storeHint;
|
|
384
|
+
if ((epStrategy === 'intercept' || ep.authIndicators.includes('signature')) && stores.length > 0) {
|
|
385
|
+
// Try to find a store/action that matches this endpoint's purpose
|
|
386
|
+
for (const s of stores) {
|
|
387
|
+
const matchingAction = s.actions.find(a => capName.split('_').some(part => a.toLowerCase().includes(part)) ||
|
|
388
|
+
a.toLowerCase().includes('fetch') || a.toLowerCase().includes('get'));
|
|
389
|
+
if (matchingAction) {
|
|
390
|
+
storeHint = { store: s.id, action: matchingAction };
|
|
391
|
+
break;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
}
|
|
517
395
|
capabilities.push({
|
|
518
|
-
name: capName,
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
confidence: Math.min(ep.score / 20, 1.0),
|
|
522
|
-
endpoint: ep.pattern,
|
|
396
|
+
name: capName, description: `${opts.site ?? detectSiteName(url)} ${capName}`,
|
|
397
|
+
strategy: storeHint ? 'store-action' : epStrategy,
|
|
398
|
+
confidence: Math.min(ep.score / 20, 1.0), endpoint: ep.pattern,
|
|
523
399
|
itemPath: ep.responseAnalysis?.itemPath ?? null,
|
|
524
|
-
recommendedColumns:
|
|
525
|
-
recommendedArgs:
|
|
400
|
+
recommendedColumns: cols.length ? cols : ['title', 'url'],
|
|
401
|
+
recommendedArgs: args,
|
|
402
|
+
...(storeHint ? { storeHint } : {}),
|
|
526
403
|
});
|
|
527
404
|
}
|
|
528
|
-
// Step
|
|
529
|
-
const
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
final_url: finalUrl,
|
|
541
|
-
title,
|
|
542
|
-
framework,
|
|
543
|
-
top_strategy: topStrategy,
|
|
544
|
-
endpoint_count: analyzedEndpoints.length,
|
|
545
|
-
api_endpoint_count: scoredEndpoints.length,
|
|
546
|
-
capabilities,
|
|
547
|
-
endpoints: scoredEndpoints.map(ep => ({
|
|
548
|
-
pattern: ep.pattern,
|
|
549
|
-
method: ep.method,
|
|
550
|
-
url: ep.url,
|
|
551
|
-
status: ep.status,
|
|
552
|
-
contentType: ep.contentType,
|
|
553
|
-
score: ep.score,
|
|
554
|
-
queryParams: ep.queryParams,
|
|
555
|
-
itemPath: ep.responseAnalysis?.itemPath ?? null,
|
|
556
|
-
itemCount: ep.responseAnalysis?.itemCount ?? 0,
|
|
557
|
-
detectedFields: ep.responseAnalysis?.detectedFields ?? {},
|
|
558
|
-
authIndicators: ep.authIndicators,
|
|
559
|
-
})),
|
|
560
|
-
auth_indicators: [...allAuthIndicators],
|
|
405
|
+
// Step 9: Determine overall auth strategy
|
|
406
|
+
const allAuth = new Set(analyzedEndpoints.flatMap(ep => ep.authIndicators));
|
|
407
|
+
const topStrategy = allAuth.has('signature') ? 'intercept' : allAuth.has('bearer') || allAuth.has('csrf') ? 'header' : allAuth.size === 0 ? 'public' : 'cookie';
|
|
408
|
+
const siteName = opts.site ?? detectSiteName(metadata.url || url);
|
|
409
|
+
const targetDir = opts.outDir ?? path.join('.opencli', 'explore', siteName);
|
|
410
|
+
fs.mkdirSync(targetDir, { recursive: true });
|
|
411
|
+
const result = {
|
|
412
|
+
site: siteName, target_url: url, final_url: metadata.url, title: metadata.title,
|
|
413
|
+
framework, stores, top_strategy: topStrategy,
|
|
414
|
+
endpoint_count: analyzedEndpoints.length + [...seen.values()].filter(ep => ep.score < 5).length,
|
|
415
|
+
api_endpoint_count: analyzedEndpoints.length,
|
|
416
|
+
capabilities, auth_indicators: [...allAuth],
|
|
561
417
|
};
|
|
562
|
-
|
|
418
|
+
// Write artifacts
|
|
419
|
+
fs.writeFileSync(path.join(targetDir, 'manifest.json'), JSON.stringify({
|
|
420
|
+
site: siteName, target_url: url, final_url: metadata.url, title: metadata.title,
|
|
421
|
+
framework, stores: stores.map(s => ({ type: s.type, id: s.id, actions: s.actions })),
|
|
422
|
+
top_strategy: topStrategy, explored_at: new Date().toISOString(),
|
|
423
|
+
}, null, 2));
|
|
424
|
+
fs.writeFileSync(path.join(targetDir, 'endpoints.json'), JSON.stringify(analyzedEndpoints.map(ep => ({
|
|
425
|
+
pattern: ep.pattern, method: ep.method, url: ep.url, status: ep.status,
|
|
426
|
+
contentType: ep.contentType, score: ep.score, queryParams: ep.queryParams,
|
|
427
|
+
itemPath: ep.responseAnalysis?.itemPath ?? null, itemCount: ep.responseAnalysis?.itemCount ?? 0,
|
|
428
|
+
detectedFields: ep.responseAnalysis?.detectedFields ?? {}, authIndicators: ep.authIndicators,
|
|
429
|
+
})), null, 2));
|
|
430
|
+
fs.writeFileSync(path.join(targetDir, 'capabilities.json'), JSON.stringify(capabilities, null, 2));
|
|
431
|
+
fs.writeFileSync(path.join(targetDir, 'auth.json'), JSON.stringify({
|
|
432
|
+
top_strategy: topStrategy, indicators: [...allAuth], framework,
|
|
433
|
+
}, null, 2));
|
|
434
|
+
if (stores.length > 0) {
|
|
435
|
+
fs.writeFileSync(path.join(targetDir, 'stores.json'), JSON.stringify(stores, null, 2));
|
|
436
|
+
}
|
|
437
|
+
return { ...result, out_dir: targetDir };
|
|
438
|
+
})(), { timeout: exploreTimeout, label: `Explore ${url}` });
|
|
563
439
|
});
|
|
564
|
-
// Write artifacts
|
|
565
|
-
const manifest = {
|
|
566
|
-
site: result.site,
|
|
567
|
-
target_url: result.target_url,
|
|
568
|
-
final_url: result.final_url,
|
|
569
|
-
title: result.title,
|
|
570
|
-
framework: result.framework,
|
|
571
|
-
top_strategy: result.top_strategy,
|
|
572
|
-
explored_at: new Date().toISOString(),
|
|
573
|
-
};
|
|
574
|
-
fs.writeFileSync(path.join(outDir, 'manifest.json'), JSON.stringify(manifest, null, 2));
|
|
575
|
-
fs.writeFileSync(path.join(outDir, 'endpoints.json'), JSON.stringify(result.endpoints ?? [], null, 2));
|
|
576
|
-
fs.writeFileSync(path.join(outDir, 'capabilities.json'), JSON.stringify(result.capabilities ?? [], null, 2));
|
|
577
|
-
fs.writeFileSync(path.join(outDir, 'auth.json'), JSON.stringify({
|
|
578
|
-
top_strategy: result.top_strategy,
|
|
579
|
-
indicators: result.auth_indicators ?? [],
|
|
580
|
-
framework: result.framework ?? {},
|
|
581
|
-
}, null, 2));
|
|
582
|
-
return { ...result, out_dir: outDir };
|
|
583
440
|
}
|
|
584
441
|
export function renderExploreSummary(result) {
|
|
585
442
|
const lines = [
|
|
586
|
-
'opencli
|
|
587
|
-
`
|
|
588
|
-
`URL: ${result.target_url}`,
|
|
589
|
-
`Title: ${result.title || '(none)'}`,
|
|
590
|
-
`Strategy: ${result.top_strategy}`,
|
|
443
|
+
'opencli probe: OK', `Site: ${result.site}`, `URL: ${result.target_url}`,
|
|
444
|
+
`Title: ${result.title || '(none)'}`, `Strategy: ${result.top_strategy}`,
|
|
591
445
|
`Endpoints: ${result.endpoint_count} total, ${result.api_endpoint_count} API`,
|
|
592
446
|
`Capabilities: ${result.capabilities?.length ?? 0}`,
|
|
593
447
|
];
|
|
594
448
|
for (const cap of (result.capabilities ?? []).slice(0, 5)) {
|
|
595
|
-
|
|
449
|
+
const storeInfo = cap.storeHint ? ` → ${cap.storeHint.store}.${cap.storeHint.action}()` : '';
|
|
450
|
+
lines.push(` • ${cap.name} (${cap.strategy}, ${(cap.confidence * 100).toFixed(0)}%)${storeInfo}`);
|
|
596
451
|
}
|
|
597
452
|
const fw = result.framework ?? {};
|
|
598
453
|
const fwNames = Object.entries(fw).filter(([, v]) => v).map(([k]) => k);
|
|
599
454
|
if (fwNames.length)
|
|
600
455
|
lines.push(`Framework: ${fwNames.join(', ')}`);
|
|
456
|
+
const stores = result.stores ?? [];
|
|
457
|
+
if (stores.length) {
|
|
458
|
+
lines.push(`Stores: ${stores.length}`);
|
|
459
|
+
for (const s of stores.slice(0, 5)) {
|
|
460
|
+
lines.push(` • ${s.type}/${s.id}: ${s.actions.slice(0, 5).join(', ')}${s.actions.length > 5 ? '...' : ''}`);
|
|
461
|
+
}
|
|
462
|
+
}
|
|
601
463
|
lines.push(`Output: ${result.out_dir}`);
|
|
602
464
|
return lines.join('\n');
|
|
603
465
|
}
|
|
466
|
+
async function readPageMetadata(page /* IPage */) {
|
|
467
|
+
try {
|
|
468
|
+
const result = await page.evaluate(`() => ({ url: window.location.href, title: document.title || '' })`);
|
|
469
|
+
if (result && typeof result === 'object')
|
|
470
|
+
return { url: String(result.url ?? ''), title: String(result.title ?? '') };
|
|
471
|
+
}
|
|
472
|
+
catch { }
|
|
473
|
+
return { url: '', title: '' };
|
|
474
|
+
}
|