@jackwener/opencli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/.github/workflows/ci.yml +26 -0
  2. package/.github/workflows/release.yml +40 -0
  3. package/README.md +67 -0
  4. package/SKILL.md +230 -0
  5. package/dist/bilibili.d.ts +13 -0
  6. package/dist/bilibili.js +93 -0
  7. package/dist/browser.d.ts +48 -0
  8. package/dist/browser.js +261 -0
  9. package/dist/clis/bilibili/favorite.d.ts +1 -0
  10. package/dist/clis/bilibili/favorite.js +39 -0
  11. package/dist/clis/bilibili/feed.d.ts +1 -0
  12. package/dist/clis/bilibili/feed.js +64 -0
  13. package/dist/clis/bilibili/history.d.ts +1 -0
  14. package/dist/clis/bilibili/history.js +44 -0
  15. package/dist/clis/bilibili/me.d.ts +1 -0
  16. package/dist/clis/bilibili/me.js +13 -0
  17. package/dist/clis/bilibili/search.d.ts +1 -0
  18. package/dist/clis/bilibili/search.js +24 -0
  19. package/dist/clis/bilibili/user-videos.d.ts +1 -0
  20. package/dist/clis/bilibili/user-videos.js +38 -0
  21. package/dist/clis/github/search.d.ts +1 -0
  22. package/dist/clis/github/search.js +20 -0
  23. package/dist/clis/index.d.ts +13 -0
  24. package/dist/clis/index.js +16 -0
  25. package/dist/clis/zhihu/search.d.ts +1 -0
  26. package/dist/clis/zhihu/search.js +58 -0
  27. package/dist/engine.d.ts +6 -0
  28. package/dist/engine.js +77 -0
  29. package/dist/explore.d.ts +17 -0
  30. package/dist/explore.js +603 -0
  31. package/dist/generate.d.ts +11 -0
  32. package/dist/generate.js +134 -0
  33. package/dist/main.d.ts +5 -0
  34. package/dist/main.js +117 -0
  35. package/dist/output.d.ts +11 -0
  36. package/dist/output.js +98 -0
  37. package/dist/pipeline.d.ts +9 -0
  38. package/dist/pipeline.js +315 -0
  39. package/dist/promote.d.ts +1 -0
  40. package/dist/promote.js +3 -0
  41. package/dist/register.d.ts +2 -0
  42. package/dist/register.js +2 -0
  43. package/dist/registry.d.ts +50 -0
  44. package/dist/registry.js +42 -0
  45. package/dist/runtime.d.ts +12 -0
  46. package/dist/runtime.js +27 -0
  47. package/dist/scaffold.d.ts +2 -0
  48. package/dist/scaffold.js +2 -0
  49. package/dist/smoke.d.ts +2 -0
  50. package/dist/smoke.js +2 -0
  51. package/dist/snapshotFormatter.d.ts +9 -0
  52. package/dist/snapshotFormatter.js +41 -0
  53. package/dist/synthesize.d.ts +10 -0
  54. package/dist/synthesize.js +191 -0
  55. package/dist/validate.d.ts +2 -0
  56. package/dist/validate.js +73 -0
  57. package/dist/verify.d.ts +2 -0
  58. package/dist/verify.js +9 -0
  59. package/package.json +47 -0
  60. package/src/bilibili.ts +111 -0
  61. package/src/browser.ts +260 -0
  62. package/src/clis/bilibili/favorite.ts +42 -0
  63. package/src/clis/bilibili/feed.ts +71 -0
  64. package/src/clis/bilibili/history.ts +48 -0
  65. package/src/clis/bilibili/hot.yaml +38 -0
  66. package/src/clis/bilibili/me.ts +14 -0
  67. package/src/clis/bilibili/search.ts +25 -0
  68. package/src/clis/bilibili/user-videos.ts +42 -0
  69. package/src/clis/github/search.ts +21 -0
  70. package/src/clis/github/trending.yaml +58 -0
  71. package/src/clis/hackernews/top.yaml +36 -0
  72. package/src/clis/index.ts +19 -0
  73. package/src/clis/twitter/trending.yaml +40 -0
  74. package/src/clis/v2ex/hot.yaml +29 -0
  75. package/src/clis/v2ex/latest.yaml +28 -0
  76. package/src/clis/zhihu/hot.yaml +28 -0
  77. package/src/clis/zhihu/search.ts +65 -0
  78. package/src/engine.ts +86 -0
  79. package/src/explore.ts +648 -0
  80. package/src/generate.ts +145 -0
  81. package/src/main.ts +103 -0
  82. package/src/output.ts +96 -0
  83. package/src/pipeline.ts +295 -0
  84. package/src/promote.ts +3 -0
  85. package/src/register.ts +2 -0
  86. package/src/registry.ts +87 -0
  87. package/src/runtime.ts +36 -0
  88. package/src/scaffold.ts +2 -0
  89. package/src/smoke.ts +2 -0
  90. package/src/snapshotFormatter.ts +51 -0
  91. package/src/synthesize.ts +210 -0
  92. package/src/validate.ts +55 -0
  93. package/src/verify.ts +9 -0
  94. package/tsconfig.json +17 -0
@@ -0,0 +1,603 @@
1
+ /**
2
+ * Deep Explore: intelligent API discovery with response analysis.
3
+ *
4
+ * Unlike simple page snapshots, Deep Explore intercepts network traffic,
5
+ * analyzes response schemas, and automatically infers capabilities that
6
+ * can be turned into CLI commands.
7
+ *
8
+ * Flow:
9
+ * 1. Navigate to target URL
10
+ * 2. Auto-scroll to trigger lazy loading
11
+ * 3. Capture network requests (with body analysis)
12
+ * 4. For each JSON response: detect list fields, infer columns, analyze auth
13
+ * 5. Detect frontend framework (Vue/React/Pinia/Next.js)
14
+ * 6. Generate structured capabilities.json
15
+ */
16
+ import * as fs from 'node:fs';
17
+ import * as path from 'node:path';
18
+ import { browserSession, DEFAULT_BROWSER_EXPLORE_TIMEOUT, runWithTimeout } from './runtime.js';
19
+ // ── Site name detection ────────────────────────────────────────────────────
20
+ const KNOWN_ALIASES = {
21
+ 'x.com': 'twitter', 'twitter.com': 'twitter',
22
+ 'news.ycombinator.com': 'hackernews',
23
+ 'www.zhihu.com': 'zhihu', 'www.bilibili.com': 'bilibili',
24
+ 'www.v2ex.com': 'v2ex', 'www.reddit.com': 'reddit',
25
+ 'www.xiaohongshu.com': 'xiaohongshu', 'www.douban.com': 'douban',
26
+ 'www.weibo.com': 'weibo', 'search.bilibili.com': 'bilibili',
27
+ };
28
+ function detectSiteName(url) {
29
+ try {
30
+ const host = new URL(url).hostname.toLowerCase();
31
+ if (host in KNOWN_ALIASES)
32
+ return KNOWN_ALIASES[host];
33
+ const parts = host.split('.').filter(p => p && p !== 'www');
34
+ if (parts.length >= 2) {
35
+ if (['uk', 'jp', 'cn', 'com'].includes(parts[parts.length - 1]) && parts.length >= 3) {
36
+ return parts[parts.length - 3].replace(/[^a-z0-9]/g, '');
37
+ }
38
+ return parts[parts.length - 2].replace(/[^a-z0-9]/g, '');
39
+ }
40
+ return parts[0]?.replace(/[^a-z0-9]/g, '') ?? 'site';
41
+ }
42
+ catch {
43
+ return 'site';
44
+ }
45
+ }
46
+ // ── Field & capability inference ───────────────────────────────────────────
47
+ /**
48
+ * Common field names grouped by semantic role.
49
+ * Used to auto-detect which response fields map to which columns.
50
+ */
51
+ const FIELD_ROLES = {
52
+ title: ['title', 'name', 'text', 'content', 'desc', 'description', 'headline', 'subject'],
53
+ url: ['url', 'uri', 'link', 'href', 'permalink', 'jump_url', 'web_url', 'short_link', 'share_url'],
54
+ author: ['author', 'username', 'user_name', 'nickname', 'nick', 'owner', 'creator', 'up_name', 'uname'],
55
+ score: ['score', 'hot', 'heat', 'likes', 'like_count', 'view_count', 'views', 'stat', 'play', 'favorite_count', 'reply_count'],
56
+ time: ['time', 'created_at', 'publish_time', 'pub_time', 'date', 'ctime', 'mtime', 'pubdate', 'created'],
57
+ id: ['id', 'aid', 'bvid', 'mid', 'uid', 'oid', 'note_id', 'item_id'],
58
+ cover: ['cover', 'pic', 'image', 'thumbnail', 'poster', 'avatar'],
59
+ category: ['category', 'tag', 'type', 'tname', 'channel', 'section'],
60
+ };
61
+ /** Param names that indicate searchable APIs */
62
+ const SEARCH_PARAMS = new Set(['q', 'query', 'keyword', 'search', 'wd', 'kw', 'search_query', 'w']);
63
+ /** Param names that indicate pagination */
64
+ const PAGINATION_PARAMS = new Set(['page', 'pn', 'offset', 'cursor', 'next', 'page_num']);
65
+ /** Param names that indicate limit control */
66
+ const LIMIT_PARAMS = new Set(['limit', 'count', 'size', 'per_page', 'page_size', 'ps', 'num']);
67
+ /** Content types to ignore */
68
+ const IGNORED_CONTENT_TYPES = new Set(['image/', 'font/', 'text/css', 'text/javascript', 'application/javascript', 'application/wasm']);
69
+ /** Volatile query params to strip from patterns */
70
+ const VOLATILE_PARAMS = new Set(['w_rid', 'wts', '_', 'callback', 'timestamp', 't', 'nonce', 'sign']);
71
+ /**
72
+ * Parse raw network output from Playwright MCP into structured entries.
73
+ * Handles both text format ([GET] url => [200]) and structured JSON.
74
+ */
75
+ function parseNetworkOutput(raw) {
76
+ if (typeof raw === 'string') {
77
+ // Playwright MCP returns network as text lines like:
78
+ // "[GET] https://api.example.com/xxx => [200] "
79
+ // May also have markdown headers like "### Result"
80
+ const entries = [];
81
+ const lines = raw.split('\n').filter((l) => l.trim());
82
+ for (const line of lines) {
83
+ // Format: [METHOD] URL => [STATUS] optional_extra
84
+ const bracketMatch = line.match(/^\[?(GET|POST|PUT|DELETE|PATCH|OPTIONS)\]?\s+(\S+)\s*(?:=>|→)\s*\[?(\d+)\]?/i);
85
+ if (bracketMatch) {
86
+ const [, method, url, status] = bracketMatch;
87
+ entries.push({
88
+ method: method.toUpperCase(),
89
+ url,
90
+ status: status ? parseInt(status) : null,
91
+ contentType: url.endsWith('.json') ? 'application/json' :
92
+ (url.includes('/api/') || url.includes('/x/')) ? 'application/json' : '',
93
+ });
94
+ continue;
95
+ }
96
+ // Legacy format: GET url → 200 (application/json)
97
+ const legacyMatch = line.match(/^(GET|POST|PUT|DELETE|PATCH|OPTIONS)\s+(\S+)\s*→?\s*(\d+)?\s*(?:\(([^)]*)\))?/i);
98
+ if (legacyMatch) {
99
+ const [, method, url, status, ct] = legacyMatch;
100
+ entries.push({
101
+ method: method.toUpperCase(),
102
+ url,
103
+ status: status ? parseInt(status) : null,
104
+ contentType: ct ?? '',
105
+ });
106
+ }
107
+ }
108
+ return entries;
109
+ }
110
+ if (Array.isArray(raw)) {
111
+ return raw.map((e) => ({
112
+ method: (e.method ?? 'GET').toUpperCase(),
113
+ url: e.url ?? e.request?.url ?? '',
114
+ status: e.status ?? e.statusCode ?? null,
115
+ contentType: e.contentType ?? e.mimeType ?? '',
116
+ responseBody: e.responseBody ?? e.body,
117
+ requestHeaders: e.requestHeaders ?? e.headers,
118
+ }));
119
+ }
120
+ return [];
121
+ }
122
+ /**
123
+ * Normalize a URL into a pattern by replacing IDs with placeholders.
124
+ */
125
+ function urlToPattern(url) {
126
+ try {
127
+ const parsed = new URL(url);
128
+ const pathNorm = parsed.pathname
129
+ .replace(/\/\d+/g, '/{id}')
130
+ .replace(/\/[0-9a-fA-F]{8,}/g, '/{hex}')
131
+ .replace(/\/BV[a-zA-Z0-9]{10}/g, '/{bvid}');
132
+ const params = [];
133
+ parsed.searchParams.forEach((_v, k) => {
134
+ if (!VOLATILE_PARAMS.has(k))
135
+ params.push(k);
136
+ });
137
+ const qs = params.length ? '?' + params.sort().map(k => `${k}={}`).join('&') : '';
138
+ return `${parsed.host}${pathNorm}${qs}`;
139
+ }
140
+ catch {
141
+ return url;
142
+ }
143
+ }
144
+ /**
145
+ * Extract query params from a URL.
146
+ */
147
+ function extractQueryParams(url) {
148
+ try {
149
+ const params = {};
150
+ new URL(url).searchParams.forEach((v, k) => { params[k] = v; });
151
+ return params;
152
+ }
153
+ catch {
154
+ return {};
155
+ }
156
+ }
157
+ /**
158
+ * Detect auth indicators from request headers.
159
+ */
160
+ function detectAuthIndicators(headers) {
161
+ if (!headers)
162
+ return [];
163
+ const indicators = [];
164
+ const keys = Object.keys(headers).map(k => k.toLowerCase());
165
+ if (keys.some(k => k === 'authorization'))
166
+ indicators.push('bearer');
167
+ if (keys.some(k => k.startsWith('x-csrf') || k.startsWith('x-xsrf')))
168
+ indicators.push('csrf');
169
+ if (keys.some(k => k.startsWith('x-s') || k === 'x-t' || k === 'x-s-common'))
170
+ indicators.push('signature');
171
+ if (keys.some(k => k === 'x-client-transaction-id'))
172
+ indicators.push('transaction');
173
+ return indicators;
174
+ }
175
+ /**
176
+ * Analyze a JSON response to find list data and field mappings.
177
+ */
178
+ function analyzeResponseBody(body) {
179
+ if (!body || typeof body !== 'object')
180
+ return null;
181
+ // Try to find the main list in the response
182
+ const candidates = [];
183
+ function findArrays(obj, currentPath, depth) {
184
+ if (depth > 4)
185
+ return;
186
+ if (Array.isArray(obj) && obj.length >= 2) {
187
+ // Check if items are objects (not primitive arrays)
188
+ if (obj.some(item => item && typeof item === 'object' && !Array.isArray(item))) {
189
+ candidates.push({ path: currentPath, items: obj });
190
+ }
191
+ }
192
+ if (obj && typeof obj === 'object' && !Array.isArray(obj)) {
193
+ for (const [key, val] of Object.entries(obj)) {
194
+ const nextPath = currentPath ? `${currentPath}.${key}` : key;
195
+ findArrays(val, nextPath, depth + 1);
196
+ }
197
+ }
198
+ }
199
+ findArrays(body, '', 0);
200
+ if (!candidates.length)
201
+ return null;
202
+ // Pick the largest array as the main list
203
+ candidates.sort((a, b) => b.items.length - a.items.length);
204
+ const best = candidates[0];
205
+ // Analyze field names in the first item
206
+ const sampleItem = best.items[0];
207
+ const sampleFieldNames = sampleItem && typeof sampleItem === 'object'
208
+ ? flattenFieldNames(sampleItem, '', 2)
209
+ : [];
210
+ // Match fields to semantic roles
211
+ const detectedFields = {};
212
+ for (const [role, aliases] of Object.entries(FIELD_ROLES)) {
213
+ for (const fieldName of sampleFieldNames) {
214
+ const basename = fieldName.split('.').pop()?.toLowerCase() ?? '';
215
+ if (aliases.includes(basename)) {
216
+ detectedFields[role] = fieldName;
217
+ break;
218
+ }
219
+ }
220
+ }
221
+ return {
222
+ itemPath: best.path || null,
223
+ itemCount: best.items.length,
224
+ detectedFields,
225
+ sampleFieldNames,
226
+ };
227
+ }
228
+ /**
229
+ * Flatten nested object field names for analysis.
230
+ */
231
+ function flattenFieldNames(obj, prefix, maxDepth) {
232
+ if (maxDepth <= 0 || !obj || typeof obj !== 'object')
233
+ return [];
234
+ const names = [];
235
+ for (const key of Object.keys(obj)) {
236
+ const fullKey = prefix ? `${prefix}.${key}` : key;
237
+ names.push(fullKey);
238
+ if (obj[key] && typeof obj[key] === 'object' && !Array.isArray(obj[key])) {
239
+ names.push(...flattenFieldNames(obj[key], fullKey, maxDepth - 1));
240
+ }
241
+ }
242
+ return names;
243
+ }
244
+ /**
245
+ * Analyze a list of network entries into structured endpoints.
246
+ */
247
+ function analyzeEndpoints(entries, siteHost) {
248
+ const seen = new Map();
249
+ for (const entry of entries) {
250
+ if (!entry.url)
251
+ continue;
252
+ // Skip static resources
253
+ const ct = entry.contentType.toLowerCase();
254
+ if (IGNORED_CONTENT_TYPES.has(ct.split(';')[0]?.trim() ?? '') ||
255
+ ct.includes('image/') || ct.includes('font/') || ct.includes('css') ||
256
+ ct.includes('javascript') || ct.includes('wasm'))
257
+ continue;
258
+ // Skip non-JSON and failed responses
259
+ if (entry.status && entry.status >= 400)
260
+ continue;
261
+ const pattern = urlToPattern(entry.url);
262
+ const queryParams = extractQueryParams(entry.url);
263
+ const paramNames = Object.keys(queryParams).filter(k => !VOLATILE_PARAMS.has(k));
264
+ const key = `${entry.method}:${pattern}`;
265
+ if (seen.has(key))
266
+ continue;
267
+ const endpoint = {
268
+ pattern,
269
+ method: entry.method,
270
+ url: entry.url,
271
+ status: entry.status,
272
+ contentType: ct,
273
+ queryParams: paramNames,
274
+ hasSearchParam: paramNames.some(p => SEARCH_PARAMS.has(p)),
275
+ hasPaginationParam: paramNames.some(p => PAGINATION_PARAMS.has(p)),
276
+ hasLimitParam: paramNames.some(p => LIMIT_PARAMS.has(p)),
277
+ authIndicators: detectAuthIndicators(entry.requestHeaders),
278
+ responseAnalysis: entry.responseBody ? analyzeResponseBody(entry.responseBody) : null,
279
+ };
280
+ seen.set(key, endpoint);
281
+ }
282
+ return [...seen.values()];
283
+ }
284
+ /**
285
+ * Infer what strategy to use based on endpoint analysis.
286
+ */
287
+ function inferStrategy(endpoint) {
288
+ if (endpoint.authIndicators.includes('signature'))
289
+ return 'intercept';
290
+ if (endpoint.authIndicators.includes('transaction'))
291
+ return 'header';
292
+ if (endpoint.authIndicators.includes('bearer') || endpoint.authIndicators.includes('csrf'))
293
+ return 'header';
294
+ // Check if the URL is a public API (no auth indicators)
295
+ if (endpoint.authIndicators.length === 0) {
296
+ // If it's the same domain, likely cookie auth
297
+ return 'cookie';
298
+ }
299
+ return 'cookie';
300
+ }
301
+ /**
302
+ * Infer the capability name from an endpoint pattern.
303
+ */
304
+ function inferCapabilityName(endpoint, goal) {
305
+ if (goal)
306
+ return goal;
307
+ const u = endpoint.url.toLowerCase();
308
+ const p = endpoint.pattern.toLowerCase();
309
+ // Match common patterns
310
+ if (endpoint.hasSearchParam)
311
+ return 'search';
312
+ if (u.includes('hot') || u.includes('popular') || u.includes('ranking') || u.includes('trending'))
313
+ return 'hot';
314
+ if (u.includes('feed') || u.includes('timeline') || u.includes('dynamic'))
315
+ return 'feed';
316
+ if (u.includes('comment') || u.includes('reply'))
317
+ return 'comments';
318
+ if (u.includes('history'))
319
+ return 'history';
320
+ if (u.includes('profile') || u.includes('userinfo') || u.includes('/me') || u.includes('myinfo'))
321
+ return 'me';
322
+ if (u.includes('video') || u.includes('article') || u.includes('detail') || u.includes('view'))
323
+ return 'detail';
324
+ if (u.includes('favorite') || u.includes('collect') || u.includes('bookmark'))
325
+ return 'favorite';
326
+ if (u.includes('notification') || u.includes('notice'))
327
+ return 'notifications';
328
+ // Fallback: try to extract from path
329
+ try {
330
+ const pathname = new URL(endpoint.url).pathname;
331
+ const segments = pathname.split('/').filter(s => s && !s.match(/^\d+$/) && !s.match(/^[0-9a-f]{8,}$/i));
332
+ if (segments.length)
333
+ return segments[segments.length - 1].replace(/[^a-z0-9]/gi, '_').toLowerCase();
334
+ }
335
+ catch { }
336
+ return 'data';
337
+ }
338
+ /**
339
+ * Build recommended columns from response analysis.
340
+ */
341
+ function buildRecommendedColumns(analysis) {
342
+ if (!analysis)
343
+ return ['title', 'url'];
344
+ const cols = [];
345
+ // Prioritize: title → url → author → score → time
346
+ const priority = ['title', 'url', 'author', 'score', 'time'];
347
+ for (const role of priority) {
348
+ if (analysis.detectedFields[role])
349
+ cols.push(role);
350
+ }
351
+ return cols.length ? cols : ['title', 'url'];
352
+ }
353
+ /**
354
+ * Build recommended args from endpoint query params.
355
+ */
356
+ function buildRecommendedArgs(endpoint) {
357
+ const args = [];
358
+ if (endpoint.hasSearchParam) {
359
+ const paramName = endpoint.queryParams.find(p => SEARCH_PARAMS.has(p)) ?? 'keyword';
360
+ args.push({ name: 'keyword', type: 'str', required: true });
361
+ }
362
+ // Always add limit
363
+ args.push({ name: 'limit', type: 'int', required: false, default: 20 });
364
+ if (endpoint.hasPaginationParam) {
365
+ args.push({ name: 'page', type: 'int', required: false, default: 1 });
366
+ }
367
+ return args;
368
+ }
369
+ /**
370
+ * Score an endpoint's interest level for capability generation.
371
+ * Higher score = more likely to be a useful API endpoint.
372
+ */
373
+ function scoreEndpoint(ep) {
374
+ let score = 0;
375
+ // JSON content type is strongly preferred
376
+ if (ep.contentType.includes('json'))
377
+ score += 10;
378
+ // Has response analysis with items
379
+ if (ep.responseAnalysis) {
380
+ score += 5;
381
+ score += Math.min(ep.responseAnalysis.itemCount, 10);
382
+ score += Object.keys(ep.responseAnalysis.detectedFields).length * 2;
383
+ }
384
+ // API-like path patterns
385
+ if (ep.pattern.includes('/api/') || ep.pattern.includes('/x/'))
386
+ score += 3;
387
+ // Has search/pagination params
388
+ if (ep.hasSearchParam)
389
+ score += 3;
390
+ if (ep.hasPaginationParam)
391
+ score += 2;
392
+ if (ep.hasLimitParam)
393
+ score += 2;
394
+ // 200 OK
395
+ if (ep.status === 200)
396
+ score += 2;
397
+ return score;
398
+ }
399
+ // ── Framework detection ────────────────────────────────────────────────────
400
+ const FRAMEWORK_DETECT_JS = `
401
+ (() => {
402
+ const result = {};
403
+ try {
404
+ const app = document.querySelector('#app');
405
+ result.vue3 = !!(app && app.__vue_app__);
406
+ result.vue2 = !!(app && app.__vue__);
407
+ result.react = !!window.__REACT_DEVTOOLS_GLOBAL_HOOK__ || !!document.querySelector('[data-reactroot]');
408
+ result.nextjs = !!window.__NEXT_DATA__;
409
+ result.nuxt = !!window.__NUXT__;
410
+ if (result.vue3 && app.__vue_app__) {
411
+ const gp = app.__vue_app__.config?.globalProperties;
412
+ result.pinia = !!(gp && gp.$pinia);
413
+ result.vuex = !!(gp && gp.$store);
414
+ }
415
+ } catch {}
416
+ return JSON.stringify(result);
417
+ })()
418
+ `;
419
+ // ── Main explore function ──────────────────────────────────────────────────
420
+ export async function exploreUrl(url, opts = {}) {
421
+ const site = opts.site ?? detectSiteName(url);
422
+ const outDir = opts.outDir ?? path.join('.opencli', 'explore', site);
423
+ fs.mkdirSync(outDir, { recursive: true });
424
+ const result = await browserSession(opts.BrowserFactory, async (page) => {
425
+ return runWithTimeout((async () => {
426
+ // Step 1: Navigate
427
+ await page.goto(url);
428
+ await page.wait(opts.waitSeconds ?? 3);
429
+ // Step 2: Auto-scroll to trigger lazy loading
430
+ for (let i = 0; i < 3; i++) {
431
+ await page.scroll('down');
432
+ await page.wait(1);
433
+ }
434
+ // Step 3: Capture network traffic
435
+ const rawNetwork = await page.networkRequests(false);
436
+ const networkEntries = parseNetworkOutput(rawNetwork);
437
+ // Step 4: For JSON endpoints, try to fetch response body in-browser
438
+ const jsonEndpoints = networkEntries.filter(e => e.contentType.includes('json') && e.method === 'GET' && e.status === 200);
439
+ for (const ep of jsonEndpoints.slice(0, 10)) {
440
+ // Only fetch body for promising-looking API endpoints
441
+ if (ep.url.includes('/api/') || ep.url.includes('/x/') || ep.url.includes('/web/') ||
442
+ ep.contentType.includes('json')) {
443
+ try {
444
+ const bodyResult = await page.evaluate(`
445
+ async () => {
446
+ try {
447
+ const resp = await fetch(${JSON.stringify(ep.url)}, { credentials: 'include' });
448
+ if (!resp.ok) return null;
449
+ const data = await resp.json();
450
+ return JSON.stringify(data).slice(0, 10000);
451
+ } catch { return null; }
452
+ }
453
+ `);
454
+ if (bodyResult && typeof bodyResult === 'string') {
455
+ try {
456
+ ep.responseBody = JSON.parse(bodyResult);
457
+ }
458
+ catch { }
459
+ }
460
+ else if (bodyResult && typeof bodyResult === 'object') {
461
+ ep.responseBody = bodyResult;
462
+ }
463
+ }
464
+ catch { }
465
+ }
466
+ }
467
+ // Step 5: Detect frontend framework
468
+ let framework = {};
469
+ try {
470
+ const fwResult = await page.evaluate(FRAMEWORK_DETECT_JS);
471
+ if (typeof fwResult === 'string')
472
+ framework = JSON.parse(fwResult);
473
+ else if (typeof fwResult === 'object')
474
+ framework = fwResult;
475
+ }
476
+ catch { }
477
+ // Step 6: Get page metadata
478
+ let title = '', finalUrl = '';
479
+ try {
480
+ const meta = await page.evaluate(`
481
+ () => JSON.stringify({ url: window.location.href, title: document.title || '' })
482
+ `);
483
+ if (typeof meta === 'string') {
484
+ const parsed = JSON.parse(meta);
485
+ title = parsed.title;
486
+ finalUrl = parsed.url;
487
+ }
488
+ else if (typeof meta === 'object') {
489
+ title = meta.title;
490
+ finalUrl = meta.url;
491
+ }
492
+ }
493
+ catch { }
494
+ // Step 7: Analyze endpoints
495
+ let siteHost = '';
496
+ try {
497
+ siteHost = new URL(url).hostname;
498
+ }
499
+ catch { }
500
+ const analyzedEndpoints = analyzeEndpoints(networkEntries, siteHost);
501
+ // Step 8: Score and rank endpoints
502
+ const scoredEndpoints = analyzedEndpoints
503
+ .map(ep => ({ ...ep, score: scoreEndpoint(ep) }))
504
+ .filter(ep => ep.score >= 5)
505
+ .sort((a, b) => b.score - a.score);
506
+ // Step 9: Infer capabilities from top endpoints
507
+ const capabilities = [];
508
+ const usedNames = new Set();
509
+ for (const ep of scoredEndpoints.slice(0, 8)) {
510
+ let capName = inferCapabilityName(ep, opts.goal);
511
+ // Deduplicate names
512
+ if (usedNames.has(capName)) {
513
+ const suffix = ep.pattern.split('/').filter(s => s && !s.startsWith('{') && !s.includes('.')).pop();
514
+ capName = suffix ? `${capName}_${suffix}` : `${capName}_${usedNames.size}`;
515
+ }
516
+ usedNames.add(capName);
517
+ capabilities.push({
518
+ name: capName,
519
+ description: `${site} ${capName}`,
520
+ strategy: inferStrategy(ep),
521
+ confidence: Math.min(ep.score / 20, 1.0),
522
+ endpoint: ep.pattern,
523
+ itemPath: ep.responseAnalysis?.itemPath ?? null,
524
+ recommendedColumns: buildRecommendedColumns(ep.responseAnalysis),
525
+ recommendedArgs: buildRecommendedArgs(ep),
526
+ });
527
+ }
528
+ // Step 10: Determine auth strategy
529
+ const allAuthIndicators = new Set(analyzedEndpoints.flatMap(ep => ep.authIndicators));
530
+ let topStrategy = 'cookie';
531
+ if (allAuthIndicators.has('signature'))
532
+ topStrategy = 'intercept';
533
+ else if (allAuthIndicators.has('transaction') || allAuthIndicators.has('bearer'))
534
+ topStrategy = 'header';
535
+ else if (allAuthIndicators.size === 0 && scoredEndpoints.some(ep => ep.contentType.includes('json')))
536
+ topStrategy = 'public';
537
+ return {
538
+ site,
539
+ target_url: url,
540
+ final_url: finalUrl,
541
+ title,
542
+ framework,
543
+ top_strategy: topStrategy,
544
+ endpoint_count: analyzedEndpoints.length,
545
+ api_endpoint_count: scoredEndpoints.length,
546
+ capabilities,
547
+ endpoints: scoredEndpoints.map(ep => ({
548
+ pattern: ep.pattern,
549
+ method: ep.method,
550
+ url: ep.url,
551
+ status: ep.status,
552
+ contentType: ep.contentType,
553
+ score: ep.score,
554
+ queryParams: ep.queryParams,
555
+ itemPath: ep.responseAnalysis?.itemPath ?? null,
556
+ itemCount: ep.responseAnalysis?.itemCount ?? 0,
557
+ detectedFields: ep.responseAnalysis?.detectedFields ?? {},
558
+ authIndicators: ep.authIndicators,
559
+ })),
560
+ auth_indicators: [...allAuthIndicators],
561
+ };
562
+ })(), { timeout: DEFAULT_BROWSER_EXPLORE_TIMEOUT, label: 'explore' });
563
+ });
564
+ // Write artifacts
565
+ const manifest = {
566
+ site: result.site,
567
+ target_url: result.target_url,
568
+ final_url: result.final_url,
569
+ title: result.title,
570
+ framework: result.framework,
571
+ top_strategy: result.top_strategy,
572
+ explored_at: new Date().toISOString(),
573
+ };
574
+ fs.writeFileSync(path.join(outDir, 'manifest.json'), JSON.stringify(manifest, null, 2));
575
+ fs.writeFileSync(path.join(outDir, 'endpoints.json'), JSON.stringify(result.endpoints ?? [], null, 2));
576
+ fs.writeFileSync(path.join(outDir, 'capabilities.json'), JSON.stringify(result.capabilities ?? [], null, 2));
577
+ fs.writeFileSync(path.join(outDir, 'auth.json'), JSON.stringify({
578
+ top_strategy: result.top_strategy,
579
+ indicators: result.auth_indicators ?? [],
580
+ framework: result.framework ?? {},
581
+ }, null, 2));
582
+ return { ...result, out_dir: outDir };
583
+ }
584
+ export function renderExploreSummary(result) {
585
+ const lines = [
586
+ 'opencli explore: OK',
587
+ `Site: ${result.site}`,
588
+ `URL: ${result.target_url}`,
589
+ `Title: ${result.title || '(none)'}`,
590
+ `Strategy: ${result.top_strategy}`,
591
+ `Endpoints: ${result.endpoint_count} total, ${result.api_endpoint_count} API`,
592
+ `Capabilities: ${result.capabilities?.length ?? 0}`,
593
+ ];
594
+ for (const cap of (result.capabilities ?? []).slice(0, 5)) {
595
+ lines.push(` • ${cap.name} (${cap.strategy}, confidence: ${(cap.confidence * 100).toFixed(0)}%)`);
596
+ }
597
+ const fw = result.framework ?? {};
598
+ const fwNames = Object.entries(fw).filter(([, v]) => v).map(([k]) => k);
599
+ if (fwNames.length)
600
+ lines.push(`Framework: ${fwNames.join(', ')}`);
601
+ lines.push(`Output: ${result.out_dir}`);
602
+ return lines.join('\n');
603
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Generate: one-shot CLI creation from URL.
3
+ *
4
+ * Orchestrates the full pipeline:
5
+ * explore (Deep Explore) → synthesize (YAML generation) → register → verify
6
+ *
7
+ * Includes Strategy Cascade: if the initial strategy fails,
8
+ * automatically downgrades and retries.
9
+ */
10
+ export declare function generateCliFromUrl(opts: any): Promise<any>;
11
+ export declare function renderGenerateSummary(r: any): string;