@jackwener/opencli 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLI-CREATOR.md +594 -0
- package/README.md +116 -38
- package/README.zh-CN.md +143 -0
- package/SKILL.md +154 -102
- package/dist/browser.d.ts +1 -0
- package/dist/browser.js +35 -1
- package/dist/cascade.d.ts +45 -0
- package/dist/cascade.js +180 -0
- package/dist/clis/bilibili/hot.yaml +38 -0
- package/dist/clis/github/trending.yaml +58 -0
- package/dist/clis/hackernews/top.yaml +36 -0
- package/dist/clis/index.d.ts +2 -1
- package/dist/clis/index.js +3 -1
- package/dist/clis/reddit/hot.yaml +46 -0
- package/dist/clis/twitter/trending.yaml +40 -0
- package/dist/clis/v2ex/hot.yaml +25 -0
- package/dist/clis/v2ex/latest.yaml +25 -0
- package/dist/clis/v2ex/topic.yaml +27 -0
- package/dist/clis/xiaohongshu/feed.yaml +32 -0
- package/dist/clis/xiaohongshu/notifications.yaml +38 -0
- package/dist/clis/xiaohongshu/search.d.ts +5 -0
- package/dist/clis/xiaohongshu/search.js +68 -0
- package/dist/clis/zhihu/hot.yaml +42 -0
- package/dist/clis/zhihu/question.js +39 -0
- package/dist/clis/zhihu/search.yaml +55 -0
- package/dist/explore.d.ts +23 -13
- package/dist/explore.js +293 -422
- package/dist/main.js +17 -0
- package/dist/pipeline.js +238 -2
- package/dist/synthesize.d.ts +11 -8
- package/dist/synthesize.js +142 -118
- package/package.json +4 -2
- package/src/browser.ts +33 -1
- package/src/cascade.ts +217 -0
- package/src/clis/index.ts +4 -1
- package/src/clis/reddit/hot.yaml +46 -0
- package/src/clis/v2ex/hot.yaml +5 -9
- package/src/clis/v2ex/latest.yaml +5 -8
- package/src/clis/v2ex/topic.yaml +27 -0
- package/src/clis/xiaohongshu/feed.yaml +32 -0
- package/src/clis/xiaohongshu/notifications.yaml +38 -0
- package/src/clis/xiaohongshu/search.ts +71 -0
- package/src/clis/zhihu/hot.yaml +22 -8
- package/src/clis/zhihu/question.ts +45 -0
- package/src/clis/zhihu/search.yaml +55 -0
- package/src/explore.ts +303 -465
- package/src/main.ts +14 -0
- package/src/pipeline.ts +239 -2
- package/src/synthesize.ts +142 -137
- package/dist/clis/zhihu/search.js +0 -58
- package/src/clis/zhihu/search.ts +0 -65
- /package/dist/clis/zhihu/{search.d.ts → question.d.ts} +0 -0
package/src/main.ts
CHANGED
|
@@ -52,12 +52,26 @@ program.command('verify').description('Validate + smoke test').argument('[target
|
|
|
52
52
|
program.command('explore').description('Explore a website').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3')
|
|
53
53
|
.action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); });
|
|
54
54
|
|
|
55
|
+
program.command('probe').description('Probe a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3')
|
|
56
|
+
.action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); });
|
|
57
|
+
|
|
55
58
|
program.command('synthesize').description('Synthesize CLIs from explore').argument('<target>').option('--top <n>', '', '3')
|
|
56
59
|
.action(async (target, opts) => { const { synthesizeFromExplore, renderSynthesizeSummary } = await import('./synthesize.js'); console.log(renderSynthesizeSummary(synthesizeFromExplore(target, { top: parseInt(opts.top) }))); });
|
|
57
60
|
|
|
58
61
|
program.command('generate').description('One-shot: explore → synthesize → register').argument('<url>').option('--goal <text>').option('--site <name>')
|
|
59
62
|
.action(async (url, opts) => { const { generateCliFromUrl, renderGenerateSummary } = await import('./generate.js'); const r = await generateCliFromUrl({ url, BrowserFactory: PlaywrightMCP, builtinClis: BUILTIN_CLIS, userClis: USER_CLIS, goal: opts.goal, site: opts.site }); console.log(renderGenerateSummary(r)); process.exitCode = r.ok ? 0 : 1; });
|
|
60
63
|
|
|
64
|
+
program.command('cascade').description('Strategy cascade: find simplest working strategy').argument('<url>').option('--site <name>')
|
|
65
|
+
.action(async (url, opts) => {
|
|
66
|
+
const { cascadeProbe, renderCascadeResult } = await import('./cascade.js');
|
|
67
|
+
const result = await browserSession(PlaywrightMCP, async (page) => {
|
|
68
|
+
// Navigate to the site first for cookie context
|
|
69
|
+
try { const siteUrl = new URL(url); await page.goto(`${siteUrl.protocol}//${siteUrl.host}`); await page.wait(2); } catch {}
|
|
70
|
+
return cascadeProbe(page, url);
|
|
71
|
+
});
|
|
72
|
+
console.log(renderCascadeResult(result));
|
|
73
|
+
});
|
|
74
|
+
|
|
61
75
|
// ── Dynamic site commands ──────────────────────────────────────────────────
|
|
62
76
|
|
|
63
77
|
const registry = getRegistry();
|
package/src/pipeline.ts
CHANGED
|
@@ -26,6 +26,11 @@ export async function executePipeline(
|
|
|
26
26
|
for (const [op, params] of Object.entries(step)) {
|
|
27
27
|
if (debug) debugStepStart(i + 1, total, op, params);
|
|
28
28
|
data = await executeStep(page, op, params, data, args);
|
|
29
|
+
// Detect error objects returned by steps (e.g. tap store not found)
|
|
30
|
+
if (data && typeof data === 'object' && !Array.isArray(data) && data.error) {
|
|
31
|
+
process.stderr.write(` ${chalk.yellow('⚠')} ${chalk.yellow(op)}: ${data.error}\n`);
|
|
32
|
+
if (data.hint) process.stderr.write(` ${chalk.dim('💡')} ${chalk.dim(data.hint)}\n`);
|
|
33
|
+
}
|
|
29
34
|
if (debug) debugStepResult(op, data);
|
|
30
35
|
}
|
|
31
36
|
}
|
|
@@ -143,7 +148,15 @@ async function executeStep(page: any, op: string, params: any, data: any, args:
|
|
|
143
148
|
}
|
|
144
149
|
case 'evaluate': {
|
|
145
150
|
const js = String(render(params, { args, data }));
|
|
146
|
-
|
|
151
|
+
let result = await page.evaluate(normalizeEvaluateSource(js));
|
|
152
|
+
// MCP may return JSON as a string — auto-parse it
|
|
153
|
+
if (typeof result === 'string') {
|
|
154
|
+
const trimmed = result.trim();
|
|
155
|
+
if ((trimmed.startsWith('[') && trimmed.endsWith(']')) || (trimmed.startsWith('{') && trimmed.endsWith('}'))) {
|
|
156
|
+
try { result = JSON.parse(trimmed); } catch {}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
return result;
|
|
147
160
|
}
|
|
148
161
|
case 'snapshot': {
|
|
149
162
|
const opts = (typeof params === 'object' && params) ? params : {};
|
|
@@ -208,7 +221,231 @@ async function executeStep(page: any, op: string, params: any, data: any, args:
|
|
|
208
221
|
if (!Array.isArray(data)) return data;
|
|
209
222
|
return data.slice(0, Number(render(params, { args, data })));
|
|
210
223
|
}
|
|
211
|
-
case 'intercept':
|
|
224
|
+
case 'intercept': {
|
|
225
|
+
// Declarative XHR interception step
|
|
226
|
+
// Usage:
|
|
227
|
+
// intercept:
|
|
228
|
+
// trigger: "navigate:https://..." | "evaluate:store.note.fetch()" | "click:ref"
|
|
229
|
+
// capture: "api/pattern" # URL substring to match
|
|
230
|
+
// timeout: 5 # seconds to wait for matching request
|
|
231
|
+
// select: "data.items" # optional: extract sub-path from response
|
|
232
|
+
const cfg = typeof params === 'object' ? params : {};
|
|
233
|
+
const trigger = cfg.trigger ?? '';
|
|
234
|
+
const capturePattern = cfg.capture ?? '';
|
|
235
|
+
const timeout = cfg.timeout ?? 8;
|
|
236
|
+
const selectPath = cfg.select ?? null;
|
|
237
|
+
|
|
238
|
+
if (!capturePattern) return data;
|
|
239
|
+
|
|
240
|
+
// Step 1: Execute the trigger action
|
|
241
|
+
if (trigger.startsWith('navigate:')) {
|
|
242
|
+
const url = render(trigger.slice('navigate:'.length), { args, data });
|
|
243
|
+
await page.goto(String(url));
|
|
244
|
+
} else if (trigger.startsWith('evaluate:')) {
|
|
245
|
+
const js = trigger.slice('evaluate:'.length);
|
|
246
|
+
await page.evaluate(normalizeEvaluateSource(render(js, { args, data }) as string));
|
|
247
|
+
} else if (trigger.startsWith('click:')) {
|
|
248
|
+
const ref = render(trigger.slice('click:'.length), { args, data });
|
|
249
|
+
await page.click(String(ref).replace(/^@/, ''));
|
|
250
|
+
} else if (trigger === 'scroll') {
|
|
251
|
+
await page.scroll('down');
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Step 2: Wait a bit for network requests to fire
|
|
255
|
+
await page.wait(Math.min(timeout, 3));
|
|
256
|
+
|
|
257
|
+
// Step 3: Get network requests and find matching ones
|
|
258
|
+
const rawNetwork = await page.networkRequests(false);
|
|
259
|
+
const matchingResponses: any[] = [];
|
|
260
|
+
|
|
261
|
+
if (typeof rawNetwork === 'string') {
|
|
262
|
+
// Parse the network output to find matching URLs
|
|
263
|
+
const lines = rawNetwork.split('\n');
|
|
264
|
+
for (const line of lines) {
|
|
265
|
+
const match = line.match(/\[?(GET|POST)\]?\s+(\S+)\s*(?:=>|→)\s*\[?(\d+)\]?/i);
|
|
266
|
+
if (match) {
|
|
267
|
+
const [, method, url, status] = match;
|
|
268
|
+
if (url.includes(capturePattern) && status === '200') {
|
|
269
|
+
// Re-fetch the matching URL to get the response body
|
|
270
|
+
try {
|
|
271
|
+
const body = await page.evaluate(`
|
|
272
|
+
async () => {
|
|
273
|
+
try {
|
|
274
|
+
const resp = await fetch(${JSON.stringify(url)}, { credentials: 'include' });
|
|
275
|
+
if (!resp.ok) return null;
|
|
276
|
+
return await resp.json();
|
|
277
|
+
} catch { return null; }
|
|
278
|
+
}
|
|
279
|
+
`);
|
|
280
|
+
if (body) matchingResponses.push(body);
|
|
281
|
+
} catch {}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Step 4: Select from response if specified
|
|
288
|
+
let result = matchingResponses.length === 1 ? matchingResponses[0] :
|
|
289
|
+
matchingResponses.length > 1 ? matchingResponses : data;
|
|
290
|
+
|
|
291
|
+
if (selectPath && result) {
|
|
292
|
+
let current = result;
|
|
293
|
+
for (const part of String(selectPath).split('.')) {
|
|
294
|
+
if (current && typeof current === 'object' && !Array.isArray(current)) {
|
|
295
|
+
current = current[part];
|
|
296
|
+
} else break;
|
|
297
|
+
}
|
|
298
|
+
result = current ?? result;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return result;
|
|
302
|
+
}
|
|
303
|
+
case 'tap': {
|
|
304
|
+
// ── Declarative Store Action Bridge ──────────────────────────────────
|
|
305
|
+
// Usage:
|
|
306
|
+
// tap:
|
|
307
|
+
// store: feed # Pinia/Vuex store name
|
|
308
|
+
// action: fetchFeeds # Store action to call
|
|
309
|
+
// args: [] # Optional args to pass to action
|
|
310
|
+
// capture: homefeed # URL pattern to capture response
|
|
311
|
+
// timeout: 5 # Seconds to wait for network (default: 5)
|
|
312
|
+
// select: data.items # Optional: extract sub-path from response
|
|
313
|
+
// framework: pinia # Optional: pinia | vuex (auto-detected if omitted)
|
|
314
|
+
//
|
|
315
|
+
// Generates a self-contained IIFE that:
|
|
316
|
+
// 1. Injects fetch + XHR dual interception proxy
|
|
317
|
+
// 2. Finds the Pinia/Vuex store and calls the action
|
|
318
|
+
// 3. Captures the response matching the URL pattern
|
|
319
|
+
// 4. Auto-cleans up interception in finally block
|
|
320
|
+
// 5. Returns the captured data (optionally sub-selected)
|
|
321
|
+
|
|
322
|
+
const cfg = typeof params === 'object' ? params : {};
|
|
323
|
+
const storeName = String(render(cfg.store ?? '', { args, data }));
|
|
324
|
+
const actionName = String(render(cfg.action ?? '', { args, data }));
|
|
325
|
+
const capturePattern = String(render(cfg.capture ?? '', { args, data }));
|
|
326
|
+
const timeout = cfg.timeout ?? 5;
|
|
327
|
+
const selectPath = cfg.select ? String(render(cfg.select, { args, data })) : null;
|
|
328
|
+
const framework = cfg.framework ?? null; // auto-detect if null
|
|
329
|
+
const actionArgs = cfg.args ?? [];
|
|
330
|
+
|
|
331
|
+
if (!storeName || !actionName) throw new Error('tap: store and action are required');
|
|
332
|
+
|
|
333
|
+
// Build select chain for the captured response
|
|
334
|
+
const selectChain = selectPath
|
|
335
|
+
? selectPath.split('.').map((p: string) => `?.[${JSON.stringify(p)}]`).join('')
|
|
336
|
+
: '';
|
|
337
|
+
|
|
338
|
+
// Serialize action arguments
|
|
339
|
+
const actionArgsRendered = actionArgs.map((a: any) => {
|
|
340
|
+
const rendered = render(a, { args, data });
|
|
341
|
+
return JSON.stringify(rendered);
|
|
342
|
+
});
|
|
343
|
+
const actionCall = actionArgsRendered.length
|
|
344
|
+
? `store[${JSON.stringify(actionName)}](${actionArgsRendered.join(', ')})`
|
|
345
|
+
: `store[${JSON.stringify(actionName)}]()`;
|
|
346
|
+
|
|
347
|
+
const js = `
|
|
348
|
+
async () => {
|
|
349
|
+
// ── 1. Setup capture proxy (fetch + XHR dual interception) ──
|
|
350
|
+
let captured = null;
|
|
351
|
+
const capturePattern = ${JSON.stringify(capturePattern)};
|
|
352
|
+
|
|
353
|
+
// Intercept fetch API
|
|
354
|
+
const origFetch = window.fetch;
|
|
355
|
+
window.fetch = async function(...fetchArgs) {
|
|
356
|
+
const resp = await origFetch.apply(this, fetchArgs);
|
|
357
|
+
try {
|
|
358
|
+
const url = typeof fetchArgs[0] === 'string' ? fetchArgs[0]
|
|
359
|
+
: fetchArgs[0] instanceof Request ? fetchArgs[0].url : String(fetchArgs[0]);
|
|
360
|
+
if (capturePattern && url.includes(capturePattern) && !captured) {
|
|
361
|
+
try { captured = await resp.clone().json(); } catch {}
|
|
362
|
+
}
|
|
363
|
+
} catch {}
|
|
364
|
+
return resp;
|
|
365
|
+
};
|
|
366
|
+
|
|
367
|
+
// Intercept XMLHttpRequest
|
|
368
|
+
const origXhrOpen = XMLHttpRequest.prototype.open;
|
|
369
|
+
const origXhrSend = XMLHttpRequest.prototype.send;
|
|
370
|
+
XMLHttpRequest.prototype.open = function(method, url) {
|
|
371
|
+
this.__tapUrl = String(url);
|
|
372
|
+
return origXhrOpen.apply(this, arguments);
|
|
373
|
+
};
|
|
374
|
+
XMLHttpRequest.prototype.send = function(body) {
|
|
375
|
+
if (capturePattern && this.__tapUrl?.includes(capturePattern)) {
|
|
376
|
+
const xhr = this;
|
|
377
|
+
const origHandler = xhr.onreadystatechange;
|
|
378
|
+
xhr.onreadystatechange = function() {
|
|
379
|
+
if (xhr.readyState === 4 && !captured) {
|
|
380
|
+
try { captured = JSON.parse(xhr.responseText); } catch {}
|
|
381
|
+
}
|
|
382
|
+
if (origHandler) origHandler.apply(this, arguments);
|
|
383
|
+
};
|
|
384
|
+
// Also handle onload
|
|
385
|
+
const origOnload = xhr.onload;
|
|
386
|
+
xhr.onload = function() {
|
|
387
|
+
if (!captured) { try { captured = JSON.parse(xhr.responseText); } catch {} }
|
|
388
|
+
if (origOnload) origOnload.apply(this, arguments);
|
|
389
|
+
};
|
|
390
|
+
}
|
|
391
|
+
return origXhrSend.apply(this, arguments);
|
|
392
|
+
};
|
|
393
|
+
|
|
394
|
+
try {
|
|
395
|
+
// ── 2. Find store ──
|
|
396
|
+
let store = null;
|
|
397
|
+
const storeName = ${JSON.stringify(storeName)};
|
|
398
|
+
const fw = ${JSON.stringify(framework)};
|
|
399
|
+
|
|
400
|
+
// Auto-detect framework if not specified
|
|
401
|
+
const app = document.querySelector('#app');
|
|
402
|
+
if (!fw || fw === 'pinia') {
|
|
403
|
+
// Try Pinia (Vue 3)
|
|
404
|
+
try {
|
|
405
|
+
const pinia = app?.__vue_app__?.config?.globalProperties?.$pinia;
|
|
406
|
+
if (pinia?._s) store = pinia._s.get(storeName);
|
|
407
|
+
} catch {}
|
|
408
|
+
}
|
|
409
|
+
if (!store && (!fw || fw === 'vuex')) {
|
|
410
|
+
// Try Vuex (Vue 2/3)
|
|
411
|
+
try {
|
|
412
|
+
const vuexStore = app?.__vue_app__?.config?.globalProperties?.$store
|
|
413
|
+
?? app?.__vue__?.$store;
|
|
414
|
+
if (vuexStore) {
|
|
415
|
+
// Vuex doesn't have named stores like Pinia, dispatch action
|
|
416
|
+
store = { [${JSON.stringify(actionName)}]: (...a) => vuexStore.dispatch(storeName + '/' + ${JSON.stringify(actionName)}, ...a) };
|
|
417
|
+
}
|
|
418
|
+
} catch {}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
if (!store) return { error: 'Store not found: ' + storeName, hint: 'Page may not be fully loaded or store name may be incorrect' };
|
|
422
|
+
if (typeof store[${JSON.stringify(actionName)}] !== 'function') {
|
|
423
|
+
return { error: 'Action not found: ' + ${JSON.stringify(actionName)} + ' on store ' + storeName,
|
|
424
|
+
hint: 'Available: ' + Object.keys(store).filter(k => typeof store[k] === 'function' && !k.startsWith('$') && !k.startsWith('_')).join(', ') };
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// ── 3. Call store action ──
|
|
428
|
+
await ${actionCall};
|
|
429
|
+
|
|
430
|
+
// ── 4. Wait for network response ──
|
|
431
|
+
const deadline = Date.now() + ${timeout} * 1000;
|
|
432
|
+
while (!captured && Date.now() < deadline) {
|
|
433
|
+
await new Promise(r => setTimeout(r, 200));
|
|
434
|
+
}
|
|
435
|
+
} finally {
|
|
436
|
+
// ── 5. Always restore originals ──
|
|
437
|
+
window.fetch = origFetch;
|
|
438
|
+
XMLHttpRequest.prototype.open = origXhrOpen;
|
|
439
|
+
XMLHttpRequest.prototype.send = origXhrSend;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
if (!captured) return { error: 'No matching response captured for pattern: ' + capturePattern };
|
|
443
|
+
return captured${selectChain} ?? captured;
|
|
444
|
+
}
|
|
445
|
+
`;
|
|
446
|
+
|
|
447
|
+
return page.evaluate(js);
|
|
448
|
+
}
|
|
212
449
|
default: return data;
|
|
213
450
|
}
|
|
214
451
|
}
|
package/src/synthesize.ts
CHANGED
|
@@ -1,167 +1,185 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Synthesize
|
|
3
|
-
*
|
|
4
|
-
* Takes the structured capabilities from Deep Explore and generates
|
|
5
|
-
* YAML pipeline files that can be directly registered as CLI commands.
|
|
6
|
-
*
|
|
7
|
-
* This is the bridge between discovery (explore) and usability (CLI).
|
|
2
|
+
* Synthesize candidate CLIs from explore artifacts.
|
|
3
|
+
* Generates evaluate-based YAML pipelines (matching hand-written adapter patterns).
|
|
8
4
|
*/
|
|
9
5
|
|
|
10
6
|
import * as fs from 'node:fs';
|
|
11
7
|
import * as path from 'node:path';
|
|
12
8
|
import yaml from 'js-yaml';
|
|
13
9
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
10
|
+
/** Volatile params to strip from generated URLs */
|
|
11
|
+
const VOLATILE_PARAMS = new Set(['w_rid', 'wts', 'callback', '_', 'timestamp', 't', 'nonce', 'sign']);
|
|
12
|
+
const SEARCH_PARAM_NAMES = new Set(['q', 'query', 'keyword', 'search', 'wd', 'kw', 'w', 'search_query']);
|
|
13
|
+
const LIMIT_PARAM_NAMES = new Set(['ps', 'page_size', 'limit', 'count', 'per_page', 'size', 'num']);
|
|
14
|
+
const PAGE_PARAM_NAMES = new Set(['pn', 'page', 'page_num', 'offset', 'cursor']);
|
|
17
15
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
16
|
+
export function synthesizeFromExplore(
|
|
17
|
+
target: string,
|
|
18
|
+
opts: { outDir?: string; top?: number } = {},
|
|
19
|
+
): Record<string, any> {
|
|
20
|
+
const exploreDir = resolveExploreDir(target);
|
|
21
|
+
const bundle = loadExploreBundle(exploreDir);
|
|
22
22
|
|
|
23
23
|
const targetDir = opts.outDir ?? path.join(exploreDir, 'candidates');
|
|
24
24
|
fs.mkdirSync(targetDir, { recursive: true });
|
|
25
25
|
|
|
26
|
-
const site = manifest.site;
|
|
27
|
-
const
|
|
28
|
-
const candidates: any[] = [];
|
|
29
|
-
|
|
30
|
-
// Sort capabilities by confidence
|
|
31
|
-
const sortedCaps = [...capabilities]
|
|
26
|
+
const site = bundle.manifest.site;
|
|
27
|
+
const capabilities = (bundle.capabilities ?? [])
|
|
32
28
|
.sort((a: any, b: any) => (b.confidence ?? 0) - (a.confidence ?? 0))
|
|
33
|
-
.slice(0,
|
|
34
|
-
|
|
35
|
-
for (const cap of sortedCaps) {
|
|
36
|
-
// Find the matching endpoint for more detail
|
|
37
|
-
const endpoint = endpoints.find((ep: any) => ep.pattern === cap.endpoint) ??
|
|
38
|
-
endpoints[0];
|
|
29
|
+
.slice(0, opts.top ?? 3);
|
|
30
|
+
const candidates: any[] = [];
|
|
39
31
|
|
|
40
|
-
|
|
41
|
-
const
|
|
42
|
-
|
|
32
|
+
for (const cap of capabilities) {
|
|
33
|
+
const endpoint = chooseEndpoint(cap, bundle.endpoints);
|
|
34
|
+
if (!endpoint) continue;
|
|
35
|
+
const candidate = buildCandidateYaml(site, bundle.manifest, cap, endpoint);
|
|
36
|
+
const filePath = path.join(targetDir, `${candidate.name}.yaml`);
|
|
43
37
|
fs.writeFileSync(filePath, yaml.dump(candidate.yaml, { sortKeys: false, lineWidth: 120 }));
|
|
44
|
-
|
|
45
|
-
candidates.push({
|
|
46
|
-
name: cap.name,
|
|
47
|
-
path: filePath,
|
|
48
|
-
strategy: cap.strategy,
|
|
49
|
-
endpoint: cap.endpoint,
|
|
50
|
-
confidence: cap.confidence,
|
|
51
|
-
columns: candidate.yaml.columns,
|
|
52
|
-
});
|
|
38
|
+
candidates.push({ name: candidate.name, path: filePath, strategy: cap.strategy, confidence: cap.confidence });
|
|
53
39
|
}
|
|
54
40
|
|
|
55
|
-
const index = {
|
|
56
|
-
site,
|
|
57
|
-
target_url: manifest.target_url,
|
|
58
|
-
generated_from: exploreDir,
|
|
59
|
-
candidate_count: candidates.length,
|
|
60
|
-
candidates,
|
|
61
|
-
};
|
|
41
|
+
const index = { site, target_url: bundle.manifest.target_url, generated_from: exploreDir, candidate_count: candidates.length, candidates };
|
|
62
42
|
fs.writeFileSync(path.join(targetDir, 'candidates.json'), JSON.stringify(index, null, 2));
|
|
63
43
|
|
|
44
|
+
return { site, explore_dir: exploreDir, out_dir: targetDir, candidate_count: candidates.length, candidates };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function renderSynthesizeSummary(result: Record<string, any>): string {
|
|
48
|
+
const lines = ['opencli synthesize: OK', `Site: ${result.site}`, `Source: ${result.explore_dir}`, `Candidates: ${result.candidate_count}`];
|
|
49
|
+
for (const c of result.candidates ?? []) lines.push(` • ${c.name} (${c.strategy}, ${((c.confidence ?? 0) * 100).toFixed(0)}% confidence) → ${c.path}`);
|
|
50
|
+
return lines.join('\n');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function resolveExploreDir(target: string): string {
|
|
54
|
+
if (fs.existsSync(target)) return target;
|
|
55
|
+
const candidate = path.join('.opencli', 'explore', target);
|
|
56
|
+
if (fs.existsSync(candidate)) return candidate;
|
|
57
|
+
throw new Error(`Explore directory not found: ${target}`);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function loadExploreBundle(exploreDir: string): Record<string, any> {
|
|
64
61
|
return {
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
candidates,
|
|
62
|
+
manifest: JSON.parse(fs.readFileSync(path.join(exploreDir, 'manifest.json'), 'utf-8')),
|
|
63
|
+
endpoints: JSON.parse(fs.readFileSync(path.join(exploreDir, 'endpoints.json'), 'utf-8')),
|
|
64
|
+
capabilities: JSON.parse(fs.readFileSync(path.join(exploreDir, 'capabilities.json'), 'utf-8')),
|
|
65
|
+
auth: JSON.parse(fs.readFileSync(path.join(exploreDir, 'auth.json'), 'utf-8')),
|
|
70
66
|
};
|
|
71
67
|
}
|
|
72
68
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
const
|
|
69
|
+
function chooseEndpoint(cap: any, endpoints: any[]): any | null {
|
|
70
|
+
if (!endpoints.length) return null;
|
|
71
|
+
// Match by endpoint pattern from capability
|
|
72
|
+
if (cap.endpoint) {
|
|
73
|
+
const match = endpoints.find((e: any) => e.pattern === cap.endpoint || e.url?.includes(cap.endpoint));
|
|
74
|
+
if (match) return match;
|
|
75
|
+
}
|
|
76
|
+
return endpoints.sort((a: any, b: any) => (b.score ?? 0) - (a.score ?? 0))[0];
|
|
77
|
+
}
|
|
78
78
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
* - Templates search, limit, and pagination params
|
|
83
|
-
* - Builds URL string manually to avoid URL encoding of ${{ }} expressions
|
|
84
|
-
*/
|
|
85
|
-
function buildTemplatedUrl(rawUrl: string, cap: any, endpoint: any): string {
|
|
79
|
+
// ── URL templating ─────────────────────────────────────────────────────────
|
|
80
|
+
|
|
81
|
+
function buildTemplatedUrl(rawUrl: string, cap: any, _endpoint: any): string {
|
|
86
82
|
try {
|
|
87
83
|
const u = new URL(rawUrl);
|
|
88
84
|
const base = `${u.protocol}//${u.host}${u.pathname}`;
|
|
89
85
|
const params: Array<[string, string]> = [];
|
|
90
|
-
|
|
91
86
|
const hasKeyword = cap.recommendedArgs?.some((a: any) => a.name === 'keyword');
|
|
92
87
|
|
|
93
88
|
u.searchParams.forEach((v, k) => {
|
|
94
|
-
// Skip volatile params
|
|
95
89
|
if (VOLATILE_PARAMS.has(k)) return;
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
if (
|
|
99
|
-
|
|
100
|
-
} else if (LIMIT_PARAM_NAMES.has(k)) {
|
|
101
|
-
params.push([k, '${{ args.limit | default(20) }}']);
|
|
102
|
-
} else if (PAGE_PARAM_NAMES.has(k)) {
|
|
103
|
-
params.push([k, '${{ args.page | default(1) }}']);
|
|
104
|
-
} else {
|
|
105
|
-
params.push([k, v]);
|
|
106
|
-
}
|
|
90
|
+
if (hasKeyword && SEARCH_PARAM_NAMES.has(k)) params.push([k, '${{ args.keyword }}']);
|
|
91
|
+
else if (LIMIT_PARAM_NAMES.has(k)) params.push([k, '${{ args.limit | default(20) }}']);
|
|
92
|
+
else if (PAGE_PARAM_NAMES.has(k)) params.push([k, '${{ args.page | default(1) }}']);
|
|
93
|
+
else params.push([k, v]);
|
|
107
94
|
});
|
|
108
95
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
} catch {
|
|
112
|
-
return rawUrl;
|
|
113
|
-
}
|
|
96
|
+
return params.length ? base + '?' + params.map(([k, v]) => `${k}=${v}`).join('&') : base;
|
|
97
|
+
} catch { return rawUrl; }
|
|
114
98
|
}
|
|
115
99
|
|
|
116
100
|
/**
|
|
117
|
-
* Build
|
|
101
|
+
* Build inline evaluate script for browser-based fetch+parse.
|
|
102
|
+
* Follows patterns from bilibili/hot.yaml and twitter/trending.yaml.
|
|
118
103
|
*/
|
|
104
|
+
function buildEvaluateScript(url: string, itemPath: string, endpoint: any): string {
|
|
105
|
+
const pathChain = itemPath.split('.').map((p: string) => `?.${p}`).join('');
|
|
106
|
+
const detectedFields = endpoint?.detectedFields ?? {};
|
|
107
|
+
const hasFields = Object.keys(detectedFields).length > 0;
|
|
108
|
+
|
|
109
|
+
let mapCode = '';
|
|
110
|
+
if (hasFields) {
|
|
111
|
+
const mappings = Object.entries(detectedFields)
|
|
112
|
+
.map(([role, field]) => ` ${role}: item${String(field).split('.').map(p => `?.${p}`).join('')}`)
|
|
113
|
+
.join(',\n');
|
|
114
|
+
mapCode = `.map((item) => ({\n${mappings}\n }))`;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return [
|
|
118
|
+
'(async () => {',
|
|
119
|
+
` const res = await fetch('${url}', {`,
|
|
120
|
+
` credentials: 'include'`,
|
|
121
|
+
' });',
|
|
122
|
+
' const data = await res.json();',
|
|
123
|
+
` return (data${pathChain} || [])${mapCode};`,
|
|
124
|
+
'})()\n',
|
|
125
|
+
].join('\n');
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// ── YAML pipeline generation ───────────────────────────────────────────────
|
|
129
|
+
|
|
119
130
|
function buildCandidateYaml(site: string, manifest: any, cap: any, endpoint: any): { name: string; yaml: any } {
|
|
120
131
|
const needsBrowser = cap.strategy !== 'public';
|
|
121
132
|
const pipeline: any[] = [];
|
|
133
|
+
const templatedUrl = buildTemplatedUrl(endpoint?.url ?? manifest.target_url, cap, endpoint);
|
|
122
134
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
pipeline.push({ navigate: manifest.target_url });
|
|
126
|
-
}
|
|
135
|
+
let domain = '';
|
|
136
|
+
try { domain = new URL(manifest.target_url).hostname; } catch {}
|
|
127
137
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
138
|
+
if (cap.strategy === 'store-action' && cap.storeHint) {
|
|
139
|
+
// Store Action: navigate + wait + tap (declarative, clean)
|
|
140
|
+
pipeline.push({ navigate: manifest.target_url });
|
|
141
|
+
pipeline.push({ wait: 3 });
|
|
142
|
+
const tapStep: Record<string, any> = {
|
|
143
|
+
store: cap.storeHint.store,
|
|
144
|
+
action: cap.storeHint.action,
|
|
145
|
+
timeout: 8,
|
|
146
|
+
};
|
|
147
|
+
// Infer capture pattern from endpoint URL
|
|
148
|
+
if (endpoint?.url) {
|
|
149
|
+
try {
|
|
150
|
+
const epUrl = new URL(endpoint.url);
|
|
151
|
+
const pathParts = epUrl.pathname.split('/').filter((p: string) => p);
|
|
152
|
+
// Use last meaningful path segment as capture pattern
|
|
153
|
+
const capturePart = pathParts.filter((p: string) => !p.match(/^v\d+$/)).pop();
|
|
154
|
+
if (capturePart) tapStep.capture = capturePart;
|
|
155
|
+
} catch {}
|
|
156
|
+
}
|
|
157
|
+
if (cap.itemPath) tapStep.select = cap.itemPath;
|
|
158
|
+
pipeline.push({ tap: tapStep });
|
|
159
|
+
} else if (needsBrowser) {
|
|
160
|
+
// Browser-based: navigate + evaluate (like bilibili/hot.yaml, twitter/trending.yaml)
|
|
161
|
+
pipeline.push({ navigate: manifest.target_url });
|
|
162
|
+
const itemPath = cap.itemPath ?? 'data.data.list';
|
|
163
|
+
pipeline.push({ evaluate: buildEvaluateScript(templatedUrl, itemPath, endpoint) });
|
|
164
|
+
} else {
|
|
165
|
+
// Public API: direct fetch (like hackernews/top.yaml)
|
|
166
|
+
pipeline.push({ fetch: { url: templatedUrl } });
|
|
167
|
+
if (cap.itemPath) pipeline.push({ select: cap.itemPath });
|
|
137
168
|
}
|
|
138
169
|
|
|
139
|
-
//
|
|
170
|
+
// Map fields
|
|
140
171
|
const mapStep: Record<string, string> = {};
|
|
141
172
|
const columns = cap.recommendedColumns ?? ['title', 'url'];
|
|
142
|
-
|
|
143
|
-
// Add a rank column if not doing search
|
|
144
|
-
if (!cap.recommendedArgs?.some((a: any) => a.name === 'keyword')) {
|
|
145
|
-
mapStep['rank'] = '${{ index + 1 }}';
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
// Build field mappings from the endpoint's detected fields
|
|
173
|
+
if (!cap.recommendedArgs?.some((a: any) => a.name === 'keyword')) mapStep['rank'] = '${{ index + 1 }}';
|
|
149
174
|
const detectedFields = endpoint?.detectedFields ?? {};
|
|
150
175
|
for (const col of columns) {
|
|
151
176
|
const fieldPath = detectedFields[col];
|
|
152
|
-
|
|
153
|
-
mapStep[col] = `\${{ item.${fieldPath} }}`;
|
|
154
|
-
} else {
|
|
155
|
-
mapStep[col] = `\${{ item.${col} }}`;
|
|
156
|
-
}
|
|
177
|
+
mapStep[col] = fieldPath ? `\${{ item.${fieldPath} }}` : `\${{ item.${col} }}`;
|
|
157
178
|
}
|
|
158
|
-
|
|
159
179
|
pipeline.push({ map: mapStep });
|
|
160
|
-
|
|
161
|
-
// Step 5: Limit
|
|
162
180
|
pipeline.push({ limit: '${{ args.limit | default(20) }}' });
|
|
163
181
|
|
|
164
|
-
//
|
|
182
|
+
// Args
|
|
165
183
|
const argsDef: Record<string, any> = {};
|
|
166
184
|
for (const arg of cap.recommendedArgs ?? []) {
|
|
167
185
|
const def: any = { type: arg.type ?? 'str' };
|
|
@@ -172,39 +190,26 @@ function buildCandidateYaml(site: string, manifest: any, cap: any, endpoint: any
|
|
|
172
190
|
else if (arg.name === 'page') def.description = 'Page number';
|
|
173
191
|
argsDef[arg.name] = def;
|
|
174
192
|
}
|
|
175
|
-
|
|
176
|
-
// Ensure limit arg always exists
|
|
177
|
-
if (!argsDef['limit']) {
|
|
178
|
-
argsDef['limit'] = { type: 'int', default: 20, description: 'Number of items to return' };
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
const allColumns = Object.keys(mapStep);
|
|
193
|
+
if (!argsDef['limit']) argsDef['limit'] = { type: 'int', default: 20, description: 'Number of items to return' };
|
|
182
194
|
|
|
183
195
|
return {
|
|
184
196
|
name: cap.name,
|
|
185
197
|
yaml: {
|
|
186
|
-
site,
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
domain: manifest.final_url ? new URL(manifest.final_url).hostname : undefined,
|
|
190
|
-
strategy: cap.strategy,
|
|
191
|
-
browser: needsBrowser,
|
|
192
|
-
args: argsDef,
|
|
193
|
-
pipeline,
|
|
194
|
-
columns: allColumns,
|
|
198
|
+
site, name: cap.name, description: `${cap.description || site + ' ' + cap.name} (auto-generated)`,
|
|
199
|
+
domain, strategy: cap.strategy, browser: needsBrowser,
|
|
200
|
+
args: argsDef, pipeline, columns: Object.keys(mapStep),
|
|
195
201
|
},
|
|
196
202
|
};
|
|
197
203
|
}
|
|
198
204
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
return lines.join('\n');
|
|
205
|
+
/** Backward-compatible export for scaffold.ts */
|
|
206
|
+
export function buildCandidate(site: string, targetUrl: string, cap: any, endpoint: any): any {
|
|
207
|
+
// Map old-style field names to new ones
|
|
208
|
+
const normalizedCap = {
|
|
209
|
+
...cap,
|
|
210
|
+
recommendedArgs: cap.recommendedArgs ?? cap.recommended_args,
|
|
211
|
+
recommendedColumns: cap.recommendedColumns ?? cap.recommended_columns,
|
|
212
|
+
};
|
|
213
|
+
const manifest = { target_url: targetUrl, final_url: targetUrl };
|
|
214
|
+
return buildCandidateYaml(site, manifest, normalizedCap, endpoint);
|
|
210
215
|
}
|