@jackwener/opencli 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLI-CREATOR.md +594 -0
- package/README.md +116 -38
- package/README.zh-CN.md +143 -0
- package/SKILL.md +154 -102
- package/dist/browser.d.ts +1 -0
- package/dist/browser.js +35 -1
- package/dist/cascade.d.ts +45 -0
- package/dist/cascade.js +180 -0
- package/dist/clis/bilibili/hot.yaml +38 -0
- package/dist/clis/github/trending.yaml +58 -0
- package/dist/clis/hackernews/top.yaml +36 -0
- package/dist/clis/index.d.ts +2 -1
- package/dist/clis/index.js +3 -1
- package/dist/clis/reddit/hot.yaml +46 -0
- package/dist/clis/twitter/trending.yaml +40 -0
- package/dist/clis/v2ex/hot.yaml +25 -0
- package/dist/clis/v2ex/latest.yaml +25 -0
- package/dist/clis/v2ex/topic.yaml +27 -0
- package/dist/clis/xiaohongshu/feed.yaml +32 -0
- package/dist/clis/xiaohongshu/notifications.yaml +38 -0
- package/dist/clis/xiaohongshu/search.d.ts +5 -0
- package/dist/clis/xiaohongshu/search.js +68 -0
- package/dist/clis/zhihu/hot.yaml +42 -0
- package/dist/clis/zhihu/question.js +39 -0
- package/dist/clis/zhihu/search.yaml +55 -0
- package/dist/explore.d.ts +23 -13
- package/dist/explore.js +293 -422
- package/dist/main.js +17 -0
- package/dist/pipeline.js +238 -2
- package/dist/synthesize.d.ts +11 -8
- package/dist/synthesize.js +142 -118
- package/package.json +4 -2
- package/src/browser.ts +33 -1
- package/src/cascade.ts +217 -0
- package/src/clis/index.ts +4 -1
- package/src/clis/reddit/hot.yaml +46 -0
- package/src/clis/v2ex/hot.yaml +5 -9
- package/src/clis/v2ex/latest.yaml +5 -8
- package/src/clis/v2ex/topic.yaml +27 -0
- package/src/clis/xiaohongshu/feed.yaml +32 -0
- package/src/clis/xiaohongshu/notifications.yaml +38 -0
- package/src/clis/xiaohongshu/search.ts +71 -0
- package/src/clis/zhihu/hot.yaml +22 -8
- package/src/clis/zhihu/question.ts +45 -0
- package/src/clis/zhihu/search.yaml +55 -0
- package/src/explore.ts +303 -465
- package/src/main.ts +14 -0
- package/src/pipeline.ts +239 -2
- package/src/synthesize.ts +142 -137
- package/dist/clis/zhihu/search.js +0 -58
- package/src/clis/zhihu/search.ts +0 -65
- /package/dist/clis/zhihu/{search.d.ts → question.d.ts} +0 -0
package/dist/main.js
CHANGED
|
@@ -55,10 +55,27 @@ program.command('verify').description('Validate + smoke test').argument('[target
|
|
|
55
55
|
.action(async (target, opts) => { const { verifyClis, renderVerifyReport } = await import('./verify.js'); const r = await verifyClis({ builtinClis: BUILTIN_CLIS, userClis: USER_CLIS, target, smoke: opts.smoke }); console.log(renderVerifyReport(r)); process.exitCode = r.ok ? 0 : 1; });
|
|
56
56
|
program.command('explore').description('Explore a website').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3')
|
|
57
57
|
.action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); });
|
|
58
|
+
program.command('probe').description('Probe a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3')
|
|
59
|
+
.action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); });
|
|
58
60
|
program.command('synthesize').description('Synthesize CLIs from explore').argument('<target>').option('--top <n>', '', '3')
|
|
59
61
|
.action(async (target, opts) => { const { synthesizeFromExplore, renderSynthesizeSummary } = await import('./synthesize.js'); console.log(renderSynthesizeSummary(synthesizeFromExplore(target, { top: parseInt(opts.top) }))); });
|
|
60
62
|
program.command('generate').description('One-shot: explore → synthesize → register').argument('<url>').option('--goal <text>').option('--site <name>')
|
|
61
63
|
.action(async (url, opts) => { const { generateCliFromUrl, renderGenerateSummary } = await import('./generate.js'); const r = await generateCliFromUrl({ url, BrowserFactory: PlaywrightMCP, builtinClis: BUILTIN_CLIS, userClis: USER_CLIS, goal: opts.goal, site: opts.site }); console.log(renderGenerateSummary(r)); process.exitCode = r.ok ? 0 : 1; });
|
|
64
|
+
program.command('cascade').description('Strategy cascade: find simplest working strategy').argument('<url>').option('--site <name>')
|
|
65
|
+
.action(async (url, opts) => {
|
|
66
|
+
const { cascadeProbe, renderCascadeResult } = await import('./cascade.js');
|
|
67
|
+
const result = await browserSession(PlaywrightMCP, async (page) => {
|
|
68
|
+
// Navigate to the site first for cookie context
|
|
69
|
+
try {
|
|
70
|
+
const siteUrl = new URL(url);
|
|
71
|
+
await page.goto(`${siteUrl.protocol}//${siteUrl.host}`);
|
|
72
|
+
await page.wait(2);
|
|
73
|
+
}
|
|
74
|
+
catch { }
|
|
75
|
+
return cascadeProbe(page, url);
|
|
76
|
+
});
|
|
77
|
+
console.log(renderCascadeResult(result));
|
|
78
|
+
});
|
|
62
79
|
// ── Dynamic site commands ──────────────────────────────────────────────────
|
|
63
80
|
const registry = getRegistry();
|
|
64
81
|
const siteGroups = new Map();
|
package/dist/pipeline.js
CHANGED
|
@@ -16,6 +16,12 @@ export async function executePipeline(page, pipeline, ctx = {}) {
|
|
|
16
16
|
if (debug)
|
|
17
17
|
debugStepStart(i + 1, total, op, params);
|
|
18
18
|
data = await executeStep(page, op, params, data, args);
|
|
19
|
+
// Detect error objects returned by steps (e.g. tap store not found)
|
|
20
|
+
if (data && typeof data === 'object' && !Array.isArray(data) && data.error) {
|
|
21
|
+
process.stderr.write(` ${chalk.yellow('⚠')} ${chalk.yellow(op)}: ${data.error}\n`);
|
|
22
|
+
if (data.hint)
|
|
23
|
+
process.stderr.write(` ${chalk.dim('💡')} ${chalk.dim(data.hint)}\n`);
|
|
24
|
+
}
|
|
19
25
|
if (debug)
|
|
20
26
|
debugStepResult(op, data);
|
|
21
27
|
}
|
|
@@ -136,7 +142,18 @@ async function executeStep(page, op, params, data, args) {
|
|
|
136
142
|
}
|
|
137
143
|
case 'evaluate': {
|
|
138
144
|
const js = String(render(params, { args, data }));
|
|
139
|
-
|
|
145
|
+
let result = await page.evaluate(normalizeEvaluateSource(js));
|
|
146
|
+
// MCP may return JSON as a string — auto-parse it
|
|
147
|
+
if (typeof result === 'string') {
|
|
148
|
+
const trimmed = result.trim();
|
|
149
|
+
if ((trimmed.startsWith('[') && trimmed.endsWith(']')) || (trimmed.startsWith('{') && trimmed.endsWith('}'))) {
|
|
150
|
+
try {
|
|
151
|
+
result = JSON.parse(trimmed);
|
|
152
|
+
}
|
|
153
|
+
catch { }
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
return result;
|
|
140
157
|
}
|
|
141
158
|
case 'snapshot': {
|
|
142
159
|
const opts = (typeof params === 'object' && params) ? params : {};
|
|
@@ -214,7 +231,226 @@ async function executeStep(page, op, params, data, args) {
|
|
|
214
231
|
return data;
|
|
215
232
|
return data.slice(0, Number(render(params, { args, data })));
|
|
216
233
|
}
|
|
217
|
-
case 'intercept':
|
|
234
|
+
case 'intercept': {
|
|
235
|
+
// Declarative XHR interception step
|
|
236
|
+
// Usage:
|
|
237
|
+
// intercept:
|
|
238
|
+
// trigger: "navigate:https://..." | "evaluate:store.note.fetch()" | "click:ref"
|
|
239
|
+
// capture: "api/pattern" # URL substring to match
|
|
240
|
+
// timeout: 5 # seconds to wait for matching request
|
|
241
|
+
// select: "data.items" # optional: extract sub-path from response
|
|
242
|
+
const cfg = typeof params === 'object' ? params : {};
|
|
243
|
+
const trigger = cfg.trigger ?? '';
|
|
244
|
+
const capturePattern = cfg.capture ?? '';
|
|
245
|
+
const timeout = cfg.timeout ?? 8;
|
|
246
|
+
const selectPath = cfg.select ?? null;
|
|
247
|
+
if (!capturePattern)
|
|
248
|
+
return data;
|
|
249
|
+
// Step 1: Execute the trigger action
|
|
250
|
+
if (trigger.startsWith('navigate:')) {
|
|
251
|
+
const url = render(trigger.slice('navigate:'.length), { args, data });
|
|
252
|
+
await page.goto(String(url));
|
|
253
|
+
}
|
|
254
|
+
else if (trigger.startsWith('evaluate:')) {
|
|
255
|
+
const js = trigger.slice('evaluate:'.length);
|
|
256
|
+
await page.evaluate(normalizeEvaluateSource(render(js, { args, data })));
|
|
257
|
+
}
|
|
258
|
+
else if (trigger.startsWith('click:')) {
|
|
259
|
+
const ref = render(trigger.slice('click:'.length), { args, data });
|
|
260
|
+
await page.click(String(ref).replace(/^@/, ''));
|
|
261
|
+
}
|
|
262
|
+
else if (trigger === 'scroll') {
|
|
263
|
+
await page.scroll('down');
|
|
264
|
+
}
|
|
265
|
+
// Step 2: Wait a bit for network requests to fire
|
|
266
|
+
await page.wait(Math.min(timeout, 3));
|
|
267
|
+
// Step 3: Get network requests and find matching ones
|
|
268
|
+
const rawNetwork = await page.networkRequests(false);
|
|
269
|
+
const matchingResponses = [];
|
|
270
|
+
if (typeof rawNetwork === 'string') {
|
|
271
|
+
// Parse the network output to find matching URLs
|
|
272
|
+
const lines = rawNetwork.split('\n');
|
|
273
|
+
for (const line of lines) {
|
|
274
|
+
const match = line.match(/\[?(GET|POST)\]?\s+(\S+)\s*(?:=>|→)\s*\[?(\d+)\]?/i);
|
|
275
|
+
if (match) {
|
|
276
|
+
const [, method, url, status] = match;
|
|
277
|
+
if (url.includes(capturePattern) && status === '200') {
|
|
278
|
+
// Re-fetch the matching URL to get the response body
|
|
279
|
+
try {
|
|
280
|
+
const body = await page.evaluate(`
|
|
281
|
+
async () => {
|
|
282
|
+
try {
|
|
283
|
+
const resp = await fetch(${JSON.stringify(url)}, { credentials: 'include' });
|
|
284
|
+
if (!resp.ok) return null;
|
|
285
|
+
return await resp.json();
|
|
286
|
+
} catch { return null; }
|
|
287
|
+
}
|
|
288
|
+
`);
|
|
289
|
+
if (body)
|
|
290
|
+
matchingResponses.push(body);
|
|
291
|
+
}
|
|
292
|
+
catch { }
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
// Step 4: Select from response if specified
|
|
298
|
+
let result = matchingResponses.length === 1 ? matchingResponses[0] :
|
|
299
|
+
matchingResponses.length > 1 ? matchingResponses : data;
|
|
300
|
+
if (selectPath && result) {
|
|
301
|
+
let current = result;
|
|
302
|
+
for (const part of String(selectPath).split('.')) {
|
|
303
|
+
if (current && typeof current === 'object' && !Array.isArray(current)) {
|
|
304
|
+
current = current[part];
|
|
305
|
+
}
|
|
306
|
+
else
|
|
307
|
+
break;
|
|
308
|
+
}
|
|
309
|
+
result = current ?? result;
|
|
310
|
+
}
|
|
311
|
+
return result;
|
|
312
|
+
}
|
|
313
|
+
case 'tap': {
|
|
314
|
+
// ── Declarative Store Action Bridge ──────────────────────────────────
|
|
315
|
+
// Usage:
|
|
316
|
+
// tap:
|
|
317
|
+
// store: feed # Pinia/Vuex store name
|
|
318
|
+
// action: fetchFeeds # Store action to call
|
|
319
|
+
// args: [] # Optional args to pass to action
|
|
320
|
+
// capture: homefeed # URL pattern to capture response
|
|
321
|
+
// timeout: 5 # Seconds to wait for network (default: 5)
|
|
322
|
+
// select: data.items # Optional: extract sub-path from response
|
|
323
|
+
// framework: pinia # Optional: pinia | vuex (auto-detected if omitted)
|
|
324
|
+
//
|
|
325
|
+
// Generates a self-contained IIFE that:
|
|
326
|
+
// 1. Injects fetch + XHR dual interception proxy
|
|
327
|
+
// 2. Finds the Pinia/Vuex store and calls the action
|
|
328
|
+
// 3. Captures the response matching the URL pattern
|
|
329
|
+
// 4. Auto-cleans up interception in finally block
|
|
330
|
+
// 5. Returns the captured data (optionally sub-selected)
|
|
331
|
+
const cfg = typeof params === 'object' ? params : {};
|
|
332
|
+
const storeName = String(render(cfg.store ?? '', { args, data }));
|
|
333
|
+
const actionName = String(render(cfg.action ?? '', { args, data }));
|
|
334
|
+
const capturePattern = String(render(cfg.capture ?? '', { args, data }));
|
|
335
|
+
const timeout = cfg.timeout ?? 5;
|
|
336
|
+
const selectPath = cfg.select ? String(render(cfg.select, { args, data })) : null;
|
|
337
|
+
const framework = cfg.framework ?? null; // auto-detect if null
|
|
338
|
+
const actionArgs = cfg.args ?? [];
|
|
339
|
+
if (!storeName || !actionName)
|
|
340
|
+
throw new Error('tap: store and action are required');
|
|
341
|
+
// Build select chain for the captured response
|
|
342
|
+
const selectChain = selectPath
|
|
343
|
+
? selectPath.split('.').map((p) => `?.[${JSON.stringify(p)}]`).join('')
|
|
344
|
+
: '';
|
|
345
|
+
// Serialize action arguments
|
|
346
|
+
const actionArgsRendered = actionArgs.map((a) => {
|
|
347
|
+
const rendered = render(a, { args, data });
|
|
348
|
+
return JSON.stringify(rendered);
|
|
349
|
+
});
|
|
350
|
+
const actionCall = actionArgsRendered.length
|
|
351
|
+
? `store[${JSON.stringify(actionName)}](${actionArgsRendered.join(', ')})`
|
|
352
|
+
: `store[${JSON.stringify(actionName)}]()`;
|
|
353
|
+
const js = `
|
|
354
|
+
async () => {
|
|
355
|
+
// ── 1. Setup capture proxy (fetch + XHR dual interception) ──
|
|
356
|
+
let captured = null;
|
|
357
|
+
const capturePattern = ${JSON.stringify(capturePattern)};
|
|
358
|
+
|
|
359
|
+
// Intercept fetch API
|
|
360
|
+
const origFetch = window.fetch;
|
|
361
|
+
window.fetch = async function(...fetchArgs) {
|
|
362
|
+
const resp = await origFetch.apply(this, fetchArgs);
|
|
363
|
+
try {
|
|
364
|
+
const url = typeof fetchArgs[0] === 'string' ? fetchArgs[0]
|
|
365
|
+
: fetchArgs[0] instanceof Request ? fetchArgs[0].url : String(fetchArgs[0]);
|
|
366
|
+
if (capturePattern && url.includes(capturePattern) && !captured) {
|
|
367
|
+
try { captured = await resp.clone().json(); } catch {}
|
|
368
|
+
}
|
|
369
|
+
} catch {}
|
|
370
|
+
return resp;
|
|
371
|
+
};
|
|
372
|
+
|
|
373
|
+
// Intercept XMLHttpRequest
|
|
374
|
+
const origXhrOpen = XMLHttpRequest.prototype.open;
|
|
375
|
+
const origXhrSend = XMLHttpRequest.prototype.send;
|
|
376
|
+
XMLHttpRequest.prototype.open = function(method, url) {
|
|
377
|
+
this.__tapUrl = String(url);
|
|
378
|
+
return origXhrOpen.apply(this, arguments);
|
|
379
|
+
};
|
|
380
|
+
XMLHttpRequest.prototype.send = function(body) {
|
|
381
|
+
if (capturePattern && this.__tapUrl?.includes(capturePattern)) {
|
|
382
|
+
const xhr = this;
|
|
383
|
+
const origHandler = xhr.onreadystatechange;
|
|
384
|
+
xhr.onreadystatechange = function() {
|
|
385
|
+
if (xhr.readyState === 4 && !captured) {
|
|
386
|
+
try { captured = JSON.parse(xhr.responseText); } catch {}
|
|
387
|
+
}
|
|
388
|
+
if (origHandler) origHandler.apply(this, arguments);
|
|
389
|
+
};
|
|
390
|
+
// Also handle onload
|
|
391
|
+
const origOnload = xhr.onload;
|
|
392
|
+
xhr.onload = function() {
|
|
393
|
+
if (!captured) { try { captured = JSON.parse(xhr.responseText); } catch {} }
|
|
394
|
+
if (origOnload) origOnload.apply(this, arguments);
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
return origXhrSend.apply(this, arguments);
|
|
398
|
+
};
|
|
399
|
+
|
|
400
|
+
try {
|
|
401
|
+
// ── 2. Find store ──
|
|
402
|
+
let store = null;
|
|
403
|
+
const storeName = ${JSON.stringify(storeName)};
|
|
404
|
+
const fw = ${JSON.stringify(framework)};
|
|
405
|
+
|
|
406
|
+
// Auto-detect framework if not specified
|
|
407
|
+
const app = document.querySelector('#app');
|
|
408
|
+
if (!fw || fw === 'pinia') {
|
|
409
|
+
// Try Pinia (Vue 3)
|
|
410
|
+
try {
|
|
411
|
+
const pinia = app?.__vue_app__?.config?.globalProperties?.$pinia;
|
|
412
|
+
if (pinia?._s) store = pinia._s.get(storeName);
|
|
413
|
+
} catch {}
|
|
414
|
+
}
|
|
415
|
+
if (!store && (!fw || fw === 'vuex')) {
|
|
416
|
+
// Try Vuex (Vue 2/3)
|
|
417
|
+
try {
|
|
418
|
+
const vuexStore = app?.__vue_app__?.config?.globalProperties?.$store
|
|
419
|
+
?? app?.__vue__?.$store;
|
|
420
|
+
if (vuexStore) {
|
|
421
|
+
// Vuex doesn't have named stores like Pinia, dispatch action
|
|
422
|
+
store = { [${JSON.stringify(actionName)}]: (...a) => vuexStore.dispatch(storeName + '/' + ${JSON.stringify(actionName)}, ...a) };
|
|
423
|
+
}
|
|
424
|
+
} catch {}
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
if (!store) return { error: 'Store not found: ' + storeName, hint: 'Page may not be fully loaded or store name may be incorrect' };
|
|
428
|
+
if (typeof store[${JSON.stringify(actionName)}] !== 'function') {
|
|
429
|
+
return { error: 'Action not found: ' + ${JSON.stringify(actionName)} + ' on store ' + storeName,
|
|
430
|
+
hint: 'Available: ' + Object.keys(store).filter(k => typeof store[k] === 'function' && !k.startsWith('$') && !k.startsWith('_')).join(', ') };
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// ── 3. Call store action ──
|
|
434
|
+
await ${actionCall};
|
|
435
|
+
|
|
436
|
+
// ── 4. Wait for network response ──
|
|
437
|
+
const deadline = Date.now() + ${timeout} * 1000;
|
|
438
|
+
while (!captured && Date.now() < deadline) {
|
|
439
|
+
await new Promise(r => setTimeout(r, 200));
|
|
440
|
+
}
|
|
441
|
+
} finally {
|
|
442
|
+
// ── 5. Always restore originals ──
|
|
443
|
+
window.fetch = origFetch;
|
|
444
|
+
XMLHttpRequest.prototype.open = origXhrOpen;
|
|
445
|
+
XMLHttpRequest.prototype.send = origXhrSend;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
if (!captured) return { error: 'No matching response captured for pattern: ' + capturePattern };
|
|
449
|
+
return captured${selectChain} ?? captured;
|
|
450
|
+
}
|
|
451
|
+
`;
|
|
452
|
+
return page.evaluate(js);
|
|
453
|
+
}
|
|
218
454
|
default: return data;
|
|
219
455
|
}
|
|
220
456
|
}
|
package/dist/synthesize.d.ts
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Synthesize
|
|
3
|
-
*
|
|
4
|
-
* Takes the structured capabilities from Deep Explore and generates
|
|
5
|
-
* YAML pipeline files that can be directly registered as CLI commands.
|
|
6
|
-
*
|
|
7
|
-
* This is the bridge between discovery (explore) and usability (CLI).
|
|
2
|
+
* Synthesize candidate CLIs from explore artifacts.
|
|
3
|
+
* Generates evaluate-based YAML pipelines (matching hand-written adapter patterns).
|
|
8
4
|
*/
|
|
9
|
-
export declare function synthesizeFromExplore(target: string, opts?:
|
|
10
|
-
|
|
5
|
+
export declare function synthesizeFromExplore(target: string, opts?: {
|
|
6
|
+
outDir?: string;
|
|
7
|
+
top?: number;
|
|
8
|
+
}): Record<string, any>;
|
|
9
|
+
export declare function renderSynthesizeSummary(result: Record<string, any>): string;
|
|
10
|
+
export declare function resolveExploreDir(target: string): string;
|
|
11
|
+
export declare function loadExploreBundle(exploreDir: string): Record<string, any>;
|
|
12
|
+
/** Backward-compatible export for scaffold.ts */
|
|
13
|
+
export declare function buildCandidate(site: string, targetUrl: string, cap: any, endpoint: any): any;
|
package/dist/synthesize.js
CHANGED
|
@@ -1,147 +1,181 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Synthesize
|
|
3
|
-
*
|
|
4
|
-
* Takes the structured capabilities from Deep Explore and generates
|
|
5
|
-
* YAML pipeline files that can be directly registered as CLI commands.
|
|
6
|
-
*
|
|
7
|
-
* This is the bridge between discovery (explore) and usability (CLI).
|
|
2
|
+
* Synthesize candidate CLIs from explore artifacts.
|
|
3
|
+
* Generates evaluate-based YAML pipelines (matching hand-written adapter patterns).
|
|
8
4
|
*/
|
|
9
5
|
import * as fs from 'node:fs';
|
|
10
6
|
import * as path from 'node:path';
|
|
11
7
|
import yaml from 'js-yaml';
|
|
8
|
+
/** Volatile params to strip from generated URLs */
|
|
9
|
+
const VOLATILE_PARAMS = new Set(['w_rid', 'wts', 'callback', '_', 'timestamp', 't', 'nonce', 'sign']);
|
|
10
|
+
const SEARCH_PARAM_NAMES = new Set(['q', 'query', 'keyword', 'search', 'wd', 'kw', 'w', 'search_query']);
|
|
11
|
+
const LIMIT_PARAM_NAMES = new Set(['ps', 'page_size', 'limit', 'count', 'per_page', 'size', 'num']);
|
|
12
|
+
const PAGE_PARAM_NAMES = new Set(['pn', 'page', 'page_num', 'offset', 'cursor']);
|
|
12
13
|
export function synthesizeFromExplore(target, opts = {}) {
|
|
13
|
-
const exploreDir =
|
|
14
|
-
|
|
15
|
-
throw new Error(`Explore dir not found: ${target}`);
|
|
16
|
-
const manifest = JSON.parse(fs.readFileSync(path.join(exploreDir, 'manifest.json'), 'utf-8'));
|
|
17
|
-
const capabilities = JSON.parse(fs.readFileSync(path.join(exploreDir, 'capabilities.json'), 'utf-8'));
|
|
18
|
-
const endpoints = JSON.parse(fs.readFileSync(path.join(exploreDir, 'endpoints.json'), 'utf-8'));
|
|
19
|
-
const auth = JSON.parse(fs.readFileSync(path.join(exploreDir, 'auth.json'), 'utf-8'));
|
|
14
|
+
const exploreDir = resolveExploreDir(target);
|
|
15
|
+
const bundle = loadExploreBundle(exploreDir);
|
|
20
16
|
const targetDir = opts.outDir ?? path.join(exploreDir, 'candidates');
|
|
21
17
|
fs.mkdirSync(targetDir, { recursive: true });
|
|
22
|
-
const site = manifest.site;
|
|
23
|
-
const
|
|
24
|
-
const candidates = [];
|
|
25
|
-
// Sort capabilities by confidence
|
|
26
|
-
const sortedCaps = [...capabilities]
|
|
18
|
+
const site = bundle.manifest.site;
|
|
19
|
+
const capabilities = (bundle.capabilities ?? [])
|
|
27
20
|
.sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0))
|
|
28
|
-
.slice(0,
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
const endpoint =
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
const
|
|
35
|
-
const filePath = path.join(targetDir,
|
|
21
|
+
.slice(0, opts.top ?? 3);
|
|
22
|
+
const candidates = [];
|
|
23
|
+
for (const cap of capabilities) {
|
|
24
|
+
const endpoint = chooseEndpoint(cap, bundle.endpoints);
|
|
25
|
+
if (!endpoint)
|
|
26
|
+
continue;
|
|
27
|
+
const candidate = buildCandidateYaml(site, bundle.manifest, cap, endpoint);
|
|
28
|
+
const filePath = path.join(targetDir, `${candidate.name}.yaml`);
|
|
36
29
|
fs.writeFileSync(filePath, yaml.dump(candidate.yaml, { sortKeys: false, lineWidth: 120 }));
|
|
37
|
-
candidates.push({
|
|
38
|
-
name: cap.name,
|
|
39
|
-
path: filePath,
|
|
40
|
-
strategy: cap.strategy,
|
|
41
|
-
endpoint: cap.endpoint,
|
|
42
|
-
confidence: cap.confidence,
|
|
43
|
-
columns: candidate.yaml.columns,
|
|
44
|
-
});
|
|
30
|
+
candidates.push({ name: candidate.name, path: filePath, strategy: cap.strategy, confidence: cap.confidence });
|
|
45
31
|
}
|
|
46
|
-
const index = {
|
|
47
|
-
site,
|
|
48
|
-
target_url: manifest.target_url,
|
|
49
|
-
generated_from: exploreDir,
|
|
50
|
-
candidate_count: candidates.length,
|
|
51
|
-
candidates,
|
|
52
|
-
};
|
|
32
|
+
const index = { site, target_url: bundle.manifest.target_url, generated_from: exploreDir, candidate_count: candidates.length, candidates };
|
|
53
33
|
fs.writeFileSync(path.join(targetDir, 'candidates.json'), JSON.stringify(index, null, 2));
|
|
34
|
+
return { site, explore_dir: exploreDir, out_dir: targetDir, candidate_count: candidates.length, candidates };
|
|
35
|
+
}
|
|
36
|
+
export function renderSynthesizeSummary(result) {
|
|
37
|
+
const lines = ['opencli synthesize: OK', `Site: ${result.site}`, `Source: ${result.explore_dir}`, `Candidates: ${result.candidate_count}`];
|
|
38
|
+
for (const c of result.candidates ?? [])
|
|
39
|
+
lines.push(` • ${c.name} (${c.strategy}, ${((c.confidence ?? 0) * 100).toFixed(0)}% confidence) → ${c.path}`);
|
|
40
|
+
return lines.join('\n');
|
|
41
|
+
}
|
|
42
|
+
export function resolveExploreDir(target) {
|
|
43
|
+
if (fs.existsSync(target))
|
|
44
|
+
return target;
|
|
45
|
+
const candidate = path.join('.opencli', 'explore', target);
|
|
46
|
+
if (fs.existsSync(candidate))
|
|
47
|
+
return candidate;
|
|
48
|
+
throw new Error(`Explore directory not found: ${target}`);
|
|
49
|
+
}
|
|
50
|
+
export function loadExploreBundle(exploreDir) {
|
|
54
51
|
return {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
candidates,
|
|
52
|
+
manifest: JSON.parse(fs.readFileSync(path.join(exploreDir, 'manifest.json'), 'utf-8')),
|
|
53
|
+
endpoints: JSON.parse(fs.readFileSync(path.join(exploreDir, 'endpoints.json'), 'utf-8')),
|
|
54
|
+
capabilities: JSON.parse(fs.readFileSync(path.join(exploreDir, 'capabilities.json'), 'utf-8')),
|
|
55
|
+
auth: JSON.parse(fs.readFileSync(path.join(exploreDir, 'auth.json'), 'utf-8')),
|
|
60
56
|
};
|
|
61
57
|
}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
58
|
+
function chooseEndpoint(cap, endpoints) {
|
|
59
|
+
if (!endpoints.length)
|
|
60
|
+
return null;
|
|
61
|
+
// Match by endpoint pattern from capability
|
|
62
|
+
if (cap.endpoint) {
|
|
63
|
+
const match = endpoints.find((e) => e.pattern === cap.endpoint || e.url?.includes(cap.endpoint));
|
|
64
|
+
if (match)
|
|
65
|
+
return match;
|
|
66
|
+
}
|
|
67
|
+
return endpoints.sort((a, b) => (b.score ?? 0) - (a.score ?? 0))[0];
|
|
68
|
+
}
|
|
69
|
+
// ── URL templating ─────────────────────────────────────────────────────────
|
|
70
|
+
function buildTemplatedUrl(rawUrl, cap, _endpoint) {
|
|
74
71
|
try {
|
|
75
72
|
const u = new URL(rawUrl);
|
|
76
73
|
const base = `${u.protocol}//${u.host}${u.pathname}`;
|
|
77
74
|
const params = [];
|
|
78
75
|
const hasKeyword = cap.recommendedArgs?.some((a) => a.name === 'keyword');
|
|
79
76
|
u.searchParams.forEach((v, k) => {
|
|
80
|
-
// Skip volatile params
|
|
81
77
|
if (VOLATILE_PARAMS.has(k))
|
|
82
78
|
return;
|
|
83
|
-
|
|
84
|
-
if (hasKeyword && SEARCH_PARAM_NAMES.has(k)) {
|
|
79
|
+
if (hasKeyword && SEARCH_PARAM_NAMES.has(k))
|
|
85
80
|
params.push([k, '${{ args.keyword }}']);
|
|
86
|
-
|
|
87
|
-
else if (LIMIT_PARAM_NAMES.has(k)) {
|
|
81
|
+
else if (LIMIT_PARAM_NAMES.has(k))
|
|
88
82
|
params.push([k, '${{ args.limit | default(20) }}']);
|
|
89
|
-
|
|
90
|
-
else if (PAGE_PARAM_NAMES.has(k)) {
|
|
83
|
+
else if (PAGE_PARAM_NAMES.has(k))
|
|
91
84
|
params.push([k, '${{ args.page | default(1) }}']);
|
|
92
|
-
|
|
93
|
-
else {
|
|
85
|
+
else
|
|
94
86
|
params.push([k, v]);
|
|
95
|
-
}
|
|
96
87
|
});
|
|
97
|
-
|
|
98
|
-
return base;
|
|
99
|
-
return base + '?' + params.map(([k, v]) => `${k}=${v}`).join('&');
|
|
88
|
+
return params.length ? base + '?' + params.map(([k, v]) => `${k}=${v}`).join('&') : base;
|
|
100
89
|
}
|
|
101
90
|
catch {
|
|
102
91
|
return rawUrl;
|
|
103
92
|
}
|
|
104
93
|
}
|
|
105
94
|
/**
|
|
106
|
-
* Build
|
|
95
|
+
* Build inline evaluate script for browser-based fetch+parse.
|
|
96
|
+
* Follows patterns from bilibili/hot.yaml and twitter/trending.yaml.
|
|
107
97
|
*/
|
|
98
|
+
function buildEvaluateScript(url, itemPath, endpoint) {
|
|
99
|
+
const pathChain = itemPath.split('.').map((p) => `?.${p}`).join('');
|
|
100
|
+
const detectedFields = endpoint?.detectedFields ?? {};
|
|
101
|
+
const hasFields = Object.keys(detectedFields).length > 0;
|
|
102
|
+
let mapCode = '';
|
|
103
|
+
if (hasFields) {
|
|
104
|
+
const mappings = Object.entries(detectedFields)
|
|
105
|
+
.map(([role, field]) => ` ${role}: item${String(field).split('.').map(p => `?.${p}`).join('')}`)
|
|
106
|
+
.join(',\n');
|
|
107
|
+
mapCode = `.map((item) => ({\n${mappings}\n }))`;
|
|
108
|
+
}
|
|
109
|
+
return [
|
|
110
|
+
'(async () => {',
|
|
111
|
+
` const res = await fetch('${url}', {`,
|
|
112
|
+
` credentials: 'include'`,
|
|
113
|
+
' });',
|
|
114
|
+
' const data = await res.json();',
|
|
115
|
+
` return (data${pathChain} || [])${mapCode};`,
|
|
116
|
+
'})()\n',
|
|
117
|
+
].join('\n');
|
|
118
|
+
}
|
|
119
|
+
// ── YAML pipeline generation ───────────────────────────────────────────────
|
|
108
120
|
function buildCandidateYaml(site, manifest, cap, endpoint) {
|
|
109
121
|
const needsBrowser = cap.strategy !== 'public';
|
|
110
122
|
const pipeline = [];
|
|
111
|
-
|
|
112
|
-
|
|
123
|
+
const templatedUrl = buildTemplatedUrl(endpoint?.url ?? manifest.target_url, cap, endpoint);
|
|
124
|
+
let domain = '';
|
|
125
|
+
try {
|
|
126
|
+
domain = new URL(manifest.target_url).hostname;
|
|
127
|
+
}
|
|
128
|
+
catch { }
|
|
129
|
+
if (cap.strategy === 'store-action' && cap.storeHint) {
|
|
130
|
+
// Store Action: navigate + wait + tap (declarative, clean)
|
|
113
131
|
pipeline.push({ navigate: manifest.target_url });
|
|
132
|
+
pipeline.push({ wait: 3 });
|
|
133
|
+
const tapStep = {
|
|
134
|
+
store: cap.storeHint.store,
|
|
135
|
+
action: cap.storeHint.action,
|
|
136
|
+
timeout: 8,
|
|
137
|
+
};
|
|
138
|
+
// Infer capture pattern from endpoint URL
|
|
139
|
+
if (endpoint?.url) {
|
|
140
|
+
try {
|
|
141
|
+
const epUrl = new URL(endpoint.url);
|
|
142
|
+
const pathParts = epUrl.pathname.split('/').filter((p) => p);
|
|
143
|
+
// Use last meaningful path segment as capture pattern
|
|
144
|
+
const capturePart = pathParts.filter((p) => !p.match(/^v\d+$/)).pop();
|
|
145
|
+
if (capturePart)
|
|
146
|
+
tapStep.capture = capturePart;
|
|
147
|
+
}
|
|
148
|
+
catch { }
|
|
149
|
+
}
|
|
150
|
+
if (cap.itemPath)
|
|
151
|
+
tapStep.select = cap.itemPath;
|
|
152
|
+
pipeline.push({ tap: tapStep });
|
|
153
|
+
}
|
|
154
|
+
else if (needsBrowser) {
|
|
155
|
+
// Browser-based: navigate + evaluate (like bilibili/hot.yaml, twitter/trending.yaml)
|
|
156
|
+
pipeline.push({ navigate: manifest.target_url });
|
|
157
|
+
const itemPath = cap.itemPath ?? 'data.data.list';
|
|
158
|
+
pipeline.push({ evaluate: buildEvaluateScript(templatedUrl, itemPath, endpoint) });
|
|
114
159
|
}
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
if (cap.itemPath) {
|
|
121
|
-
pipeline.push({ select: cap.itemPath });
|
|
160
|
+
else {
|
|
161
|
+
// Public API: direct fetch (like hackernews/top.yaml)
|
|
162
|
+
pipeline.push({ fetch: { url: templatedUrl } });
|
|
163
|
+
if (cap.itemPath)
|
|
164
|
+
pipeline.push({ select: cap.itemPath });
|
|
122
165
|
}
|
|
123
|
-
//
|
|
166
|
+
// Map fields
|
|
124
167
|
const mapStep = {};
|
|
125
168
|
const columns = cap.recommendedColumns ?? ['title', 'url'];
|
|
126
|
-
|
|
127
|
-
if (!cap.recommendedArgs?.some((a) => a.name === 'keyword')) {
|
|
169
|
+
if (!cap.recommendedArgs?.some((a) => a.name === 'keyword'))
|
|
128
170
|
mapStep['rank'] = '${{ index + 1 }}';
|
|
129
|
-
}
|
|
130
|
-
// Build field mappings from the endpoint's detected fields
|
|
131
171
|
const detectedFields = endpoint?.detectedFields ?? {};
|
|
132
172
|
for (const col of columns) {
|
|
133
173
|
const fieldPath = detectedFields[col];
|
|
134
|
-
|
|
135
|
-
mapStep[col] = `\${{ item.${fieldPath} }}`;
|
|
136
|
-
}
|
|
137
|
-
else {
|
|
138
|
-
mapStep[col] = `\${{ item.${col} }}`;
|
|
139
|
-
}
|
|
174
|
+
mapStep[col] = fieldPath ? `\${{ item.${fieldPath} }}` : `\${{ item.${col} }}`;
|
|
140
175
|
}
|
|
141
176
|
pipeline.push({ map: mapStep });
|
|
142
|
-
// Step 5: Limit
|
|
143
177
|
pipeline.push({ limit: '${{ args.limit | default(20) }}' });
|
|
144
|
-
//
|
|
178
|
+
// Args
|
|
145
179
|
const argsDef = {};
|
|
146
180
|
for (const arg of cap.recommendedArgs ?? []) {
|
|
147
181
|
const def = { type: arg.type ?? 'str' };
|
|
@@ -157,35 +191,25 @@ function buildCandidateYaml(site, manifest, cap, endpoint) {
|
|
|
157
191
|
def.description = 'Page number';
|
|
158
192
|
argsDef[arg.name] = def;
|
|
159
193
|
}
|
|
160
|
-
|
|
161
|
-
if (!argsDef['limit']) {
|
|
194
|
+
if (!argsDef['limit'])
|
|
162
195
|
argsDef['limit'] = { type: 'int', default: 20, description: 'Number of items to return' };
|
|
163
|
-
}
|
|
164
|
-
const allColumns = Object.keys(mapStep);
|
|
165
196
|
return {
|
|
166
197
|
name: cap.name,
|
|
167
198
|
yaml: {
|
|
168
|
-
site,
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
domain: manifest.final_url ? new URL(manifest.final_url).hostname : undefined,
|
|
172
|
-
strategy: cap.strategy,
|
|
173
|
-
browser: needsBrowser,
|
|
174
|
-
args: argsDef,
|
|
175
|
-
pipeline,
|
|
176
|
-
columns: allColumns,
|
|
199
|
+
site, name: cap.name, description: `${cap.description || site + ' ' + cap.name} (auto-generated)`,
|
|
200
|
+
domain, strategy: cap.strategy, browser: needsBrowser,
|
|
201
|
+
args: argsDef, pipeline, columns: Object.keys(mapStep),
|
|
177
202
|
},
|
|
178
203
|
};
|
|
179
204
|
}
|
|
180
|
-
export
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
return lines.join('\n');
|
|
205
|
+
/** Backward-compatible export for scaffold.ts */
|
|
206
|
+
export function buildCandidate(site, targetUrl, cap, endpoint) {
|
|
207
|
+
// Map old-style field names to new ones
|
|
208
|
+
const normalizedCap = {
|
|
209
|
+
...cap,
|
|
210
|
+
recommendedArgs: cap.recommendedArgs ?? cap.recommended_args,
|
|
211
|
+
recommendedColumns: cap.recommendedColumns ?? cap.recommended_columns,
|
|
212
|
+
};
|
|
213
|
+
const manifest = { target_url: targetUrl, final_url: targetUrl };
|
|
214
|
+
return buildCandidateYaml(site, manifest, normalizedCap, endpoint);
|
|
191
215
|
}
|