@jackwener/opencli 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CLI-CREATOR.md +594 -0
  2. package/README.md +116 -38
  3. package/README.zh-CN.md +143 -0
  4. package/SKILL.md +154 -102
  5. package/dist/browser.d.ts +1 -0
  6. package/dist/browser.js +35 -1
  7. package/dist/cascade.d.ts +45 -0
  8. package/dist/cascade.js +180 -0
  9. package/dist/clis/bilibili/hot.yaml +38 -0
  10. package/dist/clis/github/trending.yaml +58 -0
  11. package/dist/clis/hackernews/top.yaml +36 -0
  12. package/dist/clis/index.d.ts +2 -1
  13. package/dist/clis/index.js +3 -1
  14. package/dist/clis/reddit/hot.yaml +46 -0
  15. package/dist/clis/twitter/trending.yaml +40 -0
  16. package/dist/clis/v2ex/hot.yaml +25 -0
  17. package/dist/clis/v2ex/latest.yaml +25 -0
  18. package/dist/clis/v2ex/topic.yaml +27 -0
  19. package/dist/clis/xiaohongshu/feed.yaml +32 -0
  20. package/dist/clis/xiaohongshu/notifications.yaml +38 -0
  21. package/dist/clis/xiaohongshu/search.d.ts +5 -0
  22. package/dist/clis/xiaohongshu/search.js +68 -0
  23. package/dist/clis/zhihu/hot.yaml +42 -0
  24. package/dist/clis/zhihu/question.js +39 -0
  25. package/dist/clis/zhihu/search.yaml +55 -0
  26. package/dist/explore.d.ts +23 -13
  27. package/dist/explore.js +293 -422
  28. package/dist/main.js +17 -0
  29. package/dist/pipeline.js +238 -2
  30. package/dist/synthesize.d.ts +11 -8
  31. package/dist/synthesize.js +142 -118
  32. package/package.json +4 -2
  33. package/src/browser.ts +33 -1
  34. package/src/cascade.ts +217 -0
  35. package/src/clis/index.ts +4 -1
  36. package/src/clis/reddit/hot.yaml +46 -0
  37. package/src/clis/v2ex/hot.yaml +5 -9
  38. package/src/clis/v2ex/latest.yaml +5 -8
  39. package/src/clis/v2ex/topic.yaml +27 -0
  40. package/src/clis/xiaohongshu/feed.yaml +32 -0
  41. package/src/clis/xiaohongshu/notifications.yaml +38 -0
  42. package/src/clis/xiaohongshu/search.ts +71 -0
  43. package/src/clis/zhihu/hot.yaml +22 -8
  44. package/src/clis/zhihu/question.ts +45 -0
  45. package/src/clis/zhihu/search.yaml +55 -0
  46. package/src/explore.ts +303 -465
  47. package/src/main.ts +14 -0
  48. package/src/pipeline.ts +239 -2
  49. package/src/synthesize.ts +142 -137
  50. package/dist/clis/zhihu/search.js +0 -58
  51. package/src/clis/zhihu/search.ts +0 -65
  52. /package/dist/clis/zhihu/{search.d.ts → question.d.ts} +0 -0
package/src/main.ts CHANGED
@@ -52,12 +52,26 @@ program.command('verify').description('Validate + smoke test').argument('[target
52
52
  program.command('explore').description('Explore a website').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3')
53
53
  .action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); });
54
54
 
55
+ program.command('probe').description('Probe a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3')
56
+ .action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); });
57
+
55
58
  program.command('synthesize').description('Synthesize CLIs from explore').argument('<target>').option('--top <n>', '', '3')
56
59
  .action(async (target, opts) => { const { synthesizeFromExplore, renderSynthesizeSummary } = await import('./synthesize.js'); console.log(renderSynthesizeSummary(synthesizeFromExplore(target, { top: parseInt(opts.top) }))); });
57
60
 
58
61
  program.command('generate').description('One-shot: explore → synthesize → register').argument('<url>').option('--goal <text>').option('--site <name>')
59
62
  .action(async (url, opts) => { const { generateCliFromUrl, renderGenerateSummary } = await import('./generate.js'); const r = await generateCliFromUrl({ url, BrowserFactory: PlaywrightMCP, builtinClis: BUILTIN_CLIS, userClis: USER_CLIS, goal: opts.goal, site: opts.site }); console.log(renderGenerateSummary(r)); process.exitCode = r.ok ? 0 : 1; });
60
63
 
64
+ program.command('cascade').description('Strategy cascade: find simplest working strategy').argument('<url>').option('--site <name>')
65
+ .action(async (url, opts) => {
66
+ const { cascadeProbe, renderCascadeResult } = await import('./cascade.js');
67
+ const result = await browserSession(PlaywrightMCP, async (page) => {
68
+ // Navigate to the site first for cookie context
69
+ try { const siteUrl = new URL(url); await page.goto(`${siteUrl.protocol}//${siteUrl.host}`); await page.wait(2); } catch {}
70
+ return cascadeProbe(page, url);
71
+ });
72
+ console.log(renderCascadeResult(result));
73
+ });
74
+
61
75
  // ── Dynamic site commands ──────────────────────────────────────────────────
62
76
 
63
77
  const registry = getRegistry();
package/src/pipeline.ts CHANGED
@@ -26,6 +26,11 @@ export async function executePipeline(
26
26
  for (const [op, params] of Object.entries(step)) {
27
27
  if (debug) debugStepStart(i + 1, total, op, params);
28
28
  data = await executeStep(page, op, params, data, args);
29
+ // Detect error objects returned by steps (e.g. tap store not found)
30
+ if (data && typeof data === 'object' && !Array.isArray(data) && data.error) {
31
+ process.stderr.write(` ${chalk.yellow('⚠')} ${chalk.yellow(op)}: ${data.error}\n`);
32
+ if (data.hint) process.stderr.write(` ${chalk.dim('💡')} ${chalk.dim(data.hint)}\n`);
33
+ }
29
34
  if (debug) debugStepResult(op, data);
30
35
  }
31
36
  }
@@ -143,7 +148,15 @@ async function executeStep(page: any, op: string, params: any, data: any, args:
143
148
  }
144
149
  case 'evaluate': {
145
150
  const js = String(render(params, { args, data }));
146
- return page.evaluate(normalizeEvaluateSource(js));
151
+ let result = await page.evaluate(normalizeEvaluateSource(js));
152
+ // MCP may return JSON as a string — auto-parse it
153
+ if (typeof result === 'string') {
154
+ const trimmed = result.trim();
155
+ if ((trimmed.startsWith('[') && trimmed.endsWith(']')) || (trimmed.startsWith('{') && trimmed.endsWith('}'))) {
156
+ try { result = JSON.parse(trimmed); } catch {}
157
+ }
158
+ }
159
+ return result;
147
160
  }
148
161
  case 'snapshot': {
149
162
  const opts = (typeof params === 'object' && params) ? params : {};
@@ -208,7 +221,231 @@ async function executeStep(page: any, op: string, params: any, data: any, args:
208
221
  if (!Array.isArray(data)) return data;
209
222
  return data.slice(0, Number(render(params, { args, data })));
210
223
  }
211
- case 'intercept': return data;
224
+ case 'intercept': {
225
+ // Declarative XHR interception step
226
+ // Usage:
227
+ // intercept:
228
+ // trigger: "navigate:https://..." | "evaluate:store.note.fetch()" | "click:ref"
229
+ // capture: "api/pattern" # URL substring to match
230
+ // timeout: 5 # seconds to wait for matching request
231
+ // select: "data.items" # optional: extract sub-path from response
232
+ const cfg = typeof params === 'object' ? params : {};
233
+ const trigger = cfg.trigger ?? '';
234
+ const capturePattern = cfg.capture ?? '';
235
+ const timeout = cfg.timeout ?? 8;
236
+ const selectPath = cfg.select ?? null;
237
+
238
+ if (!capturePattern) return data;
239
+
240
+ // Step 1: Execute the trigger action
241
+ if (trigger.startsWith('navigate:')) {
242
+ const url = render(trigger.slice('navigate:'.length), { args, data });
243
+ await page.goto(String(url));
244
+ } else if (trigger.startsWith('evaluate:')) {
245
+ const js = trigger.slice('evaluate:'.length);
246
+ await page.evaluate(normalizeEvaluateSource(render(js, { args, data }) as string));
247
+ } else if (trigger.startsWith('click:')) {
248
+ const ref = render(trigger.slice('click:'.length), { args, data });
249
+ await page.click(String(ref).replace(/^@/, ''));
250
+ } else if (trigger === 'scroll') {
251
+ await page.scroll('down');
252
+ }
253
+
254
+ // Step 2: Wait a bit for network requests to fire
255
+ await page.wait(Math.min(timeout, 3));
256
+
257
+ // Step 3: Get network requests and find matching ones
258
+ const rawNetwork = await page.networkRequests(false);
259
+ const matchingResponses: any[] = [];
260
+
261
+ if (typeof rawNetwork === 'string') {
262
+ // Parse the network output to find matching URLs
263
+ const lines = rawNetwork.split('\n');
264
+ for (const line of lines) {
265
+ const match = line.match(/\[?(GET|POST)\]?\s+(\S+)\s*(?:=>|→)\s*\[?(\d+)\]?/i);
266
+ if (match) {
267
+ const [, method, url, status] = match;
268
+ if (url.includes(capturePattern) && status === '200') {
269
+ // Re-fetch the matching URL to get the response body
270
+ try {
271
+ const body = await page.evaluate(`
272
+ async () => {
273
+ try {
274
+ const resp = await fetch(${JSON.stringify(url)}, { credentials: 'include' });
275
+ if (!resp.ok) return null;
276
+ return await resp.json();
277
+ } catch { return null; }
278
+ }
279
+ `);
280
+ if (body) matchingResponses.push(body);
281
+ } catch {}
282
+ }
283
+ }
284
+ }
285
+ }
286
+
287
+ // Step 4: Select from response if specified
288
+ let result = matchingResponses.length === 1 ? matchingResponses[0] :
289
+ matchingResponses.length > 1 ? matchingResponses : data;
290
+
291
+ if (selectPath && result) {
292
+ let current = result;
293
+ for (const part of String(selectPath).split('.')) {
294
+ if (current && typeof current === 'object' && !Array.isArray(current)) {
295
+ current = current[part];
296
+ } else break;
297
+ }
298
+ result = current ?? result;
299
+ }
300
+
301
+ return result;
302
+ }
303
+ case 'tap': {
304
+ // ── Declarative Store Action Bridge ──────────────────────────────────
305
+ // Usage:
306
+ // tap:
307
+ // store: feed # Pinia/Vuex store name
308
+ // action: fetchFeeds # Store action to call
309
+ // args: [] # Optional args to pass to action
310
+ // capture: homefeed # URL pattern to capture response
311
+ // timeout: 5 # Seconds to wait for network (default: 5)
312
+ // select: data.items # Optional: extract sub-path from response
313
+ // framework: pinia # Optional: pinia | vuex (auto-detected if omitted)
314
+ //
315
+ // Generates a self-contained IIFE that:
316
+ // 1. Injects fetch + XHR dual interception proxy
317
+ // 2. Finds the Pinia/Vuex store and calls the action
318
+ // 3. Captures the response matching the URL pattern
319
+ // 4. Auto-cleans up interception in finally block
320
+ // 5. Returns the captured data (optionally sub-selected)
321
+
322
+ const cfg = typeof params === 'object' ? params : {};
323
+ const storeName = String(render(cfg.store ?? '', { args, data }));
324
+ const actionName = String(render(cfg.action ?? '', { args, data }));
325
+ const capturePattern = String(render(cfg.capture ?? '', { args, data }));
326
+ const timeout = cfg.timeout ?? 5;
327
+ const selectPath = cfg.select ? String(render(cfg.select, { args, data })) : null;
328
+ const framework = cfg.framework ?? null; // auto-detect if null
329
+ const actionArgs = cfg.args ?? [];
330
+
331
+ if (!storeName || !actionName) throw new Error('tap: store and action are required');
332
+
333
+ // Build select chain for the captured response
334
+ const selectChain = selectPath
335
+ ? selectPath.split('.').map((p: string) => `?.[${JSON.stringify(p)}]`).join('')
336
+ : '';
337
+
338
+ // Serialize action arguments
339
+ const actionArgsRendered = actionArgs.map((a: any) => {
340
+ const rendered = render(a, { args, data });
341
+ return JSON.stringify(rendered);
342
+ });
343
+ const actionCall = actionArgsRendered.length
344
+ ? `store[${JSON.stringify(actionName)}](${actionArgsRendered.join(', ')})`
345
+ : `store[${JSON.stringify(actionName)}]()`;
346
+
347
+ const js = `
348
+ async () => {
349
+ // ── 1. Setup capture proxy (fetch + XHR dual interception) ──
350
+ let captured = null;
351
+ const capturePattern = ${JSON.stringify(capturePattern)};
352
+
353
+ // Intercept fetch API
354
+ const origFetch = window.fetch;
355
+ window.fetch = async function(...fetchArgs) {
356
+ const resp = await origFetch.apply(this, fetchArgs);
357
+ try {
358
+ const url = typeof fetchArgs[0] === 'string' ? fetchArgs[0]
359
+ : fetchArgs[0] instanceof Request ? fetchArgs[0].url : String(fetchArgs[0]);
360
+ if (capturePattern && url.includes(capturePattern) && !captured) {
361
+ try { captured = await resp.clone().json(); } catch {}
362
+ }
363
+ } catch {}
364
+ return resp;
365
+ };
366
+
367
+ // Intercept XMLHttpRequest
368
+ const origXhrOpen = XMLHttpRequest.prototype.open;
369
+ const origXhrSend = XMLHttpRequest.prototype.send;
370
+ XMLHttpRequest.prototype.open = function(method, url) {
371
+ this.__tapUrl = String(url);
372
+ return origXhrOpen.apply(this, arguments);
373
+ };
374
+ XMLHttpRequest.prototype.send = function(body) {
375
+ if (capturePattern && this.__tapUrl?.includes(capturePattern)) {
376
+ const xhr = this;
377
+ const origHandler = xhr.onreadystatechange;
378
+ xhr.onreadystatechange = function() {
379
+ if (xhr.readyState === 4 && !captured) {
380
+ try { captured = JSON.parse(xhr.responseText); } catch {}
381
+ }
382
+ if (origHandler) origHandler.apply(this, arguments);
383
+ };
384
+ // Also handle onload
385
+ const origOnload = xhr.onload;
386
+ xhr.onload = function() {
387
+ if (!captured) { try { captured = JSON.parse(xhr.responseText); } catch {} }
388
+ if (origOnload) origOnload.apply(this, arguments);
389
+ };
390
+ }
391
+ return origXhrSend.apply(this, arguments);
392
+ };
393
+
394
+ try {
395
+ // ── 2. Find store ──
396
+ let store = null;
397
+ const storeName = ${JSON.stringify(storeName)};
398
+ const fw = ${JSON.stringify(framework)};
399
+
400
+ // Auto-detect framework if not specified
401
+ const app = document.querySelector('#app');
402
+ if (!fw || fw === 'pinia') {
403
+ // Try Pinia (Vue 3)
404
+ try {
405
+ const pinia = app?.__vue_app__?.config?.globalProperties?.$pinia;
406
+ if (pinia?._s) store = pinia._s.get(storeName);
407
+ } catch {}
408
+ }
409
+ if (!store && (!fw || fw === 'vuex')) {
410
+ // Try Vuex (Vue 2/3)
411
+ try {
412
+ const vuexStore = app?.__vue_app__?.config?.globalProperties?.$store
413
+ ?? app?.__vue__?.$store;
414
+ if (vuexStore) {
415
+ // Vuex doesn't have named stores like Pinia, dispatch action
416
+ store = { [${JSON.stringify(actionName)}]: (...a) => vuexStore.dispatch(storeName + '/' + ${JSON.stringify(actionName)}, ...a) };
417
+ }
418
+ } catch {}
419
+ }
420
+
421
+ if (!store) return { error: 'Store not found: ' + storeName, hint: 'Page may not be fully loaded or store name may be incorrect' };
422
+ if (typeof store[${JSON.stringify(actionName)}] !== 'function') {
423
+ return { error: 'Action not found: ' + ${JSON.stringify(actionName)} + ' on store ' + storeName,
424
+ hint: 'Available: ' + Object.keys(store).filter(k => typeof store[k] === 'function' && !k.startsWith('$') && !k.startsWith('_')).join(', ') };
425
+ }
426
+
427
+ // ── 3. Call store action ──
428
+ await ${actionCall};
429
+
430
+ // ── 4. Wait for network response ──
431
+ const deadline = Date.now() + ${timeout} * 1000;
432
+ while (!captured && Date.now() < deadline) {
433
+ await new Promise(r => setTimeout(r, 200));
434
+ }
435
+ } finally {
436
+ // ── 5. Always restore originals ──
437
+ window.fetch = origFetch;
438
+ XMLHttpRequest.prototype.open = origXhrOpen;
439
+ XMLHttpRequest.prototype.send = origXhrSend;
440
+ }
441
+
442
+ if (!captured) return { error: 'No matching response captured for pattern: ' + capturePattern };
443
+ return captured${selectChain} ?? captured;
444
+ }
445
+ `;
446
+
447
+ return page.evaluate(js);
448
+ }
212
449
  default: return data;
213
450
  }
214
451
  }
package/src/synthesize.ts CHANGED
@@ -1,167 +1,185 @@
1
1
  /**
2
- * Synthesize: turn explore capabilities into ready-to-use CLI definitions.
3
- *
4
- * Takes the structured capabilities from Deep Explore and generates
5
- * YAML pipeline files that can be directly registered as CLI commands.
6
- *
7
- * This is the bridge between discovery (explore) and usability (CLI).
2
+ * Synthesize candidate CLIs from explore artifacts.
3
+ * Generates evaluate-based YAML pipelines (matching hand-written adapter patterns).
8
4
  */
9
5
 
10
6
  import * as fs from 'node:fs';
11
7
  import * as path from 'node:path';
12
8
  import yaml from 'js-yaml';
13
9
 
14
- export function synthesizeFromExplore(target: string, opts: any = {}): any {
15
- const exploreDir = fs.existsSync(target) ? target : path.join('.opencli', 'explore', target);
16
- if (!fs.existsSync(exploreDir)) throw new Error(`Explore dir not found: ${target}`);
10
+ /** Volatile params to strip from generated URLs */
11
+ const VOLATILE_PARAMS = new Set(['w_rid', 'wts', 'callback', '_', 'timestamp', 't', 'nonce', 'sign']);
12
+ const SEARCH_PARAM_NAMES = new Set(['q', 'query', 'keyword', 'search', 'wd', 'kw', 'w', 'search_query']);
13
+ const LIMIT_PARAM_NAMES = new Set(['ps', 'page_size', 'limit', 'count', 'per_page', 'size', 'num']);
14
+ const PAGE_PARAM_NAMES = new Set(['pn', 'page', 'page_num', 'offset', 'cursor']);
17
15
 
18
- const manifest = JSON.parse(fs.readFileSync(path.join(exploreDir, 'manifest.json'), 'utf-8'));
19
- const capabilities = JSON.parse(fs.readFileSync(path.join(exploreDir, 'capabilities.json'), 'utf-8'));
20
- const endpoints = JSON.parse(fs.readFileSync(path.join(exploreDir, 'endpoints.json'), 'utf-8'));
21
- const auth = JSON.parse(fs.readFileSync(path.join(exploreDir, 'auth.json'), 'utf-8'));
16
+ export function synthesizeFromExplore(
17
+ target: string,
18
+ opts: { outDir?: string; top?: number } = {},
19
+ ): Record<string, any> {
20
+ const exploreDir = resolveExploreDir(target);
21
+ const bundle = loadExploreBundle(exploreDir);
22
22
 
23
23
  const targetDir = opts.outDir ?? path.join(exploreDir, 'candidates');
24
24
  fs.mkdirSync(targetDir, { recursive: true });
25
25
 
26
- const site = manifest.site;
27
- const topN = opts.top ?? 5;
28
- const candidates: any[] = [];
29
-
30
- // Sort capabilities by confidence
31
- const sortedCaps = [...capabilities]
26
+ const site = bundle.manifest.site;
27
+ const capabilities = (bundle.capabilities ?? [])
32
28
  .sort((a: any, b: any) => (b.confidence ?? 0) - (a.confidence ?? 0))
33
- .slice(0, topN);
34
-
35
- for (const cap of sortedCaps) {
36
- // Find the matching endpoint for more detail
37
- const endpoint = endpoints.find((ep: any) => ep.pattern === cap.endpoint) ??
38
- endpoints[0];
29
+ .slice(0, opts.top ?? 3);
30
+ const candidates: any[] = [];
39
31
 
40
- const candidate = buildCandidateYaml(site, manifest, cap, endpoint);
41
- const fileName = `${cap.name}.yaml`;
42
- const filePath = path.join(targetDir, fileName);
32
+ for (const cap of capabilities) {
33
+ const endpoint = chooseEndpoint(cap, bundle.endpoints);
34
+ if (!endpoint) continue;
35
+ const candidate = buildCandidateYaml(site, bundle.manifest, cap, endpoint);
36
+ const filePath = path.join(targetDir, `${candidate.name}.yaml`);
43
37
  fs.writeFileSync(filePath, yaml.dump(candidate.yaml, { sortKeys: false, lineWidth: 120 }));
44
-
45
- candidates.push({
46
- name: cap.name,
47
- path: filePath,
48
- strategy: cap.strategy,
49
- endpoint: cap.endpoint,
50
- confidence: cap.confidence,
51
- columns: candidate.yaml.columns,
52
- });
38
+ candidates.push({ name: candidate.name, path: filePath, strategy: cap.strategy, confidence: cap.confidence });
53
39
  }
54
40
 
55
- const index = {
56
- site,
57
- target_url: manifest.target_url,
58
- generated_from: exploreDir,
59
- candidate_count: candidates.length,
60
- candidates,
61
- };
41
+ const index = { site, target_url: bundle.manifest.target_url, generated_from: exploreDir, candidate_count: candidates.length, candidates };
62
42
  fs.writeFileSync(path.join(targetDir, 'candidates.json'), JSON.stringify(index, null, 2));
63
43
 
44
+ return { site, explore_dir: exploreDir, out_dir: targetDir, candidate_count: candidates.length, candidates };
45
+ }
46
+
47
+ export function renderSynthesizeSummary(result: Record<string, any>): string {
48
+ const lines = ['opencli synthesize: OK', `Site: ${result.site}`, `Source: ${result.explore_dir}`, `Candidates: ${result.candidate_count}`];
49
+ for (const c of result.candidates ?? []) lines.push(` • ${c.name} (${c.strategy}, ${((c.confidence ?? 0) * 100).toFixed(0)}% confidence) → ${c.path}`);
50
+ return lines.join('\n');
51
+ }
52
+
53
+ export function resolveExploreDir(target: string): string {
54
+ if (fs.existsSync(target)) return target;
55
+ const candidate = path.join('.opencli', 'explore', target);
56
+ if (fs.existsSync(candidate)) return candidate;
57
+ throw new Error(`Explore directory not found: ${target}`);
58
+ }
59
+
60
+ export function loadExploreBundle(exploreDir: string): Record<string, any> {
64
61
  return {
65
- site,
66
- explore_dir: exploreDir,
67
- out_dir: targetDir,
68
- candidate_count: candidates.length,
69
- candidates,
62
+ manifest: JSON.parse(fs.readFileSync(path.join(exploreDir, 'manifest.json'), 'utf-8')),
63
+ endpoints: JSON.parse(fs.readFileSync(path.join(exploreDir, 'endpoints.json'), 'utf-8')),
64
+ capabilities: JSON.parse(fs.readFileSync(path.join(exploreDir, 'capabilities.json'), 'utf-8')),
65
+ auth: JSON.parse(fs.readFileSync(path.join(exploreDir, 'auth.json'), 'utf-8')),
70
66
  };
71
67
  }
72
68
 
73
- /** Volatile params to strip from generated URLs */
74
- const VOLATILE_PARAMS = new Set(['w_rid', 'wts', 'callback', '_', 'timestamp', 't', 'nonce', 'sign']);
75
- const SEARCH_PARAM_NAMES = new Set(['q', 'query', 'keyword', 'search', 'wd', 'kw', 'w', 'search_query']);
76
- const LIMIT_PARAM_NAMES = new Set(['ps', 'page_size', 'limit', 'count', 'per_page', 'size', 'num']);
77
- const PAGE_PARAM_NAMES = new Set(['pn', 'page', 'page_num', 'offset', 'cursor']);
69
+ function chooseEndpoint(cap: any, endpoints: any[]): any | null {
70
+ if (!endpoints.length) return null;
71
+ // Match by endpoint pattern from capability
72
+ if (cap.endpoint) {
73
+ const match = endpoints.find((e: any) => e.pattern === cap.endpoint || e.url?.includes(cap.endpoint));
74
+ if (match) return match;
75
+ }
76
+ return endpoints.sort((a: any, b: any) => (b.score ?? 0) - (a.score ?? 0))[0];
77
+ }
78
78
 
79
- /**
80
- * Build a clean templated URL from a raw API URL.
81
- * - Strips volatile params (w_rid, wts, etc.)
82
- * - Templates search, limit, and pagination params
83
- * - Builds URL string manually to avoid URL encoding of ${{ }} expressions
84
- */
85
- function buildTemplatedUrl(rawUrl: string, cap: any, endpoint: any): string {
79
+ // ── URL templating ─────────────────────────────────────────────────────────
80
+
81
+ function buildTemplatedUrl(rawUrl: string, cap: any, _endpoint: any): string {
86
82
  try {
87
83
  const u = new URL(rawUrl);
88
84
  const base = `${u.protocol}//${u.host}${u.pathname}`;
89
85
  const params: Array<[string, string]> = [];
90
-
91
86
  const hasKeyword = cap.recommendedArgs?.some((a: any) => a.name === 'keyword');
92
87
 
93
88
  u.searchParams.forEach((v, k) => {
94
- // Skip volatile params
95
89
  if (VOLATILE_PARAMS.has(k)) return;
96
-
97
- // Template known param types
98
- if (hasKeyword && SEARCH_PARAM_NAMES.has(k)) {
99
- params.push([k, '${{ args.keyword }}']);
100
- } else if (LIMIT_PARAM_NAMES.has(k)) {
101
- params.push([k, '${{ args.limit | default(20) }}']);
102
- } else if (PAGE_PARAM_NAMES.has(k)) {
103
- params.push([k, '${{ args.page | default(1) }}']);
104
- } else {
105
- params.push([k, v]);
106
- }
90
+ if (hasKeyword && SEARCH_PARAM_NAMES.has(k)) params.push([k, '${{ args.keyword }}']);
91
+ else if (LIMIT_PARAM_NAMES.has(k)) params.push([k, '${{ args.limit | default(20) }}']);
92
+ else if (PAGE_PARAM_NAMES.has(k)) params.push([k, '${{ args.page | default(1) }}']);
93
+ else params.push([k, v]);
107
94
  });
108
95
 
109
- if (params.length === 0) return base;
110
- return base + '?' + params.map(([k, v]) => `${k}=${v}`).join('&');
111
- } catch {
112
- return rawUrl;
113
- }
96
+ return params.length ? base + '?' + params.map(([k, v]) => `${k}=${v}`).join('&') : base;
97
+ } catch { return rawUrl; }
114
98
  }
115
99
 
116
100
  /**
117
- * Build a YAML pipeline definition from a capability + endpoint.
101
+ * Build inline evaluate script for browser-based fetch+parse.
102
+ * Follows patterns from bilibili/hot.yaml and twitter/trending.yaml.
118
103
  */
104
+ function buildEvaluateScript(url: string, itemPath: string, endpoint: any): string {
105
+ const pathChain = itemPath.split('.').map((p: string) => `?.${p}`).join('');
106
+ const detectedFields = endpoint?.detectedFields ?? {};
107
+ const hasFields = Object.keys(detectedFields).length > 0;
108
+
109
+ let mapCode = '';
110
+ if (hasFields) {
111
+ const mappings = Object.entries(detectedFields)
112
+ .map(([role, field]) => ` ${role}: item${String(field).split('.').map(p => `?.${p}`).join('')}`)
113
+ .join(',\n');
114
+ mapCode = `.map((item) => ({\n${mappings}\n }))`;
115
+ }
116
+
117
+ return [
118
+ '(async () => {',
119
+ ` const res = await fetch('${url}', {`,
120
+ ` credentials: 'include'`,
121
+ ' });',
122
+ ' const data = await res.json();',
123
+ ` return (data${pathChain} || [])${mapCode};`,
124
+ '})()\n',
125
+ ].join('\n');
126
+ }
127
+
128
+ // ── YAML pipeline generation ───────────────────────────────────────────────
129
+
119
130
  function buildCandidateYaml(site: string, manifest: any, cap: any, endpoint: any): { name: string; yaml: any } {
120
131
  const needsBrowser = cap.strategy !== 'public';
121
132
  const pipeline: any[] = [];
133
+ const templatedUrl = buildTemplatedUrl(endpoint?.url ?? manifest.target_url, cap, endpoint);
122
134
 
123
- // Step 1: Navigate (if browser-based)
124
- if (needsBrowser) {
125
- pipeline.push({ navigate: manifest.target_url });
126
- }
135
+ let domain = '';
136
+ try { domain = new URL(manifest.target_url).hostname; } catch {}
127
137
 
128
- // Step 2: Fetch the API — build a clean URL with templates
129
- const rawUrl = endpoint?.url ?? manifest.target_url;
130
- const fetchStep: any = { url: buildTemplatedUrl(rawUrl, cap, endpoint) };
131
-
132
- pipeline.push({ fetch: fetchStep });
133
-
134
- // Step 3: Select the item path
135
- if (cap.itemPath) {
136
- pipeline.push({ select: cap.itemPath });
138
+ if (cap.strategy === 'store-action' && cap.storeHint) {
139
+ // Store Action: navigate + wait + tap (declarative, clean)
140
+ pipeline.push({ navigate: manifest.target_url });
141
+ pipeline.push({ wait: 3 });
142
+ const tapStep: Record<string, any> = {
143
+ store: cap.storeHint.store,
144
+ action: cap.storeHint.action,
145
+ timeout: 8,
146
+ };
147
+ // Infer capture pattern from endpoint URL
148
+ if (endpoint?.url) {
149
+ try {
150
+ const epUrl = new URL(endpoint.url);
151
+ const pathParts = epUrl.pathname.split('/').filter((p: string) => p);
152
+ // Use last meaningful path segment as capture pattern
153
+ const capturePart = pathParts.filter((p: string) => !p.match(/^v\d+$/)).pop();
154
+ if (capturePart) tapStep.capture = capturePart;
155
+ } catch {}
156
+ }
157
+ if (cap.itemPath) tapStep.select = cap.itemPath;
158
+ pipeline.push({ tap: tapStep });
159
+ } else if (needsBrowser) {
160
+ // Browser-based: navigate + evaluate (like bilibili/hot.yaml, twitter/trending.yaml)
161
+ pipeline.push({ navigate: manifest.target_url });
162
+ const itemPath = cap.itemPath ?? 'data.data.list';
163
+ pipeline.push({ evaluate: buildEvaluateScript(templatedUrl, itemPath, endpoint) });
164
+ } else {
165
+ // Public API: direct fetch (like hackernews/top.yaml)
166
+ pipeline.push({ fetch: { url: templatedUrl } });
167
+ if (cap.itemPath) pipeline.push({ select: cap.itemPath });
137
168
  }
138
169
 
139
- // Step 4: Map fields to columns
170
+ // Map fields
140
171
  const mapStep: Record<string, string> = {};
141
172
  const columns = cap.recommendedColumns ?? ['title', 'url'];
142
-
143
- // Add a rank column if not doing search
144
- if (!cap.recommendedArgs?.some((a: any) => a.name === 'keyword')) {
145
- mapStep['rank'] = '${{ index + 1 }}';
146
- }
147
-
148
- // Build field mappings from the endpoint's detected fields
173
+ if (!cap.recommendedArgs?.some((a: any) => a.name === 'keyword')) mapStep['rank'] = '${{ index + 1 }}';
149
174
  const detectedFields = endpoint?.detectedFields ?? {};
150
175
  for (const col of columns) {
151
176
  const fieldPath = detectedFields[col];
152
- if (fieldPath) {
153
- mapStep[col] = `\${{ item.${fieldPath} }}`;
154
- } else {
155
- mapStep[col] = `\${{ item.${col} }}`;
156
- }
177
+ mapStep[col] = fieldPath ? `\${{ item.${fieldPath} }}` : `\${{ item.${col} }}`;
157
178
  }
158
-
159
179
  pipeline.push({ map: mapStep });
160
-
161
- // Step 5: Limit
162
180
  pipeline.push({ limit: '${{ args.limit | default(20) }}' });
163
181
 
164
- // Build args definition
182
+ // Args
165
183
  const argsDef: Record<string, any> = {};
166
184
  for (const arg of cap.recommendedArgs ?? []) {
167
185
  const def: any = { type: arg.type ?? 'str' };
@@ -172,39 +190,26 @@ function buildCandidateYaml(site: string, manifest: any, cap: any, endpoint: any
172
190
  else if (arg.name === 'page') def.description = 'Page number';
173
191
  argsDef[arg.name] = def;
174
192
  }
175
-
176
- // Ensure limit arg always exists
177
- if (!argsDef['limit']) {
178
- argsDef['limit'] = { type: 'int', default: 20, description: 'Number of items to return' };
179
- }
180
-
181
- const allColumns = Object.keys(mapStep);
193
+ if (!argsDef['limit']) argsDef['limit'] = { type: 'int', default: 20, description: 'Number of items to return' };
182
194
 
183
195
  return {
184
196
  name: cap.name,
185
197
  yaml: {
186
- site,
187
- name: cap.name,
188
- description: `${site} ${cap.name} (auto-generated)`,
189
- domain: manifest.final_url ? new URL(manifest.final_url).hostname : undefined,
190
- strategy: cap.strategy,
191
- browser: needsBrowser,
192
- args: argsDef,
193
- pipeline,
194
- columns: allColumns,
198
+ site, name: cap.name, description: `${cap.description || site + ' ' + cap.name} (auto-generated)`,
199
+ domain, strategy: cap.strategy, browser: needsBrowser,
200
+ args: argsDef, pipeline, columns: Object.keys(mapStep),
195
201
  },
196
202
  };
197
203
  }
198
204
 
199
- export function renderSynthesizeSummary(r: any): string {
200
- const lines = [
201
- 'opencli synthesize: OK',
202
- `Site: ${r.site}`,
203
- `Source: ${r.explore_dir}`,
204
- `Candidates: ${r.candidate_count}`,
205
- ];
206
- for (const c of r.candidates ?? []) {
207
- lines.push(` ${c.name} (${c.strategy}, ${(c.confidence * 100).toFixed(0)}% confidence) ${c.path}`);
208
- }
209
- return lines.join('\n');
205
+ /** Backward-compatible export for scaffold.ts */
206
+ export function buildCandidate(site: string, targetUrl: string, cap: any, endpoint: any): any {
207
+ // Map old-style field names to new ones
208
+ const normalizedCap = {
209
+ ...cap,
210
+ recommendedArgs: cap.recommendedArgs ?? cap.recommended_args,
211
+ recommendedColumns: cap.recommendedColumns ?? cap.recommended_columns,
212
+ };
213
+ const manifest = { target_url: targetUrl, final_url: targetUrl };
214
+ return buildCandidateYaml(site, manifest, normalizedCap, endpoint);
210
215
  }