@jackwener/opencli 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CLI-CREATOR.md +594 -0
  2. package/README.md +116 -38
  3. package/README.zh-CN.md +143 -0
  4. package/SKILL.md +154 -102
  5. package/dist/browser.d.ts +1 -0
  6. package/dist/browser.js +35 -1
  7. package/dist/cascade.d.ts +45 -0
  8. package/dist/cascade.js +180 -0
  9. package/dist/clis/bilibili/hot.yaml +38 -0
  10. package/dist/clis/github/trending.yaml +58 -0
  11. package/dist/clis/hackernews/top.yaml +36 -0
  12. package/dist/clis/index.d.ts +2 -1
  13. package/dist/clis/index.js +3 -1
  14. package/dist/clis/reddit/hot.yaml +46 -0
  15. package/dist/clis/twitter/trending.yaml +40 -0
  16. package/dist/clis/v2ex/hot.yaml +25 -0
  17. package/dist/clis/v2ex/latest.yaml +25 -0
  18. package/dist/clis/v2ex/topic.yaml +27 -0
  19. package/dist/clis/xiaohongshu/feed.yaml +32 -0
  20. package/dist/clis/xiaohongshu/notifications.yaml +38 -0
  21. package/dist/clis/xiaohongshu/search.d.ts +5 -0
  22. package/dist/clis/xiaohongshu/search.js +68 -0
  23. package/dist/clis/zhihu/hot.yaml +42 -0
  24. package/dist/clis/zhihu/question.js +39 -0
  25. package/dist/clis/zhihu/search.yaml +55 -0
  26. package/dist/explore.d.ts +23 -13
  27. package/dist/explore.js +293 -422
  28. package/dist/main.js +17 -0
  29. package/dist/pipeline.js +238 -2
  30. package/dist/synthesize.d.ts +11 -8
  31. package/dist/synthesize.js +142 -118
  32. package/package.json +4 -2
  33. package/src/browser.ts +33 -1
  34. package/src/cascade.ts +217 -0
  35. package/src/clis/index.ts +4 -1
  36. package/src/clis/reddit/hot.yaml +46 -0
  37. package/src/clis/v2ex/hot.yaml +5 -9
  38. package/src/clis/v2ex/latest.yaml +5 -8
  39. package/src/clis/v2ex/topic.yaml +27 -0
  40. package/src/clis/xiaohongshu/feed.yaml +32 -0
  41. package/src/clis/xiaohongshu/notifications.yaml +38 -0
  42. package/src/clis/xiaohongshu/search.ts +71 -0
  43. package/src/clis/zhihu/hot.yaml +22 -8
  44. package/src/clis/zhihu/question.ts +45 -0
  45. package/src/clis/zhihu/search.yaml +55 -0
  46. package/src/explore.ts +303 -465
  47. package/src/main.ts +14 -0
  48. package/src/pipeline.ts +239 -2
  49. package/src/synthesize.ts +142 -137
  50. package/dist/clis/zhihu/search.js +0 -58
  51. package/src/clis/zhihu/search.ts +0 -65
  52. /package/dist/clis/zhihu/{search.d.ts → question.d.ts} +0 -0
package/dist/main.js CHANGED
@@ -55,10 +55,27 @@ program.command('verify').description('Validate + smoke test').argument('[target
55
55
  .action(async (target, opts) => { const { verifyClis, renderVerifyReport } = await import('./verify.js'); const r = await verifyClis({ builtinClis: BUILTIN_CLIS, userClis: USER_CLIS, target, smoke: opts.smoke }); console.log(renderVerifyReport(r)); process.exitCode = r.ok ? 0 : 1; });
56
56
  program.command('explore').description('Explore a website').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3')
57
57
  .action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); });
58
+ program.command('probe').description('Probe a website: discover APIs, stores, and recommend strategies').argument('<url>').option('--site <name>').option('--goal <text>').option('--wait <s>', '', '3')
59
+ .action(async (url, opts) => { const { exploreUrl, renderExploreSummary } = await import('./explore.js'); console.log(renderExploreSummary(await exploreUrl(url, { BrowserFactory: PlaywrightMCP, site: opts.site, goal: opts.goal, waitSeconds: parseFloat(opts.wait) }))); });
58
60
  program.command('synthesize').description('Synthesize CLIs from explore').argument('<target>').option('--top <n>', '', '3')
59
61
  .action(async (target, opts) => { const { synthesizeFromExplore, renderSynthesizeSummary } = await import('./synthesize.js'); console.log(renderSynthesizeSummary(synthesizeFromExplore(target, { top: parseInt(opts.top) }))); });
60
62
  program.command('generate').description('One-shot: explore → synthesize → register').argument('<url>').option('--goal <text>').option('--site <name>')
61
63
  .action(async (url, opts) => { const { generateCliFromUrl, renderGenerateSummary } = await import('./generate.js'); const r = await generateCliFromUrl({ url, BrowserFactory: PlaywrightMCP, builtinClis: BUILTIN_CLIS, userClis: USER_CLIS, goal: opts.goal, site: opts.site }); console.log(renderGenerateSummary(r)); process.exitCode = r.ok ? 0 : 1; });
64
+ program.command('cascade').description('Strategy cascade: find simplest working strategy').argument('<url>').option('--site <name>')
65
+ .action(async (url, opts) => {
66
+ const { cascadeProbe, renderCascadeResult } = await import('./cascade.js');
67
+ const result = await browserSession(PlaywrightMCP, async (page) => {
68
+ // Navigate to the site first for cookie context
69
+ try {
70
+ const siteUrl = new URL(url);
71
+ await page.goto(`${siteUrl.protocol}//${siteUrl.host}`);
72
+ await page.wait(2);
73
+ }
74
+ catch { }
75
+ return cascadeProbe(page, url);
76
+ });
77
+ console.log(renderCascadeResult(result));
78
+ });
62
79
  // ── Dynamic site commands ──────────────────────────────────────────────────
63
80
  const registry = getRegistry();
64
81
  const siteGroups = new Map();
package/dist/pipeline.js CHANGED
@@ -16,6 +16,12 @@ export async function executePipeline(page, pipeline, ctx = {}) {
16
16
  if (debug)
17
17
  debugStepStart(i + 1, total, op, params);
18
18
  data = await executeStep(page, op, params, data, args);
19
+ // Detect error objects returned by steps (e.g. tap store not found)
20
+ if (data && typeof data === 'object' && !Array.isArray(data) && data.error) {
21
+ process.stderr.write(` ${chalk.yellow('⚠')} ${chalk.yellow(op)}: ${data.error}\n`);
22
+ if (data.hint)
23
+ process.stderr.write(` ${chalk.dim('💡')} ${chalk.dim(data.hint)}\n`);
24
+ }
19
25
  if (debug)
20
26
  debugStepResult(op, data);
21
27
  }
@@ -136,7 +142,18 @@ async function executeStep(page, op, params, data, args) {
136
142
  }
137
143
  case 'evaluate': {
138
144
  const js = String(render(params, { args, data }));
139
- return page.evaluate(normalizeEvaluateSource(js));
145
+ let result = await page.evaluate(normalizeEvaluateSource(js));
146
+ // MCP may return JSON as a string — auto-parse it
147
+ if (typeof result === 'string') {
148
+ const trimmed = result.trim();
149
+ if ((trimmed.startsWith('[') && trimmed.endsWith(']')) || (trimmed.startsWith('{') && trimmed.endsWith('}'))) {
150
+ try {
151
+ result = JSON.parse(trimmed);
152
+ }
153
+ catch { }
154
+ }
155
+ }
156
+ return result;
140
157
  }
141
158
  case 'snapshot': {
142
159
  const opts = (typeof params === 'object' && params) ? params : {};
@@ -214,7 +231,226 @@ async function executeStep(page, op, params, data, args) {
214
231
  return data;
215
232
  return data.slice(0, Number(render(params, { args, data })));
216
233
  }
217
- case 'intercept': return data;
234
+ case 'intercept': {
235
+ // Declarative XHR interception step
236
+ // Usage:
237
+ // intercept:
238
+ // trigger: "navigate:https://..." | "evaluate:store.note.fetch()" | "click:ref"
239
+ // capture: "api/pattern" # URL substring to match
240
+ // timeout: 5 # seconds to wait for matching request
241
+ // select: "data.items" # optional: extract sub-path from response
242
+ const cfg = typeof params === 'object' ? params : {};
243
+ const trigger = cfg.trigger ?? '';
244
+ const capturePattern = cfg.capture ?? '';
245
+ const timeout = cfg.timeout ?? 8;
246
+ const selectPath = cfg.select ?? null;
247
+ if (!capturePattern)
248
+ return data;
249
+ // Step 1: Execute the trigger action
250
+ if (trigger.startsWith('navigate:')) {
251
+ const url = render(trigger.slice('navigate:'.length), { args, data });
252
+ await page.goto(String(url));
253
+ }
254
+ else if (trigger.startsWith('evaluate:')) {
255
+ const js = trigger.slice('evaluate:'.length);
256
+ await page.evaluate(normalizeEvaluateSource(render(js, { args, data })));
257
+ }
258
+ else if (trigger.startsWith('click:')) {
259
+ const ref = render(trigger.slice('click:'.length), { args, data });
260
+ await page.click(String(ref).replace(/^@/, ''));
261
+ }
262
+ else if (trigger === 'scroll') {
263
+ await page.scroll('down');
264
+ }
265
+ // Step 2: Wait a bit for network requests to fire
266
+ await page.wait(Math.min(timeout, 3));
267
+ // Step 3: Get network requests and find matching ones
268
+ const rawNetwork = await page.networkRequests(false);
269
+ const matchingResponses = [];
270
+ if (typeof rawNetwork === 'string') {
271
+ // Parse the network output to find matching URLs
272
+ const lines = rawNetwork.split('\n');
273
+ for (const line of lines) {
274
+ const match = line.match(/\[?(GET|POST)\]?\s+(\S+)\s*(?:=>|→)\s*\[?(\d+)\]?/i);
275
+ if (match) {
276
+ const [, method, url, status] = match;
277
+ if (url.includes(capturePattern) && status === '200') {
278
+ // Re-fetch the matching URL to get the response body
279
+ try {
280
+ const body = await page.evaluate(`
281
+ async () => {
282
+ try {
283
+ const resp = await fetch(${JSON.stringify(url)}, { credentials: 'include' });
284
+ if (!resp.ok) return null;
285
+ return await resp.json();
286
+ } catch { return null; }
287
+ }
288
+ `);
289
+ if (body)
290
+ matchingResponses.push(body);
291
+ }
292
+ catch { }
293
+ }
294
+ }
295
+ }
296
+ }
297
+ // Step 4: Select from response if specified
298
+ let result = matchingResponses.length === 1 ? matchingResponses[0] :
299
+ matchingResponses.length > 1 ? matchingResponses : data;
300
+ if (selectPath && result) {
301
+ let current = result;
302
+ for (const part of String(selectPath).split('.')) {
303
+ if (current && typeof current === 'object' && !Array.isArray(current)) {
304
+ current = current[part];
305
+ }
306
+ else
307
+ break;
308
+ }
309
+ result = current ?? result;
310
+ }
311
+ return result;
312
+ }
313
+ case 'tap': {
314
+ // ── Declarative Store Action Bridge ──────────────────────────────────
315
+ // Usage:
316
+ // tap:
317
+ // store: feed # Pinia/Vuex store name
318
+ // action: fetchFeeds # Store action to call
319
+ // args: [] # Optional args to pass to action
320
+ // capture: homefeed # URL pattern to capture response
321
+ // timeout: 5 # Seconds to wait for network (default: 5)
322
+ // select: data.items # Optional: extract sub-path from response
323
+ // framework: pinia # Optional: pinia | vuex (auto-detected if omitted)
324
+ //
325
+ // Generates a self-contained IIFE that:
326
+ // 1. Injects fetch + XHR dual interception proxy
327
+ // 2. Finds the Pinia/Vuex store and calls the action
328
+ // 3. Captures the response matching the URL pattern
329
+ // 4. Auto-cleans up interception in finally block
330
+ // 5. Returns the captured data (optionally sub-selected)
331
+ const cfg = typeof params === 'object' ? params : {};
332
+ const storeName = String(render(cfg.store ?? '', { args, data }));
333
+ const actionName = String(render(cfg.action ?? '', { args, data }));
334
+ const capturePattern = String(render(cfg.capture ?? '', { args, data }));
335
+ const timeout = cfg.timeout ?? 5;
336
+ const selectPath = cfg.select ? String(render(cfg.select, { args, data })) : null;
337
+ const framework = cfg.framework ?? null; // auto-detect if null
338
+ const actionArgs = cfg.args ?? [];
339
+ if (!storeName || !actionName)
340
+ throw new Error('tap: store and action are required');
341
+ // Build select chain for the captured response
342
+ const selectChain = selectPath
343
+ ? selectPath.split('.').map((p) => `?.[${JSON.stringify(p)}]`).join('')
344
+ : '';
345
+ // Serialize action arguments
346
+ const actionArgsRendered = actionArgs.map((a) => {
347
+ const rendered = render(a, { args, data });
348
+ return JSON.stringify(rendered);
349
+ });
350
+ const actionCall = actionArgsRendered.length
351
+ ? `store[${JSON.stringify(actionName)}](${actionArgsRendered.join(', ')})`
352
+ : `store[${JSON.stringify(actionName)}]()`;
353
+ const js = `
354
+ async () => {
355
+ // ── 1. Setup capture proxy (fetch + XHR dual interception) ──
356
+ let captured = null;
357
+ const capturePattern = ${JSON.stringify(capturePattern)};
358
+
359
+ // Intercept fetch API
360
+ const origFetch = window.fetch;
361
+ window.fetch = async function(...fetchArgs) {
362
+ const resp = await origFetch.apply(this, fetchArgs);
363
+ try {
364
+ const url = typeof fetchArgs[0] === 'string' ? fetchArgs[0]
365
+ : fetchArgs[0] instanceof Request ? fetchArgs[0].url : String(fetchArgs[0]);
366
+ if (capturePattern && url.includes(capturePattern) && !captured) {
367
+ try { captured = await resp.clone().json(); } catch {}
368
+ }
369
+ } catch {}
370
+ return resp;
371
+ };
372
+
373
+ // Intercept XMLHttpRequest
374
+ const origXhrOpen = XMLHttpRequest.prototype.open;
375
+ const origXhrSend = XMLHttpRequest.prototype.send;
376
+ XMLHttpRequest.prototype.open = function(method, url) {
377
+ this.__tapUrl = String(url);
378
+ return origXhrOpen.apply(this, arguments);
379
+ };
380
+ XMLHttpRequest.prototype.send = function(body) {
381
+ if (capturePattern && this.__tapUrl?.includes(capturePattern)) {
382
+ const xhr = this;
383
+ const origHandler = xhr.onreadystatechange;
384
+ xhr.onreadystatechange = function() {
385
+ if (xhr.readyState === 4 && !captured) {
386
+ try { captured = JSON.parse(xhr.responseText); } catch {}
387
+ }
388
+ if (origHandler) origHandler.apply(this, arguments);
389
+ };
390
+ // Also handle onload
391
+ const origOnload = xhr.onload;
392
+ xhr.onload = function() {
393
+ if (!captured) { try { captured = JSON.parse(xhr.responseText); } catch {} }
394
+ if (origOnload) origOnload.apply(this, arguments);
395
+ };
396
+ }
397
+ return origXhrSend.apply(this, arguments);
398
+ };
399
+
400
+ try {
401
+ // ── 2. Find store ──
402
+ let store = null;
403
+ const storeName = ${JSON.stringify(storeName)};
404
+ const fw = ${JSON.stringify(framework)};
405
+
406
+ // Auto-detect framework if not specified
407
+ const app = document.querySelector('#app');
408
+ if (!fw || fw === 'pinia') {
409
+ // Try Pinia (Vue 3)
410
+ try {
411
+ const pinia = app?.__vue_app__?.config?.globalProperties?.$pinia;
412
+ if (pinia?._s) store = pinia._s.get(storeName);
413
+ } catch {}
414
+ }
415
+ if (!store && (!fw || fw === 'vuex')) {
416
+ // Try Vuex (Vue 2/3)
417
+ try {
418
+ const vuexStore = app?.__vue_app__?.config?.globalProperties?.$store
419
+ ?? app?.__vue__?.$store;
420
+ if (vuexStore) {
421
+ // Vuex doesn't have named stores like Pinia, dispatch action
422
+ store = { [${JSON.stringify(actionName)}]: (...a) => vuexStore.dispatch(storeName + '/' + ${JSON.stringify(actionName)}, ...a) };
423
+ }
424
+ } catch {}
425
+ }
426
+
427
+ if (!store) return { error: 'Store not found: ' + storeName, hint: 'Page may not be fully loaded or store name may be incorrect' };
428
+ if (typeof store[${JSON.stringify(actionName)}] !== 'function') {
429
+ return { error: 'Action not found: ' + ${JSON.stringify(actionName)} + ' on store ' + storeName,
430
+ hint: 'Available: ' + Object.keys(store).filter(k => typeof store[k] === 'function' && !k.startsWith('$') && !k.startsWith('_')).join(', ') };
431
+ }
432
+
433
+ // ── 3. Call store action ──
434
+ await ${actionCall};
435
+
436
+ // ── 4. Wait for network response ──
437
+ const deadline = Date.now() + ${timeout} * 1000;
438
+ while (!captured && Date.now() < deadline) {
439
+ await new Promise(r => setTimeout(r, 200));
440
+ }
441
+ } finally {
442
+ // ── 5. Always restore originals ──
443
+ window.fetch = origFetch;
444
+ XMLHttpRequest.prototype.open = origXhrOpen;
445
+ XMLHttpRequest.prototype.send = origXhrSend;
446
+ }
447
+
448
+ if (!captured) return { error: 'No matching response captured for pattern: ' + capturePattern };
449
+ return captured${selectChain} ?? captured;
450
+ }
451
+ `;
452
+ return page.evaluate(js);
453
+ }
218
454
  default: return data;
219
455
  }
220
456
  }
@@ -1,10 +1,13 @@
1
1
  /**
2
- * Synthesize: turn explore capabilities into ready-to-use CLI definitions.
3
- *
4
- * Takes the structured capabilities from Deep Explore and generates
5
- * YAML pipeline files that can be directly registered as CLI commands.
6
- *
7
- * This is the bridge between discovery (explore) and usability (CLI).
2
+ * Synthesize candidate CLIs from explore artifacts.
3
+ * Generates evaluate-based YAML pipelines (matching hand-written adapter patterns).
8
4
  */
9
- export declare function synthesizeFromExplore(target: string, opts?: any): any;
10
- export declare function renderSynthesizeSummary(r: any): string;
5
+ export declare function synthesizeFromExplore(target: string, opts?: {
6
+ outDir?: string;
7
+ top?: number;
8
+ }): Record<string, any>;
9
+ export declare function renderSynthesizeSummary(result: Record<string, any>): string;
10
+ export declare function resolveExploreDir(target: string): string;
11
+ export declare function loadExploreBundle(exploreDir: string): Record<string, any>;
12
+ /** Backward-compatible export for scaffold.ts */
13
+ export declare function buildCandidate(site: string, targetUrl: string, cap: any, endpoint: any): any;
@@ -1,147 +1,181 @@
1
1
  /**
2
- * Synthesize: turn explore capabilities into ready-to-use CLI definitions.
3
- *
4
- * Takes the structured capabilities from Deep Explore and generates
5
- * YAML pipeline files that can be directly registered as CLI commands.
6
- *
7
- * This is the bridge between discovery (explore) and usability (CLI).
2
+ * Synthesize candidate CLIs from explore artifacts.
3
+ * Generates evaluate-based YAML pipelines (matching hand-written adapter patterns).
8
4
  */
9
5
  import * as fs from 'node:fs';
10
6
  import * as path from 'node:path';
11
7
  import yaml from 'js-yaml';
8
+ /** Volatile params to strip from generated URLs */
9
+ const VOLATILE_PARAMS = new Set(['w_rid', 'wts', 'callback', '_', 'timestamp', 't', 'nonce', 'sign']);
10
+ const SEARCH_PARAM_NAMES = new Set(['q', 'query', 'keyword', 'search', 'wd', 'kw', 'w', 'search_query']);
11
+ const LIMIT_PARAM_NAMES = new Set(['ps', 'page_size', 'limit', 'count', 'per_page', 'size', 'num']);
12
+ const PAGE_PARAM_NAMES = new Set(['pn', 'page', 'page_num', 'offset', 'cursor']);
12
13
  export function synthesizeFromExplore(target, opts = {}) {
13
- const exploreDir = fs.existsSync(target) ? target : path.join('.opencli', 'explore', target);
14
- if (!fs.existsSync(exploreDir))
15
- throw new Error(`Explore dir not found: ${target}`);
16
- const manifest = JSON.parse(fs.readFileSync(path.join(exploreDir, 'manifest.json'), 'utf-8'));
17
- const capabilities = JSON.parse(fs.readFileSync(path.join(exploreDir, 'capabilities.json'), 'utf-8'));
18
- const endpoints = JSON.parse(fs.readFileSync(path.join(exploreDir, 'endpoints.json'), 'utf-8'));
19
- const auth = JSON.parse(fs.readFileSync(path.join(exploreDir, 'auth.json'), 'utf-8'));
14
+ const exploreDir = resolveExploreDir(target);
15
+ const bundle = loadExploreBundle(exploreDir);
20
16
  const targetDir = opts.outDir ?? path.join(exploreDir, 'candidates');
21
17
  fs.mkdirSync(targetDir, { recursive: true });
22
- const site = manifest.site;
23
- const topN = opts.top ?? 5;
24
- const candidates = [];
25
- // Sort capabilities by confidence
26
- const sortedCaps = [...capabilities]
18
+ const site = bundle.manifest.site;
19
+ const capabilities = (bundle.capabilities ?? [])
27
20
  .sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0))
28
- .slice(0, topN);
29
- for (const cap of sortedCaps) {
30
- // Find the matching endpoint for more detail
31
- const endpoint = endpoints.find((ep) => ep.pattern === cap.endpoint) ??
32
- endpoints[0];
33
- const candidate = buildCandidateYaml(site, manifest, cap, endpoint);
34
- const fileName = `${cap.name}.yaml`;
35
- const filePath = path.join(targetDir, fileName);
21
+ .slice(0, opts.top ?? 3);
22
+ const candidates = [];
23
+ for (const cap of capabilities) {
24
+ const endpoint = chooseEndpoint(cap, bundle.endpoints);
25
+ if (!endpoint)
26
+ continue;
27
+ const candidate = buildCandidateYaml(site, bundle.manifest, cap, endpoint);
28
+ const filePath = path.join(targetDir, `${candidate.name}.yaml`);
36
29
  fs.writeFileSync(filePath, yaml.dump(candidate.yaml, { sortKeys: false, lineWidth: 120 }));
37
- candidates.push({
38
- name: cap.name,
39
- path: filePath,
40
- strategy: cap.strategy,
41
- endpoint: cap.endpoint,
42
- confidence: cap.confidence,
43
- columns: candidate.yaml.columns,
44
- });
30
+ candidates.push({ name: candidate.name, path: filePath, strategy: cap.strategy, confidence: cap.confidence });
45
31
  }
46
- const index = {
47
- site,
48
- target_url: manifest.target_url,
49
- generated_from: exploreDir,
50
- candidate_count: candidates.length,
51
- candidates,
52
- };
32
+ const index = { site, target_url: bundle.manifest.target_url, generated_from: exploreDir, candidate_count: candidates.length, candidates };
53
33
  fs.writeFileSync(path.join(targetDir, 'candidates.json'), JSON.stringify(index, null, 2));
34
+ return { site, explore_dir: exploreDir, out_dir: targetDir, candidate_count: candidates.length, candidates };
35
+ }
36
+ export function renderSynthesizeSummary(result) {
37
+ const lines = ['opencli synthesize: OK', `Site: ${result.site}`, `Source: ${result.explore_dir}`, `Candidates: ${result.candidate_count}`];
38
+ for (const c of result.candidates ?? [])
39
+ lines.push(` • ${c.name} (${c.strategy}, ${((c.confidence ?? 0) * 100).toFixed(0)}% confidence) → ${c.path}`);
40
+ return lines.join('\n');
41
+ }
42
+ export function resolveExploreDir(target) {
43
+ if (fs.existsSync(target))
44
+ return target;
45
+ const candidate = path.join('.opencli', 'explore', target);
46
+ if (fs.existsSync(candidate))
47
+ return candidate;
48
+ throw new Error(`Explore directory not found: ${target}`);
49
+ }
50
+ export function loadExploreBundle(exploreDir) {
54
51
  return {
55
- site,
56
- explore_dir: exploreDir,
57
- out_dir: targetDir,
58
- candidate_count: candidates.length,
59
- candidates,
52
+ manifest: JSON.parse(fs.readFileSync(path.join(exploreDir, 'manifest.json'), 'utf-8')),
53
+ endpoints: JSON.parse(fs.readFileSync(path.join(exploreDir, 'endpoints.json'), 'utf-8')),
54
+ capabilities: JSON.parse(fs.readFileSync(path.join(exploreDir, 'capabilities.json'), 'utf-8')),
55
+ auth: JSON.parse(fs.readFileSync(path.join(exploreDir, 'auth.json'), 'utf-8')),
60
56
  };
61
57
  }
62
- /** Volatile params to strip from generated URLs */
63
- const VOLATILE_PARAMS = new Set(['w_rid', 'wts', 'callback', '_', 'timestamp', 't', 'nonce', 'sign']);
64
- const SEARCH_PARAM_NAMES = new Set(['q', 'query', 'keyword', 'search', 'wd', 'kw', 'w', 'search_query']);
65
- const LIMIT_PARAM_NAMES = new Set(['ps', 'page_size', 'limit', 'count', 'per_page', 'size', 'num']);
66
- const PAGE_PARAM_NAMES = new Set(['pn', 'page', 'page_num', 'offset', 'cursor']);
67
- /**
68
- * Build a clean templated URL from a raw API URL.
69
- * - Strips volatile params (w_rid, wts, etc.)
70
- * - Templates search, limit, and pagination params
71
- * - Builds URL string manually to avoid URL encoding of ${{ }} expressions
72
- */
73
- function buildTemplatedUrl(rawUrl, cap, endpoint) {
58
+ function chooseEndpoint(cap, endpoints) {
59
+ if (!endpoints.length)
60
+ return null;
61
+ // Match by endpoint pattern from capability
62
+ if (cap.endpoint) {
63
+ const match = endpoints.find((e) => e.pattern === cap.endpoint || e.url?.includes(cap.endpoint));
64
+ if (match)
65
+ return match;
66
+ }
67
+ return endpoints.sort((a, b) => (b.score ?? 0) - (a.score ?? 0))[0];
68
+ }
69
+ // ── URL templating ─────────────────────────────────────────────────────────
70
+ function buildTemplatedUrl(rawUrl, cap, _endpoint) {
74
71
  try {
75
72
  const u = new URL(rawUrl);
76
73
  const base = `${u.protocol}//${u.host}${u.pathname}`;
77
74
  const params = [];
78
75
  const hasKeyword = cap.recommendedArgs?.some((a) => a.name === 'keyword');
79
76
  u.searchParams.forEach((v, k) => {
80
- // Skip volatile params
81
77
  if (VOLATILE_PARAMS.has(k))
82
78
  return;
83
- // Template known param types
84
- if (hasKeyword && SEARCH_PARAM_NAMES.has(k)) {
79
+ if (hasKeyword && SEARCH_PARAM_NAMES.has(k))
85
80
  params.push([k, '${{ args.keyword }}']);
86
- }
87
- else if (LIMIT_PARAM_NAMES.has(k)) {
81
+ else if (LIMIT_PARAM_NAMES.has(k))
88
82
  params.push([k, '${{ args.limit | default(20) }}']);
89
- }
90
- else if (PAGE_PARAM_NAMES.has(k)) {
83
+ else if (PAGE_PARAM_NAMES.has(k))
91
84
  params.push([k, '${{ args.page | default(1) }}']);
92
- }
93
- else {
85
+ else
94
86
  params.push([k, v]);
95
- }
96
87
  });
97
- if (params.length === 0)
98
- return base;
99
- return base + '?' + params.map(([k, v]) => `${k}=${v}`).join('&');
88
+ return params.length ? base + '?' + params.map(([k, v]) => `${k}=${v}`).join('&') : base;
100
89
  }
101
90
  catch {
102
91
  return rawUrl;
103
92
  }
104
93
  }
105
94
  /**
106
- * Build a YAML pipeline definition from a capability + endpoint.
95
+ * Build inline evaluate script for browser-based fetch+parse.
96
+ * Follows patterns from bilibili/hot.yaml and twitter/trending.yaml.
107
97
  */
98
+ function buildEvaluateScript(url, itemPath, endpoint) {
99
+ const pathChain = itemPath.split('.').map((p) => `?.${p}`).join('');
100
+ const detectedFields = endpoint?.detectedFields ?? {};
101
+ const hasFields = Object.keys(detectedFields).length > 0;
102
+ let mapCode = '';
103
+ if (hasFields) {
104
+ const mappings = Object.entries(detectedFields)
105
+ .map(([role, field]) => ` ${role}: item${String(field).split('.').map(p => `?.${p}`).join('')}`)
106
+ .join(',\n');
107
+ mapCode = `.map((item) => ({\n${mappings}\n }))`;
108
+ }
109
+ return [
110
+ '(async () => {',
111
+ ` const res = await fetch('${url}', {`,
112
+ ` credentials: 'include'`,
113
+ ' });',
114
+ ' const data = await res.json();',
115
+ ` return (data${pathChain} || [])${mapCode};`,
116
+ '})()\n',
117
+ ].join('\n');
118
+ }
119
+ // ── YAML pipeline generation ───────────────────────────────────────────────
108
120
  function buildCandidateYaml(site, manifest, cap, endpoint) {
109
121
  const needsBrowser = cap.strategy !== 'public';
110
122
  const pipeline = [];
111
- // Step 1: Navigate (if browser-based)
112
- if (needsBrowser) {
123
+ const templatedUrl = buildTemplatedUrl(endpoint?.url ?? manifest.target_url, cap, endpoint);
124
+ let domain = '';
125
+ try {
126
+ domain = new URL(manifest.target_url).hostname;
127
+ }
128
+ catch { }
129
+ if (cap.strategy === 'store-action' && cap.storeHint) {
130
+ // Store Action: navigate + wait + tap (declarative, clean)
113
131
  pipeline.push({ navigate: manifest.target_url });
132
+ pipeline.push({ wait: 3 });
133
+ const tapStep = {
134
+ store: cap.storeHint.store,
135
+ action: cap.storeHint.action,
136
+ timeout: 8,
137
+ };
138
+ // Infer capture pattern from endpoint URL
139
+ if (endpoint?.url) {
140
+ try {
141
+ const epUrl = new URL(endpoint.url);
142
+ const pathParts = epUrl.pathname.split('/').filter((p) => p);
143
+ // Use last meaningful path segment as capture pattern
144
+ const capturePart = pathParts.filter((p) => !p.match(/^v\d+$/)).pop();
145
+ if (capturePart)
146
+ tapStep.capture = capturePart;
147
+ }
148
+ catch { }
149
+ }
150
+ if (cap.itemPath)
151
+ tapStep.select = cap.itemPath;
152
+ pipeline.push({ tap: tapStep });
153
+ }
154
+ else if (needsBrowser) {
155
+ // Browser-based: navigate + evaluate (like bilibili/hot.yaml, twitter/trending.yaml)
156
+ pipeline.push({ navigate: manifest.target_url });
157
+ const itemPath = cap.itemPath ?? 'data.data.list';
158
+ pipeline.push({ evaluate: buildEvaluateScript(templatedUrl, itemPath, endpoint) });
114
159
  }
115
- // Step 2: Fetch the API — build a clean URL with templates
116
- const rawUrl = endpoint?.url ?? manifest.target_url;
117
- const fetchStep = { url: buildTemplatedUrl(rawUrl, cap, endpoint) };
118
- pipeline.push({ fetch: fetchStep });
119
- // Step 3: Select the item path
120
- if (cap.itemPath) {
121
- pipeline.push({ select: cap.itemPath });
160
+ else {
161
+ // Public API: direct fetch (like hackernews/top.yaml)
162
+ pipeline.push({ fetch: { url: templatedUrl } });
163
+ if (cap.itemPath)
164
+ pipeline.push({ select: cap.itemPath });
122
165
  }
123
- // Step 4: Map fields to columns
166
+ // Map fields
124
167
  const mapStep = {};
125
168
  const columns = cap.recommendedColumns ?? ['title', 'url'];
126
- // Add a rank column if not doing search
127
- if (!cap.recommendedArgs?.some((a) => a.name === 'keyword')) {
169
+ if (!cap.recommendedArgs?.some((a) => a.name === 'keyword'))
128
170
  mapStep['rank'] = '${{ index + 1 }}';
129
- }
130
- // Build field mappings from the endpoint's detected fields
131
171
  const detectedFields = endpoint?.detectedFields ?? {};
132
172
  for (const col of columns) {
133
173
  const fieldPath = detectedFields[col];
134
- if (fieldPath) {
135
- mapStep[col] = `\${{ item.${fieldPath} }}`;
136
- }
137
- else {
138
- mapStep[col] = `\${{ item.${col} }}`;
139
- }
174
+ mapStep[col] = fieldPath ? `\${{ item.${fieldPath} }}` : `\${{ item.${col} }}`;
140
175
  }
141
176
  pipeline.push({ map: mapStep });
142
- // Step 5: Limit
143
177
  pipeline.push({ limit: '${{ args.limit | default(20) }}' });
144
- // Build args definition
178
+ // Args
145
179
  const argsDef = {};
146
180
  for (const arg of cap.recommendedArgs ?? []) {
147
181
  const def = { type: arg.type ?? 'str' };
@@ -157,35 +191,25 @@ function buildCandidateYaml(site, manifest, cap, endpoint) {
157
191
  def.description = 'Page number';
158
192
  argsDef[arg.name] = def;
159
193
  }
160
- // Ensure limit arg always exists
161
- if (!argsDef['limit']) {
194
+ if (!argsDef['limit'])
162
195
  argsDef['limit'] = { type: 'int', default: 20, description: 'Number of items to return' };
163
- }
164
- const allColumns = Object.keys(mapStep);
165
196
  return {
166
197
  name: cap.name,
167
198
  yaml: {
168
- site,
169
- name: cap.name,
170
- description: `${site} ${cap.name} (auto-generated)`,
171
- domain: manifest.final_url ? new URL(manifest.final_url).hostname : undefined,
172
- strategy: cap.strategy,
173
- browser: needsBrowser,
174
- args: argsDef,
175
- pipeline,
176
- columns: allColumns,
199
+ site, name: cap.name, description: `${cap.description || site + ' ' + cap.name} (auto-generated)`,
200
+ domain, strategy: cap.strategy, browser: needsBrowser,
201
+ args: argsDef, pipeline, columns: Object.keys(mapStep),
177
202
  },
178
203
  };
179
204
  }
180
- export function renderSynthesizeSummary(r) {
181
- const lines = [
182
- 'opencli synthesize: OK',
183
- `Site: ${r.site}`,
184
- `Source: ${r.explore_dir}`,
185
- `Candidates: ${r.candidate_count}`,
186
- ];
187
- for (const c of r.candidates ?? []) {
188
- lines.push(` • ${c.name} (${c.strategy}, ${(c.confidence * 100).toFixed(0)}% confidence) ${c.path}`);
189
- }
190
- return lines.join('\n');
205
+ /** Backward-compatible export for scaffold.ts */
206
+ export function buildCandidate(site, targetUrl, cap, endpoint) {
207
+ // Map old-style field names to new ones
208
+ const normalizedCap = {
209
+ ...cap,
210
+ recommendedArgs: cap.recommendedArgs ?? cap.recommended_args,
211
+ recommendedColumns: cap.recommendedColumns ?? cap.recommended_columns,
212
+ };
213
+ const manifest = { target_url: targetUrl, final_url: targetUrl };
214
+ return buildCandidateYaml(site, manifest, normalizedCap, endpoint);
191
215
  }