@jackwener/opencli 1.6.6 → 1.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +3 -1
  2. package/README.zh-CN.md +6 -2
  3. package/dist/clis/1688/assets.d.ts +42 -0
  4. package/dist/clis/1688/assets.js +204 -0
  5. package/dist/clis/1688/assets.test.d.ts +1 -0
  6. package/dist/clis/1688/assets.test.js +39 -0
  7. package/dist/clis/1688/download.d.ts +9 -0
  8. package/dist/clis/1688/download.js +76 -0
  9. package/dist/clis/1688/download.test.d.ts +1 -0
  10. package/dist/clis/1688/download.test.js +31 -0
  11. package/dist/clis/1688/shared.d.ts +10 -0
  12. package/dist/clis/1688/shared.js +43 -0
  13. package/dist/clis/linux-do/topic-content.d.ts +35 -0
  14. package/dist/clis/linux-do/topic-content.js +154 -0
  15. package/dist/clis/linux-do/topic-content.test.d.ts +1 -0
  16. package/dist/clis/linux-do/topic-content.test.js +59 -0
  17. package/dist/clis/linux-do/topic.yaml +1 -16
  18. package/dist/clis/xueqiu/groups.yaml +23 -0
  19. package/dist/clis/xueqiu/kline.yaml +65 -0
  20. package/dist/clis/xueqiu/watchlist.yaml +9 -9
  21. package/dist/src/analysis.d.ts +2 -0
  22. package/dist/src/analysis.js +6 -0
  23. package/dist/src/browser/cdp.js +96 -0
  24. package/dist/src/build-manifest.d.ts +3 -1
  25. package/dist/src/build-manifest.js +10 -7
  26. package/dist/src/build-manifest.test.js +8 -4
  27. package/dist/src/cli.d.ts +2 -1
  28. package/dist/src/cli.js +48 -46
  29. package/dist/src/commands/daemon.js +2 -10
  30. package/dist/src/diagnostic.d.ts +63 -0
  31. package/dist/src/diagnostic.js +247 -0
  32. package/dist/src/diagnostic.test.d.ts +1 -0
  33. package/dist/src/diagnostic.test.js +213 -0
  34. package/dist/src/discovery.js +7 -17
  35. package/dist/src/download/progress.js +7 -2
  36. package/dist/src/execution.js +25 -4
  37. package/dist/src/explore.d.ts +0 -2
  38. package/dist/src/explore.js +61 -38
  39. package/dist/src/extension-manifest-regression.test.js +0 -1
  40. package/dist/src/generate.d.ts +1 -1
  41. package/dist/src/generate.js +2 -3
  42. package/dist/src/package-paths.d.ts +8 -0
  43. package/dist/src/package-paths.js +41 -0
  44. package/dist/src/plugin-scaffold.js +1 -3
  45. package/dist/src/record.d.ts +1 -2
  46. package/dist/src/record.js +14 -52
  47. package/dist/src/synthesize.d.ts +0 -2
  48. package/dist/src/synthesize.js +8 -4
  49. package/package.json +1 -1
  50. package/scripts/postinstall.js +18 -71
  51. package/dist/cli-manifest.json +0 -17250
@@ -0,0 +1,213 @@
1
+ import { describe, it, expect, vi, afterEach } from 'vitest';
2
+ import { buildRepairContext, isDiagnosticEnabled, emitDiagnostic, truncate, redactUrl, redactText, resolveAdapterSourcePath, MAX_DIAGNOSTIC_BYTES, } from './diagnostic.js';
3
+ import { SelectorError, CommandExecutionError } from './errors.js';
4
+ function makeCmd(overrides = {}) {
5
+ return {
6
+ site: 'test-site',
7
+ name: 'test-cmd',
8
+ description: 'test',
9
+ args: [],
10
+ ...overrides,
11
+ };
12
+ }
13
+ describe('isDiagnosticEnabled', () => {
14
+ const origEnv = process.env.OPENCLI_DIAGNOSTIC;
15
+ afterEach(() => {
16
+ if (origEnv === undefined)
17
+ delete process.env.OPENCLI_DIAGNOSTIC;
18
+ else
19
+ process.env.OPENCLI_DIAGNOSTIC = origEnv;
20
+ });
21
+ it('returns false when env not set', () => {
22
+ delete process.env.OPENCLI_DIAGNOSTIC;
23
+ expect(isDiagnosticEnabled()).toBe(false);
24
+ });
25
+ it('returns true when env is "1"', () => {
26
+ process.env.OPENCLI_DIAGNOSTIC = '1';
27
+ expect(isDiagnosticEnabled()).toBe(true);
28
+ });
29
+ it('returns false for other values', () => {
30
+ process.env.OPENCLI_DIAGNOSTIC = 'true';
31
+ expect(isDiagnosticEnabled()).toBe(false);
32
+ });
33
+ });
34
+ describe('truncate', () => {
35
+ it('returns short strings unchanged', () => {
36
+ expect(truncate('hello', 100)).toBe('hello');
37
+ });
38
+ it('truncates long strings with marker', () => {
39
+ const long = 'a'.repeat(200);
40
+ const result = truncate(long, 50);
41
+ expect(result.length).toBeLessThan(200);
42
+ expect(result).toContain('...[truncated,');
43
+ expect(result).toContain('150 chars omitted]');
44
+ });
45
+ });
46
+ describe('redactUrl', () => {
47
+ it('redacts sensitive query parameters', () => {
48
+ expect(redactUrl('https://api.com/v1?token=abc123&q=test'))
49
+ .toBe('https://api.com/v1?token=[REDACTED]&q=test');
50
+ });
51
+ it('redacts multiple sensitive params', () => {
52
+ const url = 'https://api.com?api_key=xxx&secret=yyy&page=1';
53
+ const result = redactUrl(url);
54
+ expect(result).toContain('api_key=[REDACTED]');
55
+ expect(result).toContain('secret=[REDACTED]');
56
+ expect(result).toContain('page=1');
57
+ });
58
+ it('leaves clean URLs unchanged', () => {
59
+ expect(redactUrl('https://example.com/page?q=test')).toBe('https://example.com/page?q=test');
60
+ });
61
+ });
62
+ describe('redactText', () => {
63
+ it('redacts Bearer tokens', () => {
64
+ expect(redactText('Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.test'))
65
+ .toContain('Bearer [REDACTED]');
66
+ });
67
+ it('redacts JWT tokens', () => {
68
+ const jwt = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U';
69
+ expect(redactText(`token is ${jwt}`)).toContain('[REDACTED_JWT]');
70
+ expect(redactText(`token is ${jwt}`)).not.toContain('eyJhbGci');
71
+ });
72
+ it('redacts inline token=value patterns', () => {
73
+ expect(redactText('failed with token=abc123def456')).toContain('token=[REDACTED]');
74
+ });
75
+ it('redacts cookie values', () => {
76
+ const result = redactText('cookie: session=abc123; user=xyz789; path=/');
77
+ expect(result).toContain('[REDACTED]');
78
+ expect(result).not.toContain('session=abc123');
79
+ });
80
+ it('leaves normal text unchanged', () => {
81
+ expect(redactText('Error: element not found')).toBe('Error: element not found');
82
+ });
83
+ });
84
+ describe('resolveAdapterSourcePath', () => {
85
+ it('returns source when it is a real file path (not manifest:)', () => {
86
+ const cmd = makeCmd({ source: '/home/user/.opencli/clis/arxiv/search.yaml' });
87
+ expect(resolveAdapterSourcePath(cmd)).toBe('/home/user/.opencli/clis/arxiv/search.yaml');
88
+ });
89
+ it('skips manifest: pseudo-paths and falls back to _modulePath', () => {
90
+ const cmd = makeCmd({ source: 'manifest:arxiv/search', _modulePath: '/pkg/dist/clis/arxiv/search.js' });
91
+ // Should try to map dist→source, but since files don't exist on disk, returns _modulePath
92
+ const result = resolveAdapterSourcePath(cmd);
93
+ expect(result).toBeDefined();
94
+ expect(result).not.toContain('manifest:');
95
+ });
96
+ it('returns undefined when only manifest: pseudo-path and no _modulePath', () => {
97
+ const cmd = makeCmd({ source: 'manifest:test/cmd' });
98
+ expect(resolveAdapterSourcePath(cmd)).toBeUndefined();
99
+ });
100
+ it('prefers _modulePath mapped to .ts over dist .js', () => {
101
+ // This test verifies the mapping logic without requiring files on disk
102
+ const cmd = makeCmd({ _modulePath: '/project/dist/clis/site/cmd.js' });
103
+ const result = resolveAdapterSourcePath(cmd);
104
+ // Since neither .ts nor .js exists, returns _modulePath as best guess
105
+ expect(result).toBe('/project/dist/clis/site/cmd.js');
106
+ });
107
+ });
108
+ describe('buildRepairContext', () => {
109
+ it('captures CliError fields', () => {
110
+ const err = new SelectorError('.missing-element', 'Element removed');
111
+ const ctx = buildRepairContext(err, makeCmd());
112
+ expect(ctx.error.code).toBe('SELECTOR');
113
+ expect(ctx.error.message).toContain('.missing-element');
114
+ expect(ctx.error.hint).toBe('Element removed');
115
+ expect(ctx.error.stack).toBeDefined();
116
+ expect(ctx.adapter.site).toBe('test-site');
117
+ expect(ctx.adapter.command).toBe('test-site/test-cmd');
118
+ expect(ctx.timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/);
119
+ });
120
+ it('handles non-CliError errors', () => {
121
+ const err = new TypeError('Cannot read property "x" of undefined');
122
+ const ctx = buildRepairContext(err, makeCmd());
123
+ expect(ctx.error.code).toBe('UNKNOWN');
124
+ expect(ctx.error.message).toContain('Cannot read property');
125
+ expect(ctx.error.hint).toBeUndefined();
126
+ });
127
+ it('includes page state when provided', () => {
128
+ const pageState = {
129
+ url: 'https://example.com/page',
130
+ snapshot: '<div>...</div>',
131
+ networkRequests: [{ url: '/api/data', status: 200 }],
132
+ consoleErrors: ['Uncaught TypeError'],
133
+ };
134
+ const ctx = buildRepairContext(new CommandExecutionError('boom'), makeCmd(), pageState);
135
+ expect(ctx.page).toEqual(pageState);
136
+ });
137
+ it('omits page when not provided', () => {
138
+ const ctx = buildRepairContext(new Error('boom'), makeCmd());
139
+ expect(ctx.page).toBeUndefined();
140
+ });
141
+ it('truncates long stack traces', () => {
142
+ const err = new Error('boom');
143
+ err.stack = 'x'.repeat(10_000);
144
+ const ctx = buildRepairContext(err, makeCmd());
145
+ expect(ctx.error.stack.length).toBeLessThan(10_000);
146
+ expect(ctx.error.stack).toContain('truncated');
147
+ });
148
+ it('redacts sensitive data in error message and stack', () => {
149
+ const err = new Error('Request failed with Bearer eyJhbGciOiJIUzI1NiJ9.test.sig');
150
+ const ctx = buildRepairContext(err, makeCmd());
151
+ expect(ctx.error.message).toContain('Bearer [REDACTED]');
152
+ expect(ctx.error.message).not.toContain('eyJhbGci');
153
+ // Stack also gets redacted
154
+ expect(ctx.error.stack).toContain('Bearer [REDACTED]');
155
+ });
156
+ });
157
+ describe('emitDiagnostic', () => {
158
+ it('writes delimited JSON to stderr', () => {
159
+ const writeSpy = vi.spyOn(process.stderr, 'write').mockReturnValue(true);
160
+ const ctx = buildRepairContext(new CommandExecutionError('test error'), makeCmd());
161
+ emitDiagnostic(ctx);
162
+ const output = writeSpy.mock.calls.map(c => c[0]).join('');
163
+ expect(output).toContain('___OPENCLI_DIAGNOSTIC___');
164
+ expect(output).toContain('"code":"COMMAND_EXEC"');
165
+ expect(output).toContain('"message":"test error"');
166
+ // Verify JSON is parseable between markers
167
+ const match = output.match(/___OPENCLI_DIAGNOSTIC___\n(.*)\n___OPENCLI_DIAGNOSTIC___/);
168
+ expect(match).toBeTruthy();
169
+ const parsed = JSON.parse(match[1]);
170
+ expect(parsed.error.code).toBe('COMMAND_EXEC');
171
+ writeSpy.mockRestore();
172
+ });
173
+ it('drops page snapshot when over size budget', () => {
174
+ const writeSpy = vi.spyOn(process.stderr, 'write').mockReturnValue(true);
175
+ const ctx = {
176
+ error: { code: 'COMMAND_EXEC', message: 'boom' },
177
+ adapter: { site: 'test', command: 'test/cmd' },
178
+ page: {
179
+ url: 'https://example.com',
180
+ snapshot: 'x'.repeat(MAX_DIAGNOSTIC_BYTES + 1000),
181
+ networkRequests: [],
182
+ consoleErrors: [],
183
+ },
184
+ timestamp: new Date().toISOString(),
185
+ };
186
+ emitDiagnostic(ctx);
187
+ const output = writeSpy.mock.calls.map(c => c[0]).join('');
188
+ const match = output.match(/___OPENCLI_DIAGNOSTIC___\n(.*)\n___OPENCLI_DIAGNOSTIC___/);
189
+ expect(match).toBeTruthy();
190
+ const parsed = JSON.parse(match[1]);
191
+ // Page snapshot should be replaced or page dropped entirely
192
+ expect(parsed.page?.snapshot !== ctx.page.snapshot || parsed.page === undefined).toBe(true);
193
+ expect(match[1].length).toBeLessThanOrEqual(MAX_DIAGNOSTIC_BYTES);
194
+ writeSpy.mockRestore();
195
+ });
196
+ it('redacts sensitive headers in network requests', () => {
197
+ const pageState = {
198
+ url: 'https://example.com',
199
+ snapshot: '<div/>',
200
+ networkRequests: [{
201
+ url: 'https://api.com/data?token=secret123',
202
+ headers: { authorization: 'Bearer xyz', 'content-type': 'application/json' },
203
+ body: '{"data": "ok"}',
204
+ }],
205
+ consoleErrors: [],
206
+ };
207
+ // Build context manually to test redaction via collectPageState
208
+ // Since collectPageState is private, test the output of buildRepairContext
209
+ // with already-collected page state — redaction happens in collectPageState.
210
+ // For unit test, verify redactUrl directly (tested above) and trust integration.
211
+ expect(redactUrl('https://api.com/data?token=secret123')).toContain('[REDACTED]');
212
+ });
213
+ });
@@ -15,6 +15,7 @@ import yaml from 'js-yaml';
15
15
  import { Strategy, registerCommand } from './registry.js';
16
16
  import { getErrorMessage } from './errors.js';
17
17
  import { log } from './logger.js';
18
+ import { findPackageRoot, getCliManifestPath, getFetchAdaptersScriptPath } from './package-paths.js';
18
19
  /** User runtime directory: ~/.opencli */
19
20
  export const USER_OPENCLI_DIR = path.join(os.homedir(), '.opencli');
20
21
  /** User CLIs directory: ~/.opencli/clis */
@@ -31,18 +32,7 @@ function parseStrategy(rawStrategy, fallback = Strategy.COOKIE) {
31
32
  return Strategy[key] ?? fallback;
32
33
  }
33
34
  import { isRecord } from './utils.js';
34
- /**
35
- * Find the package root (directory containing package.json).
36
- * Dev: import.meta.url is in src/ → one level up.
37
- * Prod: import.meta.url is in dist/src/ → two levels up.
38
- */
39
- function findPackageRoot() {
40
- let dir = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
41
- if (!fs.existsSync(path.join(dir, 'package.json'))) {
42
- dir = path.resolve(dir, '..');
43
- }
44
- return dir;
45
- }
35
+ const PACKAGE_ROOT = findPackageRoot(fileURLToPath(import.meta.url));
46
36
  /**
47
37
  * Ensure ~/.opencli/node_modules/@jackwener/opencli symlink exists so that
48
38
  * user CLIs in ~/.opencli/clis/ can `import { cli } from '@jackwener/opencli/registry'`.
@@ -65,7 +55,7 @@ export async function ensureUserCliCompatShims(baseDir = USER_OPENCLI_DIR) {
65
55
  await fs.promises.writeFile(pkgJsonPath, pkgJsonContent, 'utf-8');
66
56
  }
67
57
  // Create node_modules/@jackwener/opencli symlink pointing to the installed package root.
68
- const opencliRoot = findPackageRoot();
58
+ const opencliRoot = PACKAGE_ROOT;
69
59
  const symlinkDir = path.join(baseDir, 'node_modules', '@jackwener');
70
60
  const symlinkPath = path.join(symlinkDir, 'opencli');
71
61
  try {
@@ -116,7 +106,7 @@ export async function ensureUserAdapters() {
116
106
  log.info('First run detected — copying adapters (one-time setup)...');
117
107
  try {
118
108
  const { execFileSync } = await import('node:child_process');
119
- const scriptPath = path.join(findPackageRoot(), 'scripts', 'fetch-adapters.js');
109
+ const scriptPath = getFetchAdaptersScriptPath(PACKAGE_ROOT);
120
110
  execFileSync(process.execPath, [scriptPath], {
121
111
  stdio: 'inherit',
122
112
  env: { ...process.env, _OPENCLI_FIRST_RUN: '1' },
@@ -135,7 +125,7 @@ export async function ensureUserAdapters() {
135
125
  export async function discoverClis(...dirs) {
136
126
  // Fast path: try manifest first (production / post-build)
137
127
  for (const dir of dirs) {
138
- const manifestPath = path.resolve(dir, '..', 'cli-manifest.json');
128
+ const manifestPath = getCliManifestPath(dir);
139
129
  try {
140
130
  await fs.promises.access(manifestPath);
141
131
  const loaded = await loadFromManifest(manifestPath, dir);
@@ -173,7 +163,7 @@ async function loadFromManifest(manifestPath, clisDir) {
173
163
  columns: entry.columns,
174
164
  pipeline: entry.pipeline,
175
165
  timeoutSeconds: entry.timeout,
176
- source: `manifest:${entry.site}/${entry.name}`,
166
+ source: entry.sourceFile ? path.resolve(clisDir, entry.sourceFile) : `manifest:${entry.site}/${entry.name}`,
177
167
  deprecated: entry.deprecated,
178
168
  replacedBy: entry.replacedBy,
179
169
  navigateBefore: entry.navigateBefore,
@@ -196,7 +186,7 @@ async function loadFromManifest(manifestPath, clisDir) {
196
186
  args: entry.args ?? [],
197
187
  columns: entry.columns,
198
188
  timeoutSeconds: entry.timeout,
199
- source: modulePath,
189
+ source: entry.sourceFile ? path.resolve(clisDir, entry.sourceFile) : modulePath,
200
190
  deprecated: entry.deprecated,
201
191
  replacedBy: entry.replacedBy,
202
192
  navigateBefore: entry.navigateBefore,
@@ -23,8 +23,13 @@ export function formatDuration(ms) {
23
23
  if (seconds < 60)
24
24
  return `${seconds}s`;
25
25
  const minutes = Math.floor(seconds / 60);
26
- const remainingSeconds = seconds % 60;
27
- return `${minutes}m ${remainingSeconds}s`;
26
+ if (minutes < 60) {
27
+ const remainingSeconds = seconds % 60;
28
+ return remainingSeconds > 0 ? `${minutes}m ${remainingSeconds}s` : `${minutes}m`;
29
+ }
30
+ const hours = Math.floor(minutes / 60);
31
+ const remainingMinutes = minutes % 60;
32
+ return remainingMinutes > 0 ? `${hours}h ${remainingMinutes}m` : `${hours}h`;
28
33
  }
29
34
  /**
30
35
  * Create a simple progress bar for terminal display.
@@ -13,6 +13,7 @@ import { Strategy, getRegistry, fullName } from './registry.js';
13
13
  import { pathToFileURL } from 'node:url';
14
14
  import { executePipeline } from './pipeline/index.js';
15
15
  import { AdapterLoadError, ArgumentError, BrowserConnectError, CommandExecutionError, getErrorMessage } from './errors.js';
16
+ import { isDiagnosticEnabled, collectDiagnostic, emitDiagnostic } from './diagnostic.js';
16
17
  import { shouldUseBrowserSession } from './capabilityRouting.js';
17
18
  import { getBrowserFactory, browserSession, runWithTimeout, DEFAULT_BROWSER_COMMAND_TIMEOUT } from './runtime.js';
18
19
  import { emitHook } from './hooks.js';
@@ -129,6 +130,7 @@ export async function executeCommand(cmd, rawKwargs, debug = false) {
129
130
  };
130
131
  await emitHook('onBeforeExecute', hookCtx);
131
132
  let result;
133
+ let diagnosticEmitted = false;
132
134
  try {
133
135
  if (shouldUseBrowserSession(cmd)) {
134
136
  const electron = isElectronApp(cmd.site);
@@ -176,10 +178,22 @@ export async function executeCommand(cmd, rawKwargs, debug = false) {
176
178
  log.debug(`[pre-nav] Failed to navigate to ${preNavUrl}: ${err instanceof Error ? err.message : err}`);
177
179
  }
178
180
  }
179
- return runWithTimeout(runCommand(cmd, page, kwargs, debug), {
180
- timeout: cmd.timeoutSeconds ?? DEFAULT_BROWSER_COMMAND_TIMEOUT,
181
- label: fullName(cmd),
182
- });
181
+ try {
182
+ return await runWithTimeout(runCommand(cmd, page, kwargs, debug), {
183
+ timeout: cmd.timeoutSeconds ?? DEFAULT_BROWSER_COMMAND_TIMEOUT,
184
+ label: fullName(cmd),
185
+ });
186
+ }
187
+ catch (err) {
188
+ // Collect diagnostic while page is still alive (before browserSession closes it).
189
+ if (isDiagnosticEnabled()) {
190
+ const internal = cmd;
191
+ const ctx = await collectDiagnostic(err, internal, page);
192
+ emitDiagnostic(ctx);
193
+ diagnosticEmitted = true;
194
+ }
195
+ throw err;
196
+ }
183
197
  }, { workspace: `site:${cmd.site}`, cdpEndpoint });
184
198
  }
185
199
  else {
@@ -198,6 +212,13 @@ export async function executeCommand(cmd, rawKwargs, debug = false) {
198
212
  }
199
213
  }
200
214
  catch (err) {
215
+ // Emit diagnostic if not already emitted (browser session emits with page state;
216
+ // this fallback covers non-browser commands and pre-session failures like BrowserConnectError).
217
+ if (isDiagnosticEnabled() && !diagnosticEmitted) {
218
+ const internal = cmd;
219
+ const ctx = await collectDiagnostic(err, internal, null);
220
+ emitDiagnostic(ctx);
221
+ }
201
222
  hookCtx.error = err;
202
223
  hookCtx.finishedAt = Date.now();
203
224
  await emitHook('onAfterExecute', hookCtx);
@@ -12,7 +12,6 @@ interface InferredCapability {
12
12
  name: string;
13
13
  description: string;
14
14
  strategy: string;
15
- confidence: number;
16
15
  endpoint: string;
17
16
  itemPath: string | null;
18
17
  recommendedColumns: string[];
@@ -52,7 +51,6 @@ export interface ExploreEndpointArtifact {
52
51
  url: string;
53
52
  status: number | null;
54
53
  contentType: string;
55
- score: number;
56
54
  queryParams: string[];
57
55
  itemPath: string | null;
58
56
  itemCount: number;
@@ -13,7 +13,7 @@ import { detectFramework } from './scripts/framework.js';
13
13
  import { discoverStores } from './scripts/store.js';
14
14
  import { interactFuzz } from './scripts/interact.js';
15
15
  import { log } from './logger.js';
16
- import { urlToPattern, findArrayPath, flattenFields, detectFieldRoles, inferCapabilityName, inferStrategy, detectAuthFromHeaders, classifyQueryParams, } from './analysis.js';
16
+ import { urlToPattern, findArrayPath, flattenFields, detectFieldRoles, inferCapabilityName, inferStrategy, detectAuthFromHeaders, classifyQueryParams, isNoiseUrl, } from './analysis.js';
17
17
  // ── Site name detection ────────────────────────────────────────────────────
18
18
  const KNOWN_SITE_ALIASES = {
19
19
  'x.com': 'twitter', 'twitter.com': 'twitter',
@@ -66,13 +66,29 @@ function parseNetworkRequests(raw) {
66
66
  return entries;
67
67
  }
68
68
  if (Array.isArray(raw)) {
69
- return raw.filter(e => e && typeof e === 'object').map(e => ({
70
- method: (e.method ?? 'GET').toUpperCase(),
71
- url: String(e.url ?? e.request?.url ?? e.requestUrl ?? ''),
72
- status: e.status ?? e.statusCode ?? null,
73
- contentType: e.contentType ?? e.response?.contentType ?? '',
74
- responseBody: e.responseBody, requestHeaders: e.requestHeaders,
75
- }));
69
+ return raw.filter(e => e && typeof e === 'object').map(e => {
70
+ // Handle both legacy shape (status/contentType/responseBody) and
71
+ // extension/CDP capture shape (responseStatus/responseContentType/responsePreview)
72
+ let body = e.responseBody;
73
+ if (body === undefined && e.responsePreview !== undefined) {
74
+ const preview = e.responsePreview;
75
+ if (typeof preview === 'string') {
76
+ try {
77
+ body = JSON.parse(preview);
78
+ }
79
+ catch {
80
+ body = preview;
81
+ }
82
+ }
83
+ }
84
+ return {
85
+ method: (e.method ?? 'GET').toUpperCase(),
86
+ url: String(e.url ?? e.request?.url ?? e.requestUrl ?? ''),
87
+ status: e.status ?? e.responseStatus ?? e.statusCode ?? null,
88
+ contentType: e.contentType ?? e.responseContentType ?? e.response?.contentType ?? '',
89
+ responseBody: body, requestHeaders: e.requestHeaders,
90
+ };
91
+ });
76
92
  }
77
93
  return [];
78
94
  }
@@ -91,29 +107,32 @@ function isBooleanRecord(value) {
91
107
  return typeof value === 'object' && value !== null && !Array.isArray(value)
92
108
  && Object.values(value).every(v => typeof v === 'boolean');
93
109
  }
94
- function scoreEndpoint(ep) {
95
- let s = 0;
96
- if (ep.contentType.includes('json'))
97
- s += 10;
98
- if (ep.responseAnalysis) {
99
- s += 5;
100
- s += Math.min(ep.responseAnalysis.itemCount, 10);
101
- s += Object.keys(ep.responseAnalysis.detectedFields).length * 2;
102
- }
110
+ /**
111
+ * Deterministic sort key for endpoint ordering — transparent, observable signals only.
112
+ * Used by generate/synthesize to pick a stable default candidate.
113
+ * Not exposed externally; AI agents see the raw metadata and decide for themselves.
114
+ */
115
+ function endpointSortKey(ep) {
116
+ let k = 0;
117
+ // Prefer endpoints with array data (list APIs are more useful for automation)
118
+ const items = ep.responseAnalysis?.itemCount ?? 0;
119
+ if (items > 0)
120
+ k += 100 + Math.min(items, 50);
121
+ // Prefer endpoints with detected semantic fields
122
+ k += Object.keys(ep.responseAnalysis?.detectedFields ?? {}).length * 10;
123
+ // Prefer API-style paths
103
124
  if (ep.pattern.includes('/api/') || ep.pattern.includes('/x/'))
104
- s += 3;
105
- if (ep.hasSearchParam)
106
- s += 3;
107
- if (ep.hasPaginationParam)
108
- s += 2;
109
- if (ep.hasLimitParam)
110
- s += 2;
111
- if (ep.status === 200)
112
- s += 2;
113
- // Anti-Bot Empty Value Detection: penalize JSON endpoints returning empty data
114
- if (ep.responseAnalysis && ep.responseAnalysis.itemCount === 0 && ep.contentType.includes('json'))
115
- s -= 3;
116
- return s;
125
+ k += 5;
126
+ // Prefer endpoints with query params (more likely to be parameterized APIs)
127
+ if (ep.hasSearchParam || ep.hasPaginationParam || ep.hasLimitParam)
128
+ k += 5;
129
+ return k;
130
+ }
131
+ /** Check whether an endpoint carries useful structured data (any JSON response, not noise). */
132
+ function isUsefulEndpoint(ep) {
133
+ if (isNoiseUrl(ep.url))
134
+ return false;
135
+ return ep.contentType.includes('json');
117
136
  }
118
137
  // ── Framework detection ────────────────────────────────────────────────────
119
138
  const FRAMEWORK_DETECT_JS = detectFramework.toString();
@@ -122,7 +141,7 @@ const STORE_DISCOVER_JS = discoverStores.toString();
122
141
  // ── Auto-Interaction (Fuzzing) ─────────────────────────────────────────────
123
142
  const INTERACT_FUZZ_JS = interactFuzz.toString();
124
143
  // ── Analysis helpers (extracted from exploreUrl) ───────────────────────────
125
- /** Filter, deduplicate, and score network endpoints. */
144
+ /** Filter and deduplicate network endpoints, keeping only useful structured-data APIs. */
126
145
  function analyzeEndpoints(networkEntries) {
127
146
  const seen = new Map();
128
147
  for (const entry of networkEntries) {
@@ -145,12 +164,13 @@ function analyzeEndpoints(networkEntries) {
145
164
  hasLimitParam: hasLimit || qp.some(p => LIMIT_PARAMS.has(p)),
146
165
  authIndicators: detectAuthFromHeaders(entry.requestHeaders),
147
166
  responseAnalysis: entry.responseBody ? analyzeResponseBody(entry.responseBody) : null,
148
- score: 0,
149
167
  };
150
- ep.score = scoreEndpoint(ep);
151
168
  seen.set(key, ep);
152
169
  }
153
- const analyzed = [...seen.values()].filter(ep => ep.score >= 5).sort((a, b) => b.score - a.score);
170
+ // Filter to useful endpoints; deterministic ordering by observable metadata signals
171
+ const analyzed = [...seen.values()]
172
+ .filter(isUsefulEndpoint)
173
+ .sort((a, b) => endpointSortKey(b) - endpointSortKey(a));
154
174
  return { analyzed, totalCount: seen.size };
155
175
  }
156
176
  /** Infer CLI capabilities from analyzed endpoints. */
@@ -192,7 +212,7 @@ function inferCapabilitiesFromEndpoints(endpoints, stores, opts) {
192
212
  capabilities.push({
193
213
  name: capName, description: `${opts.site ?? detectSiteName(opts.url)} ${capName}`,
194
214
  strategy: storeHint ? 'store-action' : epStrategy,
195
- confidence: Math.min(ep.score / 20, 1.0), endpoint: ep.pattern,
215
+ endpoint: ep.pattern,
196
216
  itemPath: ep.responseAnalysis?.itemPath ?? null,
197
217
  recommendedColumns: cols.length ? cols : ['title', 'url'],
198
218
  recommendedArgs: args,
@@ -216,7 +236,7 @@ async function writeExploreArtifacts(targetDir, result, analyzedEndpoints, store
216
236
  }, null, 2)),
217
237
  fs.promises.writeFile(path.join(targetDir, 'endpoints.json'), JSON.stringify(analyzedEndpoints.map(ep => ({
218
238
  pattern: ep.pattern, method: ep.method, url: ep.url, status: ep.status,
219
- contentType: ep.contentType, score: ep.score, queryParams: ep.queryParams,
239
+ contentType: ep.contentType, queryParams: ep.queryParams,
220
240
  itemPath: ep.responseAnalysis?.itemPath ?? null, itemCount: ep.responseAnalysis?.itemCount ?? 0,
221
241
  detectedFields: ep.responseAnalysis?.detectedFields ?? {}, authIndicators: ep.authIndicators,
222
242
  })), null, 2)),
@@ -237,6 +257,7 @@ export async function exploreUrl(url, opts) {
237
257
  return browserSession(opts.BrowserFactory, async (page) => {
238
258
  return runWithTimeout((async () => {
239
259
  // Step 1: Navigate
260
+ await page.startNetworkCapture?.();
240
261
  await page.goto(url);
241
262
  await page.wait(waitSeconds);
242
263
  // Step 2: Auto-scroll to trigger lazy loading intelligently
@@ -269,7 +290,9 @@ export async function exploreUrl(url, opts) {
269
290
  // Step 3: Read page metadata
270
291
  const metadata = await readPageMetadata(page);
271
292
  // Step 4: Capture network traffic
272
- const rawNetwork = await page.networkRequests(false);
293
+ const rawNetwork = page.readNetworkCapture
294
+ ? await page.readNetworkCapture()
295
+ : await page.networkRequests(false);
273
296
  const networkEntries = parseNetworkRequests(rawNetwork);
274
297
  // Step 5: For JSON endpoints missing a body, carefully re-fetch in-browser via a pristine iframe
275
298
  const jsonEndpoints = networkEntries.filter(e => e.contentType.includes('json') && e.method === 'GET' && e.status === 200 && !e.responseBody);
@@ -348,7 +371,7 @@ export function renderExploreSummary(result) {
348
371
  ];
349
372
  for (const cap of (result.capabilities ?? []).slice(0, 5)) {
350
373
  const storeInfo = cap.storeHint ? ` → ${cap.storeHint.store}.${cap.storeHint.action}()` : '';
351
- lines.push(` • ${cap.name} (${cap.strategy}, ${(cap.confidence * 100).toFixed(0)}%)${storeInfo}`);
374
+ lines.push(` • ${cap.name} (${cap.strategy})${storeInfo}`);
352
375
  }
353
376
  const fw = result.framework ?? {};
354
377
  const fwNames = Object.entries(fw).filter(([, v]) => v).map(([k]) => k);
@@ -7,7 +7,6 @@ describe('extension manifest regression', () => {
7
7
  const raw = await fs.readFile(manifestPath, 'utf8');
8
8
  const manifest = JSON.parse(raw);
9
9
  expect(manifest.permissions).toContain('cookies');
10
- expect(manifest.permissions).toContain('scripting');
11
10
  expect(manifest.host_permissions).toContain('<all_urls>');
12
11
  });
13
12
  });
@@ -34,7 +34,7 @@ export interface GenerateCliResult {
34
34
  };
35
35
  synthesize: {
36
36
  candidate_count: number;
37
- candidates: Array<Pick<SynthesizeCandidateSummary, 'name' | 'strategy' | 'confidence'>>;
37
+ candidates: Array<Pick<SynthesizeCandidateSummary, 'name' | 'strategy'>>;
38
38
  };
39
39
  }
40
40
  export declare function generateCliFromUrl(opts: GenerateCliOptions): Promise<GenerateCliResult>;
@@ -40,7 +40,7 @@ function selectCandidate(candidates, goal) {
40
40
  if (!candidates.length)
41
41
  return null;
42
42
  if (!goal)
43
- return candidates[0]; // highest confidence first
43
+ return candidates[0];
44
44
  const normalized = normalizeGoal(goal);
45
45
  if (normalized) {
46
46
  const exact = candidates.find(c => c.name === normalized);
@@ -90,7 +90,6 @@ export async function generateCliFromUrl(opts) {
90
90
  candidates: (synthesizeResult.candidates ?? []).map((c) => ({
91
91
  name: c.name,
92
92
  strategy: c.strategy,
93
- confidence: c.confidence,
94
93
  })),
95
94
  },
96
95
  };
@@ -111,7 +110,7 @@ export function renderGenerateSummary(r) {
111
110
  ` Candidates: ${r.synthesize?.candidate_count ?? 0}`,
112
111
  ];
113
112
  for (const c of r.synthesize?.candidates ?? []) {
114
- lines.push(` • ${c.name} (${c.strategy}, ${((c.confidence ?? 0) * 100).toFixed(0)}%)`);
113
+ lines.push(` • ${c.name} (${c.strategy})`);
115
114
  }
116
115
  const fw = r.explore?.framework ?? {};
117
116
  const fwNames = Object.entries(fw).filter(([, v]) => v).map(([k]) => k);
@@ -0,0 +1,8 @@
1
+ export interface PackageJsonLike {
2
+ bin?: string | Record<string, string>;
3
+ main?: string;
4
+ }
5
+ export declare function findPackageRoot(startFile: string, fileExists?: (candidate: string) => boolean): string;
6
+ export declare function getBuiltEntryCandidates(packageRoot: string, readFile?: (filePath: string) => string): string[];
7
+ export declare function getCliManifestPath(clisDir: string): string;
8
+ export declare function getFetchAdaptersScriptPath(packageRoot: string): string;
@@ -0,0 +1,41 @@
1
+ import * as fs from 'node:fs';
2
+ import * as path from 'node:path';
3
+ export function findPackageRoot(startFile, fileExists = fs.existsSync) {
4
+ let dir = path.dirname(startFile);
5
+ while (true) {
6
+ if (fileExists(path.join(dir, 'package.json')))
7
+ return dir;
8
+ const parent = path.dirname(dir);
9
+ if (parent === dir) {
10
+ throw new Error(`Could not find package.json above ${startFile}`);
11
+ }
12
+ dir = parent;
13
+ }
14
+ }
15
+ export function getBuiltEntryCandidates(packageRoot, readFile = (filePath) => fs.readFileSync(filePath, 'utf-8')) {
16
+ const candidates = [];
17
+ try {
18
+ const pkg = JSON.parse(readFile(path.join(packageRoot, 'package.json')));
19
+ if (typeof pkg.bin === 'string') {
20
+ candidates.push(path.join(packageRoot, pkg.bin));
21
+ }
22
+ else if (pkg.bin && typeof pkg.bin === 'object' && typeof pkg.bin.opencli === 'string') {
23
+ candidates.push(path.join(packageRoot, pkg.bin.opencli));
24
+ }
25
+ if (typeof pkg.main === 'string') {
26
+ candidates.push(path.join(packageRoot, pkg.main));
27
+ }
28
+ }
29
+ catch {
30
+ // Fall through to compatibility candidates below.
31
+ }
32
+ // Compatibility fallback for partially-built trees or older layouts.
33
+ candidates.push(path.join(packageRoot, 'dist', 'src', 'main.js'), path.join(packageRoot, 'dist', 'main.js'));
34
+ return [...new Set(candidates)];
35
+ }
36
+ export function getCliManifestPath(clisDir) {
37
+ return path.resolve(clisDir, '..', 'cli-manifest.json');
38
+ }
39
+ export function getFetchAdaptersScriptPath(packageRoot) {
40
+ return path.join(packageRoot, 'scripts', 'fetch-adapters.js');
41
+ }
@@ -68,9 +68,7 @@ pipeline:
68
68
  - fetch:
69
69
  url: "https://httpbin.org/get?greeting=hello"
70
70
  method: GET
71
- - extract:
72
- type: json
73
- selector: "$.args"
71
+ - select: "args"
74
72
  `;
75
73
  writeFile(targetDir, 'hello.yaml', yamlContent);
76
74
  files.push('hello.yaml');