@jackwener/opencli 1.6.7 → 1.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +3 -1
  2. package/README.zh-CN.md +6 -2
  3. package/dist/clis/1688/assets.d.ts +42 -0
  4. package/dist/clis/1688/assets.js +204 -0
  5. package/dist/clis/1688/assets.test.d.ts +1 -0
  6. package/dist/clis/1688/assets.test.js +39 -0
  7. package/dist/clis/1688/download.d.ts +9 -0
  8. package/dist/clis/1688/download.js +76 -0
  9. package/dist/clis/1688/download.test.d.ts +1 -0
  10. package/dist/clis/1688/download.test.js +31 -0
  11. package/dist/clis/1688/shared.d.ts +10 -0
  12. package/dist/clis/1688/shared.js +43 -0
  13. package/dist/clis/linux-do/topic-content.d.ts +35 -0
  14. package/dist/clis/linux-do/topic-content.js +154 -0
  15. package/dist/clis/linux-do/topic-content.test.d.ts +1 -0
  16. package/dist/clis/linux-do/topic-content.test.js +59 -0
  17. package/dist/clis/linux-do/topic.yaml +1 -16
  18. package/dist/clis/xueqiu/groups.yaml +23 -0
  19. package/dist/clis/xueqiu/kline.yaml +65 -0
  20. package/dist/clis/xueqiu/watchlist.yaml +9 -9
  21. package/dist/src/analysis.d.ts +2 -0
  22. package/dist/src/analysis.js +6 -0
  23. package/dist/src/browser/cdp.js +96 -0
  24. package/dist/src/build-manifest.d.ts +3 -1
  25. package/dist/src/build-manifest.js +10 -7
  26. package/dist/src/build-manifest.test.js +8 -4
  27. package/dist/src/cli.d.ts +2 -1
  28. package/dist/src/cli.js +48 -46
  29. package/dist/src/commands/daemon.js +2 -10
  30. package/dist/src/diagnostic.d.ts +27 -2
  31. package/dist/src/diagnostic.js +201 -25
  32. package/dist/src/diagnostic.test.js +130 -1
  33. package/dist/src/discovery.js +7 -17
  34. package/dist/src/download/progress.js +7 -2
  35. package/dist/src/explore.d.ts +0 -2
  36. package/dist/src/explore.js +61 -38
  37. package/dist/src/extension-manifest-regression.test.js +0 -1
  38. package/dist/src/generate.d.ts +1 -1
  39. package/dist/src/generate.js +2 -3
  40. package/dist/src/package-paths.d.ts +8 -0
  41. package/dist/src/package-paths.js +41 -0
  42. package/dist/src/plugin-scaffold.js +1 -3
  43. package/dist/src/record.d.ts +1 -2
  44. package/dist/src/record.js +14 -52
  45. package/dist/src/synthesize.d.ts +0 -2
  46. package/dist/src/synthesize.js +8 -4
  47. package/package.json +1 -1
  48. package/dist/cli-manifest.json +0 -17250
@@ -15,6 +15,7 @@ import yaml from 'js-yaml';
15
15
  import { Strategy, registerCommand } from './registry.js';
16
16
  import { getErrorMessage } from './errors.js';
17
17
  import { log } from './logger.js';
18
+ import { findPackageRoot, getCliManifestPath, getFetchAdaptersScriptPath } from './package-paths.js';
18
19
  /** User runtime directory: ~/.opencli */
19
20
  export const USER_OPENCLI_DIR = path.join(os.homedir(), '.opencli');
20
21
  /** User CLIs directory: ~/.opencli/clis */
@@ -31,18 +32,7 @@ function parseStrategy(rawStrategy, fallback = Strategy.COOKIE) {
31
32
  return Strategy[key] ?? fallback;
32
33
  }
33
34
  import { isRecord } from './utils.js';
34
- /**
35
- * Find the package root (directory containing package.json).
36
- * Dev: import.meta.url is in src/ → one level up.
37
- * Prod: import.meta.url is in dist/src/ → two levels up.
38
- */
39
- function findPackageRoot() {
40
- let dir = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
41
- if (!fs.existsSync(path.join(dir, 'package.json'))) {
42
- dir = path.resolve(dir, '..');
43
- }
44
- return dir;
45
- }
35
+ const PACKAGE_ROOT = findPackageRoot(fileURLToPath(import.meta.url));
46
36
  /**
47
37
  * Ensure ~/.opencli/node_modules/@jackwener/opencli symlink exists so that
48
38
  * user CLIs in ~/.opencli/clis/ can `import { cli } from '@jackwener/opencli/registry'`.
@@ -65,7 +55,7 @@ export async function ensureUserCliCompatShims(baseDir = USER_OPENCLI_DIR) {
65
55
  await fs.promises.writeFile(pkgJsonPath, pkgJsonContent, 'utf-8');
66
56
  }
67
57
  // Create node_modules/@jackwener/opencli symlink pointing to the installed package root.
68
- const opencliRoot = findPackageRoot();
58
+ const opencliRoot = PACKAGE_ROOT;
69
59
  const symlinkDir = path.join(baseDir, 'node_modules', '@jackwener');
70
60
  const symlinkPath = path.join(symlinkDir, 'opencli');
71
61
  try {
@@ -116,7 +106,7 @@ export async function ensureUserAdapters() {
116
106
  log.info('First run detected — copying adapters (one-time setup)...');
117
107
  try {
118
108
  const { execFileSync } = await import('node:child_process');
119
- const scriptPath = path.join(findPackageRoot(), 'scripts', 'fetch-adapters.js');
109
+ const scriptPath = getFetchAdaptersScriptPath(PACKAGE_ROOT);
120
110
  execFileSync(process.execPath, [scriptPath], {
121
111
  stdio: 'inherit',
122
112
  env: { ...process.env, _OPENCLI_FIRST_RUN: '1' },
@@ -135,7 +125,7 @@ export async function ensureUserAdapters() {
135
125
  export async function discoverClis(...dirs) {
136
126
  // Fast path: try manifest first (production / post-build)
137
127
  for (const dir of dirs) {
138
- const manifestPath = path.resolve(dir, '..', 'cli-manifest.json');
128
+ const manifestPath = getCliManifestPath(dir);
139
129
  try {
140
130
  await fs.promises.access(manifestPath);
141
131
  const loaded = await loadFromManifest(manifestPath, dir);
@@ -173,7 +163,7 @@ async function loadFromManifest(manifestPath, clisDir) {
173
163
  columns: entry.columns,
174
164
  pipeline: entry.pipeline,
175
165
  timeoutSeconds: entry.timeout,
176
- source: `manifest:${entry.site}/${entry.name}`,
166
+ source: entry.sourceFile ? path.resolve(clisDir, entry.sourceFile) : `manifest:${entry.site}/${entry.name}`,
177
167
  deprecated: entry.deprecated,
178
168
  replacedBy: entry.replacedBy,
179
169
  navigateBefore: entry.navigateBefore,
@@ -196,7 +186,7 @@ async function loadFromManifest(manifestPath, clisDir) {
196
186
  args: entry.args ?? [],
197
187
  columns: entry.columns,
198
188
  timeoutSeconds: entry.timeout,
199
- source: modulePath,
189
+ source: entry.sourceFile ? path.resolve(clisDir, entry.sourceFile) : modulePath,
200
190
  deprecated: entry.deprecated,
201
191
  replacedBy: entry.replacedBy,
202
192
  navigateBefore: entry.navigateBefore,
@@ -23,8 +23,13 @@ export function formatDuration(ms) {
23
23
  if (seconds < 60)
24
24
  return `${seconds}s`;
25
25
  const minutes = Math.floor(seconds / 60);
26
- const remainingSeconds = seconds % 60;
27
- return `${minutes}m ${remainingSeconds}s`;
26
+ if (minutes < 60) {
27
+ const remainingSeconds = seconds % 60;
28
+ return remainingSeconds > 0 ? `${minutes}m ${remainingSeconds}s` : `${minutes}m`;
29
+ }
30
+ const hours = Math.floor(minutes / 60);
31
+ const remainingMinutes = minutes % 60;
32
+ return remainingMinutes > 0 ? `${hours}h ${remainingMinutes}m` : `${hours}h`;
28
33
  }
29
34
  /**
30
35
  * Create a simple progress bar for terminal display.
@@ -12,7 +12,6 @@ interface InferredCapability {
12
12
  name: string;
13
13
  description: string;
14
14
  strategy: string;
15
- confidence: number;
16
15
  endpoint: string;
17
16
  itemPath: string | null;
18
17
  recommendedColumns: string[];
@@ -52,7 +51,6 @@ export interface ExploreEndpointArtifact {
52
51
  url: string;
53
52
  status: number | null;
54
53
  contentType: string;
55
- score: number;
56
54
  queryParams: string[];
57
55
  itemPath: string | null;
58
56
  itemCount: number;
@@ -13,7 +13,7 @@ import { detectFramework } from './scripts/framework.js';
13
13
  import { discoverStores } from './scripts/store.js';
14
14
  import { interactFuzz } from './scripts/interact.js';
15
15
  import { log } from './logger.js';
16
- import { urlToPattern, findArrayPath, flattenFields, detectFieldRoles, inferCapabilityName, inferStrategy, detectAuthFromHeaders, classifyQueryParams, } from './analysis.js';
16
+ import { urlToPattern, findArrayPath, flattenFields, detectFieldRoles, inferCapabilityName, inferStrategy, detectAuthFromHeaders, classifyQueryParams, isNoiseUrl, } from './analysis.js';
17
17
  // ── Site name detection ────────────────────────────────────────────────────
18
18
  const KNOWN_SITE_ALIASES = {
19
19
  'x.com': 'twitter', 'twitter.com': 'twitter',
@@ -66,13 +66,29 @@ function parseNetworkRequests(raw) {
66
66
  return entries;
67
67
  }
68
68
  if (Array.isArray(raw)) {
69
- return raw.filter(e => e && typeof e === 'object').map(e => ({
70
- method: (e.method ?? 'GET').toUpperCase(),
71
- url: String(e.url ?? e.request?.url ?? e.requestUrl ?? ''),
72
- status: e.status ?? e.statusCode ?? null,
73
- contentType: e.contentType ?? e.response?.contentType ?? '',
74
- responseBody: e.responseBody, requestHeaders: e.requestHeaders,
75
- }));
69
+ return raw.filter(e => e && typeof e === 'object').map(e => {
70
+ // Handle both legacy shape (status/contentType/responseBody) and
71
+ // extension/CDP capture shape (responseStatus/responseContentType/responsePreview)
72
+ let body = e.responseBody;
73
+ if (body === undefined && e.responsePreview !== undefined) {
74
+ const preview = e.responsePreview;
75
+ if (typeof preview === 'string') {
76
+ try {
77
+ body = JSON.parse(preview);
78
+ }
79
+ catch {
80
+ body = preview;
81
+ }
82
+ }
83
+ }
84
+ return {
85
+ method: (e.method ?? 'GET').toUpperCase(),
86
+ url: String(e.url ?? e.request?.url ?? e.requestUrl ?? ''),
87
+ status: e.status ?? e.responseStatus ?? e.statusCode ?? null,
88
+ contentType: e.contentType ?? e.responseContentType ?? e.response?.contentType ?? '',
89
+ responseBody: body, requestHeaders: e.requestHeaders,
90
+ };
91
+ });
76
92
  }
77
93
  return [];
78
94
  }
@@ -91,29 +107,32 @@ function isBooleanRecord(value) {
91
107
  return typeof value === 'object' && value !== null && !Array.isArray(value)
92
108
  && Object.values(value).every(v => typeof v === 'boolean');
93
109
  }
94
- function scoreEndpoint(ep) {
95
- let s = 0;
96
- if (ep.contentType.includes('json'))
97
- s += 10;
98
- if (ep.responseAnalysis) {
99
- s += 5;
100
- s += Math.min(ep.responseAnalysis.itemCount, 10);
101
- s += Object.keys(ep.responseAnalysis.detectedFields).length * 2;
102
- }
110
+ /**
111
+ * Deterministic sort key for endpoint ordering — transparent, observable signals only.
112
+ * Used by generate/synthesize to pick a stable default candidate.
113
+ * Not exposed externally; AI agents see the raw metadata and decide for themselves.
114
+ */
115
+ function endpointSortKey(ep) {
116
+ let k = 0;
117
+ // Prefer endpoints with array data (list APIs are more useful for automation)
118
+ const items = ep.responseAnalysis?.itemCount ?? 0;
119
+ if (items > 0)
120
+ k += 100 + Math.min(items, 50);
121
+ // Prefer endpoints with detected semantic fields
122
+ k += Object.keys(ep.responseAnalysis?.detectedFields ?? {}).length * 10;
123
+ // Prefer API-style paths
103
124
  if (ep.pattern.includes('/api/') || ep.pattern.includes('/x/'))
104
- s += 3;
105
- if (ep.hasSearchParam)
106
- s += 3;
107
- if (ep.hasPaginationParam)
108
- s += 2;
109
- if (ep.hasLimitParam)
110
- s += 2;
111
- if (ep.status === 200)
112
- s += 2;
113
- // Anti-Bot Empty Value Detection: penalize JSON endpoints returning empty data
114
- if (ep.responseAnalysis && ep.responseAnalysis.itemCount === 0 && ep.contentType.includes('json'))
115
- s -= 3;
116
- return s;
125
+ k += 5;
126
+ // Prefer endpoints with query params (more likely to be parameterized APIs)
127
+ if (ep.hasSearchParam || ep.hasPaginationParam || ep.hasLimitParam)
128
+ k += 5;
129
+ return k;
130
+ }
131
+ /** Check whether an endpoint carries useful structured data (any JSON response, not noise). */
132
+ function isUsefulEndpoint(ep) {
133
+ if (isNoiseUrl(ep.url))
134
+ return false;
135
+ return ep.contentType.includes('json');
117
136
  }
118
137
  // ── Framework detection ────────────────────────────────────────────────────
119
138
  const FRAMEWORK_DETECT_JS = detectFramework.toString();
@@ -122,7 +141,7 @@ const STORE_DISCOVER_JS = discoverStores.toString();
122
141
  // ── Auto-Interaction (Fuzzing) ─────────────────────────────────────────────
123
142
  const INTERACT_FUZZ_JS = interactFuzz.toString();
124
143
  // ── Analysis helpers (extracted from exploreUrl) ───────────────────────────
125
- /** Filter, deduplicate, and score network endpoints. */
144
+ /** Filter and deduplicate network endpoints, keeping only useful structured-data APIs. */
126
145
  function analyzeEndpoints(networkEntries) {
127
146
  const seen = new Map();
128
147
  for (const entry of networkEntries) {
@@ -145,12 +164,13 @@ function analyzeEndpoints(networkEntries) {
145
164
  hasLimitParam: hasLimit || qp.some(p => LIMIT_PARAMS.has(p)),
146
165
  authIndicators: detectAuthFromHeaders(entry.requestHeaders),
147
166
  responseAnalysis: entry.responseBody ? analyzeResponseBody(entry.responseBody) : null,
148
- score: 0,
149
167
  };
150
- ep.score = scoreEndpoint(ep);
151
168
  seen.set(key, ep);
152
169
  }
153
- const analyzed = [...seen.values()].filter(ep => ep.score >= 5).sort((a, b) => b.score - a.score);
170
+ // Filter to useful endpoints; deterministic ordering by observable metadata signals
171
+ const analyzed = [...seen.values()]
172
+ .filter(isUsefulEndpoint)
173
+ .sort((a, b) => endpointSortKey(b) - endpointSortKey(a));
154
174
  return { analyzed, totalCount: seen.size };
155
175
  }
156
176
  /** Infer CLI capabilities from analyzed endpoints. */
@@ -192,7 +212,7 @@ function inferCapabilitiesFromEndpoints(endpoints, stores, opts) {
192
212
  capabilities.push({
193
213
  name: capName, description: `${opts.site ?? detectSiteName(opts.url)} ${capName}`,
194
214
  strategy: storeHint ? 'store-action' : epStrategy,
195
- confidence: Math.min(ep.score / 20, 1.0), endpoint: ep.pattern,
215
+ endpoint: ep.pattern,
196
216
  itemPath: ep.responseAnalysis?.itemPath ?? null,
197
217
  recommendedColumns: cols.length ? cols : ['title', 'url'],
198
218
  recommendedArgs: args,
@@ -216,7 +236,7 @@ async function writeExploreArtifacts(targetDir, result, analyzedEndpoints, store
216
236
  }, null, 2)),
217
237
  fs.promises.writeFile(path.join(targetDir, 'endpoints.json'), JSON.stringify(analyzedEndpoints.map(ep => ({
218
238
  pattern: ep.pattern, method: ep.method, url: ep.url, status: ep.status,
219
- contentType: ep.contentType, score: ep.score, queryParams: ep.queryParams,
239
+ contentType: ep.contentType, queryParams: ep.queryParams,
220
240
  itemPath: ep.responseAnalysis?.itemPath ?? null, itemCount: ep.responseAnalysis?.itemCount ?? 0,
221
241
  detectedFields: ep.responseAnalysis?.detectedFields ?? {}, authIndicators: ep.authIndicators,
222
242
  })), null, 2)),
@@ -237,6 +257,7 @@ export async function exploreUrl(url, opts) {
237
257
  return browserSession(opts.BrowserFactory, async (page) => {
238
258
  return runWithTimeout((async () => {
239
259
  // Step 1: Navigate
260
+ await page.startNetworkCapture?.();
240
261
  await page.goto(url);
241
262
  await page.wait(waitSeconds);
242
263
  // Step 2: Auto-scroll to trigger lazy loading intelligently
@@ -269,7 +290,9 @@ export async function exploreUrl(url, opts) {
269
290
  // Step 3: Read page metadata
270
291
  const metadata = await readPageMetadata(page);
271
292
  // Step 4: Capture network traffic
272
- const rawNetwork = await page.networkRequests(false);
293
+ const rawNetwork = page.readNetworkCapture
294
+ ? await page.readNetworkCapture()
295
+ : await page.networkRequests(false);
273
296
  const networkEntries = parseNetworkRequests(rawNetwork);
274
297
  // Step 5: For JSON endpoints missing a body, carefully re-fetch in-browser via a pristine iframe
275
298
  const jsonEndpoints = networkEntries.filter(e => e.contentType.includes('json') && e.method === 'GET' && e.status === 200 && !e.responseBody);
@@ -348,7 +371,7 @@ export function renderExploreSummary(result) {
348
371
  ];
349
372
  for (const cap of (result.capabilities ?? []).slice(0, 5)) {
350
373
  const storeInfo = cap.storeHint ? ` → ${cap.storeHint.store}.${cap.storeHint.action}()` : '';
351
- lines.push(` • ${cap.name} (${cap.strategy}, ${(cap.confidence * 100).toFixed(0)}%)${storeInfo}`);
374
+ lines.push(` • ${cap.name} (${cap.strategy})${storeInfo}`);
352
375
  }
353
376
  const fw = result.framework ?? {};
354
377
  const fwNames = Object.entries(fw).filter(([, v]) => v).map(([k]) => k);
@@ -7,7 +7,6 @@ describe('extension manifest regression', () => {
7
7
  const raw = await fs.readFile(manifestPath, 'utf8');
8
8
  const manifest = JSON.parse(raw);
9
9
  expect(manifest.permissions).toContain('cookies');
10
- expect(manifest.permissions).toContain('scripting');
11
10
  expect(manifest.host_permissions).toContain('<all_urls>');
12
11
  });
13
12
  });
@@ -34,7 +34,7 @@ export interface GenerateCliResult {
34
34
  };
35
35
  synthesize: {
36
36
  candidate_count: number;
37
- candidates: Array<Pick<SynthesizeCandidateSummary, 'name' | 'strategy' | 'confidence'>>;
37
+ candidates: Array<Pick<SynthesizeCandidateSummary, 'name' | 'strategy'>>;
38
38
  };
39
39
  }
40
40
  export declare function generateCliFromUrl(opts: GenerateCliOptions): Promise<GenerateCliResult>;
@@ -40,7 +40,7 @@ function selectCandidate(candidates, goal) {
40
40
  if (!candidates.length)
41
41
  return null;
42
42
  if (!goal)
43
- return candidates[0]; // highest confidence first
43
+ return candidates[0];
44
44
  const normalized = normalizeGoal(goal);
45
45
  if (normalized) {
46
46
  const exact = candidates.find(c => c.name === normalized);
@@ -90,7 +90,6 @@ export async function generateCliFromUrl(opts) {
90
90
  candidates: (synthesizeResult.candidates ?? []).map((c) => ({
91
91
  name: c.name,
92
92
  strategy: c.strategy,
93
- confidence: c.confidence,
94
93
  })),
95
94
  },
96
95
  };
@@ -111,7 +110,7 @@ export function renderGenerateSummary(r) {
111
110
  ` Candidates: ${r.synthesize?.candidate_count ?? 0}`,
112
111
  ];
113
112
  for (const c of r.synthesize?.candidates ?? []) {
114
- lines.push(` • ${c.name} (${c.strategy}, ${((c.confidence ?? 0) * 100).toFixed(0)}%)`);
113
+ lines.push(` • ${c.name} (${c.strategy})`);
115
114
  }
116
115
  const fw = r.explore?.framework ?? {};
117
116
  const fwNames = Object.entries(fw).filter(([, v]) => v).map(([k]) => k);
@@ -0,0 +1,8 @@
1
+ export interface PackageJsonLike {
2
+ bin?: string | Record<string, string>;
3
+ main?: string;
4
+ }
5
+ export declare function findPackageRoot(startFile: string, fileExists?: (candidate: string) => boolean): string;
6
+ export declare function getBuiltEntryCandidates(packageRoot: string, readFile?: (filePath: string) => string): string[];
7
+ export declare function getCliManifestPath(clisDir: string): string;
8
+ export declare function getFetchAdaptersScriptPath(packageRoot: string): string;
@@ -0,0 +1,41 @@
1
+ import * as fs from 'node:fs';
2
+ import * as path from 'node:path';
3
+ export function findPackageRoot(startFile, fileExists = fs.existsSync) {
4
+ let dir = path.dirname(startFile);
5
+ while (true) {
6
+ if (fileExists(path.join(dir, 'package.json')))
7
+ return dir;
8
+ const parent = path.dirname(dir);
9
+ if (parent === dir) {
10
+ throw new Error(`Could not find package.json above ${startFile}`);
11
+ }
12
+ dir = parent;
13
+ }
14
+ }
15
+ export function getBuiltEntryCandidates(packageRoot, readFile = (filePath) => fs.readFileSync(filePath, 'utf-8')) {
16
+ const candidates = [];
17
+ try {
18
+ const pkg = JSON.parse(readFile(path.join(packageRoot, 'package.json')));
19
+ if (typeof pkg.bin === 'string') {
20
+ candidates.push(path.join(packageRoot, pkg.bin));
21
+ }
22
+ else if (pkg.bin && typeof pkg.bin === 'object' && typeof pkg.bin.opencli === 'string') {
23
+ candidates.push(path.join(packageRoot, pkg.bin.opencli));
24
+ }
25
+ if (typeof pkg.main === 'string') {
26
+ candidates.push(path.join(packageRoot, pkg.main));
27
+ }
28
+ }
29
+ catch {
30
+ // Fall through to compatibility candidates below.
31
+ }
32
+ // Compatibility fallback for partially-built trees or older layouts.
33
+ candidates.push(path.join(packageRoot, 'dist', 'src', 'main.js'), path.join(packageRoot, 'dist', 'main.js'));
34
+ return [...new Set(candidates)];
35
+ }
36
+ export function getCliManifestPath(clisDir) {
37
+ return path.resolve(clisDir, '..', 'cli-manifest.json');
38
+ }
39
+ export function getFetchAdaptersScriptPath(packageRoot) {
40
+ return path.join(packageRoot, 'scripts', 'fetch-adapters.js');
41
+ }
@@ -68,9 +68,7 @@ pipeline:
68
68
  - fetch:
69
69
  url: "https://httpbin.org/get?greeting=hello"
70
70
  method: GET
71
- - extract:
72
- type: json
73
- selector: "$.args"
71
+ - select: "args"
74
72
  `;
75
73
  writeFile(targetDir, 'hello.yaml', yamlContent);
76
74
  files.push('hello.yaml');
@@ -45,7 +45,6 @@ type RecordedCandidateKind = 'read' | 'write';
45
45
  export interface RecordedCandidate {
46
46
  kind: RecordedCandidateKind;
47
47
  req: RecordedRequest;
48
- score: number;
49
48
  arrayResult: ReturnType<typeof findArrayPath> | null;
50
49
  }
51
50
  interface GeneratedRecordedCandidate {
@@ -70,7 +69,7 @@ export declare function createRecordedEntry(input: {
70
69
  * for every JSON response. No URL pattern filter — captures everything.
71
70
  */
72
71
  export declare function generateFullCaptureInterceptorJs(): string;
73
- /** Analyze recorded requests into read and write candidates. */
72
+ /** Analyze recorded requests into read and write candidates, filtering out noise. */
74
73
  export declare function analyzeRecordedRequests(requests: RecordedRequest[]): {
75
74
  candidates: RecordedCandidate[];
76
75
  };
@@ -18,26 +18,11 @@ import chalk from 'chalk';
18
18
  import yaml from 'js-yaml';
19
19
  import { sendCommand } from './browser/daemon-client.js';
20
20
  import { SEARCH_PARAMS, PAGINATION_PARAMS, FIELD_ROLES } from './constants.js';
21
- import { urlToPattern, findArrayPath, inferCapabilityName, inferStrategy, detectAuthFromContent, classifyQueryParams, } from './analysis.js';
22
- /** Keep the stronger candidate when multiple recordings share one bucket. */
23
- function preferRecordedCandidate(current, next) {
24
- if (next.score > current.score)
25
- return next;
26
- if (next.score < current.score)
27
- return current;
21
+ import { urlToPattern, findArrayPath, inferCapabilityName, inferStrategy, detectAuthFromContent, classifyQueryParams, isNoiseUrl, } from './analysis.js';
22
+ /** Keep the later candidate when multiple recordings share one bucket (prefer fresher data). */
23
+ function preferRecordedCandidate(_current, next) {
28
24
  return next;
29
25
  }
30
- /** Apply shared endpoint score tweaks. */
31
- function applyCommonEndpointScoreAdjustments(req, score) {
32
- let adjusted = score;
33
- if (req.url.includes('/api/'))
34
- adjusted += 3;
35
- if (req.url.match(/\/(track|log|analytics|beacon|pixel|stats|metric)/i))
36
- adjusted -= 10;
37
- if (req.url.match(/\/(ping|heartbeat|keep.?alive)/i))
38
- adjusted -= 10;
39
- return adjusted;
40
- }
41
26
  /** Build a candidate-level dedupe key. */
42
27
  function getRecordedCandidateKey(candidate) {
43
28
  return `${candidate.kind} ${getRecordedRequestKey(candidate.req)}`;
@@ -262,23 +247,6 @@ function generateReadRecordedJs() {
262
247
  `;
263
248
  }
264
249
  // ── Analysis helpers ───────────────────────────────────────────────────────
265
- function scoreRequest(req, arrayResult) {
266
- let s = 0;
267
- if (arrayResult) {
268
- s += 10;
269
- s += Math.min(arrayResult.items.length, 10);
270
- // Bonus for detected semantic fields
271
- const sample = arrayResult.items[0];
272
- if (sample && typeof sample === 'object') {
273
- const keys = Object.keys(sample).map(k => k.toLowerCase());
274
- for (const aliases of Object.values(FIELD_ROLES)) {
275
- if (aliases.some(a => keys.includes(a)))
276
- s += 2;
277
- }
278
- }
279
- }
280
- return applyCommonEndpointScoreAdjustments(req, s);
281
- }
282
250
  /** Check whether one recorded request is safe to treat as a write candidate. */
283
251
  function isWriteCandidate(req) {
284
252
  return ['POST', 'PUT', 'PATCH'].includes(req.method)
@@ -290,25 +258,19 @@ function isWriteCandidate(req) {
290
258
  && typeof req.responseBody === 'object'
291
259
  && !Array.isArray(req.responseBody);
292
260
  }
293
- /** Score replayable write requests while keeping tracking and heartbeat traffic suppressed. */
294
- function scoreWriteRequest(req) {
295
- return applyCommonEndpointScoreAdjustments(req, 6);
296
- }
297
- /** Analyze recorded requests into read and write candidates. */
261
+ /** Analyze recorded requests into read and write candidates, filtering out noise. */
298
262
  export function analyzeRecordedRequests(requests) {
299
263
  const candidates = [];
300
264
  for (const req of requests) {
265
+ if (isNoiseUrl(req.url))
266
+ continue;
301
267
  const arrayResult = findArrayPath(req.responseBody);
302
268
  if (isWriteCandidate(req)) {
303
- const score = scoreWriteRequest(req);
304
- if (score > 0)
305
- candidates.push({ kind: 'write', req, score, arrayResult: null });
269
+ candidates.push({ kind: 'write', req, arrayResult: null });
306
270
  continue;
307
271
  }
308
272
  if (arrayResult) {
309
- const score = scoreRequest(req, arrayResult);
310
- if (score > 0)
311
- candidates.push({ kind: 'read', req, score, arrayResult });
273
+ candidates.push({ kind: 'read', req, arrayResult });
312
274
  }
313
275
  }
314
276
  return { candidates };
@@ -465,9 +427,9 @@ export function generateRecordedCandidates(site, pageUrl, requests) {
465
427
  const current = deduped.get(key);
466
428
  deduped.set(key, current ? preferRecordedCandidate(current, candidate) : candidate);
467
429
  }
430
+ // Sort reads by array item count (richer data first), then take top 5
468
431
  const selected = [...deduped.values()]
469
- .filter((candidate) => candidate.kind === 'read' ? candidate.score >= 8 : candidate.score >= 6)
470
- .sort((a, b) => b.score - a.score)
432
+ .sort((a, b) => (b.arrayResult?.items.length ?? 0) - (a.arrayResult?.items.length ?? 0))
471
433
  .slice(0, 5);
472
434
  const usedNames = new Set();
473
435
  return selected.map((candidate) => {
@@ -636,13 +598,13 @@ function analyzeAndWrite(site, pageUrl, requests, outDir) {
636
598
  // Generate candidate YAMLs (top 5)
637
599
  const candidates = [];
638
600
  const usedNames = new Set();
639
- console.log(chalk.bold('\n Captured endpoints (scored):\n'));
640
- for (const entry of analysis.candidates.sort((a, b) => b.score - a.score).slice(0, 8)) {
601
+ console.log(chalk.bold('\n Captured endpoints:\n'));
602
+ for (const entry of analysis.candidates.sort((a, b) => (b.arrayResult?.items.length ?? 0) - (a.arrayResult?.items.length ?? 0)).slice(0, 8)) {
641
603
  const itemCount = entry.arrayResult?.items.length ?? 0;
642
604
  const strategy = entry.kind === 'write'
643
605
  ? 'cookie'
644
606
  : inferStrategy(detectAuthFromContent(entry.req.url, entry.req.responseBody));
645
- const marker = entry.score >= 15 ? chalk.green('') : entry.score >= 8 ? chalk.yellow('') : chalk.dim('·');
607
+ const marker = entry.kind === 'write' ? chalk.magenta('') : itemCount > 5 ? chalk.green('') : chalk.dim('·');
646
608
  console.log(` ${marker} ${chalk.white(urlToPattern(entry.req.url))}` +
647
609
  chalk.dim(` [${strategy}]`) +
648
610
  (entry.kind === 'write'
@@ -664,7 +626,7 @@ function analyzeAndWrite(site, pageUrl, requests, outDir) {
664
626
  console.log(chalk.dim(` → ${filePath}`));
665
627
  }
666
628
  if (candidates.length === 0) {
667
- console.log(chalk.yellow(' No high-confidence candidates found.'));
629
+ console.log(chalk.yellow(' No candidates found.'));
668
630
  console.log(chalk.dim(' Tip: make sure you triggered JSON API calls (open lists, search, scroll).'));
669
631
  }
670
632
  return {
@@ -17,7 +17,6 @@ export interface SynthesizeCapability {
17
17
  name: string;
18
18
  description: string;
19
19
  strategy: string;
20
- confidence?: number;
21
20
  endpoint?: string;
22
21
  itemPath?: string | null;
23
22
  recommendedColumns?: string[];
@@ -70,7 +69,6 @@ export interface SynthesizeCandidateSummary {
70
69
  name: string;
71
70
  path: string;
72
71
  strategy: string;
73
- confidence?: number;
74
72
  }
75
73
  export interface SynthesizeResult {
76
74
  site: string;
@@ -13,7 +13,6 @@ export function synthesizeFromExplore(target, opts = {}) {
13
13
  fs.mkdirSync(targetDir, { recursive: true });
14
14
  const site = bundle.manifest.site;
15
15
  const capabilities = (bundle.capabilities ?? [])
16
- .sort((a, b) => (b.confidence ?? 0) - (a.confidence ?? 0))
17
16
  .slice(0, opts.top ?? 3);
18
17
  const candidates = [];
19
18
  for (const cap of capabilities) {
@@ -23,7 +22,7 @@ export function synthesizeFromExplore(target, opts = {}) {
23
22
  const candidate = buildCandidateYaml(site, bundle.manifest, cap, endpoint);
24
23
  const filePath = path.join(targetDir, `${candidate.name}.yaml`);
25
24
  fs.writeFileSync(filePath, yaml.dump(candidate.yaml, { sortKeys: false, lineWidth: 120 }));
26
- candidates.push({ name: candidate.name, path: filePath, strategy: cap.strategy, confidence: cap.confidence });
25
+ candidates.push({ name: candidate.name, path: filePath, strategy: cap.strategy });
27
26
  }
28
27
  const index = { site, target_url: bundle.manifest.target_url, generated_from: exploreDir, candidate_count: candidates.length, candidates };
29
28
  fs.writeFileSync(path.join(targetDir, 'candidates.json'), JSON.stringify(index, null, 2));
@@ -32,7 +31,7 @@ export function synthesizeFromExplore(target, opts = {}) {
32
31
  export function renderSynthesizeSummary(result) {
33
32
  const lines = ['opencli synthesize: OK', `Site: ${result.site}`, `Source: ${result.explore_dir}`, `Candidates: ${result.candidate_count}`];
34
33
  for (const c of result.candidates ?? [])
35
- lines.push(` • ${c.name} (${c.strategy}, ${((c.confidence ?? 0) * 100).toFixed(0)}% confidence) → ${c.path}`);
34
+ lines.push(` • ${c.name} (${c.strategy}) → ${c.path}`);
36
35
  return lines.join('\n');
37
36
  }
38
37
  export function resolveExploreDir(target) {
@@ -61,7 +60,12 @@ function chooseEndpoint(cap, endpoints) {
61
60
  if (match)
62
61
  return match;
63
62
  }
64
- return [...endpoints].sort((a, b) => (b.score ?? 0) - (a.score ?? 0))[0];
63
+ // Fallback: prefer endpoint with most data (item count + detected fields)
64
+ return [...endpoints].sort((a, b) => {
65
+ const aKey = (a.itemCount ?? 0) * 10 + Object.keys(a.detectedFields ?? {}).length;
66
+ const bKey = (b.itemCount ?? 0) * 10 + Object.keys(b.detectedFields ?? {}).length;
67
+ return bKey - aKey;
68
+ })[0];
65
69
  }
66
70
  // ── URL templating ─────────────────────────────────────────────────────────
67
71
  function buildTemplatedUrl(rawUrl, cap, _endpoint) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jackwener/opencli",
3
- "version": "1.6.7",
3
+ "version": "1.6.8",
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },