@jackwener/opencli 1.7.6 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/README.md +17 -8
  2. package/README.zh-CN.md +14 -8
  3. package/cli-manifest.json +469 -11
  4. package/clis/51job/company.js +125 -0
  5. package/clis/51job/detail.js +108 -0
  6. package/clis/51job/hot.js +55 -0
  7. package/clis/51job/search.js +79 -0
  8. package/clis/51job/utils.js +302 -0
  9. package/clis/51job/utils.test.js +69 -0
  10. package/clis/amazon/discussion.js +37 -6
  11. package/clis/amazon/discussion.test.js +147 -32
  12. package/clis/bilibili/video.js +11 -4
  13. package/clis/bilibili/video.test.js +51 -0
  14. package/clis/chatgpt/image.js +1 -1
  15. package/clis/chatgpt-app/ask.js +3 -19
  16. package/clis/chatgpt-app/ax.js +132 -1
  17. package/clis/chatgpt-app/ax.test.js +23 -0
  18. package/clis/chatgpt-app/send.js +2 -21
  19. package/clis/deepseek/ask.js +50 -18
  20. package/clis/deepseek/ask.test.js +195 -2
  21. package/clis/deepseek/utils.js +113 -29
  22. package/clis/deepseek/utils.test.js +109 -1
  23. package/clis/gemini/image.js +1 -1
  24. package/clis/instagram/download.js +1 -1
  25. package/clis/powerchina/search.js +250 -0
  26. package/clis/powerchina/search.test.js +67 -0
  27. package/clis/sinafinance/stock.js +5 -2
  28. package/clis/sinafinance/stock.test.js +59 -0
  29. package/clis/toutiao/articles.js +81 -0
  30. package/clis/toutiao/articles.test.js +23 -0
  31. package/clis/twitter/likes.js +3 -2
  32. package/clis/twitter/search.js +4 -2
  33. package/clis/twitter/search.test.js +4 -0
  34. package/clis/twitter/shared.js +28 -0
  35. package/clis/twitter/shared.test.js +96 -0
  36. package/clis/twitter/thread.js +3 -1
  37. package/clis/twitter/timeline.js +3 -2
  38. package/clis/twitter/tweets.js +3 -2
  39. package/clis/twitter/tweets.test.js +1 -1
  40. package/clis/web/read.js +25 -5
  41. package/clis/web/read.test.js +76 -0
  42. package/clis/weixin/create-draft.js +225 -0
  43. package/clis/weixin/drafts.js +65 -0
  44. package/clis/weixin/drafts.test.js +65 -0
  45. package/clis/weread/ai-outline.js +170 -0
  46. package/clis/weread/ai-outline.test.js +83 -0
  47. package/clis/weread/book.js +57 -44
  48. package/clis/weread/commands.test.js +24 -0
  49. package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
  50. package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
  51. package/dist/src/browser/analyze.d.ts +103 -0
  52. package/dist/src/browser/analyze.js +230 -0
  53. package/dist/src/browser/analyze.test.d.ts +1 -0
  54. package/dist/src/browser/analyze.test.js +164 -0
  55. package/dist/src/browser/article-extract.d.ts +57 -0
  56. package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
  57. package/dist/src/browser/article-extract.e2e.test.js +105 -0
  58. package/dist/src/browser/article-extract.js +169 -0
  59. package/dist/src/browser/article-extract.test.d.ts +1 -0
  60. package/dist/src/browser/article-extract.test.js +94 -0
  61. package/dist/src/browser/cdp.js +11 -2
  62. package/dist/src/browser/verify-fixture.d.ts +59 -0
  63. package/dist/src/browser/verify-fixture.js +213 -0
  64. package/dist/src/browser/verify-fixture.test.d.ts +1 -0
  65. package/dist/src/browser/verify-fixture.test.js +161 -0
  66. package/dist/src/cli.d.ts +32 -0
  67. package/dist/src/cli.js +333 -43
  68. package/dist/src/cli.test.js +257 -1
  69. package/dist/src/commanderAdapter.js +12 -0
  70. package/dist/src/commanderAdapter.test.js +11 -0
  71. package/dist/src/daemon.d.ts +3 -2
  72. package/dist/src/daemon.js +16 -4
  73. package/dist/src/daemon.test.d.ts +1 -0
  74. package/dist/src/daemon.test.js +19 -0
  75. package/dist/src/download/article-download.d.ts +12 -0
  76. package/dist/src/download/article-download.js +141 -17
  77. package/dist/src/download/article-download.test.js +196 -0
  78. package/dist/src/download/index.js +73 -86
  79. package/dist/src/errors.js +4 -2
  80. package/dist/src/errors.test.js +13 -0
  81. package/dist/src/launcher.d.ts +1 -1
  82. package/dist/src/launcher.js +3 -3
  83. package/dist/src/output.js +1 -1
  84. package/dist/src/output.test.js +6 -0
  85. package/package.json +5 -1
@@ -0,0 +1,161 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { deriveFixture, expandFixtureArgs, validateRows } from './verify-fixture.js';
3
+ describe('validateRows', () => {
4
+ it('passes when rows meet all expectations', () => {
5
+ const fixture = {
6
+ expect: {
7
+ rowCount: { min: 1, max: 3 },
8
+ columns: ['id', 'title', 'url'],
9
+ types: { id: 'number', title: 'string', url: 'string' },
10
+ patterns: { url: '^https://' },
11
+ notEmpty: ['title', 'url'],
12
+ },
13
+ };
14
+ const rows = [
15
+ { id: 1, title: 'a', url: 'https://x.com/a' },
16
+ { id: 2, title: 'b', url: 'https://x.com/b' },
17
+ ];
18
+ expect(validateRows(rows, fixture)).toEqual([]);
19
+ });
20
+ it('reports rowCount below min', () => {
21
+ const failures = validateRows([], { expect: { rowCount: { min: 1 } } });
22
+ expect(failures).toHaveLength(1);
23
+ expect(failures[0]).toMatchObject({ rule: 'rowCount' });
24
+ expect(failures[0].detail).toContain('at least 1');
25
+ });
26
+ it('reports rowCount above max', () => {
27
+ const failures = validateRows([{}, {}, {}, {}], { expect: { rowCount: { max: 3 } } });
28
+ expect(failures).toHaveLength(1);
29
+ expect(failures[0].detail).toContain('at most 3');
30
+ });
31
+ it('reports missing columns per row', () => {
32
+ const failures = validateRows([{ a: 1 }, { a: 2, b: 3 }], { expect: { columns: ['a', 'b'] } });
33
+ // row 0 missing 'b', row 1 complete
34
+ expect(failures).toEqual([
35
+ { rule: 'column', detail: 'missing column "b"', rowIndex: 0 },
36
+ ]);
37
+ });
38
+ it('reports type mismatch including null', () => {
39
+ const failures = validateRows([{ a: 'abc' }, { a: null }, { a: 42 }], { expect: { types: { a: 'string' } } });
40
+ // row 0 string ok, row 1 null fail, row 2 number fail
41
+ expect(failures).toHaveLength(2);
42
+ expect(failures[0].rowIndex).toBe(1);
43
+ expect(failures[0].detail).toContain('null');
44
+ expect(failures[1].rowIndex).toBe(2);
45
+ expect(failures[1].detail).toContain('number');
46
+ });
47
+ it('accepts union types like "number|string"', () => {
48
+ const failures = validateRows([{ id: 1 }, { id: 'abc' }], { expect: { types: { id: 'number|string' } } });
49
+ expect(failures).toEqual([]);
50
+ });
51
+ it('accepts "any" as wildcard type', () => {
52
+ const failures = validateRows([{ v: 1 }, { v: 'x' }, { v: null }, { v: [1, 2] }], { expect: { types: { v: 'any' } } });
53
+ expect(failures).toEqual([]);
54
+ });
55
+ it('reports pattern mismatch with row index and truncated value', () => {
56
+ const failures = validateRows([{ url: 'https://ok.com' }, { url: 'not-a-url' }], { expect: { patterns: { url: '^https?://' } } });
57
+ expect(failures).toHaveLength(1);
58
+ expect(failures[0]).toMatchObject({ rule: 'pattern', rowIndex: 1 });
59
+ expect(failures[0].detail).toContain('not-a-url');
60
+ });
61
+ it('skips pattern check for null/undefined values', () => {
62
+ const failures = validateRows([{ url: null }, { url: undefined }], { expect: { patterns: { url: '^x' } } });
63
+ expect(failures).toEqual([]);
64
+ });
65
+ it('reports invalid regex without crashing', () => {
66
+ const failures = validateRows([{ a: 'x' }], { expect: { patterns: { a: '[unclosed' } } });
67
+ expect(failures.some((f) => f.rule === 'pattern' && f.detail.includes('invalid'))).toBe(true);
68
+ });
69
+ it('treats empty/whitespace/null as failing notEmpty', () => {
70
+ const failures = validateRows([{ t: '' }, { t: ' ' }, { t: null }, { t: 'ok' }], { expect: { notEmpty: ['t'] } });
71
+ expect(failures).toHaveLength(3);
72
+ expect(failures.map((f) => f.rowIndex)).toEqual([0, 1, 2]);
73
+ });
74
+ it('no failures when fixture has no expect block', () => {
75
+ expect(validateRows([{ anything: 1 }], {})).toEqual([]);
76
+ });
77
+ it('mustNotContain flags substring bleed in columns', () => {
78
+ const failures = validateRows([
79
+ { description: 'Lead engineer, 5 years exp. address: Shanghai. category: IT' },
80
+ { description: 'Clean text.' },
81
+ ], {
82
+ expect: {
83
+ mustNotContain: { description: ['address:', 'category:'] },
84
+ },
85
+ });
86
+ expect(failures).toHaveLength(2);
87
+ expect(failures.every((f) => f.rule === 'mustNotContain')).toBe(true);
88
+ expect(failures.every((f) => f.rowIndex === 0)).toBe(true);
89
+ });
90
+ it('mustNotContain skips null/undefined values', () => {
91
+ const failures = validateRows([{ description: null }, { description: undefined }], { expect: { mustNotContain: { description: ['x'] } } });
92
+ expect(failures).toEqual([]);
93
+ });
94
+ it('mustBeTruthy catches silent 0 / false / "" fallbacks', () => {
95
+ const failures = validateRows([{ count: 10 }, { count: 0 }, { count: false }, { count: '' }, { count: null }], { expect: { mustBeTruthy: ['count'] } });
96
+ expect(failures).toHaveLength(4);
97
+ expect(failures.every((f) => f.rule === 'mustBeTruthy')).toBe(true);
98
+ expect(failures.map((f) => f.rowIndex)).toEqual([1, 2, 3, 4]);
99
+ });
100
+ });
101
+ describe('deriveFixture', () => {
102
+ it('returns rowCount.min=0 when rows are empty', () => {
103
+ expect(deriveFixture([])).toEqual({ expect: { rowCount: { min: 0 } } });
104
+ });
105
+ it('extracts columns from first row and infers types per column', () => {
106
+ const fixture = deriveFixture([
107
+ { id: 1, title: 'a', url: 'https://x' },
108
+ { id: 2, title: 'b', url: 'https://y' },
109
+ ]);
110
+ expect(fixture.expect?.columns).toEqual(['id', 'title', 'url']);
111
+ expect(fixture.expect?.types).toEqual({
112
+ id: 'number',
113
+ title: 'string',
114
+ url: 'string',
115
+ });
116
+ expect(fixture.expect?.rowCount).toEqual({ min: 1 });
117
+ });
118
+ it('unions mixed types across rows as "a|b"', () => {
119
+ const fixture = deriveFixture([
120
+ { v: 1 },
121
+ { v: 'two' },
122
+ { v: null },
123
+ ]);
124
+ expect(fixture.expect?.types?.v).toBe('null|number|string');
125
+ });
126
+ it('embeds args when provided', () => {
127
+ const fixture = deriveFixture([{ x: 1 }], { limit: 5 });
128
+ expect(fixture.args).toEqual({ limit: 5 });
129
+ });
130
+ it('embeds positional argv array when provided', () => {
131
+ const fixture = deriveFixture([{ x: 1 }], ['123', '--limit', '3']);
132
+ expect(fixture.args).toEqual(['123', '--limit', '3']);
133
+ });
134
+ it('does not add patterns or notEmpty automatically', () => {
135
+ const fixture = deriveFixture([{ a: 'x' }]);
136
+ expect(fixture.expect?.patterns).toBeUndefined();
137
+ expect(fixture.expect?.notEmpty).toBeUndefined();
138
+ });
139
+ });
140
+ describe('expandFixtureArgs', () => {
141
+ it('returns [] for undefined', () => {
142
+ expect(expandFixtureArgs(undefined)).toEqual([]);
143
+ });
144
+ it('expands object form as --key value pairs', () => {
145
+ expect(expandFixtureArgs({ limit: 3, sort: 'hot' })).toEqual(['--limit', '3', '--sort', 'hot']);
146
+ });
147
+ it('passes array form verbatim, stringifying values', () => {
148
+ expect(expandFixtureArgs(['123456', '--limit', 3])).toEqual(['123456', '--limit', '3']);
149
+ });
150
+ it('handles empty object and empty array', () => {
151
+ expect(expandFixtureArgs({})).toEqual([]);
152
+ expect(expandFixtureArgs([])).toEqual([]);
153
+ });
154
+ it('preserves positional + flag mix (e.g. <tid> --limit 3)', () => {
155
+ expect(expandFixtureArgs(['https://example.com/thread-1', '--comments', '5'])).toEqual([
156
+ 'https://example.com/thread-1',
157
+ '--comments',
158
+ '5',
159
+ ]);
160
+ });
161
+ });
package/dist/src/cli.d.ts CHANGED
@@ -6,6 +6,38 @@
6
6
  */
7
7
  import { Command } from 'commander';
8
8
  import { findPackageRoot } from './package-paths.js';
9
+ /**
10
+ * Check whether the site-memory scaffolding exists under
11
+ * ~/.opencli/sites/<site>/. Agents have a strong tendency to forget to write
12
+ * endpoints.json / notes.md after a successful verify, which dooms the next
13
+ * agent to redo recon from scratch. Surfacing the current state as part of
14
+ * verify's final report converts that "silent skip" into a visible nudge;
15
+ * `--strict-memory` escalates it to a failure so agents driving a hardened
16
+ * workflow can't forget.
17
+ */
18
+ export type SiteMemoryReport = {
19
+ ok: boolean;
20
+ siteDir: string;
21
+ endpoints: {
22
+ present: boolean;
23
+ count: number;
24
+ path: string;
25
+ };
26
+ notes: {
27
+ present: boolean;
28
+ path: string;
29
+ };
30
+ };
31
+ export declare function checkSiteMemory(site: string): SiteMemoryReport;
32
+ export declare function printSiteMemoryReport(report: SiteMemoryReport, strict: boolean | undefined): void;
33
+ /** Coerce adapter JSON output into a row array. Accepts `[{...}]`, single `{}`, or `{items:[...]}`-style envelopes. */
34
+ export declare function normalizeVerifyRows(data: unknown): Record<string, unknown>[];
35
+ /** Render up to 10 rows as a compact padded table for eyeball inspection during verify. */
36
+ export declare function renderVerifyPreview(rows: Record<string, unknown>[], opts?: {
37
+ maxRows?: number;
38
+ maxCols?: number;
39
+ cellMax?: number;
40
+ }): string;
9
41
  export declare function createProgram(BUILTIN_CLIS: string, USER_CLIS: string): Command;
10
42
  export declare function runCli(BUILTIN_CLIS: string, USER_CLIS: string): void;
11
43
  export interface BrowserVerifyInvocation {
package/dist/src/cli.js CHANGED
@@ -28,6 +28,7 @@ import { DEFAULT_TTL_MS, findEntry, loadNetworkCache, saveNetworkCache } from '.
28
28
  import { parseFilter, shapeMatchesFilter } from './browser/shape-filter.js';
29
29
  import { buildHtmlTreeJs } from './browser/html-tree.js';
30
30
  import { buildExtractHtmlJs, runExtractFromHtml } from './browser/extract.js';
31
+ import { analyzeSite } from './browser/analyze.js';
31
32
  import { daemonStatus, daemonStop } from './commands/daemon.js';
32
33
  import { log } from './logger.js';
33
34
  const CLI_FILE = fileURLToPath(import.meta.url);
@@ -43,38 +44,42 @@ const BROWSER_TAB_OPTION_DESCRIPTION = 'Target tab/page identity returned by "br
43
44
  async function captureNetworkItems(page) {
44
45
  if (page.readNetworkCapture) {
45
46
  const raw = await page.readNetworkCapture();
46
- return raw.map((e) => {
47
- const preview = e.responsePreview ?? null;
48
- let body = null;
49
- if (preview) {
50
- try {
51
- body = JSON.parse(preview);
52
- }
53
- catch {
54
- body = preview;
47
+ if (Array.isArray(raw) && raw.length > 0) {
48
+ return raw.map((e) => {
49
+ const preview = e.responsePreview ?? null;
50
+ let body = null;
51
+ if (preview) {
52
+ try {
53
+ body = JSON.parse(preview);
54
+ }
55
+ catch {
56
+ body = preview;
57
+ }
55
58
  }
56
- }
57
- const fullSize = typeof e.responseBodyFullSize === 'number'
58
- ? e.responseBodyFullSize
59
- : (preview ? preview.length : 0);
60
- const truncated = e.responseBodyTruncated === true;
61
- return {
62
- url: e.url || '',
63
- method: e.method || 'GET',
64
- status: e.responseStatus || 0,
65
- size: fullSize,
66
- ct: e.responseContentType || '',
67
- body,
68
- bodyFullSize: fullSize,
69
- bodyTruncated: truncated,
70
- };
71
- });
59
+ const fullSize = typeof e.responseBodyFullSize === 'number'
60
+ ? e.responseBodyFullSize
61
+ : (preview ? preview.length : 0);
62
+ const truncated = e.responseBodyTruncated === true;
63
+ return {
64
+ url: e.url || '',
65
+ method: e.method || 'GET',
66
+ status: e.responseStatus || 0,
67
+ size: fullSize,
68
+ ct: e.responseContentType || '',
69
+ body,
70
+ bodyFullSize: fullSize,
71
+ bodyTruncated: truncated,
72
+ };
73
+ });
74
+ }
72
75
  }
73
- const raw = await page.evaluate(`(function(){ return JSON.stringify(window.__opencli_net || []); })()`);
76
+ const raw = await page.evaluate(`(function(){ var out = window.__opencli_net || []; window.__opencli_net = []; return JSON.stringify(out); })()`);
74
77
  try {
75
78
  return JSON.parse(raw);
76
79
  }
77
80
  catch {
81
+ if (process.env.OPENCLI_VERBOSE)
82
+ log.warn(`[network] Failed to parse interceptor buffer: ${typeof raw === 'string' ? raw.slice(0, 200) : String(raw)}`);
78
83
  return [];
79
84
  }
80
85
  }
@@ -84,10 +89,108 @@ function filterNetworkItems(items) {
84
89
  !/\.(js|css|png|jpg|gif|svg|woff|ico|map)(\?|$)/i.test(r.url) &&
85
90
  !/analytics|tracking|telemetry|beacon|pixel|gtag|fbevents/i.test(r.url));
86
91
  }
92
+ /** Exit codes by network error code — usage errors vs runtime failures. */
93
+ const NETWORK_ERROR_EXIT = {
94
+ invalid_args: EXIT_CODES.USAGE_ERROR,
95
+ invalid_filter: EXIT_CODES.USAGE_ERROR,
96
+ invalid_max_body: EXIT_CODES.USAGE_ERROR,
97
+ };
87
98
  /** Emit a structured error JSON so agents can branch on `error.code` without regex. */
88
99
  function emitNetworkError(code, message, extra = {}) {
89
100
  console.log(JSON.stringify({ error: { code, message, ...extra } }, null, 2));
90
- process.exitCode = EXIT_CODES.USAGE_ERROR;
101
+ process.exitCode = NETWORK_ERROR_EXIT[code] ?? EXIT_CODES.GENERIC_ERROR;
102
+ }
103
+ export function checkSiteMemory(site) {
104
+ const siteDir = path.join(os.homedir(), '.opencli', 'sites', site);
105
+ const endpointsPath = path.join(siteDir, 'endpoints.json');
106
+ const notesPath = path.join(siteDir, 'notes.md');
107
+ let endpointsCount = 0;
108
+ let endpointsPresent = fs.existsSync(endpointsPath);
109
+ if (endpointsPresent) {
110
+ try {
111
+ const parsed = JSON.parse(fs.readFileSync(endpointsPath, 'utf-8'));
112
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
113
+ endpointsCount = Object.keys(parsed).length;
114
+ }
115
+ else if (Array.isArray(parsed)) {
116
+ endpointsCount = parsed.length;
117
+ }
118
+ }
119
+ catch {
120
+ endpointsPresent = false;
121
+ }
122
+ }
123
+ const notesPresent = fs.existsSync(notesPath);
124
+ return {
125
+ ok: endpointsPresent && endpointsCount > 0 && notesPresent,
126
+ siteDir,
127
+ endpoints: { present: endpointsPresent, count: endpointsCount, path: endpointsPath },
128
+ notes: { present: notesPresent, path: notesPath },
129
+ };
130
+ }
131
+ export function printSiteMemoryReport(report, strict) {
132
+ if (report.ok) {
133
+ console.log(` ✓ Memory: endpoints.json (${report.endpoints.count}), notes.md present at ${report.siteDir}`);
134
+ return;
135
+ }
136
+ const marker = strict ? '✗' : '⚠';
137
+ const missing = [];
138
+ if (!report.endpoints.present)
139
+ missing.push('endpoints.json');
140
+ else if (report.endpoints.count === 0)
141
+ missing.push('endpoints.json (empty)');
142
+ if (!report.notes.present)
143
+ missing.push('notes.md');
144
+ console.log(` ${marker} Memory: missing ${missing.join(', ')} under ${report.siteDir}`);
145
+ console.log(` Write the endpoint you just verified + a 1-line session note so the next agent starts from minute 0, not minute 95.`);
146
+ if (!strict) {
147
+ console.log(` (Re-run with --strict-memory to fail instead of warn.)`);
148
+ }
149
+ }
150
+ /** Coerce adapter JSON output into a row array. Accepts `[{...}]`, single `{}`, or `{items:[...]}`-style envelopes. */
151
+ export function normalizeVerifyRows(data) {
152
+ if (Array.isArray(data)) {
153
+ return data.map((r) => (r && typeof r === 'object' ? r : { value: r }));
154
+ }
155
+ if (data && typeof data === 'object') {
156
+ const obj = data;
157
+ for (const k of ['rows', 'items', 'data', 'results']) {
158
+ if (Array.isArray(obj[k])) {
159
+ return obj[k].map((r) => (r && typeof r === 'object' ? r : { value: r }));
160
+ }
161
+ }
162
+ return [obj];
163
+ }
164
+ return [];
165
+ }
166
+ /** Render up to 10 rows as a compact padded table for eyeball inspection during verify. */
167
+ export function renderVerifyPreview(rows, opts = {}) {
168
+ const maxRows = opts.maxRows ?? 10;
169
+ const maxCols = opts.maxCols ?? 6;
170
+ const cellMax = opts.cellMax ?? 40;
171
+ if (rows.length === 0)
172
+ return ' (no rows)';
173
+ const allCols = Array.from(new Set(rows.flatMap((r) => Object.keys(r))));
174
+ const cols = allCols.slice(0, maxCols);
175
+ const shown = rows.slice(0, maxRows);
176
+ const cellOf = (v) => {
177
+ if (v === null || v === undefined)
178
+ return '';
179
+ const s = typeof v === 'object' ? JSON.stringify(v) : String(v);
180
+ return s.replace(/\s+/g, ' ').slice(0, cellMax);
181
+ };
182
+ const widths = cols.map((c) => Math.max(c.length, ...shown.map((r) => cellOf(r[c]).length)));
183
+ const fmtRow = (vals) => vals.map((v, i) => v.padEnd(widths[i])).join(' ');
184
+ const out = [];
185
+ out.push(` ${fmtRow(cols)}`);
186
+ out.push(` ${widths.map((w) => '-'.repeat(w)).join(' ')}`);
187
+ for (const r of shown)
188
+ out.push(` ${fmtRow(cols.map((c) => cellOf(r[c])))}`);
189
+ if (rows.length > maxRows)
190
+ out.push(` ... and ${rows.length - maxRows} more row(s)`);
191
+ if (allCols.length > maxCols)
192
+ out.push(` (${allCols.length - maxCols} more column(s) hidden)`);
193
+ return out.join('\n');
91
194
  }
92
195
  function getBrowserCacheDir() {
93
196
  return process.env.OPENCLI_CACHE_DIR || path.join(os.homedir(), '.opencli', 'cache');
@@ -521,6 +624,73 @@ export function createProgram(BUILTIN_CLIS, USER_CLIS) {
521
624
  console.log(await page.screenshot({ format: 'png' }));
522
625
  }
523
626
  }));
627
+ // ── Analyze (site recon, agent-native) ──
628
+ //
629
+ // Mechanizes the `site-recon.md` decision tree into one CLI call. The agent
630
+ // calls `browser analyze <url>` and gets back:
631
+ //
632
+ // - pattern: A/B/C/D (mapped from network + SSR-globals signals)
633
+ // - anti_bot: vendor + evidence + the one-liner for "what to do next"
634
+ // - initial_state: which window globals are populated
635
+ // - nearest_adapter: existing commands for the same site, if any
636
+ // - recommended_next_step: a single imperative sentence
637
+ //
638
+ // Intent: replace the "open → eyeball network → curl → WAF → try again"
639
+ // feedback loop with a single deterministic verdict. Without this, agents
640
+ // burn ~20min per WAF-protected site re-discovering anti-bot posture.
641
+ addBrowserTabOption(browser.command('analyze').argument('<url>'))
642
+ .description('Classify site: anti-bot vendor, pattern (A/B/C/D), nearest adapter, recommended next step')
643
+ .action(browserAction(async (page, url) => {
644
+ const hasSessionCapture = await page.startNetworkCapture?.() ?? false;
645
+ await page.goto(url);
646
+ await page.wait(2);
647
+ if (!hasSessionCapture) {
648
+ try {
649
+ await page.evaluate(NETWORK_INTERCEPTOR_JS);
650
+ }
651
+ catch { /* non-fatal */ }
652
+ }
653
+ await captureNetworkItems(page);
654
+ // Best-effort: give the page another beat so XHR after DOMContentLoaded lands.
655
+ await page.wait(1);
656
+ const rawItems = await captureNetworkItems(page);
657
+ const networkEntries = rawItems.map((e) => ({
658
+ url: e.url,
659
+ status: e.status,
660
+ contentType: e.ct,
661
+ bodyPreview: typeof e.body === 'string'
662
+ ? e.body.slice(0, 2000)
663
+ : (e.body ? JSON.stringify(e.body).slice(0, 2000) : null),
664
+ }));
665
+ const probeJs = `(function(){
666
+ return {
667
+ cookieNames: (document.cookie || '').split(';').map(function(c){ return c.trim().split('=')[0]; }).filter(Boolean),
668
+ initialState: {
669
+ __INITIAL_STATE__: typeof window.__INITIAL_STATE__ !== 'undefined',
670
+ __NUXT__: typeof window.__NUXT__ !== 'undefined',
671
+ __NEXT_DATA__: typeof window.__NEXT_DATA__ !== 'undefined',
672
+ __APOLLO_STATE__: typeof window.__APOLLO_STATE__ !== 'undefined',
673
+ },
674
+ title: document.title || '',
675
+ finalUrl: location.href,
676
+ };
677
+ })()`;
678
+ const probe = await page.evaluate(probeJs);
679
+ const browserCookieNames = (await page.getCookies({ url: probe.finalUrl || url }).catch(() => []))
680
+ .map((c) => c.name)
681
+ .filter(Boolean);
682
+ const cookieNames = [...new Set([...probe.cookieNames, ...browserCookieNames])];
683
+ const signals = {
684
+ requestedUrl: url,
685
+ finalUrl: probe.finalUrl,
686
+ cookieNames,
687
+ networkEntries,
688
+ initialState: probe.initialState,
689
+ title: probe.title,
690
+ };
691
+ const report = analyzeSite(signals, getRegistry());
692
+ console.log(JSON.stringify(report, null, 2));
693
+ }));
524
694
  // ── Find (structured CSS query, agent-native) ──
525
695
  //
526
696
  // `browser find --css <sel>` lets agents jump straight from a semantic
@@ -850,10 +1020,10 @@ export function createProgram(BUILTIN_CLIS, USER_CLIS) {
850
1020
  }));
851
1021
  // ── Wait commands ──
852
1022
  addBrowserTabOption(browser.command('wait'))
853
- .argument('<type>', 'selector, text, or time')
854
- .argument('[value]', 'CSS selector, text string, or seconds')
1023
+ .argument('<type>', 'selector, text, time, or xhr')
1024
+ .argument('[value]', 'CSS selector, text string, seconds, or XHR URL regex')
855
1025
  .option('--timeout <ms>', 'Timeout in milliseconds', '10000')
856
- .description('Wait for selector, text, or time (e.g. wait selector ".loaded", wait text "Success", wait time 3)')
1026
+ .description('Wait for selector, text, time, or matching XHR (e.g. wait selector ".loaded", wait text "Success", wait time 3, wait xhr "/api/search")')
857
1027
  .action(browserAction(async (page, type, value, opts) => {
858
1028
  const timeout = parseInt(opts.timeout, 10);
859
1029
  if (type === 'time') {
@@ -879,8 +1049,59 @@ export function createProgram(BUILTIN_CLIS, USER_CLIS) {
879
1049
  await page.wait({ text: value, timeout: timeout / 1000 });
880
1050
  console.log(`Text "${value}" appeared`);
881
1051
  }
1052
+ else if (type === 'xhr') {
1053
+ // Poll the capture ring until an entry matches the URL regex — turns
1054
+ // the common "open page, wait N seconds, hope the data landed" idiom
1055
+ // into a deterministic barrier keyed on the API the agent actually
1056
+ // cares about. Prevents silent "empty DOM" failures on slow SPAs.
1057
+ if (!value) {
1058
+ console.error('Missing XHR URL regex');
1059
+ process.exitCode = EXIT_CODES.USAGE_ERROR;
1060
+ return;
1061
+ }
1062
+ let re;
1063
+ try {
1064
+ re = new RegExp(value);
1065
+ }
1066
+ catch (err) {
1067
+ console.error(`Invalid regex "${value}": ${err instanceof Error ? err.message : String(err)}`);
1068
+ process.exitCode = EXIT_CODES.USAGE_ERROR;
1069
+ return;
1070
+ }
1071
+ const hasSessionCapture = await page.startNetworkCapture?.() ?? false;
1072
+ if (!hasSessionCapture) {
1073
+ try {
1074
+ await page.evaluate(NETWORK_INTERCEPTOR_JS);
1075
+ }
1076
+ catch { /* non-fatal */ }
1077
+ }
1078
+ await captureNetworkItems(page);
1079
+ const deadline = Date.now() + timeout;
1080
+ const pollMs = 400;
1081
+ let matched = null;
1082
+ while (Date.now() < deadline && !matched) {
1083
+ const items = await captureNetworkItems(page);
1084
+ matched = items.find((e) => re.test(e.url)) ?? null;
1085
+ if (!matched)
1086
+ await new Promise((r) => setTimeout(r, pollMs));
1087
+ }
1088
+ if (!matched) {
1089
+ console.log(JSON.stringify({
1090
+ error: {
1091
+ code: 'xhr_not_seen',
1092
+ message: `No captured XHR matched /${value}/ within ${timeout}ms`,
1093
+ hint: 'Check the pattern against `browser network` output; the endpoint may not have fired yet, or capture is disabled.',
1094
+ },
1095
+ }, null, 2));
1096
+ process.exitCode = EXIT_CODES.GENERIC_ERROR;
1097
+ return;
1098
+ }
1099
+ console.log(JSON.stringify({
1100
+ matched: { url: matched.url, status: matched.status, contentType: matched.ct },
1101
+ }, null, 2));
1102
+ }
882
1103
  else {
883
- console.error(`Unknown wait type "${type}". Use: selector, text, or time`);
1104
+ console.error(`Unknown wait type "${type}". Use: selector, text, time, or xhr`);
884
1105
  process.exitCode = EXIT_CODES.USAGE_ERROR;
885
1106
  }
886
1107
  }));
@@ -1212,8 +1433,12 @@ cli({
1212
1433
  // ── Verify (test adapter) ──
1213
1434
  browser.command('verify')
1214
1435
  .argument('<name>', 'Adapter name in site/command format (e.g. hn/top)')
1215
- .description('Execute an adapter and show results')
1216
- .action(async (name) => {
1436
+ .option('--write-fixture', 'Write a starter fixture to ~/.opencli/sites/<site>/verify/<command>.json if none exists')
1437
+ .option('--update-fixture', 'Overwrite an existing fixture with one derived from current output')
1438
+ .option('--no-fixture', 'Ignore any fixture file for this run (no value-level validation)')
1439
+ .option('--strict-memory', 'Fail (not just warn) when ~/.opencli/sites/<site>/endpoints.json or notes.md is missing')
1440
+ .description('Execute an adapter and validate output; uses fixture at ~/.opencli/sites/<site>/verify/<cmd>.json when present')
1441
+ .action(async (name, opts = {}) => {
1217
1442
  try {
1218
1443
  const parts = name.split('/');
1219
1444
  if (parts.length !== 2) {
@@ -1228,7 +1453,7 @@ cli({
1228
1453
  return;
1229
1454
  }
1230
1455
  const { execFileSync } = await import('node:child_process');
1231
- const os = await import('node:os');
1456
+ const { loadFixture, writeFixture, deriveFixture, validateRows, fixturePath, expandFixtureArgs } = await import('./browser/verify-fixture.js');
1232
1457
  const filePath = path.join(os.homedir(), '.opencli', 'clis', site, `${command}.js`);
1233
1458
  if (!fs.existsSync(filePath)) {
1234
1459
  console.error(`Adapter not found: ${filePath}`);
@@ -1238,14 +1463,24 @@ cli({
1238
1463
  }
1239
1464
  console.log(`🔍 Verifying ${name}...\n`);
1240
1465
  console.log(` Loading: ${filePath}`);
1241
- // Read adapter to check if it defines a 'limit' arg
1466
+ const useFixture = opts.fixture !== false;
1467
+ let fixture = useFixture ? loadFixture(site, command) : null;
1468
+ // Build adapter args: fixture.args override the legacy --limit 3 heuristic.
1469
+ // - object form { "limit": 3 } → `--limit 3`
1470
+ // - array form ["123", "--limit", "3"] → verbatim (for positional subjects)
1242
1471
  const adapterSrc = fs.readFileSync(filePath, 'utf-8');
1243
1472
  const hasLimitArg = /['"]limit['"]/.test(adapterSrc);
1244
- const limitFlag = hasLimitArg ? ' --limit 3' : '';
1245
- const limitArgs = hasLimitArg ? ['--limit', '3'] : [];
1473
+ const fixtureArgs = fixture?.args;
1474
+ const cliArgs = expandFixtureArgs(fixtureArgs);
1475
+ if (cliArgs.length === 0 && hasLimitArg)
1476
+ cliArgs.push('--limit', '3');
1477
+ const argDisplay = cliArgs.join(' ');
1246
1478
  const invocation = resolveBrowserVerifyInvocation();
1479
+ // Always request JSON so we can validate structurally.
1480
+ const execArgs = [...invocation.args, site, command, ...cliArgs, '--format', 'json'];
1481
+ let rawJson;
1247
1482
  try {
1248
- const output = execFileSync(invocation.binary, [...invocation.args, site, command, ...limitArgs], {
1483
+ rawJson = execFileSync(invocation.binary, execArgs, {
1249
1484
  cwd: invocation.cwd,
1250
1485
  timeout: 30000,
1251
1486
  encoding: 'utf-8',
@@ -1253,13 +1488,9 @@ cli({
1253
1488
  stdio: ['pipe', 'pipe', 'pipe'],
1254
1489
  ...(invocation.shell ? { shell: true } : {}),
1255
1490
  });
1256
- console.log(` Executing: opencli ${site} ${command}${limitFlag}\n`);
1257
- console.log(output);
1258
- console.log(`\n ✓ Adapter works!`);
1259
1491
  }
1260
1492
  catch (err) {
1261
- console.log(` Executing: opencli ${site} ${command}${limitFlag}\n`);
1262
- // execFileSync attaches captured stdout/stderr on its thrown Error.
1493
+ console.log(` Executing: opencli ${site} ${command} ${argDisplay}\n`);
1263
1494
  const execErr = err;
1264
1495
  if (execErr.stdout)
1265
1496
  console.log(String(execErr.stdout));
@@ -1267,7 +1498,66 @@ cli({
1267
1498
  console.error(String(execErr.stderr).slice(0, 500));
1268
1499
  console.log(`\n ✗ Adapter failed. Fix the code and try again.`);
1269
1500
  process.exitCode = EXIT_CODES.GENERIC_ERROR;
1501
+ return;
1502
+ }
1503
+ console.log(` Executing: opencli ${site} ${command} ${argDisplay}\n`);
1504
+ let rows;
1505
+ try {
1506
+ rows = normalizeVerifyRows(JSON.parse(rawJson));
1507
+ }
1508
+ catch {
1509
+ console.log(rawJson);
1510
+ console.log('\n ✗ Could not parse adapter output as JSON. Is `--format json` broken?');
1511
+ process.exitCode = EXIT_CODES.GENERIC_ERROR;
1512
+ return;
1513
+ }
1514
+ console.log(renderVerifyPreview(rows));
1515
+ console.log(`\n → ${rows.length} row${rows.length === 1 ? '' : 's'}`);
1516
+ // ── Fixture handling ───────────────────────────────────────────
1517
+ if (opts.writeFixture || opts.updateFixture) {
1518
+ if (fixture && !opts.updateFixture) {
1519
+ console.log(`\n Fixture already exists at ${fixturePath(site, command)}.`);
1520
+ console.log(` Use --update-fixture to overwrite.`);
1521
+ }
1522
+ else {
1523
+ const seedArgs = fixtureArgs !== undefined
1524
+ ? fixtureArgs
1525
+ : (hasLimitArg ? { limit: 3 } : undefined);
1526
+ const derived = deriveFixture(rows, seedArgs);
1527
+ const p = writeFixture(site, command, derived);
1528
+ console.log(`\n ${fixture ? '↻ Updated' : '✎ Wrote'} fixture: ${p}`);
1529
+ console.log(` Review and hand-tune the derived expectations (add patterns / notEmpty, tighten rowCount).`);
1530
+ fixture = derived;
1531
+ }
1532
+ }
1533
+ if (!fixture) {
1534
+ console.log(`\n ✓ Adapter runs. (No fixture at ${fixturePath(site, command)} — consider --write-fixture to seed one.)`);
1535
+ const memoryReport = checkSiteMemory(site);
1536
+ printSiteMemoryReport(memoryReport, opts.strictMemory);
1537
+ if (!memoryReport.ok && opts.strictMemory) {
1538
+ process.exitCode = EXIT_CODES.GENERIC_ERROR;
1539
+ }
1540
+ return;
1270
1541
  }
1542
+ const failures = validateRows(rows, fixture);
1543
+ if (failures.length === 0) {
1544
+ console.log(`\n ✓ Adapter matches fixture (${fixturePath(site, command)}).`);
1545
+ const memoryReport = checkSiteMemory(site);
1546
+ printSiteMemoryReport(memoryReport, opts.strictMemory);
1547
+ if (!memoryReport.ok && opts.strictMemory) {
1548
+ process.exitCode = EXIT_CODES.GENERIC_ERROR;
1549
+ }
1550
+ return;
1551
+ }
1552
+ console.log(`\n ✗ Adapter output does not match fixture:`);
1553
+ for (const f of failures.slice(0, 20)) {
1554
+ const where = f.rowIndex !== undefined ? `row[${f.rowIndex}] ` : '';
1555
+ console.log(` - [${f.rule}] ${where}${f.detail}`);
1556
+ }
1557
+ if (failures.length > 20) {
1558
+ console.log(` ... and ${failures.length - 20} more failure(s)`);
1559
+ }
1560
+ process.exitCode = EXIT_CODES.GENERIC_ERROR;
1271
1561
  }
1272
1562
  catch (err) {
1273
1563
  console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);