@jackwener/opencli 1.7.6 → 1.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -8
- package/README.zh-CN.md +14 -8
- package/cli-manifest.json +469 -11
- package/clis/51job/company.js +125 -0
- package/clis/51job/detail.js +108 -0
- package/clis/51job/hot.js +55 -0
- package/clis/51job/search.js +79 -0
- package/clis/51job/utils.js +302 -0
- package/clis/51job/utils.test.js +69 -0
- package/clis/amazon/discussion.js +37 -6
- package/clis/amazon/discussion.test.js +147 -32
- package/clis/bilibili/video.js +11 -4
- package/clis/bilibili/video.test.js +51 -0
- package/clis/chatgpt/image.js +1 -1
- package/clis/chatgpt-app/ask.js +3 -19
- package/clis/chatgpt-app/ax.js +132 -1
- package/clis/chatgpt-app/ax.test.js +23 -0
- package/clis/chatgpt-app/send.js +2 -21
- package/clis/deepseek/ask.js +50 -18
- package/clis/deepseek/ask.test.js +195 -2
- package/clis/deepseek/utils.js +113 -29
- package/clis/deepseek/utils.test.js +109 -1
- package/clis/gemini/image.js +1 -1
- package/clis/instagram/download.js +1 -1
- package/clis/powerchina/search.js +250 -0
- package/clis/powerchina/search.test.js +67 -0
- package/clis/sinafinance/stock.js +5 -2
- package/clis/sinafinance/stock.test.js +59 -0
- package/clis/toutiao/articles.js +81 -0
- package/clis/toutiao/articles.test.js +23 -0
- package/clis/twitter/likes.js +3 -2
- package/clis/twitter/search.js +4 -2
- package/clis/twitter/search.test.js +4 -0
- package/clis/twitter/shared.js +28 -0
- package/clis/twitter/shared.test.js +96 -0
- package/clis/twitter/thread.js +3 -1
- package/clis/twitter/timeline.js +3 -2
- package/clis/twitter/tweets.js +3 -2
- package/clis/twitter/tweets.test.js +1 -1
- package/clis/web/read.js +25 -5
- package/clis/web/read.test.js +76 -0
- package/clis/weixin/create-draft.js +225 -0
- package/clis/weixin/drafts.js +65 -0
- package/clis/weixin/drafts.test.js +65 -0
- package/clis/weread/ai-outline.js +170 -0
- package/clis/weread/ai-outline.test.js +83 -0
- package/clis/weread/book.js +57 -44
- package/clis/weread/commands.test.js +24 -0
- package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
- package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
- package/dist/src/browser/analyze.d.ts +103 -0
- package/dist/src/browser/analyze.js +230 -0
- package/dist/src/browser/analyze.test.d.ts +1 -0
- package/dist/src/browser/analyze.test.js +164 -0
- package/dist/src/browser/article-extract.d.ts +57 -0
- package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
- package/dist/src/browser/article-extract.e2e.test.js +105 -0
- package/dist/src/browser/article-extract.js +169 -0
- package/dist/src/browser/article-extract.test.d.ts +1 -0
- package/dist/src/browser/article-extract.test.js +94 -0
- package/dist/src/browser/cdp.js +11 -2
- package/dist/src/browser/verify-fixture.d.ts +59 -0
- package/dist/src/browser/verify-fixture.js +213 -0
- package/dist/src/browser/verify-fixture.test.d.ts +1 -0
- package/dist/src/browser/verify-fixture.test.js +161 -0
- package/dist/src/cli.d.ts +32 -0
- package/dist/src/cli.js +333 -43
- package/dist/src/cli.test.js +257 -1
- package/dist/src/commanderAdapter.js +12 -0
- package/dist/src/commanderAdapter.test.js +11 -0
- package/dist/src/daemon.d.ts +3 -2
- package/dist/src/daemon.js +16 -4
- package/dist/src/daemon.test.d.ts +1 -0
- package/dist/src/daemon.test.js +19 -0
- package/dist/src/download/article-download.d.ts +12 -0
- package/dist/src/download/article-download.js +141 -17
- package/dist/src/download/article-download.test.js +196 -0
- package/dist/src/download/index.js +73 -86
- package/dist/src/errors.js +4 -2
- package/dist/src/errors.test.js +13 -0
- package/dist/src/launcher.d.ts +1 -1
- package/dist/src/launcher.js +3 -3
- package/dist/src/output.js +1 -1
- package/dist/src/output.test.js +6 -0
- package/package.json +5 -1
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { deriveFixture, expandFixtureArgs, validateRows } from './verify-fixture.js';
|
|
3
|
+
describe('validateRows', () => {
|
|
4
|
+
it('passes when rows meet all expectations', () => {
|
|
5
|
+
const fixture = {
|
|
6
|
+
expect: {
|
|
7
|
+
rowCount: { min: 1, max: 3 },
|
|
8
|
+
columns: ['id', 'title', 'url'],
|
|
9
|
+
types: { id: 'number', title: 'string', url: 'string' },
|
|
10
|
+
patterns: { url: '^https://' },
|
|
11
|
+
notEmpty: ['title', 'url'],
|
|
12
|
+
},
|
|
13
|
+
};
|
|
14
|
+
const rows = [
|
|
15
|
+
{ id: 1, title: 'a', url: 'https://x.com/a' },
|
|
16
|
+
{ id: 2, title: 'b', url: 'https://x.com/b' },
|
|
17
|
+
];
|
|
18
|
+
expect(validateRows(rows, fixture)).toEqual([]);
|
|
19
|
+
});
|
|
20
|
+
it('reports rowCount below min', () => {
|
|
21
|
+
const failures = validateRows([], { expect: { rowCount: { min: 1 } } });
|
|
22
|
+
expect(failures).toHaveLength(1);
|
|
23
|
+
expect(failures[0]).toMatchObject({ rule: 'rowCount' });
|
|
24
|
+
expect(failures[0].detail).toContain('at least 1');
|
|
25
|
+
});
|
|
26
|
+
it('reports rowCount above max', () => {
|
|
27
|
+
const failures = validateRows([{}, {}, {}, {}], { expect: { rowCount: { max: 3 } } });
|
|
28
|
+
expect(failures).toHaveLength(1);
|
|
29
|
+
expect(failures[0].detail).toContain('at most 3');
|
|
30
|
+
});
|
|
31
|
+
it('reports missing columns per row', () => {
|
|
32
|
+
const failures = validateRows([{ a: 1 }, { a: 2, b: 3 }], { expect: { columns: ['a', 'b'] } });
|
|
33
|
+
// row 0 missing 'b', row 1 complete
|
|
34
|
+
expect(failures).toEqual([
|
|
35
|
+
{ rule: 'column', detail: 'missing column "b"', rowIndex: 0 },
|
|
36
|
+
]);
|
|
37
|
+
});
|
|
38
|
+
it('reports type mismatch including null', () => {
|
|
39
|
+
const failures = validateRows([{ a: 'abc' }, { a: null }, { a: 42 }], { expect: { types: { a: 'string' } } });
|
|
40
|
+
// row 0 string ok, row 1 null fail, row 2 number fail
|
|
41
|
+
expect(failures).toHaveLength(2);
|
|
42
|
+
expect(failures[0].rowIndex).toBe(1);
|
|
43
|
+
expect(failures[0].detail).toContain('null');
|
|
44
|
+
expect(failures[1].rowIndex).toBe(2);
|
|
45
|
+
expect(failures[1].detail).toContain('number');
|
|
46
|
+
});
|
|
47
|
+
it('accepts union types like "number|string"', () => {
|
|
48
|
+
const failures = validateRows([{ id: 1 }, { id: 'abc' }], { expect: { types: { id: 'number|string' } } });
|
|
49
|
+
expect(failures).toEqual([]);
|
|
50
|
+
});
|
|
51
|
+
it('accepts "any" as wildcard type', () => {
|
|
52
|
+
const failures = validateRows([{ v: 1 }, { v: 'x' }, { v: null }, { v: [1, 2] }], { expect: { types: { v: 'any' } } });
|
|
53
|
+
expect(failures).toEqual([]);
|
|
54
|
+
});
|
|
55
|
+
it('reports pattern mismatch with row index and truncated value', () => {
|
|
56
|
+
const failures = validateRows([{ url: 'https://ok.com' }, { url: 'not-a-url' }], { expect: { patterns: { url: '^https?://' } } });
|
|
57
|
+
expect(failures).toHaveLength(1);
|
|
58
|
+
expect(failures[0]).toMatchObject({ rule: 'pattern', rowIndex: 1 });
|
|
59
|
+
expect(failures[0].detail).toContain('not-a-url');
|
|
60
|
+
});
|
|
61
|
+
it('skips pattern check for null/undefined values', () => {
|
|
62
|
+
const failures = validateRows([{ url: null }, { url: undefined }], { expect: { patterns: { url: '^x' } } });
|
|
63
|
+
expect(failures).toEqual([]);
|
|
64
|
+
});
|
|
65
|
+
it('reports invalid regex without crashing', () => {
|
|
66
|
+
const failures = validateRows([{ a: 'x' }], { expect: { patterns: { a: '[unclosed' } } });
|
|
67
|
+
expect(failures.some((f) => f.rule === 'pattern' && f.detail.includes('invalid'))).toBe(true);
|
|
68
|
+
});
|
|
69
|
+
it('treats empty/whitespace/null as failing notEmpty', () => {
|
|
70
|
+
const failures = validateRows([{ t: '' }, { t: ' ' }, { t: null }, { t: 'ok' }], { expect: { notEmpty: ['t'] } });
|
|
71
|
+
expect(failures).toHaveLength(3);
|
|
72
|
+
expect(failures.map((f) => f.rowIndex)).toEqual([0, 1, 2]);
|
|
73
|
+
});
|
|
74
|
+
it('no failures when fixture has no expect block', () => {
|
|
75
|
+
expect(validateRows([{ anything: 1 }], {})).toEqual([]);
|
|
76
|
+
});
|
|
77
|
+
it('mustNotContain flags substring bleed in columns', () => {
|
|
78
|
+
const failures = validateRows([
|
|
79
|
+
{ description: 'Lead engineer, 5 years exp. address: Shanghai. category: IT' },
|
|
80
|
+
{ description: 'Clean text.' },
|
|
81
|
+
], {
|
|
82
|
+
expect: {
|
|
83
|
+
mustNotContain: { description: ['address:', 'category:'] },
|
|
84
|
+
},
|
|
85
|
+
});
|
|
86
|
+
expect(failures).toHaveLength(2);
|
|
87
|
+
expect(failures.every((f) => f.rule === 'mustNotContain')).toBe(true);
|
|
88
|
+
expect(failures.every((f) => f.rowIndex === 0)).toBe(true);
|
|
89
|
+
});
|
|
90
|
+
it('mustNotContain skips null/undefined values', () => {
|
|
91
|
+
const failures = validateRows([{ description: null }, { description: undefined }], { expect: { mustNotContain: { description: ['x'] } } });
|
|
92
|
+
expect(failures).toEqual([]);
|
|
93
|
+
});
|
|
94
|
+
it('mustBeTruthy catches silent 0 / false / "" fallbacks', () => {
|
|
95
|
+
const failures = validateRows([{ count: 10 }, { count: 0 }, { count: false }, { count: '' }, { count: null }], { expect: { mustBeTruthy: ['count'] } });
|
|
96
|
+
expect(failures).toHaveLength(4);
|
|
97
|
+
expect(failures.every((f) => f.rule === 'mustBeTruthy')).toBe(true);
|
|
98
|
+
expect(failures.map((f) => f.rowIndex)).toEqual([1, 2, 3, 4]);
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
describe('deriveFixture', () => {
|
|
102
|
+
it('returns rowCount.min=0 when rows are empty', () => {
|
|
103
|
+
expect(deriveFixture([])).toEqual({ expect: { rowCount: { min: 0 } } });
|
|
104
|
+
});
|
|
105
|
+
it('extracts columns from first row and infers types per column', () => {
|
|
106
|
+
const fixture = deriveFixture([
|
|
107
|
+
{ id: 1, title: 'a', url: 'https://x' },
|
|
108
|
+
{ id: 2, title: 'b', url: 'https://y' },
|
|
109
|
+
]);
|
|
110
|
+
expect(fixture.expect?.columns).toEqual(['id', 'title', 'url']);
|
|
111
|
+
expect(fixture.expect?.types).toEqual({
|
|
112
|
+
id: 'number',
|
|
113
|
+
title: 'string',
|
|
114
|
+
url: 'string',
|
|
115
|
+
});
|
|
116
|
+
expect(fixture.expect?.rowCount).toEqual({ min: 1 });
|
|
117
|
+
});
|
|
118
|
+
it('unions mixed types across rows as "a|b"', () => {
|
|
119
|
+
const fixture = deriveFixture([
|
|
120
|
+
{ v: 1 },
|
|
121
|
+
{ v: 'two' },
|
|
122
|
+
{ v: null },
|
|
123
|
+
]);
|
|
124
|
+
expect(fixture.expect?.types?.v).toBe('null|number|string');
|
|
125
|
+
});
|
|
126
|
+
it('embeds args when provided', () => {
|
|
127
|
+
const fixture = deriveFixture([{ x: 1 }], { limit: 5 });
|
|
128
|
+
expect(fixture.args).toEqual({ limit: 5 });
|
|
129
|
+
});
|
|
130
|
+
it('embeds positional argv array when provided', () => {
|
|
131
|
+
const fixture = deriveFixture([{ x: 1 }], ['123', '--limit', '3']);
|
|
132
|
+
expect(fixture.args).toEqual(['123', '--limit', '3']);
|
|
133
|
+
});
|
|
134
|
+
it('does not add patterns or notEmpty automatically', () => {
|
|
135
|
+
const fixture = deriveFixture([{ a: 'x' }]);
|
|
136
|
+
expect(fixture.expect?.patterns).toBeUndefined();
|
|
137
|
+
expect(fixture.expect?.notEmpty).toBeUndefined();
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
describe('expandFixtureArgs', () => {
|
|
141
|
+
it('returns [] for undefined', () => {
|
|
142
|
+
expect(expandFixtureArgs(undefined)).toEqual([]);
|
|
143
|
+
});
|
|
144
|
+
it('expands object form as --key value pairs', () => {
|
|
145
|
+
expect(expandFixtureArgs({ limit: 3, sort: 'hot' })).toEqual(['--limit', '3', '--sort', 'hot']);
|
|
146
|
+
});
|
|
147
|
+
it('passes array form verbatim, stringifying values', () => {
|
|
148
|
+
expect(expandFixtureArgs(['123456', '--limit', 3])).toEqual(['123456', '--limit', '3']);
|
|
149
|
+
});
|
|
150
|
+
it('handles empty object and empty array', () => {
|
|
151
|
+
expect(expandFixtureArgs({})).toEqual([]);
|
|
152
|
+
expect(expandFixtureArgs([])).toEqual([]);
|
|
153
|
+
});
|
|
154
|
+
it('preserves positional + flag mix (e.g. <tid> --limit 3)', () => {
|
|
155
|
+
expect(expandFixtureArgs(['https://example.com/thread-1', '--comments', '5'])).toEqual([
|
|
156
|
+
'https://example.com/thread-1',
|
|
157
|
+
'--comments',
|
|
158
|
+
'5',
|
|
159
|
+
]);
|
|
160
|
+
});
|
|
161
|
+
});
|
package/dist/src/cli.d.ts
CHANGED
|
@@ -6,6 +6,38 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { Command } from 'commander';
|
|
8
8
|
import { findPackageRoot } from './package-paths.js';
|
|
9
|
+
/**
|
|
10
|
+
* Check whether the site-memory scaffolding exists under
|
|
11
|
+
* ~/.opencli/sites/<site>/. Agents have a strong tendency to forget to write
|
|
12
|
+
* endpoints.json / notes.md after a successful verify, which dooms the next
|
|
13
|
+
* agent to redo recon from scratch. Surfacing the current state as part of
|
|
14
|
+
* verify's final report converts that "silent skip" into a visible nudge;
|
|
15
|
+
* `--strict-memory` escalates it to a failure so agents driving a hardened
|
|
16
|
+
* workflow can't forget.
|
|
17
|
+
*/
|
|
18
|
+
export type SiteMemoryReport = {
|
|
19
|
+
ok: boolean;
|
|
20
|
+
siteDir: string;
|
|
21
|
+
endpoints: {
|
|
22
|
+
present: boolean;
|
|
23
|
+
count: number;
|
|
24
|
+
path: string;
|
|
25
|
+
};
|
|
26
|
+
notes: {
|
|
27
|
+
present: boolean;
|
|
28
|
+
path: string;
|
|
29
|
+
};
|
|
30
|
+
};
|
|
31
|
+
export declare function checkSiteMemory(site: string): SiteMemoryReport;
|
|
32
|
+
export declare function printSiteMemoryReport(report: SiteMemoryReport, strict: boolean | undefined): void;
|
|
33
|
+
/** Coerce adapter JSON output into a row array. Accepts `[{...}]`, single `{}`, or `{items:[...]}`-style envelopes. */
|
|
34
|
+
export declare function normalizeVerifyRows(data: unknown): Record<string, unknown>[];
|
|
35
|
+
/** Render up to 10 rows as a compact padded table for eyeball inspection during verify. */
|
|
36
|
+
export declare function renderVerifyPreview(rows: Record<string, unknown>[], opts?: {
|
|
37
|
+
maxRows?: number;
|
|
38
|
+
maxCols?: number;
|
|
39
|
+
cellMax?: number;
|
|
40
|
+
}): string;
|
|
9
41
|
export declare function createProgram(BUILTIN_CLIS: string, USER_CLIS: string): Command;
|
|
10
42
|
export declare function runCli(BUILTIN_CLIS: string, USER_CLIS: string): void;
|
|
11
43
|
export interface BrowserVerifyInvocation {
|
package/dist/src/cli.js
CHANGED
|
@@ -28,6 +28,7 @@ import { DEFAULT_TTL_MS, findEntry, loadNetworkCache, saveNetworkCache } from '.
|
|
|
28
28
|
import { parseFilter, shapeMatchesFilter } from './browser/shape-filter.js';
|
|
29
29
|
import { buildHtmlTreeJs } from './browser/html-tree.js';
|
|
30
30
|
import { buildExtractHtmlJs, runExtractFromHtml } from './browser/extract.js';
|
|
31
|
+
import { analyzeSite } from './browser/analyze.js';
|
|
31
32
|
import { daemonStatus, daemonStop } from './commands/daemon.js';
|
|
32
33
|
import { log } from './logger.js';
|
|
33
34
|
const CLI_FILE = fileURLToPath(import.meta.url);
|
|
@@ -43,38 +44,42 @@ const BROWSER_TAB_OPTION_DESCRIPTION = 'Target tab/page identity returned by "br
|
|
|
43
44
|
async function captureNetworkItems(page) {
|
|
44
45
|
if (page.readNetworkCapture) {
|
|
45
46
|
const raw = await page.readNetworkCapture();
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
47
|
+
if (Array.isArray(raw) && raw.length > 0) {
|
|
48
|
+
return raw.map((e) => {
|
|
49
|
+
const preview = e.responsePreview ?? null;
|
|
50
|
+
let body = null;
|
|
51
|
+
if (preview) {
|
|
52
|
+
try {
|
|
53
|
+
body = JSON.parse(preview);
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
body = preview;
|
|
57
|
+
}
|
|
55
58
|
}
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
};
|
|
71
|
-
}
|
|
59
|
+
const fullSize = typeof e.responseBodyFullSize === 'number'
|
|
60
|
+
? e.responseBodyFullSize
|
|
61
|
+
: (preview ? preview.length : 0);
|
|
62
|
+
const truncated = e.responseBodyTruncated === true;
|
|
63
|
+
return {
|
|
64
|
+
url: e.url || '',
|
|
65
|
+
method: e.method || 'GET',
|
|
66
|
+
status: e.responseStatus || 0,
|
|
67
|
+
size: fullSize,
|
|
68
|
+
ct: e.responseContentType || '',
|
|
69
|
+
body,
|
|
70
|
+
bodyFullSize: fullSize,
|
|
71
|
+
bodyTruncated: truncated,
|
|
72
|
+
};
|
|
73
|
+
});
|
|
74
|
+
}
|
|
72
75
|
}
|
|
73
|
-
const raw = await page.evaluate(`(function(){
|
|
76
|
+
const raw = await page.evaluate(`(function(){ var out = window.__opencli_net || []; window.__opencli_net = []; return JSON.stringify(out); })()`);
|
|
74
77
|
try {
|
|
75
78
|
return JSON.parse(raw);
|
|
76
79
|
}
|
|
77
80
|
catch {
|
|
81
|
+
if (process.env.OPENCLI_VERBOSE)
|
|
82
|
+
log.warn(`[network] Failed to parse interceptor buffer: ${typeof raw === 'string' ? raw.slice(0, 200) : String(raw)}`);
|
|
78
83
|
return [];
|
|
79
84
|
}
|
|
80
85
|
}
|
|
@@ -84,10 +89,108 @@ function filterNetworkItems(items) {
|
|
|
84
89
|
!/\.(js|css|png|jpg|gif|svg|woff|ico|map)(\?|$)/i.test(r.url) &&
|
|
85
90
|
!/analytics|tracking|telemetry|beacon|pixel|gtag|fbevents/i.test(r.url));
|
|
86
91
|
}
|
|
92
|
+
/** Exit codes by network error code — usage errors vs runtime failures. */
|
|
93
|
+
const NETWORK_ERROR_EXIT = {
|
|
94
|
+
invalid_args: EXIT_CODES.USAGE_ERROR,
|
|
95
|
+
invalid_filter: EXIT_CODES.USAGE_ERROR,
|
|
96
|
+
invalid_max_body: EXIT_CODES.USAGE_ERROR,
|
|
97
|
+
};
|
|
87
98
|
/** Emit a structured error JSON so agents can branch on `error.code` without regex. */
|
|
88
99
|
function emitNetworkError(code, message, extra = {}) {
|
|
89
100
|
console.log(JSON.stringify({ error: { code, message, ...extra } }, null, 2));
|
|
90
|
-
process.exitCode = EXIT_CODES.
|
|
101
|
+
process.exitCode = NETWORK_ERROR_EXIT[code] ?? EXIT_CODES.GENERIC_ERROR;
|
|
102
|
+
}
|
|
103
|
+
export function checkSiteMemory(site) {
|
|
104
|
+
const siteDir = path.join(os.homedir(), '.opencli', 'sites', site);
|
|
105
|
+
const endpointsPath = path.join(siteDir, 'endpoints.json');
|
|
106
|
+
const notesPath = path.join(siteDir, 'notes.md');
|
|
107
|
+
let endpointsCount = 0;
|
|
108
|
+
let endpointsPresent = fs.existsSync(endpointsPath);
|
|
109
|
+
if (endpointsPresent) {
|
|
110
|
+
try {
|
|
111
|
+
const parsed = JSON.parse(fs.readFileSync(endpointsPath, 'utf-8'));
|
|
112
|
+
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
|
113
|
+
endpointsCount = Object.keys(parsed).length;
|
|
114
|
+
}
|
|
115
|
+
else if (Array.isArray(parsed)) {
|
|
116
|
+
endpointsCount = parsed.length;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
endpointsPresent = false;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
const notesPresent = fs.existsSync(notesPath);
|
|
124
|
+
return {
|
|
125
|
+
ok: endpointsPresent && endpointsCount > 0 && notesPresent,
|
|
126
|
+
siteDir,
|
|
127
|
+
endpoints: { present: endpointsPresent, count: endpointsCount, path: endpointsPath },
|
|
128
|
+
notes: { present: notesPresent, path: notesPath },
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
export function printSiteMemoryReport(report, strict) {
|
|
132
|
+
if (report.ok) {
|
|
133
|
+
console.log(` ✓ Memory: endpoints.json (${report.endpoints.count}), notes.md present at ${report.siteDir}`);
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
const marker = strict ? '✗' : '⚠';
|
|
137
|
+
const missing = [];
|
|
138
|
+
if (!report.endpoints.present)
|
|
139
|
+
missing.push('endpoints.json');
|
|
140
|
+
else if (report.endpoints.count === 0)
|
|
141
|
+
missing.push('endpoints.json (empty)');
|
|
142
|
+
if (!report.notes.present)
|
|
143
|
+
missing.push('notes.md');
|
|
144
|
+
console.log(` ${marker} Memory: missing ${missing.join(', ')} under ${report.siteDir}`);
|
|
145
|
+
console.log(` Write the endpoint you just verified + a 1-line session note so the next agent starts from minute 0, not minute 95.`);
|
|
146
|
+
if (!strict) {
|
|
147
|
+
console.log(` (Re-run with --strict-memory to fail instead of warn.)`);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
/** Coerce adapter JSON output into a row array. Accepts `[{...}]`, single `{}`, or `{items:[...]}`-style envelopes. */
|
|
151
|
+
export function normalizeVerifyRows(data) {
|
|
152
|
+
if (Array.isArray(data)) {
|
|
153
|
+
return data.map((r) => (r && typeof r === 'object' ? r : { value: r }));
|
|
154
|
+
}
|
|
155
|
+
if (data && typeof data === 'object') {
|
|
156
|
+
const obj = data;
|
|
157
|
+
for (const k of ['rows', 'items', 'data', 'results']) {
|
|
158
|
+
if (Array.isArray(obj[k])) {
|
|
159
|
+
return obj[k].map((r) => (r && typeof r === 'object' ? r : { value: r }));
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return [obj];
|
|
163
|
+
}
|
|
164
|
+
return [];
|
|
165
|
+
}
|
|
166
|
+
/** Render up to 10 rows as a compact padded table for eyeball inspection during verify. */
|
|
167
|
+
export function renderVerifyPreview(rows, opts = {}) {
|
|
168
|
+
const maxRows = opts.maxRows ?? 10;
|
|
169
|
+
const maxCols = opts.maxCols ?? 6;
|
|
170
|
+
const cellMax = opts.cellMax ?? 40;
|
|
171
|
+
if (rows.length === 0)
|
|
172
|
+
return ' (no rows)';
|
|
173
|
+
const allCols = Array.from(new Set(rows.flatMap((r) => Object.keys(r))));
|
|
174
|
+
const cols = allCols.slice(0, maxCols);
|
|
175
|
+
const shown = rows.slice(0, maxRows);
|
|
176
|
+
const cellOf = (v) => {
|
|
177
|
+
if (v === null || v === undefined)
|
|
178
|
+
return '';
|
|
179
|
+
const s = typeof v === 'object' ? JSON.stringify(v) : String(v);
|
|
180
|
+
return s.replace(/\s+/g, ' ').slice(0, cellMax);
|
|
181
|
+
};
|
|
182
|
+
const widths = cols.map((c) => Math.max(c.length, ...shown.map((r) => cellOf(r[c]).length)));
|
|
183
|
+
const fmtRow = (vals) => vals.map((v, i) => v.padEnd(widths[i])).join(' ');
|
|
184
|
+
const out = [];
|
|
185
|
+
out.push(` ${fmtRow(cols)}`);
|
|
186
|
+
out.push(` ${widths.map((w) => '-'.repeat(w)).join(' ')}`);
|
|
187
|
+
for (const r of shown)
|
|
188
|
+
out.push(` ${fmtRow(cols.map((c) => cellOf(r[c])))}`);
|
|
189
|
+
if (rows.length > maxRows)
|
|
190
|
+
out.push(` ... and ${rows.length - maxRows} more row(s)`);
|
|
191
|
+
if (allCols.length > maxCols)
|
|
192
|
+
out.push(` (${allCols.length - maxCols} more column(s) hidden)`);
|
|
193
|
+
return out.join('\n');
|
|
91
194
|
}
|
|
92
195
|
function getBrowserCacheDir() {
|
|
93
196
|
return process.env.OPENCLI_CACHE_DIR || path.join(os.homedir(), '.opencli', 'cache');
|
|
@@ -521,6 +624,73 @@ export function createProgram(BUILTIN_CLIS, USER_CLIS) {
|
|
|
521
624
|
console.log(await page.screenshot({ format: 'png' }));
|
|
522
625
|
}
|
|
523
626
|
}));
|
|
627
|
+
// ── Analyze (site recon, agent-native) ──
|
|
628
|
+
//
|
|
629
|
+
// Mechanizes the `site-recon.md` decision tree into one CLI call. The agent
|
|
630
|
+
// calls `browser analyze <url>` and gets back:
|
|
631
|
+
//
|
|
632
|
+
// - pattern: A/B/C/D (mapped from network + SSR-globals signals)
|
|
633
|
+
// - anti_bot: vendor + evidence + the one-liner for "what to do next"
|
|
634
|
+
// - initial_state: which window globals are populated
|
|
635
|
+
// - nearest_adapter: existing commands for the same site, if any
|
|
636
|
+
// - recommended_next_step: a single imperative sentence
|
|
637
|
+
//
|
|
638
|
+
// Intent: replace the "open → eyeball network → curl → WAF → try again"
|
|
639
|
+
// feedback loop with a single deterministic verdict. Without this, agents
|
|
640
|
+
// burn ~20min per WAF-protected site re-discovering anti-bot posture.
|
|
641
|
+
addBrowserTabOption(browser.command('analyze').argument('<url>'))
|
|
642
|
+
.description('Classify site: anti-bot vendor, pattern (A/B/C/D), nearest adapter, recommended next step')
|
|
643
|
+
.action(browserAction(async (page, url) => {
|
|
644
|
+
const hasSessionCapture = await page.startNetworkCapture?.() ?? false;
|
|
645
|
+
await page.goto(url);
|
|
646
|
+
await page.wait(2);
|
|
647
|
+
if (!hasSessionCapture) {
|
|
648
|
+
try {
|
|
649
|
+
await page.evaluate(NETWORK_INTERCEPTOR_JS);
|
|
650
|
+
}
|
|
651
|
+
catch { /* non-fatal */ }
|
|
652
|
+
}
|
|
653
|
+
await captureNetworkItems(page);
|
|
654
|
+
// Best-effort: give the page another beat so XHR after DOMContentLoaded lands.
|
|
655
|
+
await page.wait(1);
|
|
656
|
+
const rawItems = await captureNetworkItems(page);
|
|
657
|
+
const networkEntries = rawItems.map((e) => ({
|
|
658
|
+
url: e.url,
|
|
659
|
+
status: e.status,
|
|
660
|
+
contentType: e.ct,
|
|
661
|
+
bodyPreview: typeof e.body === 'string'
|
|
662
|
+
? e.body.slice(0, 2000)
|
|
663
|
+
: (e.body ? JSON.stringify(e.body).slice(0, 2000) : null),
|
|
664
|
+
}));
|
|
665
|
+
const probeJs = `(function(){
|
|
666
|
+
return {
|
|
667
|
+
cookieNames: (document.cookie || '').split(';').map(function(c){ return c.trim().split('=')[0]; }).filter(Boolean),
|
|
668
|
+
initialState: {
|
|
669
|
+
__INITIAL_STATE__: typeof window.__INITIAL_STATE__ !== 'undefined',
|
|
670
|
+
__NUXT__: typeof window.__NUXT__ !== 'undefined',
|
|
671
|
+
__NEXT_DATA__: typeof window.__NEXT_DATA__ !== 'undefined',
|
|
672
|
+
__APOLLO_STATE__: typeof window.__APOLLO_STATE__ !== 'undefined',
|
|
673
|
+
},
|
|
674
|
+
title: document.title || '',
|
|
675
|
+
finalUrl: location.href,
|
|
676
|
+
};
|
|
677
|
+
})()`;
|
|
678
|
+
const probe = await page.evaluate(probeJs);
|
|
679
|
+
const browserCookieNames = (await page.getCookies({ url: probe.finalUrl || url }).catch(() => []))
|
|
680
|
+
.map((c) => c.name)
|
|
681
|
+
.filter(Boolean);
|
|
682
|
+
const cookieNames = [...new Set([...probe.cookieNames, ...browserCookieNames])];
|
|
683
|
+
const signals = {
|
|
684
|
+
requestedUrl: url,
|
|
685
|
+
finalUrl: probe.finalUrl,
|
|
686
|
+
cookieNames,
|
|
687
|
+
networkEntries,
|
|
688
|
+
initialState: probe.initialState,
|
|
689
|
+
title: probe.title,
|
|
690
|
+
};
|
|
691
|
+
const report = analyzeSite(signals, getRegistry());
|
|
692
|
+
console.log(JSON.stringify(report, null, 2));
|
|
693
|
+
}));
|
|
524
694
|
// ── Find (structured CSS query, agent-native) ──
|
|
525
695
|
//
|
|
526
696
|
// `browser find --css <sel>` lets agents jump straight from a semantic
|
|
@@ -850,10 +1020,10 @@ export function createProgram(BUILTIN_CLIS, USER_CLIS) {
|
|
|
850
1020
|
}));
|
|
851
1021
|
// ── Wait commands ──
|
|
852
1022
|
addBrowserTabOption(browser.command('wait'))
|
|
853
|
-
.argument('<type>', 'selector, text, or
|
|
854
|
-
.argument('[value]', 'CSS selector, text string, or
|
|
1023
|
+
.argument('<type>', 'selector, text, time, or xhr')
|
|
1024
|
+
.argument('[value]', 'CSS selector, text string, seconds, or XHR URL regex')
|
|
855
1025
|
.option('--timeout <ms>', 'Timeout in milliseconds', '10000')
|
|
856
|
-
.description('Wait for selector, text, or
|
|
1026
|
+
.description('Wait for selector, text, time, or matching XHR (e.g. wait selector ".loaded", wait text "Success", wait time 3, wait xhr "/api/search")')
|
|
857
1027
|
.action(browserAction(async (page, type, value, opts) => {
|
|
858
1028
|
const timeout = parseInt(opts.timeout, 10);
|
|
859
1029
|
if (type === 'time') {
|
|
@@ -879,8 +1049,59 @@ export function createProgram(BUILTIN_CLIS, USER_CLIS) {
|
|
|
879
1049
|
await page.wait({ text: value, timeout: timeout / 1000 });
|
|
880
1050
|
console.log(`Text "${value}" appeared`);
|
|
881
1051
|
}
|
|
1052
|
+
else if (type === 'xhr') {
|
|
1053
|
+
// Poll the capture ring until an entry matches the URL regex — turns
|
|
1054
|
+
// the common "open page, wait N seconds, hope the data landed" idiom
|
|
1055
|
+
// into a deterministic barrier keyed on the API the agent actually
|
|
1056
|
+
// cares about. Prevents silent "empty DOM" failures on slow SPAs.
|
|
1057
|
+
if (!value) {
|
|
1058
|
+
console.error('Missing XHR URL regex');
|
|
1059
|
+
process.exitCode = EXIT_CODES.USAGE_ERROR;
|
|
1060
|
+
return;
|
|
1061
|
+
}
|
|
1062
|
+
let re;
|
|
1063
|
+
try {
|
|
1064
|
+
re = new RegExp(value);
|
|
1065
|
+
}
|
|
1066
|
+
catch (err) {
|
|
1067
|
+
console.error(`Invalid regex "${value}": ${err instanceof Error ? err.message : String(err)}`);
|
|
1068
|
+
process.exitCode = EXIT_CODES.USAGE_ERROR;
|
|
1069
|
+
return;
|
|
1070
|
+
}
|
|
1071
|
+
const hasSessionCapture = await page.startNetworkCapture?.() ?? false;
|
|
1072
|
+
if (!hasSessionCapture) {
|
|
1073
|
+
try {
|
|
1074
|
+
await page.evaluate(NETWORK_INTERCEPTOR_JS);
|
|
1075
|
+
}
|
|
1076
|
+
catch { /* non-fatal */ }
|
|
1077
|
+
}
|
|
1078
|
+
await captureNetworkItems(page);
|
|
1079
|
+
const deadline = Date.now() + timeout;
|
|
1080
|
+
const pollMs = 400;
|
|
1081
|
+
let matched = null;
|
|
1082
|
+
while (Date.now() < deadline && !matched) {
|
|
1083
|
+
const items = await captureNetworkItems(page);
|
|
1084
|
+
matched = items.find((e) => re.test(e.url)) ?? null;
|
|
1085
|
+
if (!matched)
|
|
1086
|
+
await new Promise((r) => setTimeout(r, pollMs));
|
|
1087
|
+
}
|
|
1088
|
+
if (!matched) {
|
|
1089
|
+
console.log(JSON.stringify({
|
|
1090
|
+
error: {
|
|
1091
|
+
code: 'xhr_not_seen',
|
|
1092
|
+
message: `No captured XHR matched /${value}/ within ${timeout}ms`,
|
|
1093
|
+
hint: 'Check the pattern against `browser network` output; the endpoint may not have fired yet, or capture is disabled.',
|
|
1094
|
+
},
|
|
1095
|
+
}, null, 2));
|
|
1096
|
+
process.exitCode = EXIT_CODES.GENERIC_ERROR;
|
|
1097
|
+
return;
|
|
1098
|
+
}
|
|
1099
|
+
console.log(JSON.stringify({
|
|
1100
|
+
matched: { url: matched.url, status: matched.status, contentType: matched.ct },
|
|
1101
|
+
}, null, 2));
|
|
1102
|
+
}
|
|
882
1103
|
else {
|
|
883
|
-
console.error(`Unknown wait type "${type}". Use: selector, text, or
|
|
1104
|
+
console.error(`Unknown wait type "${type}". Use: selector, text, time, or xhr`);
|
|
884
1105
|
process.exitCode = EXIT_CODES.USAGE_ERROR;
|
|
885
1106
|
}
|
|
886
1107
|
}));
|
|
@@ -1212,8 +1433,12 @@ cli({
|
|
|
1212
1433
|
// ── Verify (test adapter) ──
|
|
1213
1434
|
browser.command('verify')
|
|
1214
1435
|
.argument('<name>', 'Adapter name in site/command format (e.g. hn/top)')
|
|
1215
|
-
.
|
|
1216
|
-
.
|
|
1436
|
+
.option('--write-fixture', 'Write a starter fixture to ~/.opencli/sites/<site>/verify/<command>.json if none exists')
|
|
1437
|
+
.option('--update-fixture', 'Overwrite an existing fixture with one derived from current output')
|
|
1438
|
+
.option('--no-fixture', 'Ignore any fixture file for this run (no value-level validation)')
|
|
1439
|
+
.option('--strict-memory', 'Fail (not just warn) when ~/.opencli/sites/<site>/endpoints.json or notes.md is missing')
|
|
1440
|
+
.description('Execute an adapter and validate output; uses fixture at ~/.opencli/sites/<site>/verify/<cmd>.json when present')
|
|
1441
|
+
.action(async (name, opts = {}) => {
|
|
1217
1442
|
try {
|
|
1218
1443
|
const parts = name.split('/');
|
|
1219
1444
|
if (parts.length !== 2) {
|
|
@@ -1228,7 +1453,7 @@ cli({
|
|
|
1228
1453
|
return;
|
|
1229
1454
|
}
|
|
1230
1455
|
const { execFileSync } = await import('node:child_process');
|
|
1231
|
-
const
|
|
1456
|
+
const { loadFixture, writeFixture, deriveFixture, validateRows, fixturePath, expandFixtureArgs } = await import('./browser/verify-fixture.js');
|
|
1232
1457
|
const filePath = path.join(os.homedir(), '.opencli', 'clis', site, `${command}.js`);
|
|
1233
1458
|
if (!fs.existsSync(filePath)) {
|
|
1234
1459
|
console.error(`Adapter not found: ${filePath}`);
|
|
@@ -1238,14 +1463,24 @@ cli({
|
|
|
1238
1463
|
}
|
|
1239
1464
|
console.log(`🔍 Verifying ${name}...\n`);
|
|
1240
1465
|
console.log(` Loading: ${filePath}`);
|
|
1241
|
-
|
|
1466
|
+
const useFixture = opts.fixture !== false;
|
|
1467
|
+
let fixture = useFixture ? loadFixture(site, command) : null;
|
|
1468
|
+
// Build adapter args: fixture.args override the legacy --limit 3 heuristic.
|
|
1469
|
+
// - object form { "limit": 3 } → `--limit 3`
|
|
1470
|
+
// - array form ["123", "--limit", "3"] → verbatim (for positional subjects)
|
|
1242
1471
|
const adapterSrc = fs.readFileSync(filePath, 'utf-8');
|
|
1243
1472
|
const hasLimitArg = /['"]limit['"]/.test(adapterSrc);
|
|
1244
|
-
const
|
|
1245
|
-
const
|
|
1473
|
+
const fixtureArgs = fixture?.args;
|
|
1474
|
+
const cliArgs = expandFixtureArgs(fixtureArgs);
|
|
1475
|
+
if (cliArgs.length === 0 && hasLimitArg)
|
|
1476
|
+
cliArgs.push('--limit', '3');
|
|
1477
|
+
const argDisplay = cliArgs.join(' ');
|
|
1246
1478
|
const invocation = resolveBrowserVerifyInvocation();
|
|
1479
|
+
// Always request JSON so we can validate structurally.
|
|
1480
|
+
const execArgs = [...invocation.args, site, command, ...cliArgs, '--format', 'json'];
|
|
1481
|
+
let rawJson;
|
|
1247
1482
|
try {
|
|
1248
|
-
|
|
1483
|
+
rawJson = execFileSync(invocation.binary, execArgs, {
|
|
1249
1484
|
cwd: invocation.cwd,
|
|
1250
1485
|
timeout: 30000,
|
|
1251
1486
|
encoding: 'utf-8',
|
|
@@ -1253,13 +1488,9 @@ cli({
|
|
|
1253
1488
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
1254
1489
|
...(invocation.shell ? { shell: true } : {}),
|
|
1255
1490
|
});
|
|
1256
|
-
console.log(` Executing: opencli ${site} ${command}${limitFlag}\n`);
|
|
1257
|
-
console.log(output);
|
|
1258
|
-
console.log(`\n ✓ Adapter works!`);
|
|
1259
1491
|
}
|
|
1260
1492
|
catch (err) {
|
|
1261
|
-
console.log(` Executing: opencli ${site} ${command}${
|
|
1262
|
-
// execFileSync attaches captured stdout/stderr on its thrown Error.
|
|
1493
|
+
console.log(` Executing: opencli ${site} ${command} ${argDisplay}\n`);
|
|
1263
1494
|
const execErr = err;
|
|
1264
1495
|
if (execErr.stdout)
|
|
1265
1496
|
console.log(String(execErr.stdout));
|
|
@@ -1267,7 +1498,66 @@ cli({
|
|
|
1267
1498
|
console.error(String(execErr.stderr).slice(0, 500));
|
|
1268
1499
|
console.log(`\n ✗ Adapter failed. Fix the code and try again.`);
|
|
1269
1500
|
process.exitCode = EXIT_CODES.GENERIC_ERROR;
|
|
1501
|
+
return;
|
|
1502
|
+
}
|
|
1503
|
+
console.log(` Executing: opencli ${site} ${command} ${argDisplay}\n`);
|
|
1504
|
+
let rows;
|
|
1505
|
+
try {
|
|
1506
|
+
rows = normalizeVerifyRows(JSON.parse(rawJson));
|
|
1507
|
+
}
|
|
1508
|
+
catch {
|
|
1509
|
+
console.log(rawJson);
|
|
1510
|
+
console.log('\n ✗ Could not parse adapter output as JSON. Is `--format json` broken?');
|
|
1511
|
+
process.exitCode = EXIT_CODES.GENERIC_ERROR;
|
|
1512
|
+
return;
|
|
1513
|
+
}
|
|
1514
|
+
console.log(renderVerifyPreview(rows));
|
|
1515
|
+
console.log(`\n → ${rows.length} row${rows.length === 1 ? '' : 's'}`);
|
|
1516
|
+
// ── Fixture handling ───────────────────────────────────────────
|
|
1517
|
+
if (opts.writeFixture || opts.updateFixture) {
|
|
1518
|
+
if (fixture && !opts.updateFixture) {
|
|
1519
|
+
console.log(`\n Fixture already exists at ${fixturePath(site, command)}.`);
|
|
1520
|
+
console.log(` Use --update-fixture to overwrite.`);
|
|
1521
|
+
}
|
|
1522
|
+
else {
|
|
1523
|
+
const seedArgs = fixtureArgs !== undefined
|
|
1524
|
+
? fixtureArgs
|
|
1525
|
+
: (hasLimitArg ? { limit: 3 } : undefined);
|
|
1526
|
+
const derived = deriveFixture(rows, seedArgs);
|
|
1527
|
+
const p = writeFixture(site, command, derived);
|
|
1528
|
+
console.log(`\n ${fixture ? '↻ Updated' : '✎ Wrote'} fixture: ${p}`);
|
|
1529
|
+
console.log(` Review and hand-tune the derived expectations (add patterns / notEmpty, tighten rowCount).`);
|
|
1530
|
+
fixture = derived;
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
if (!fixture) {
|
|
1534
|
+
console.log(`\n ✓ Adapter runs. (No fixture at ${fixturePath(site, command)} — consider --write-fixture to seed one.)`);
|
|
1535
|
+
const memoryReport = checkSiteMemory(site);
|
|
1536
|
+
printSiteMemoryReport(memoryReport, opts.strictMemory);
|
|
1537
|
+
if (!memoryReport.ok && opts.strictMemory) {
|
|
1538
|
+
process.exitCode = EXIT_CODES.GENERIC_ERROR;
|
|
1539
|
+
}
|
|
1540
|
+
return;
|
|
1270
1541
|
}
|
|
1542
|
+
const failures = validateRows(rows, fixture);
|
|
1543
|
+
if (failures.length === 0) {
|
|
1544
|
+
console.log(`\n ✓ Adapter matches fixture (${fixturePath(site, command)}).`);
|
|
1545
|
+
const memoryReport = checkSiteMemory(site);
|
|
1546
|
+
printSiteMemoryReport(memoryReport, opts.strictMemory);
|
|
1547
|
+
if (!memoryReport.ok && opts.strictMemory) {
|
|
1548
|
+
process.exitCode = EXIT_CODES.GENERIC_ERROR;
|
|
1549
|
+
}
|
|
1550
|
+
return;
|
|
1551
|
+
}
|
|
1552
|
+
console.log(`\n ✗ Adapter output does not match fixture:`);
|
|
1553
|
+
for (const f of failures.slice(0, 20)) {
|
|
1554
|
+
const where = f.rowIndex !== undefined ? `row[${f.rowIndex}] ` : '';
|
|
1555
|
+
console.log(` - [${f.rule}] ${where}${f.detail}`);
|
|
1556
|
+
}
|
|
1557
|
+
if (failures.length > 20) {
|
|
1558
|
+
console.log(` ... and ${failures.length - 20} more failure(s)`);
|
|
1559
|
+
}
|
|
1560
|
+
process.exitCode = EXIT_CODES.GENERIC_ERROR;
|
|
1271
1561
|
}
|
|
1272
1562
|
catch (err) {
|
|
1273
1563
|
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
|