@jackwener/opencli 1.7.5 → 1.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -10
- package/README.zh-CN.md +18 -9
- package/cli-manifest.json +401 -11
- package/clis/51job/company.js +125 -0
- package/clis/51job/detail.js +108 -0
- package/clis/51job/hot.js +55 -0
- package/clis/51job/search.js +79 -0
- package/clis/51job/utils.js +302 -0
- package/clis/51job/utils.test.js +69 -0
- package/clis/bilibili/video.js +68 -0
- package/clis/bilibili/video.test.js +132 -0
- package/clis/chatgpt/image.js +1 -1
- package/clis/deepseek/ask.js +37 -11
- package/clis/deepseek/ask.test.js +165 -0
- package/clis/deepseek/utils.js +192 -24
- package/clis/deepseek/utils.test.js +145 -0
- package/clis/gemini/image.js +1 -1
- package/clis/instagram/download.js +1 -1
- package/clis/jianyu/search.js +139 -3
- package/clis/jianyu/search.test.js +25 -0
- package/clis/jianyu/shared/procurement-detail.js +15 -0
- package/clis/jianyu/shared/procurement-detail.test.js +12 -0
- package/clis/twitter/likes.js +3 -2
- package/clis/twitter/search.js +4 -2
- package/clis/twitter/search.test.js +4 -0
- package/clis/twitter/shared.js +35 -2
- package/clis/twitter/shared.test.js +96 -0
- package/clis/twitter/thread.js +3 -1
- package/clis/twitter/timeline.js +3 -2
- package/clis/twitter/tweets.js +219 -0
- package/clis/twitter/tweets.test.js +125 -0
- package/clis/web/read.js +25 -5
- package/clis/web/read.test.js +76 -0
- package/clis/weread/ai-outline.js +170 -0
- package/clis/weread/ai-outline.test.js +83 -0
- package/clis/weread/book.js +57 -44
- package/clis/weread/commands.test.js +24 -0
- package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
- package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
- package/clis/youtube/channel.js +35 -0
- package/dist/src/browser/analyze.d.ts +103 -0
- package/dist/src/browser/analyze.js +230 -0
- package/dist/src/browser/analyze.test.d.ts +1 -0
- package/dist/src/browser/analyze.test.js +164 -0
- package/dist/src/browser/article-extract.d.ts +57 -0
- package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
- package/dist/src/browser/article-extract.e2e.test.js +105 -0
- package/dist/src/browser/article-extract.js +169 -0
- package/dist/src/browser/article-extract.test.d.ts +1 -0
- package/dist/src/browser/article-extract.test.js +94 -0
- package/dist/src/browser/base-page.d.ts +13 -3
- package/dist/src/browser/base-page.js +35 -25
- package/dist/src/browser/cdp.d.ts +1 -0
- package/dist/src/browser/cdp.js +23 -5
- package/dist/src/browser/compound.d.ts +59 -0
- package/dist/src/browser/compound.js +112 -0
- package/dist/src/browser/compound.test.d.ts +1 -0
- package/dist/src/browser/compound.test.js +175 -0
- package/dist/src/browser/dom-snapshot.d.ts +7 -0
- package/dist/src/browser/dom-snapshot.js +76 -3
- package/dist/src/browser/dom-snapshot.test.js +65 -0
- package/dist/src/browser/extract.d.ts +69 -0
- package/dist/src/browser/extract.js +132 -0
- package/dist/src/browser/extract.test.d.ts +1 -0
- package/dist/src/browser/extract.test.js +129 -0
- package/dist/src/browser/find.d.ts +76 -0
- package/dist/src/browser/find.js +179 -0
- package/dist/src/browser/find.test.d.ts +1 -0
- package/dist/src/browser/find.test.js +120 -0
- package/dist/src/browser/html-tree.d.ts +75 -0
- package/dist/src/browser/html-tree.js +112 -0
- package/dist/src/browser/html-tree.test.d.ts +1 -0
- package/dist/src/browser/html-tree.test.js +181 -0
- package/dist/src/browser/network-cache.d.ts +48 -0
- package/dist/src/browser/network-cache.js +66 -0
- package/dist/src/browser/network-cache.test.d.ts +1 -0
- package/dist/src/browser/network-cache.test.js +58 -0
- package/dist/src/browser/network-key.d.ts +22 -0
- package/dist/src/browser/network-key.js +66 -0
- package/dist/src/browser/network-key.test.d.ts +1 -0
- package/dist/src/browser/network-key.test.js +49 -0
- package/dist/src/browser/shape-filter.d.ts +52 -0
- package/dist/src/browser/shape-filter.js +101 -0
- package/dist/src/browser/shape-filter.test.d.ts +1 -0
- package/dist/src/browser/shape-filter.test.js +101 -0
- package/dist/src/browser/shape.d.ts +23 -0
- package/dist/src/browser/shape.js +95 -0
- package/dist/src/browser/shape.test.d.ts +1 -0
- package/dist/src/browser/shape.test.js +82 -0
- package/dist/src/browser/target-errors.d.ts +14 -1
- package/dist/src/browser/target-errors.js +13 -0
- package/dist/src/browser/target-errors.test.js +39 -6
- package/dist/src/browser/target-resolver.d.ts +57 -10
- package/dist/src/browser/target-resolver.js +195 -75
- package/dist/src/browser/target-resolver.test.js +80 -5
- package/dist/src/browser/verify-fixture.d.ts +59 -0
- package/dist/src/browser/verify-fixture.js +213 -0
- package/dist/src/browser/verify-fixture.test.d.ts +1 -0
- package/dist/src/browser/verify-fixture.test.js +161 -0
- package/dist/src/cli.d.ts +32 -0
- package/dist/src/cli.js +936 -141
- package/dist/src/cli.test.js +1051 -1
- package/dist/src/daemon.d.ts +3 -2
- package/dist/src/daemon.js +16 -4
- package/dist/src/daemon.test.d.ts +1 -0
- package/dist/src/daemon.test.js +19 -0
- package/dist/src/download/article-download.d.ts +12 -0
- package/dist/src/download/article-download.js +141 -17
- package/dist/src/download/article-download.test.js +196 -0
- package/dist/src/download/index.js +73 -86
- package/dist/src/errors.js +4 -2
- package/dist/src/errors.test.js +13 -0
- package/dist/src/execution.js +7 -2
- package/dist/src/execution.test.js +54 -0
- package/dist/src/launcher.d.ts +1 -1
- package/dist/src/launcher.js +3 -3
- package/dist/src/main.js +16 -0
- package/dist/src/output.js +1 -1
- package/dist/src/output.test.js +6 -0
- package/dist/src/types.d.ts +18 -3
- package/package.json +5 -1
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Compound-component expansion for high-agent-failure form controls.
|
|
3
|
+
*
|
|
4
|
+
* Agents burn turns on three recurring input categories because the raw
|
|
5
|
+
* attribute dump from `browser state` under-specifies them:
|
|
6
|
+
*
|
|
7
|
+
* - date / time / datetime-local / month / week — agents type
|
|
8
|
+
* free-form strings and the browser silently ignores mismatched formats.
|
|
9
|
+
* - select — the snapshot caps visible options at ~6; agents don't know
|
|
10
|
+
* the full option set, can't match by label, and waste turns clicking
|
|
11
|
+
* to open the dropdown just to read options.
|
|
12
|
+
* - file — the snapshot shows current filenames but not `accept` or
|
|
13
|
+
* `multiple`; agents re-upload or pick unsupported MIME types.
|
|
14
|
+
*
|
|
15
|
+
* `compoundInfoOf(el)` returns a structured JSON summary agents can rely
|
|
16
|
+
* on. Included in `browser find --css` envelope so the agent gets the
|
|
17
|
+
* rich view without extra round-trips.
|
|
18
|
+
*
|
|
19
|
+
* Emitted as a JS source string (`COMPOUND_INFO_JS`) so it can be inlined
|
|
20
|
+
* into the generated evaluate scripts under find / snapshot / eval.
|
|
21
|
+
*/
|
|
22
|
+
/** Max options included in a SelectCompound.options[]. Above this, `options_total` still reflects the true count. */
|
|
23
|
+
export const COMPOUND_SELECT_OPTIONS_CAP = 50;
|
|
24
|
+
/** Max characters per option label / file name. */
|
|
25
|
+
export const COMPOUND_LABEL_CAP = 80;
|
|
26
|
+
/**
|
|
27
|
+
* JavaScript source declaring `compoundInfoOf(el)`. Inlined into the JS
|
|
28
|
+
* emitted by `buildFindJs` (and any other evaluate script that needs the
|
|
29
|
+
* rich compound view). Returns a `CompoundInfo` object or `null`.
|
|
30
|
+
*/
|
|
31
|
+
export const COMPOUND_INFO_JS = `
|
|
32
|
+
function compoundInfoOf(el) {
|
|
33
|
+
if (!el || !el.tagName) return null;
|
|
34
|
+
const tag = el.tagName;
|
|
35
|
+
const LABEL_CAP = ${COMPOUND_LABEL_CAP};
|
|
36
|
+
const OPTS_CAP = ${COMPOUND_SELECT_OPTIONS_CAP};
|
|
37
|
+
if (tag === 'INPUT') {
|
|
38
|
+
const type = (el.getAttribute('type') || 'text').toLowerCase();
|
|
39
|
+
const FORMATS = {
|
|
40
|
+
'date': 'YYYY-MM-DD',
|
|
41
|
+
'time': 'HH:MM',
|
|
42
|
+
'datetime-local': 'YYYY-MM-DDTHH:MM',
|
|
43
|
+
'month': 'YYYY-MM',
|
|
44
|
+
'week': 'YYYY-W##',
|
|
45
|
+
};
|
|
46
|
+
if (FORMATS[type]) {
|
|
47
|
+
const info = {
|
|
48
|
+
control: type,
|
|
49
|
+
format: FORMATS[type],
|
|
50
|
+
current: (el.value == null ? '' : String(el.value)),
|
|
51
|
+
};
|
|
52
|
+
const min = el.getAttribute('min');
|
|
53
|
+
if (min) info.min = min;
|
|
54
|
+
const max = el.getAttribute('max');
|
|
55
|
+
if (max) info.max = max;
|
|
56
|
+
return info;
|
|
57
|
+
}
|
|
58
|
+
if (type === 'file') {
|
|
59
|
+
const info = {
|
|
60
|
+
control: 'file',
|
|
61
|
+
multiple: !!el.multiple,
|
|
62
|
+
current: [],
|
|
63
|
+
};
|
|
64
|
+
const accept = el.getAttribute('accept');
|
|
65
|
+
if (accept) info.accept = accept;
|
|
66
|
+
try {
|
|
67
|
+
if (el.files && el.files.length) {
|
|
68
|
+
for (let i = 0; i < el.files.length; i++) {
|
|
69
|
+
const name = (el.files[i].name || '').slice(0, LABEL_CAP);
|
|
70
|
+
info.current.push(name);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
} catch (_) {}
|
|
74
|
+
return info;
|
|
75
|
+
}
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
if (tag === 'SELECT') {
|
|
79
|
+
const multiple = !!el.multiple;
|
|
80
|
+
const options = [];
|
|
81
|
+
const selectedLabels = [];
|
|
82
|
+
let total = 0;
|
|
83
|
+
try {
|
|
84
|
+
const opts = el.options || [];
|
|
85
|
+
total = opts.length;
|
|
86
|
+
// Walk ALL options so \`current\` reflects selections that sit beyond the
|
|
87
|
+
// serialization cap. Only the first OPTS_CAP entries get pushed into
|
|
88
|
+
// options[]; anything past the cap still contributes to selectedLabels
|
|
89
|
+
// so agents see the true current state of big dropdowns.
|
|
90
|
+
for (let i = 0; i < opts.length; i++) {
|
|
91
|
+
const o = opts[i];
|
|
92
|
+
const labelRaw = (o.label != null && o.label !== '') ? o.label : (o.text || '');
|
|
93
|
+
const label = String(labelRaw).trim().slice(0, LABEL_CAP);
|
|
94
|
+
if (i < OPTS_CAP) {
|
|
95
|
+
const entry = { label: label, value: o.value, selected: !!o.selected };
|
|
96
|
+
if (o.disabled) entry.disabled = true;
|
|
97
|
+
options.push(entry);
|
|
98
|
+
}
|
|
99
|
+
if (o.selected) selectedLabels.push(label);
|
|
100
|
+
}
|
|
101
|
+
} catch (_) {}
|
|
102
|
+
return {
|
|
103
|
+
control: 'select',
|
|
104
|
+
multiple: multiple,
|
|
105
|
+
current: multiple ? selectedLabels : (selectedLabels[0] || ''),
|
|
106
|
+
options: options,
|
|
107
|
+
options_total: total,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
return null;
|
|
111
|
+
}
|
|
112
|
+
`;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { COMPOUND_INFO_JS, COMPOUND_LABEL_CAP, COMPOUND_SELECT_OPTIONS_CAP, } from './compound.js';
|
|
3
|
+
/**
|
|
4
|
+
* Tests run the JS source in a sandbox via `new Function`, feeding it
|
|
5
|
+
* minimal mock elements shaped like the DOM elements the real code sees
|
|
6
|
+
* at runtime. Avoids a full jsdom setup while still exercising the logic
|
|
7
|
+
* end-to-end instead of only snapshotting string markers.
|
|
8
|
+
*/
|
|
9
|
+
function runCompound(mockEl) {
|
|
10
|
+
const fn = new Function('el', `${COMPOUND_INFO_JS}\nreturn compoundInfoOf(el);`);
|
|
11
|
+
return fn(mockEl);
|
|
12
|
+
}
|
|
13
|
+
function mockInput(attrs, extras = {}) {
|
|
14
|
+
return {
|
|
15
|
+
tagName: 'INPUT',
|
|
16
|
+
value: extras.value,
|
|
17
|
+
multiple: extras.multiple,
|
|
18
|
+
files: extras.files,
|
|
19
|
+
getAttribute(name) {
|
|
20
|
+
return attrs[name] ?? null;
|
|
21
|
+
},
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
function mockSelect(options, multiple = false) {
|
|
25
|
+
const opts = options.map(o => ({ ...o, selected: !!o.selected }));
|
|
26
|
+
return {
|
|
27
|
+
tagName: 'SELECT',
|
|
28
|
+
multiple,
|
|
29
|
+
options: opts,
|
|
30
|
+
getAttribute: () => null,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
describe('compoundInfoOf — date-like inputs', () => {
|
|
34
|
+
it('returns { control, format, current } for <input type=date>', () => {
|
|
35
|
+
const info = runCompound(mockInput({ type: 'date' }, { value: '2026-04-21' }));
|
|
36
|
+
expect(info).toEqual({ control: 'date', format: 'YYYY-MM-DD', current: '2026-04-21' });
|
|
37
|
+
});
|
|
38
|
+
it('surfaces min + max when present', () => {
|
|
39
|
+
const info = runCompound(mockInput({ type: 'date', min: '2026-01-01', max: '2026-12-31' }, { value: '2026-04-21' }));
|
|
40
|
+
expect(info).toMatchObject({ min: '2026-01-01', max: '2026-12-31' });
|
|
41
|
+
});
|
|
42
|
+
it('handles time / datetime-local / month / week with correct format strings', () => {
|
|
43
|
+
const formats = {
|
|
44
|
+
time: 'HH:MM',
|
|
45
|
+
'datetime-local': 'YYYY-MM-DDTHH:MM',
|
|
46
|
+
month: 'YYYY-MM',
|
|
47
|
+
week: 'YYYY-W##',
|
|
48
|
+
};
|
|
49
|
+
for (const [type, fmt] of Object.entries(formats)) {
|
|
50
|
+
const info = runCompound(mockInput({ type }, { value: '' }));
|
|
51
|
+
expect(info.format).toBe(fmt);
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
it('coerces null value into empty string instead of crashing', () => {
|
|
55
|
+
const info = runCompound(mockInput({ type: 'date' }));
|
|
56
|
+
expect(info).toMatchObject({ control: 'date', current: '' });
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
describe('compoundInfoOf — file inputs', () => {
|
|
60
|
+
it('returns { control: file, multiple, current[] }', () => {
|
|
61
|
+
const info = runCompound(mockInput({ type: 'file' }, {
|
|
62
|
+
multiple: true,
|
|
63
|
+
files: [{ name: 'a.png' }, { name: 'b.jpg' }],
|
|
64
|
+
}));
|
|
65
|
+
expect(info).toEqual({ control: 'file', multiple: true, current: ['a.png', 'b.jpg'] });
|
|
66
|
+
});
|
|
67
|
+
it('includes accept when present', () => {
|
|
68
|
+
const info = runCompound(mockInput({ type: 'file', accept: 'image/*' }, { multiple: false }));
|
|
69
|
+
expect(info).toMatchObject({ control: 'file', accept: 'image/*' });
|
|
70
|
+
});
|
|
71
|
+
it('returns empty current[] when nothing uploaded', () => {
|
|
72
|
+
const info = runCompound(mockInput({ type: 'file' }, { multiple: false }));
|
|
73
|
+
expect(info).toEqual({ control: 'file', multiple: false, current: [] });
|
|
74
|
+
});
|
|
75
|
+
it('caps file name at COMPOUND_LABEL_CAP', () => {
|
|
76
|
+
const longName = 'x'.repeat(COMPOUND_LABEL_CAP + 50);
|
|
77
|
+
const info = runCompound(mockInput({ type: 'file' }, { multiple: false, files: [{ name: longName }] }));
|
|
78
|
+
expect(info.current[0].length).toBe(COMPOUND_LABEL_CAP);
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
describe('compoundInfoOf — select', () => {
|
|
82
|
+
it('returns full options list with labels, values, selected flag', () => {
|
|
83
|
+
const info = runCompound(mockSelect([
|
|
84
|
+
{ value: 'us', label: 'United States', selected: true },
|
|
85
|
+
{ value: 'ca', label: 'Canada' },
|
|
86
|
+
{ value: 'fr', label: 'France' },
|
|
87
|
+
]));
|
|
88
|
+
expect(info.options).toHaveLength(3);
|
|
89
|
+
expect(info.options[0]).toEqual({ label: 'United States', value: 'us', selected: true });
|
|
90
|
+
expect(info.options[2]).toEqual({ label: 'France', value: 'fr', selected: false });
|
|
91
|
+
});
|
|
92
|
+
it('sets current to the selected label (single-select)', () => {
|
|
93
|
+
const info = runCompound(mockSelect([
|
|
94
|
+
{ value: 'a', label: 'Alpha' },
|
|
95
|
+
{ value: 'b', label: 'Bravo', selected: true },
|
|
96
|
+
]));
|
|
97
|
+
expect(info).toMatchObject({ control: 'select', multiple: false, current: 'Bravo' });
|
|
98
|
+
});
|
|
99
|
+
it('sets current to an array of labels when multiple=true', () => {
|
|
100
|
+
const info = runCompound(mockSelect([
|
|
101
|
+
{ value: 'a', label: 'Alpha', selected: true },
|
|
102
|
+
{ value: 'b', label: 'Bravo' },
|
|
103
|
+
{ value: 'c', label: 'Charlie', selected: true },
|
|
104
|
+
], true));
|
|
105
|
+
expect(info).toMatchObject({ control: 'select', multiple: true, current: ['Alpha', 'Charlie'] });
|
|
106
|
+
});
|
|
107
|
+
it('falls back from option.label to option.text', () => {
|
|
108
|
+
const info = runCompound(mockSelect([
|
|
109
|
+
{ value: 'a', text: 'FromText' },
|
|
110
|
+
{ value: 'b', label: '', text: 'EmptyLabelFallback' },
|
|
111
|
+
]));
|
|
112
|
+
expect(info.options[0].label).toBe('FromText');
|
|
113
|
+
expect(info.options[1].label).toBe('EmptyLabelFallback');
|
|
114
|
+
});
|
|
115
|
+
it('marks disabled options', () => {
|
|
116
|
+
const info = runCompound(mockSelect([
|
|
117
|
+
{ value: 'a', label: 'A' },
|
|
118
|
+
{ value: 'b', label: 'B', disabled: true },
|
|
119
|
+
]));
|
|
120
|
+
expect(info.options[0].disabled).toBeUndefined();
|
|
121
|
+
expect(info.options[1].disabled).toBe(true);
|
|
122
|
+
});
|
|
123
|
+
it('caps options[] at COMPOUND_SELECT_OPTIONS_CAP but keeps true options_total', () => {
|
|
124
|
+
const big = Array.from({ length: COMPOUND_SELECT_OPTIONS_CAP + 25 }, (_, i) => ({
|
|
125
|
+
value: 'v' + i,
|
|
126
|
+
label: 'L' + i,
|
|
127
|
+
}));
|
|
128
|
+
const info = runCompound(mockSelect(big));
|
|
129
|
+
expect(info.options.length).toBe(COMPOUND_SELECT_OPTIONS_CAP);
|
|
130
|
+
expect(info.options_total).toBe(COMPOUND_SELECT_OPTIONS_CAP + 25);
|
|
131
|
+
});
|
|
132
|
+
it('returns "" for current on single-select with no selected option', () => {
|
|
133
|
+
const info = runCompound(mockSelect([
|
|
134
|
+
{ value: 'a', label: 'A' },
|
|
135
|
+
{ value: 'b', label: 'B' },
|
|
136
|
+
]));
|
|
137
|
+
expect(info).toMatchObject({ current: '' });
|
|
138
|
+
});
|
|
139
|
+
// Regression: the previous loop stopped walking options once it hit
|
|
140
|
+
// COMPOUND_SELECT_OPTIONS_CAP, so a long country dropdown with the
|
|
141
|
+
// selected country sitting at index 80 would be reported with current="".
|
|
142
|
+
// Agents then thought nothing was selected and picked another country.
|
|
143
|
+
it('populates current even when the selected option sits past the serialization cap', () => {
|
|
144
|
+
const big = Array.from({ length: COMPOUND_SELECT_OPTIONS_CAP + 25 }, (_, i) => ({
|
|
145
|
+
value: 'v' + i,
|
|
146
|
+
label: 'L' + i,
|
|
147
|
+
selected: i === COMPOUND_SELECT_OPTIONS_CAP + 10,
|
|
148
|
+
}));
|
|
149
|
+
const info = runCompound(mockSelect(big));
|
|
150
|
+
expect(info.current).toBe('L' + (COMPOUND_SELECT_OPTIONS_CAP + 10));
|
|
151
|
+
expect(info.options.length).toBe(COMPOUND_SELECT_OPTIONS_CAP);
|
|
152
|
+
expect(info.options_total).toBe(COMPOUND_SELECT_OPTIONS_CAP + 25);
|
|
153
|
+
});
|
|
154
|
+
it('multi-select: current[] includes labels for selected options beyond the cap', () => {
|
|
155
|
+
const big = Array.from({ length: COMPOUND_SELECT_OPTIONS_CAP + 10 }, (_, i) => ({
|
|
156
|
+
value: 'v' + i,
|
|
157
|
+
label: 'L' + i,
|
|
158
|
+
selected: i === 3 || i === COMPOUND_SELECT_OPTIONS_CAP + 5,
|
|
159
|
+
}));
|
|
160
|
+
const info = runCompound(mockSelect(big, true));
|
|
161
|
+
expect(info.current).toEqual(['L3', 'L' + (COMPOUND_SELECT_OPTIONS_CAP + 5)]);
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
describe('compoundInfoOf — unsupported shapes', () => {
|
|
165
|
+
it('returns null for plain text input', () => {
|
|
166
|
+
expect(runCompound(mockInput({ type: 'text' }, { value: 'hi' }))).toBeNull();
|
|
167
|
+
});
|
|
168
|
+
it('returns null for non-form tags', () => {
|
|
169
|
+
expect(runCompound({ tagName: 'DIV', getAttribute: () => null })).toBeNull();
|
|
170
|
+
});
|
|
171
|
+
it('returns null for null / missing element', () => {
|
|
172
|
+
expect(runCompound(null)).toBeNull();
|
|
173
|
+
expect(runCompound({})).toBeNull();
|
|
174
|
+
});
|
|
175
|
+
});
|
|
@@ -22,6 +22,13 @@
|
|
|
22
22
|
* Additional tools:
|
|
23
23
|
* - scrollToRefJs(ref) — scroll to a data-opencli-ref element
|
|
24
24
|
* - getFormStateJs() — extract all form fields as structured JSON
|
|
25
|
+
*
|
|
26
|
+
* Compound sidecar:
|
|
27
|
+
* After the tree, a `compounds:` section lists rich JSON for every
|
|
28
|
+
* date/select/file ref — format, full option list (up to cap) with
|
|
29
|
+
* `options_total` reflecting the true count, file `accept` + `multiple`.
|
|
30
|
+
* This is what the snapshot's inline attr dump cannot express and what
|
|
31
|
+
* agents kept blowing turns on.
|
|
25
32
|
*/
|
|
26
33
|
export interface DomSnapshotOptions {
|
|
27
34
|
/** Extra pixels beyond viewport to include (default 800) */
|
|
@@ -22,7 +22,15 @@
|
|
|
22
22
|
* Additional tools:
|
|
23
23
|
* - scrollToRefJs(ref) — scroll to a data-opencli-ref element
|
|
24
24
|
* - getFormStateJs() — extract all form fields as structured JSON
|
|
25
|
+
*
|
|
26
|
+
* Compound sidecar:
|
|
27
|
+
* After the tree, a `compounds:` section lists rich JSON for every
|
|
28
|
+
* date/select/file ref — format, full option list (up to cap) with
|
|
29
|
+
* `options_total` reflecting the true count, file `accept` + `multiple`.
|
|
30
|
+
* This is what the snapshot's inline attr dump cannot express and what
|
|
31
|
+
* agents kept blowing turns on.
|
|
25
32
|
*/
|
|
33
|
+
import { COMPOUND_INFO_JS } from './compound.js';
|
|
26
34
|
// ─── Utility JS Generators ───────────────────────────────────────────
|
|
27
35
|
/**
|
|
28
36
|
* Generate JS to scroll to an element identified by data-opencli-ref.
|
|
@@ -155,6 +163,8 @@ export function generateSnapshotJs(opts = {}) {
|
|
|
155
163
|
(() => {
|
|
156
164
|
'use strict';
|
|
157
165
|
|
|
166
|
+
${COMPOUND_INFO_JS}
|
|
167
|
+
|
|
158
168
|
// ── Config ─────────────────────────────────────────────────────────
|
|
159
169
|
const VIEWPORT_EXPAND = ${viewportExpand};
|
|
160
170
|
const MAX_DEPTH = ${maxDepth};
|
|
@@ -222,6 +232,38 @@ export function generateSnapshotJs(opts = {}) {
|
|
|
222
232
|
|
|
223
233
|
const PROPAGATING_TAGS = new Set(['a', 'button']);
|
|
224
234
|
|
|
235
|
+
// Roles whose element wraps its own interactive descendants (icon spans
|
|
236
|
+
// inside a role=button, chevron inside role=link). When we see one of these,
|
|
237
|
+
// we propagate its bbox to children so we can suppress duplicate refs on
|
|
238
|
+
// undistinctive descendants that are ≥99% contained.
|
|
239
|
+
const PROPAGATING_ROLES = new Set(['button', 'link', 'menuitem', 'tab', 'option']);
|
|
240
|
+
|
|
241
|
+
function isBboxPropagator(el, tag) {
|
|
242
|
+
if (PROPAGATING_TAGS.has(tag)) return true;
|
|
243
|
+
const role = el.getAttribute('role');
|
|
244
|
+
return !!(role && PROPAGATING_ROLES.has(role));
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// True when an interactive element still deserves its own [N] ref even
|
|
248
|
+
// though it's visually subsumed by a propagating ancestor. Anything with
|
|
249
|
+
// an aria-label, aria-labelledby, id, test id, name, or its own form
|
|
250
|
+
// semantics is treated as distinctive — everything else (naked spans /
|
|
251
|
+
// divs / svgs that merely inherit click from the parent button) gets
|
|
252
|
+
// folded into the parent so the snapshot doesn't ship [1]<button>[2]<svg>.
|
|
253
|
+
function isDistinctivelyInteractive(el) {
|
|
254
|
+
if (el.hasAttribute('aria-label')) return true;
|
|
255
|
+
if (el.hasAttribute('aria-labelledby')) return true;
|
|
256
|
+
if (el.id) return true;
|
|
257
|
+
if (el.getAttribute('data-testid') || el.getAttribute('data-test')) return true;
|
|
258
|
+
if (el.hasAttribute('name')) return true;
|
|
259
|
+
const tag = el.tagName.toLowerCase();
|
|
260
|
+
// Real form controls always stand on their own, even when nested in a label/button
|
|
261
|
+
if (tag === 'input' || tag === 'select' || tag === 'textarea') return true;
|
|
262
|
+
// Anchors with their own href are distinct targets
|
|
263
|
+
if (tag === 'a' && el.hasAttribute('href')) return true;
|
|
264
|
+
return false;
|
|
265
|
+
}
|
|
266
|
+
|
|
225
267
|
const AD_PATTERNS = [
|
|
226
268
|
'googleadservices.com', 'doubleclick.net', 'googlesyndication.com',
|
|
227
269
|
'facebook.com/tr', 'analytics.google.com', 'connect.facebook.net',
|
|
@@ -576,6 +618,7 @@ export function generateSnapshotJs(opts = {}) {
|
|
|
576
618
|
const hiddenInteractives = [];
|
|
577
619
|
const currentHashes = [];
|
|
578
620
|
const refIdentity = {};
|
|
621
|
+
const compoundInfos = {};
|
|
579
622
|
let iframeCount = 0;
|
|
580
623
|
let crossOriginIndex = 0;
|
|
581
624
|
|
|
@@ -627,7 +670,9 @@ export function generateSnapshotJs(opts = {}) {
|
|
|
627
670
|
if (!(tag === 'input' && el.type === 'file')) return false;
|
|
628
671
|
}
|
|
629
672
|
|
|
630
|
-
|
|
673
|
+
// \`interactive\` gets demoted below if bbox containment folds this node
|
|
674
|
+
// into a propagating ancestor — using \`let\` so the dedup pass can mutate it.
|
|
675
|
+
let interactive = isInteractive(el);
|
|
631
676
|
|
|
632
677
|
// Viewport threshold pruning
|
|
633
678
|
if (hasArea && !isInExpandedViewport(rect)) {
|
|
@@ -648,7 +693,7 @@ export function generateSnapshotJs(opts = {}) {
|
|
|
648
693
|
const scrollInfo = getScrollInfo(el);
|
|
649
694
|
const isScrollable = scrollInfo !== null;
|
|
650
695
|
|
|
651
|
-
// BBox dedup
|
|
696
|
+
// BBox dedup — tier 1 (non-interactive descendants, 0.95 threshold)
|
|
652
697
|
let excludedByParent = false;
|
|
653
698
|
if (BBOX_DEDUP && parentPropagatingRect && !interactive) {
|
|
654
699
|
if (hasArea && isContainedBy(rect, parentPropagatingRect, 0.95)) {
|
|
@@ -660,8 +705,19 @@ export function generateSnapshotJs(opts = {}) {
|
|
|
660
705
|
}
|
|
661
706
|
}
|
|
662
707
|
|
|
708
|
+
// BBox dedup — tier 2 (interactive descendants, 0.99 threshold, browser-use style).
|
|
709
|
+
// This kills the "[1]<button> [2]<svg> [3]<span>" noise on icon-buttons by
|
|
710
|
+
// folding the icon / chevron into the button's ref. The 0.99 threshold + the
|
|
711
|
+
// isDistinctivelyInteractive gate together ensure we only drop nodes that
|
|
712
|
+
// add no new actionable surface — a nested <input> or <a href> stays.
|
|
713
|
+
if (BBOX_DEDUP && parentPropagatingRect && interactive && hasArea) {
|
|
714
|
+
if (isContainedBy(rect, parentPropagatingRect, 0.99) && !isDistinctivelyInteractive(el)) {
|
|
715
|
+
interactive = false;
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
|
|
663
719
|
let propagateRect = parentPropagatingRect;
|
|
664
|
-
if (BBOX_DEDUP &&
|
|
720
|
+
if (BBOX_DEDUP && hasArea && isBboxPropagator(el, tag)) propagateRect = rect;
|
|
665
721
|
|
|
666
722
|
// Process children
|
|
667
723
|
const origLen = lines.length;
|
|
@@ -725,6 +781,10 @@ export function generateSnapshotJs(opts = {}) {
|
|
|
725
781
|
id: el.id || '',
|
|
726
782
|
testId: el.getAttribute('data-testid') || el.getAttribute('data-test') || '',
|
|
727
783
|
};
|
|
784
|
+
// Compound contract for date/select/file — captured per-ref so the
|
|
785
|
+
// sidecar maps one-to-one with the [N] tokens in the tree.
|
|
786
|
+
const compound = compoundInfoOf(el);
|
|
787
|
+
if (compound) compoundInfos['' + interactiveIndex] = compound;
|
|
728
788
|
}
|
|
729
789
|
|
|
730
790
|
// Tag + attributes
|
|
@@ -806,6 +866,19 @@ export function generateSnapshotJs(opts = {}) {
|
|
|
806
866
|
if (hiddenInteractives.length > 10) lines.push(' …' + (hiddenInteractives.length - 10) + ' more');
|
|
807
867
|
}
|
|
808
868
|
|
|
869
|
+
// Compound sidecar — rich JSON for date/select/file refs. Keys align with [N] tokens in the tree.
|
|
870
|
+
const compoundRefs = Object.keys(compoundInfos);
|
|
871
|
+
if (compoundRefs.length > 0) {
|
|
872
|
+
lines.push('---');
|
|
873
|
+
lines.push('compounds (' + compoundRefs.length + '):');
|
|
874
|
+
compoundRefs.sort(function (a, b) { return parseInt(a, 10) - parseInt(b, 10); });
|
|
875
|
+
for (const ref of compoundRefs) {
|
|
876
|
+
try {
|
|
877
|
+
lines.push(' [' + ref + '] ' + JSON.stringify(compoundInfos[ref]));
|
|
878
|
+
} catch {}
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
|
|
809
882
|
// Footer
|
|
810
883
|
lines.push('---');
|
|
811
884
|
lines.push('interactive: ' + interactiveIndex + ' | iframes: ' + iframeCount);
|
|
@@ -102,6 +102,9 @@ describe('generateSnapshotJs', () => {
|
|
|
102
102
|
// BBox dedup
|
|
103
103
|
expect(js).toContain('isContainedBy');
|
|
104
104
|
expect(js).toContain('PROPAGATING_TAGS');
|
|
105
|
+
expect(js).toContain('PROPAGATING_ROLES');
|
|
106
|
+
expect(js).toContain('isBboxPropagator');
|
|
107
|
+
expect(js).toContain('isDistinctivelyInteractive');
|
|
105
108
|
// Shadow DOM
|
|
106
109
|
expect(js).toContain('shadowRoot');
|
|
107
110
|
expect(js).toContain('|shadow|');
|
|
@@ -151,6 +154,55 @@ describe('generateSnapshotJs', () => {
|
|
|
151
154
|
expect(js).toContain('page_scroll');
|
|
152
155
|
});
|
|
153
156
|
});
|
|
157
|
+
describe('BBox 99% containment filter', () => {
|
|
158
|
+
it('propagates bbox for both PROPAGATING_TAGS and PROPAGATING_ROLES', () => {
|
|
159
|
+
const js = generateSnapshotJs();
|
|
160
|
+
// Role-based propagator list covers the common wrapper-as-control patterns
|
|
161
|
+
// that show up as <div role=button><svg/><span/></div> on modern SPAs.
|
|
162
|
+
for (const role of ['button', 'link', 'menuitem', 'tab', 'option']) {
|
|
163
|
+
expect(js).toContain(`'${role}'`);
|
|
164
|
+
}
|
|
165
|
+
// propagate site uses the unified helper, not only the tag set
|
|
166
|
+
expect(js).toContain('isBboxPropagator(el, tag)');
|
|
167
|
+
});
|
|
168
|
+
it('suppresses interactive descendants at 0.99 containment when they are not distinctive', () => {
|
|
169
|
+
const js = generateSnapshotJs();
|
|
170
|
+
expect(js).toContain('isContainedBy(rect, parentPropagatingRect, 0.99)');
|
|
171
|
+
expect(js).toContain('!isDistinctivelyInteractive(el)');
|
|
172
|
+
// The suppression path flips the local interactive flag so the node is
|
|
173
|
+
// still emitted (for text / shape) but does not get its own [N] ref.
|
|
174
|
+
expect(js).toContain('interactive = false');
|
|
175
|
+
});
|
|
176
|
+
it('does not suppress inputs / href-bearing anchors even when fully contained', () => {
|
|
177
|
+
const js = generateSnapshotJs();
|
|
178
|
+
// Guards inside isDistinctivelyInteractive
|
|
179
|
+
expect(js).toContain("tag === 'input'");
|
|
180
|
+
expect(js).toContain("tag === 'select'");
|
|
181
|
+
expect(js).toContain("tag === 'textarea'");
|
|
182
|
+
expect(js).toContain("tag === 'a'");
|
|
183
|
+
expect(js).toContain("el.hasAttribute('href')");
|
|
184
|
+
// aria-label / aria-labelledby / id / test-id / name preserve distinctness
|
|
185
|
+
expect(js).toContain("el.hasAttribute('aria-label')");
|
|
186
|
+
expect(js).toContain("el.hasAttribute('aria-labelledby')");
|
|
187
|
+
expect(js).toContain("el.id");
|
|
188
|
+
expect(js).toContain("el.getAttribute('data-testid')");
|
|
189
|
+
expect(js).toContain("el.hasAttribute('name')");
|
|
190
|
+
});
|
|
191
|
+
it('keeps the existing 0.95 non-interactive dedup tier in place', () => {
|
|
192
|
+
const js = generateSnapshotJs();
|
|
193
|
+
// The original non-interactive bbox filter is still present alongside the
|
|
194
|
+
// new interactive tier — two complementary thresholds, not a replacement.
|
|
195
|
+
expect(js).toContain('isContainedBy(rect, parentPropagatingRect, 0.95)');
|
|
196
|
+
});
|
|
197
|
+
it('bbox containment branches are gated on BBOX_DEDUP flag', () => {
|
|
198
|
+
const off = generateSnapshotJs({ bboxDedup: false });
|
|
199
|
+
// When the option is off, the filter becomes inert (BBOX_DEDUP = false)
|
|
200
|
+
// but the inlined helpers still ship — we only guard at the call sites.
|
|
201
|
+
expect(off).toContain('BBOX_DEDUP = false');
|
|
202
|
+
expect(off).toContain('isBboxPropagator');
|
|
203
|
+
expect(off).toContain('isDistinctivelyInteractive');
|
|
204
|
+
});
|
|
205
|
+
});
|
|
154
206
|
describe('scrollToRefJs', () => {
|
|
155
207
|
it('generates valid JS', () => {
|
|
156
208
|
const js = scrollToRefJs('42');
|
|
@@ -245,4 +297,17 @@ describe('Search Element Detection', () => {
|
|
|
245
297
|
const js = generateSnapshotJs();
|
|
246
298
|
expect(js).toContain('isSearchElement(el)');
|
|
247
299
|
});
|
|
300
|
+
// Blocker B regression: compound contract must be emitted by `browser state`,
|
|
301
|
+
// not only by `browser find --css`. Otherwise agents inspecting the default
|
|
302
|
+
// snapshot still have to round-trip `find` on every date/select/file control.
|
|
303
|
+
it('inlines compoundInfoOf() and attaches compound info to each interactive ref', () => {
|
|
304
|
+
const js = generateSnapshotJs();
|
|
305
|
+
expect(js).toContain('function compoundInfoOf(el)');
|
|
306
|
+
// Wiring: the walk body should call compoundInfoOf on every interactive node
|
|
307
|
+
expect(js).toContain('compoundInfoOf(el)');
|
|
308
|
+
// And collect them into a per-ref map keyed by the same [N] index as the tree
|
|
309
|
+
expect(js).toContain('compoundInfos');
|
|
310
|
+
// And emit a sidecar section after the tree so agents can find the JSON
|
|
311
|
+
expect(js).toContain("'compounds ('");
|
|
312
|
+
});
|
|
248
313
|
});
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `browser extract` — agent-native article/content reading channel.
|
|
3
|
+
*
|
|
4
|
+
* Pipeline (from first principles — agents want the *content*, not the DOM):
|
|
5
|
+
* 1. Scope: select `--selector` (default: document.body or <main>/<article>)
|
|
6
|
+
* 2. Denoise: strip script/style/nav/header/footer/aside/iframe/svg/form, inline noise
|
|
7
|
+
* 3. Convert: HTML → Markdown via shared `htmlToMarkdown` (turndown)
|
|
8
|
+
* 4. Chunk: paragraph-boundary-aware slicing with `next_start_char` cursor
|
|
9
|
+
*
|
|
10
|
+
* Why a separate command:
|
|
11
|
+
* - `get html --as json` returns tree structure; useless for "read the article".
|
|
12
|
+
* - `get text` flattens everything; loses headings, lists, links.
|
|
13
|
+
* - Markdown is the agent-readable middle ground: structure preserved, noise gone.
|
|
14
|
+
*
|
|
15
|
+
* Continuation contract: the envelope always carries `start`, `end`,
|
|
16
|
+
* `total_chars`, and `next_start_char` (null when the last chunk was emitted).
|
|
17
|
+
* Agents pass `--start <next>` to continue. No session state required.
|
|
18
|
+
*/
|
|
19
|
+
/**
|
|
20
|
+
* Returns the JS expression string used with `page.evaluate` to produce the
|
|
21
|
+
* cleaned HTML subtree that we then hand to `htmlToMarkdown`. We do the
|
|
22
|
+
* denoise/clone inside the page so we can use DOM APIs (querySelectorAll,
|
|
23
|
+
* cloneNode) rather than regex on serialized HTML.
|
|
24
|
+
*/
|
|
25
|
+
export declare function buildExtractHtmlJs(selector: string | null): string;
|
|
26
|
+
export interface ExtractChunkOptions {
|
|
27
|
+
content: string;
|
|
28
|
+
start: number;
|
|
29
|
+
chunkSize: number;
|
|
30
|
+
}
|
|
31
|
+
export interface ExtractChunkResult {
|
|
32
|
+
content: string;
|
|
33
|
+
start: number;
|
|
34
|
+
end: number;
|
|
35
|
+
nextStartChar: number | null;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Slice `content` into one chunk starting at `start` with target size
|
|
39
|
+
* `chunkSize`. When the chunk would land mid-paragraph, we pull the break
|
|
40
|
+
* back to the nearest `\n\n` (or `\n`) within a small window to keep the
|
|
41
|
+
* output readable. If no boundary is found, we hard-cut at `start+chunkSize`.
|
|
42
|
+
*/
|
|
43
|
+
export declare function chunkMarkdown(opts: ExtractChunkOptions): ExtractChunkResult;
|
|
44
|
+
export interface RunExtractOptions {
|
|
45
|
+
html: string;
|
|
46
|
+
url: string;
|
|
47
|
+
title: string;
|
|
48
|
+
selector: string | null;
|
|
49
|
+
start: number;
|
|
50
|
+
chunkSize: number;
|
|
51
|
+
}
|
|
52
|
+
export interface RunExtractResult {
|
|
53
|
+
url: string;
|
|
54
|
+
title: string;
|
|
55
|
+
selector: string | null;
|
|
56
|
+
total_chars: number;
|
|
57
|
+
chunk_size: number;
|
|
58
|
+
start: number;
|
|
59
|
+
end: number;
|
|
60
|
+
next_start_char: number | null;
|
|
61
|
+
content: string;
|
|
62
|
+
}
|
|
63
|
+
/** End-to-end host-side pipeline: HTML → markdown → chunked envelope. */
|
|
64
|
+
export declare function runExtractFromHtml(opts: RunExtractOptions): RunExtractResult;
|
|
65
|
+
export declare const __extractInternals: {
|
|
66
|
+
DEFAULT_CHUNK_SIZE: number;
|
|
67
|
+
MIN_CHUNK_SIZE: number;
|
|
68
|
+
MAX_CHUNK_SIZE: number;
|
|
69
|
+
};
|