@jackwener/opencli 1.7.5 → 1.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -10
- package/README.zh-CN.md +18 -9
- package/cli-manifest.json +401 -11
- package/clis/51job/company.js +125 -0
- package/clis/51job/detail.js +108 -0
- package/clis/51job/hot.js +55 -0
- package/clis/51job/search.js +79 -0
- package/clis/51job/utils.js +302 -0
- package/clis/51job/utils.test.js +69 -0
- package/clis/bilibili/video.js +68 -0
- package/clis/bilibili/video.test.js +132 -0
- package/clis/chatgpt/image.js +1 -1
- package/clis/deepseek/ask.js +37 -11
- package/clis/deepseek/ask.test.js +165 -0
- package/clis/deepseek/utils.js +192 -24
- package/clis/deepseek/utils.test.js +145 -0
- package/clis/gemini/image.js +1 -1
- package/clis/instagram/download.js +1 -1
- package/clis/jianyu/search.js +139 -3
- package/clis/jianyu/search.test.js +25 -0
- package/clis/jianyu/shared/procurement-detail.js +15 -0
- package/clis/jianyu/shared/procurement-detail.test.js +12 -0
- package/clis/twitter/likes.js +3 -2
- package/clis/twitter/search.js +4 -2
- package/clis/twitter/search.test.js +4 -0
- package/clis/twitter/shared.js +35 -2
- package/clis/twitter/shared.test.js +96 -0
- package/clis/twitter/thread.js +3 -1
- package/clis/twitter/timeline.js +3 -2
- package/clis/twitter/tweets.js +219 -0
- package/clis/twitter/tweets.test.js +125 -0
- package/clis/web/read.js +25 -5
- package/clis/web/read.test.js +76 -0
- package/clis/weread/ai-outline.js +170 -0
- package/clis/weread/ai-outline.test.js +83 -0
- package/clis/weread/book.js +57 -44
- package/clis/weread/commands.test.js +24 -0
- package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
- package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
- package/clis/youtube/channel.js +35 -0
- package/dist/src/browser/analyze.d.ts +103 -0
- package/dist/src/browser/analyze.js +230 -0
- package/dist/src/browser/analyze.test.d.ts +1 -0
- package/dist/src/browser/analyze.test.js +164 -0
- package/dist/src/browser/article-extract.d.ts +57 -0
- package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
- package/dist/src/browser/article-extract.e2e.test.js +105 -0
- package/dist/src/browser/article-extract.js +169 -0
- package/dist/src/browser/article-extract.test.d.ts +1 -0
- package/dist/src/browser/article-extract.test.js +94 -0
- package/dist/src/browser/base-page.d.ts +13 -3
- package/dist/src/browser/base-page.js +35 -25
- package/dist/src/browser/cdp.d.ts +1 -0
- package/dist/src/browser/cdp.js +23 -5
- package/dist/src/browser/compound.d.ts +59 -0
- package/dist/src/browser/compound.js +112 -0
- package/dist/src/browser/compound.test.d.ts +1 -0
- package/dist/src/browser/compound.test.js +175 -0
- package/dist/src/browser/dom-snapshot.d.ts +7 -0
- package/dist/src/browser/dom-snapshot.js +76 -3
- package/dist/src/browser/dom-snapshot.test.js +65 -0
- package/dist/src/browser/extract.d.ts +69 -0
- package/dist/src/browser/extract.js +132 -0
- package/dist/src/browser/extract.test.d.ts +1 -0
- package/dist/src/browser/extract.test.js +129 -0
- package/dist/src/browser/find.d.ts +76 -0
- package/dist/src/browser/find.js +179 -0
- package/dist/src/browser/find.test.d.ts +1 -0
- package/dist/src/browser/find.test.js +120 -0
- package/dist/src/browser/html-tree.d.ts +75 -0
- package/dist/src/browser/html-tree.js +112 -0
- package/dist/src/browser/html-tree.test.d.ts +1 -0
- package/dist/src/browser/html-tree.test.js +181 -0
- package/dist/src/browser/network-cache.d.ts +48 -0
- package/dist/src/browser/network-cache.js +66 -0
- package/dist/src/browser/network-cache.test.d.ts +1 -0
- package/dist/src/browser/network-cache.test.js +58 -0
- package/dist/src/browser/network-key.d.ts +22 -0
- package/dist/src/browser/network-key.js +66 -0
- package/dist/src/browser/network-key.test.d.ts +1 -0
- package/dist/src/browser/network-key.test.js +49 -0
- package/dist/src/browser/shape-filter.d.ts +52 -0
- package/dist/src/browser/shape-filter.js +101 -0
- package/dist/src/browser/shape-filter.test.d.ts +1 -0
- package/dist/src/browser/shape-filter.test.js +101 -0
- package/dist/src/browser/shape.d.ts +23 -0
- package/dist/src/browser/shape.js +95 -0
- package/dist/src/browser/shape.test.d.ts +1 -0
- package/dist/src/browser/shape.test.js +82 -0
- package/dist/src/browser/target-errors.d.ts +14 -1
- package/dist/src/browser/target-errors.js +13 -0
- package/dist/src/browser/target-errors.test.js +39 -6
- package/dist/src/browser/target-resolver.d.ts +57 -10
- package/dist/src/browser/target-resolver.js +195 -75
- package/dist/src/browser/target-resolver.test.js +80 -5
- package/dist/src/browser/verify-fixture.d.ts +59 -0
- package/dist/src/browser/verify-fixture.js +213 -0
- package/dist/src/browser/verify-fixture.test.d.ts +1 -0
- package/dist/src/browser/verify-fixture.test.js +161 -0
- package/dist/src/cli.d.ts +32 -0
- package/dist/src/cli.js +936 -141
- package/dist/src/cli.test.js +1051 -1
- package/dist/src/daemon.d.ts +3 -2
- package/dist/src/daemon.js +16 -4
- package/dist/src/daemon.test.d.ts +1 -0
- package/dist/src/daemon.test.js +19 -0
- package/dist/src/download/article-download.d.ts +12 -0
- package/dist/src/download/article-download.js +141 -17
- package/dist/src/download/article-download.test.js +196 -0
- package/dist/src/download/index.js +73 -86
- package/dist/src/errors.js +4 -2
- package/dist/src/errors.test.js +13 -0
- package/dist/src/execution.js +7 -2
- package/dist/src/execution.test.js +54 -0
- package/dist/src/launcher.d.ts +1 -1
- package/dist/src/launcher.js +3 -3
- package/dist/src/main.js +16 -0
- package/dist/src/output.js +1 -1
- package/dist/src/output.test.js +6 -0
- package/dist/src/types.d.ts +18 -3
- package/package.json +5 -1
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Client-side HTML → structured tree serializer.
|
|
3
|
+
*
|
|
4
|
+
* Returned as a JS string that gets passed to `page.evaluate`. The expression
|
|
5
|
+
* walks the DOM subtree rooted at the first selector match (or documentElement
|
|
6
|
+
* when no selector is given) and emits a compact `{tag, attrs, text, children}`
|
|
7
|
+
* tree for agents to consume instead of re-parsing raw HTML.
|
|
8
|
+
*
|
|
9
|
+
* Text handling: `text` is the concatenated text of direct text children only,
|
|
10
|
+
* whitespace-collapsed. Nested element text is left inside `children[].text`.
|
|
11
|
+
* Ordering between text and elements is not preserved — agents that need it
|
|
12
|
+
* should fall back to raw HTML mode.
|
|
13
|
+
*
|
|
14
|
+
* Budget knobs let the caller bound the output on large pages — previously an
|
|
15
|
+
* unscoped `get html --as json` could return a giant tree. Callers set any
|
|
16
|
+
* combination of `depth` / `childrenMax` / `textMax`; each hit is reported in
|
|
17
|
+
* the `truncated` envelope so agents know to narrow their selector or raise
|
|
18
|
+
* the budget.
|
|
19
|
+
*
|
|
20
|
+
* Compound controls (date / time / datetime-local / month / week / select /
|
|
21
|
+
* file) gain a `compound` field so agents inspecting the JSON tree see the
|
|
22
|
+
* full contract — date format, full option list (up to cap) with selections
|
|
23
|
+
* preserved for options beyond the cap, file `accept` and `multiple`. Without
|
|
24
|
+
* this wiring agents repeatedly guess values on these controls from the raw
|
|
25
|
+
* attributes, which is the failure mode compound.ts was built to eliminate.
|
|
26
|
+
*/
|
|
27
|
+
import { type CompoundInfo } from './compound.js';
|
|
28
|
+
export interface BuildHtmlTreeJsOptions {
|
|
29
|
+
/** CSS selector to scope the tree; unscoped = documentElement */
|
|
30
|
+
selector?: string | null;
|
|
31
|
+
/** Max depth below the root (0 = root only, no children). Omit = unlimited. */
|
|
32
|
+
depth?: number | null;
|
|
33
|
+
/** Max element children per node before the rest get dropped. Omit = unlimited. */
|
|
34
|
+
childrenMax?: number | null;
|
|
35
|
+
/** Max chars of direct text per node before truncation. Omit = unlimited. */
|
|
36
|
+
textMax?: number | null;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Returns a JS expression string. When evaluated in a page context the
|
|
40
|
+
* expression resolves to either
|
|
41
|
+
* `{selector, matched, tree, truncated}` on success, or
|
|
42
|
+
* `{selector, invalidSelector: true, reason}` when `querySelectorAll`
|
|
43
|
+
* throws a `SyntaxError` for an unparseable selector.
|
|
44
|
+
*
|
|
45
|
+
* Callers must branch on `invalidSelector` to convert it into the CLI's
|
|
46
|
+
* `invalid_selector` structured error; otherwise the browser-level exception
|
|
47
|
+
* would bubble out of `page.evaluate` and bypass the structured-error
|
|
48
|
+
* contract that agents rely on.
|
|
49
|
+
*/
|
|
50
|
+
export declare function buildHtmlTreeJs(opts?: BuildHtmlTreeJsOptions): string;
|
|
51
|
+
export interface HtmlNode {
|
|
52
|
+
tag: string;
|
|
53
|
+
attrs: Record<string, string>;
|
|
54
|
+
text: string;
|
|
55
|
+
children: HtmlNode[];
|
|
56
|
+
/**
|
|
57
|
+
* Rich view for date/select/file controls. Omitted for non-compound elements
|
|
58
|
+
* so agents can rely on `compound != null` as a signal.
|
|
59
|
+
*/
|
|
60
|
+
compound?: CompoundInfo;
|
|
61
|
+
}
|
|
62
|
+
export interface HtmlTreeTruncationInfo {
|
|
63
|
+
/** At least one element child was dropped because depth budget was hit. */
|
|
64
|
+
depth?: true;
|
|
65
|
+
/** Count of element children dropped across the tree due to `childrenMax`. */
|
|
66
|
+
children_dropped?: number;
|
|
67
|
+
/** Count of nodes whose `text` was cut to `textMax`. */
|
|
68
|
+
text_truncated?: number;
|
|
69
|
+
}
|
|
70
|
+
export interface HtmlTreeResult {
|
|
71
|
+
selector: string | null;
|
|
72
|
+
matched: number;
|
|
73
|
+
tree: HtmlNode | null;
|
|
74
|
+
truncated?: HtmlTreeTruncationInfo;
|
|
75
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Client-side HTML → structured tree serializer.
|
|
3
|
+
*
|
|
4
|
+
* Returned as a JS string that gets passed to `page.evaluate`. The expression
|
|
5
|
+
* walks the DOM subtree rooted at the first selector match (or documentElement
|
|
6
|
+
* when no selector is given) and emits a compact `{tag, attrs, text, children}`
|
|
7
|
+
* tree for agents to consume instead of re-parsing raw HTML.
|
|
8
|
+
*
|
|
9
|
+
* Text handling: `text` is the concatenated text of direct text children only,
|
|
10
|
+
* whitespace-collapsed. Nested element text is left inside `children[].text`.
|
|
11
|
+
* Ordering between text and elements is not preserved — agents that need it
|
|
12
|
+
* should fall back to raw HTML mode.
|
|
13
|
+
*
|
|
14
|
+
* Budget knobs let the caller bound the output on large pages — previously an
|
|
15
|
+
* unscoped `get html --as json` could return a giant tree. Callers set any
|
|
16
|
+
* combination of `depth` / `childrenMax` / `textMax`; each hit is reported in
|
|
17
|
+
* the `truncated` envelope so agents know to narrow their selector or raise
|
|
18
|
+
* the budget.
|
|
19
|
+
*
|
|
20
|
+
* Compound controls (date / time / datetime-local / month / week / select /
|
|
21
|
+
* file) gain a `compound` field so agents inspecting the JSON tree see the
|
|
22
|
+
* full contract — date format, full option list (up to cap) with selections
|
|
23
|
+
* preserved for options beyond the cap, file `accept` and `multiple`. Without
|
|
24
|
+
* this wiring agents repeatedly guess values on these controls from the raw
|
|
25
|
+
* attributes, which is the failure mode compound.ts was built to eliminate.
|
|
26
|
+
*/
|
|
27
|
+
import { COMPOUND_INFO_JS } from './compound.js';
|
|
28
|
+
/**
|
|
29
|
+
* Returns a JS expression string. When evaluated in a page context the
|
|
30
|
+
* expression resolves to either
|
|
31
|
+
* `{selector, matched, tree, truncated}` on success, or
|
|
32
|
+
* `{selector, invalidSelector: true, reason}` when `querySelectorAll`
|
|
33
|
+
* throws a `SyntaxError` for an unparseable selector.
|
|
34
|
+
*
|
|
35
|
+
* Callers must branch on `invalidSelector` to convert it into the CLI's
|
|
36
|
+
* `invalid_selector` structured error; otherwise the browser-level exception
|
|
37
|
+
* would bubble out of `page.evaluate` and bypass the structured-error
|
|
38
|
+
* contract that agents rely on.
|
|
39
|
+
*/
|
|
40
|
+
export function buildHtmlTreeJs(opts = {}) {
|
|
41
|
+
const selectorLiteral = opts.selector ? JSON.stringify(opts.selector) : 'null';
|
|
42
|
+
const depthLiteral = Number.isFinite(opts.depth) && opts.depth >= 0
|
|
43
|
+
? String(opts.depth)
|
|
44
|
+
: 'null';
|
|
45
|
+
const childrenMaxLiteral = Number.isFinite(opts.childrenMax) && opts.childrenMax >= 0
|
|
46
|
+
? String(opts.childrenMax)
|
|
47
|
+
: 'null';
|
|
48
|
+
const textMaxLiteral = Number.isFinite(opts.textMax) && opts.textMax >= 0
|
|
49
|
+
? String(opts.textMax)
|
|
50
|
+
: 'null';
|
|
51
|
+
return `(() => {
|
|
52
|
+
${COMPOUND_INFO_JS}
|
|
53
|
+
const selector = ${selectorLiteral};
|
|
54
|
+
const maxDepth = ${depthLiteral};
|
|
55
|
+
const maxChildren = ${childrenMaxLiteral};
|
|
56
|
+
const maxText = ${textMaxLiteral};
|
|
57
|
+
let matches;
|
|
58
|
+
if (selector) {
|
|
59
|
+
try { matches = document.querySelectorAll(selector); }
|
|
60
|
+
catch (e) {
|
|
61
|
+
return { selector: selector, invalidSelector: true, reason: (e && e.message) || String(e) };
|
|
62
|
+
}
|
|
63
|
+
} else {
|
|
64
|
+
matches = [document.documentElement];
|
|
65
|
+
}
|
|
66
|
+
const matched = matches.length;
|
|
67
|
+
const root = matches[0] || null;
|
|
68
|
+
const trunc = { depth: false, children_dropped: 0, text_truncated: 0 };
|
|
69
|
+
function serialize(el, depth) {
|
|
70
|
+
if (!el || el.nodeType !== 1) return null;
|
|
71
|
+
const attrs = {};
|
|
72
|
+
for (const a of el.attributes) attrs[a.name] = a.value;
|
|
73
|
+
let text = '';
|
|
74
|
+
for (const n of el.childNodes) {
|
|
75
|
+
if (n.nodeType === 3) text += n.nodeValue;
|
|
76
|
+
}
|
|
77
|
+
text = text.replace(/\\s+/g, ' ').trim();
|
|
78
|
+
if (maxText !== null && text.length > maxText) {
|
|
79
|
+
text = text.slice(0, maxText);
|
|
80
|
+
trunc.text_truncated++;
|
|
81
|
+
}
|
|
82
|
+
const children = [];
|
|
83
|
+
if (maxDepth === null || depth < maxDepth) {
|
|
84
|
+
const childEls = [];
|
|
85
|
+
for (const n of el.childNodes) if (n.nodeType === 1) childEls.push(n);
|
|
86
|
+
const keep = maxChildren === null ? childEls.length : Math.min(childEls.length, maxChildren);
|
|
87
|
+
for (let i = 0; i < keep; i++) {
|
|
88
|
+
const child = serialize(childEls[i], depth + 1);
|
|
89
|
+
if (child) children.push(child);
|
|
90
|
+
}
|
|
91
|
+
if (maxChildren !== null && childEls.length > maxChildren) {
|
|
92
|
+
trunc.children_dropped += childEls.length - maxChildren;
|
|
93
|
+
}
|
|
94
|
+
} else {
|
|
95
|
+
// Budget hit: we're at max depth. Count any element children we would have visited.
|
|
96
|
+
for (const n of el.childNodes) if (n.nodeType === 1) { trunc.depth = true; break; }
|
|
97
|
+
}
|
|
98
|
+
const node = { tag: el.tagName.toLowerCase(), attrs, text, children };
|
|
99
|
+
const compound = compoundInfoOf(el);
|
|
100
|
+
if (compound) node.compound = compound;
|
|
101
|
+
return node;
|
|
102
|
+
}
|
|
103
|
+
const tree = root ? serialize(root, 0) : null;
|
|
104
|
+
const truncatedOut = {};
|
|
105
|
+
if (trunc.depth) truncatedOut.depth = true;
|
|
106
|
+
if (trunc.children_dropped > 0) truncatedOut.children_dropped = trunc.children_dropped;
|
|
107
|
+
if (trunc.text_truncated > 0) truncatedOut.text_truncated = trunc.text_truncated;
|
|
108
|
+
const envelope = { selector: selector, matched: matched, tree: tree };
|
|
109
|
+
if (Object.keys(truncatedOut).length > 0) envelope.truncated = truncatedOut;
|
|
110
|
+
return envelope;
|
|
111
|
+
})()`;
|
|
112
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import { describe, expect, it } from 'vitest';
|
|
2
|
+
import { buildHtmlTreeJs } from './html-tree.js';
|
|
3
|
+
/**
|
|
4
|
+
* The serializer runs in a page context via `page.evaluate`. In unit tests we
|
|
5
|
+
* substitute `document` with a minimal stub that mirrors the DOM surface used
|
|
6
|
+
* by the expression, then Function-eval the returned JS.
|
|
7
|
+
*/
|
|
8
|
+
function runTreeJs(root, selectorMatches, selector, budgets = {}) {
|
|
9
|
+
const js = buildHtmlTreeJs({ selector, ...budgets });
|
|
10
|
+
const fakeDocument = {
|
|
11
|
+
querySelectorAll: () => selectorMatches,
|
|
12
|
+
documentElement: root,
|
|
13
|
+
};
|
|
14
|
+
const fn = new Function('document', `return ${js};`);
|
|
15
|
+
return fn(fakeDocument);
|
|
16
|
+
}
|
|
17
|
+
function runTreeJsInvalid(selector, errorMessage) {
|
|
18
|
+
const js = buildHtmlTreeJs({ selector });
|
|
19
|
+
const fakeDocument = {
|
|
20
|
+
querySelectorAll: () => { const e = new Error(errorMessage); e.name = 'SyntaxError'; throw e; },
|
|
21
|
+
documentElement: null,
|
|
22
|
+
};
|
|
23
|
+
const fn = new Function('document', `return ${js};`);
|
|
24
|
+
return fn(fakeDocument);
|
|
25
|
+
}
|
|
26
|
+
function el(tag, attrs, children, extras = {}) {
|
|
27
|
+
return {
|
|
28
|
+
nodeType: 1,
|
|
29
|
+
tagName: tag.toUpperCase(),
|
|
30
|
+
attributes: Object.entries(attrs).map(([name, value]) => ({ name, value })),
|
|
31
|
+
childNodes: children,
|
|
32
|
+
getAttribute: (name) => (name in attrs ? attrs[name] : null),
|
|
33
|
+
value: extras.value,
|
|
34
|
+
multiple: extras.multiple,
|
|
35
|
+
files: extras.files,
|
|
36
|
+
options: extras.options,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
function txt(value) { return { nodeType: 3, nodeValue: value }; }
|
|
40
|
+
describe('buildHtmlTreeJs', () => {
|
|
41
|
+
it('serializes a simple element into {tag, attrs, text, children}', () => {
|
|
42
|
+
const root = el('div', { class: 'hero', id: 'x' }, [txt('Hello')]);
|
|
43
|
+
const result = runTreeJs(root, [root], null);
|
|
44
|
+
expect(result.selector).toBeNull();
|
|
45
|
+
expect(result.matched).toBe(1);
|
|
46
|
+
expect(result.tree).toEqual({
|
|
47
|
+
tag: 'div',
|
|
48
|
+
attrs: { class: 'hero', id: 'x' },
|
|
49
|
+
text: 'Hello',
|
|
50
|
+
children: [],
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
it('collapses whitespace in direct text content only', () => {
|
|
54
|
+
const root = el('p', {}, [
|
|
55
|
+
txt(' line \n one '),
|
|
56
|
+
el('span', {}, [txt('inner text')]),
|
|
57
|
+
txt('\tline two\t'),
|
|
58
|
+
]);
|
|
59
|
+
const result = runTreeJs(root, [root], null);
|
|
60
|
+
expect(result.tree?.text).toBe('line one line two');
|
|
61
|
+
expect(result.tree?.children[0].text).toBe('inner text');
|
|
62
|
+
});
|
|
63
|
+
it('recurses into element children and preserves their attrs', () => {
|
|
64
|
+
const root = el('ul', { role: 'list' }, [
|
|
65
|
+
el('li', { 'data-id': '1' }, [txt('first')]),
|
|
66
|
+
el('li', { 'data-id': '2' }, [txt('second')]),
|
|
67
|
+
]);
|
|
68
|
+
const result = runTreeJs(root, [root], null);
|
|
69
|
+
expect(result.tree?.children).toHaveLength(2);
|
|
70
|
+
expect(result.tree?.children[0]).toEqual({
|
|
71
|
+
tag: 'li',
|
|
72
|
+
attrs: { 'data-id': '1' },
|
|
73
|
+
text: 'first',
|
|
74
|
+
children: [],
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
it('returns matched=N and serializes only the first match', () => {
|
|
78
|
+
const first = el('article', { id: 'a' }, [txt('first')]);
|
|
79
|
+
const second = el('article', { id: 'b' }, [txt('second')]);
|
|
80
|
+
const result = runTreeJs(null, [first, second], 'article');
|
|
81
|
+
expect(result.matched).toBe(2);
|
|
82
|
+
expect(result.tree?.attrs.id).toBe('a');
|
|
83
|
+
});
|
|
84
|
+
it('returns tree=null and matched=0 when selector matches nothing', () => {
|
|
85
|
+
const result = runTreeJs(null, [], '.nothing');
|
|
86
|
+
expect(result.matched).toBe(0);
|
|
87
|
+
expect(result.tree).toBeNull();
|
|
88
|
+
});
|
|
89
|
+
it('catches SyntaxError from querySelectorAll and returns {invalidSelector:true, reason}', () => {
|
|
90
|
+
const result = runTreeJsInvalid('##$@@', "'##$@@' is not a valid selector");
|
|
91
|
+
expect(result.invalidSelector).toBe(true);
|
|
92
|
+
expect(result.selector).toBe('##$@@');
|
|
93
|
+
expect(result.reason).toContain('not a valid selector');
|
|
94
|
+
});
|
|
95
|
+
it('omits `truncated` when no budget is hit', () => {
|
|
96
|
+
const root = el('div', {}, [el('span', {}, [txt('ok')])]);
|
|
97
|
+
const result = runTreeJs(root, [root], null, { depth: 5, childrenMax: 10, textMax: 100 });
|
|
98
|
+
expect(result.truncated).toBeUndefined();
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
describe('buildHtmlTreeJs budget knobs', () => {
|
|
102
|
+
it('caps tree at `depth` and reports truncated.depth', () => {
|
|
103
|
+
const deep = el('a', {}, [
|
|
104
|
+
el('b', {}, [
|
|
105
|
+
el('c', {}, [el('d', {}, [txt('deep')])]),
|
|
106
|
+
]),
|
|
107
|
+
]);
|
|
108
|
+
// depth=1 → root + one level of children; grandchildren should be dropped.
|
|
109
|
+
const result = runTreeJs(deep, [deep], null, { depth: 1 });
|
|
110
|
+
expect(result.tree?.tag).toBe('a');
|
|
111
|
+
expect(result.tree?.children).toHaveLength(1);
|
|
112
|
+
expect(result.tree?.children[0].tag).toBe('b');
|
|
113
|
+
// The "b" node had element children but we hit the depth budget before
|
|
114
|
+
// recursing into them — children array is empty, truncated.depth is true.
|
|
115
|
+
expect(result.tree?.children[0].children).toEqual([]);
|
|
116
|
+
expect(result.truncated?.depth).toBe(true);
|
|
117
|
+
});
|
|
118
|
+
it('depth=0 keeps only the root', () => {
|
|
119
|
+
const root = el('ul', {}, [
|
|
120
|
+
el('li', {}, [txt('a')]),
|
|
121
|
+
el('li', {}, [txt('b')]),
|
|
122
|
+
]);
|
|
123
|
+
const result = runTreeJs(root, [root], null, { depth: 0 });
|
|
124
|
+
expect(result.tree?.children).toEqual([]);
|
|
125
|
+
expect(result.truncated?.depth).toBe(true);
|
|
126
|
+
});
|
|
127
|
+
it('caps children per node at `childrenMax` and reports children_dropped count', () => {
|
|
128
|
+
const root = el('ul', {}, [
|
|
129
|
+
el('li', {}, [txt('1')]),
|
|
130
|
+
el('li', {}, [txt('2')]),
|
|
131
|
+
el('li', {}, [txt('3')]),
|
|
132
|
+
el('li', {}, [txt('4')]),
|
|
133
|
+
el('li', {}, [txt('5')]),
|
|
134
|
+
]);
|
|
135
|
+
const result = runTreeJs(root, [root], null, { childrenMax: 2 });
|
|
136
|
+
expect(result.tree?.children).toHaveLength(2);
|
|
137
|
+
expect(result.truncated?.children_dropped).toBe(3);
|
|
138
|
+
});
|
|
139
|
+
it('caps direct text per node at `textMax` and reports text_truncated count', () => {
|
|
140
|
+
const root = el('p', {}, [
|
|
141
|
+
txt('a'.repeat(50)),
|
|
142
|
+
el('span', {}, [txt('b'.repeat(50))]),
|
|
143
|
+
]);
|
|
144
|
+
const result = runTreeJs(root, [root], null, { textMax: 10 });
|
|
145
|
+
expect(result.tree?.text).toHaveLength(10);
|
|
146
|
+
expect(result.tree?.children[0].text).toHaveLength(10);
|
|
147
|
+
expect(result.truncated?.text_truncated).toBe(2);
|
|
148
|
+
});
|
|
149
|
+
// Blocker B regression: compound contract must ride along with the
|
|
150
|
+
// json tree so `browser get html --as json` surfaces the full contract
|
|
151
|
+
// to agents without an extra round-trip.
|
|
152
|
+
it('attaches compound info to date/file/select nodes and omits it elsewhere', () => {
|
|
153
|
+
const date = el('input', { type: 'date', min: '2026-01-01' }, [], { value: '2026-04-21' });
|
|
154
|
+
const file = el('input', { type: 'file', accept: 'image/*' }, [], { multiple: true, files: [{ name: 'a.png' }] });
|
|
155
|
+
const sel = el('select', { name: 'country' }, [], {
|
|
156
|
+
options: [
|
|
157
|
+
{ value: 'us', label: 'United States', selected: true },
|
|
158
|
+
{ value: 'ca', label: 'Canada' },
|
|
159
|
+
],
|
|
160
|
+
});
|
|
161
|
+
const plain = el('input', { type: 'text' }, [], { value: 'hi' });
|
|
162
|
+
const root = el('form', {}, [date, file, sel, plain]);
|
|
163
|
+
const result = runTreeJs(root, [root], null);
|
|
164
|
+
expect(result.tree?.children[0].compound).toMatchObject({ control: 'date', format: 'YYYY-MM-DD', current: '2026-04-21', min: '2026-01-01' });
|
|
165
|
+
expect(result.tree?.children[1].compound).toMatchObject({ control: 'file', multiple: true, current: ['a.png'], accept: 'image/*' });
|
|
166
|
+
expect(result.tree?.children[2].compound).toMatchObject({ control: 'select', multiple: false, current: 'United States' });
|
|
167
|
+
expect(result.tree?.children[3].compound).toBeUndefined();
|
|
168
|
+
});
|
|
169
|
+
it('combines budgets and reports every hit', () => {
|
|
170
|
+
const root = el('ul', {}, [
|
|
171
|
+
el('li', {}, [txt('x'.repeat(20)), el('em', {}, [txt('y')])]),
|
|
172
|
+
el('li', {}, []),
|
|
173
|
+
el('li', {}, []),
|
|
174
|
+
]);
|
|
175
|
+
const result = runTreeJs(root, [root], null, { depth: 1, childrenMax: 2, textMax: 5 });
|
|
176
|
+
expect(result.tree?.children).toHaveLength(2);
|
|
177
|
+
expect(result.truncated?.children_dropped).toBe(1);
|
|
178
|
+
expect(result.truncated?.text_truncated).toBe(1);
|
|
179
|
+
expect(result.truncated?.depth).toBe(true);
|
|
180
|
+
});
|
|
181
|
+
});
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Persistent cache for browser network captures.
|
|
3
|
+
*
|
|
4
|
+
* The live capture buffer (JS interceptor / daemon ring) can be cleared
|
|
5
|
+
* by navigation or lost between CLI invocations. Agents still need
|
|
6
|
+
* stable references to request bodies after running other commands,
|
|
7
|
+
* so every `browser network` call snapshots its results to disk.
|
|
8
|
+
*
|
|
9
|
+
* Layout: <cacheDir>/browser-network/<workspace>.json
|
|
10
|
+
* Entries expire after DEFAULT_TTL_MS (24h).
|
|
11
|
+
*/
|
|
12
|
+
export declare const DEFAULT_TTL_MS: number;
|
|
13
|
+
export interface CachedNetworkEntry {
|
|
14
|
+
key: string;
|
|
15
|
+
url: string;
|
|
16
|
+
method: string;
|
|
17
|
+
status: number;
|
|
18
|
+
/** Full body size in chars (may exceed stored body length when truncated). */
|
|
19
|
+
size: number;
|
|
20
|
+
ct: string;
|
|
21
|
+
body: unknown;
|
|
22
|
+
/**
|
|
23
|
+
* Truncation signals use snake_case so `--raw` (which emits cache entries
|
|
24
|
+
* verbatim) matches the agent-facing contract used by list / --detail.
|
|
25
|
+
*/
|
|
26
|
+
body_truncated?: boolean;
|
|
27
|
+
body_full_size?: number;
|
|
28
|
+
}
|
|
29
|
+
export interface NetworkCacheFile {
|
|
30
|
+
version: 1;
|
|
31
|
+
workspace: string;
|
|
32
|
+
savedAt: string;
|
|
33
|
+
entries: CachedNetworkEntry[];
|
|
34
|
+
}
|
|
35
|
+
export declare function getCachePath(workspace: string, baseDir?: string): string;
|
|
36
|
+
export declare function saveNetworkCache(workspace: string, entries: CachedNetworkEntry[], baseDir?: string): void;
|
|
37
|
+
export interface LoadOptions {
|
|
38
|
+
baseDir?: string;
|
|
39
|
+
ttlMs?: number;
|
|
40
|
+
now?: number;
|
|
41
|
+
}
|
|
42
|
+
export interface LoadResult {
|
|
43
|
+
status: 'ok' | 'missing' | 'expired' | 'corrupt';
|
|
44
|
+
file?: NetworkCacheFile;
|
|
45
|
+
ageMs?: number;
|
|
46
|
+
}
|
|
47
|
+
export declare function loadNetworkCache(workspace: string, opts?: LoadOptions): LoadResult;
|
|
48
|
+
export declare function findEntry(file: NetworkCacheFile, key: string): CachedNetworkEntry | null;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Persistent cache for browser network captures.
|
|
3
|
+
*
|
|
4
|
+
* The live capture buffer (JS interceptor / daemon ring) can be cleared
|
|
5
|
+
* by navigation or lost between CLI invocations. Agents still need
|
|
6
|
+
* stable references to request bodies after running other commands,
|
|
7
|
+
* so every `browser network` call snapshots its results to disk.
|
|
8
|
+
*
|
|
9
|
+
* Layout: <cacheDir>/browser-network/<workspace>.json
|
|
10
|
+
* Entries expire after DEFAULT_TTL_MS (24h).
|
|
11
|
+
*/
|
|
12
|
+
import * as fs from 'node:fs';
|
|
13
|
+
import * as os from 'node:os';
|
|
14
|
+
import * as path from 'node:path';
|
|
15
|
+
export const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000;
|
|
16
|
+
function getDefaultCacheDir() {
|
|
17
|
+
return process.env.OPENCLI_CACHE_DIR || path.join(os.homedir(), '.opencli', 'cache');
|
|
18
|
+
}
|
|
19
|
+
export function getCachePath(workspace, baseDir = getDefaultCacheDir()) {
|
|
20
|
+
const safe = workspace.replace(/[^a-zA-Z0-9_-]+/g, '_');
|
|
21
|
+
return path.join(baseDir, 'browser-network', `${safe}.json`);
|
|
22
|
+
}
|
|
23
|
+
export function saveNetworkCache(workspace, entries, baseDir) {
|
|
24
|
+
const target = getCachePath(workspace, baseDir);
|
|
25
|
+
fs.mkdirSync(path.dirname(target), { recursive: true });
|
|
26
|
+
const payload = {
|
|
27
|
+
version: 1,
|
|
28
|
+
workspace,
|
|
29
|
+
savedAt: new Date().toISOString(),
|
|
30
|
+
entries,
|
|
31
|
+
};
|
|
32
|
+
fs.writeFileSync(target, JSON.stringify(payload), 'utf-8');
|
|
33
|
+
}
|
|
34
|
+
export function loadNetworkCache(workspace, opts = {}) {
|
|
35
|
+
const target = getCachePath(workspace, opts.baseDir);
|
|
36
|
+
let raw;
|
|
37
|
+
try {
|
|
38
|
+
raw = fs.readFileSync(target, 'utf-8');
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
return { status: 'missing' };
|
|
42
|
+
}
|
|
43
|
+
let parsed;
|
|
44
|
+
try {
|
|
45
|
+
const obj = JSON.parse(raw);
|
|
46
|
+
if (!obj || obj.version !== 1 || !Array.isArray(obj.entries)) {
|
|
47
|
+
return { status: 'corrupt' };
|
|
48
|
+
}
|
|
49
|
+
parsed = obj;
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
return { status: 'corrupt' };
|
|
53
|
+
}
|
|
54
|
+
const ttl = opts.ttlMs ?? DEFAULT_TTL_MS;
|
|
55
|
+
const now = opts.now ?? Date.now();
|
|
56
|
+
const savedAt = Date.parse(parsed.savedAt);
|
|
57
|
+
if (!Number.isFinite(savedAt))
|
|
58
|
+
return { status: 'corrupt' };
|
|
59
|
+
const ageMs = now - savedAt;
|
|
60
|
+
if (ageMs > ttl)
|
|
61
|
+
return { status: 'expired', file: parsed, ageMs };
|
|
62
|
+
return { status: 'ok', file: parsed, ageMs };
|
|
63
|
+
}
|
|
64
|
+
export function findEntry(file, key) {
|
|
65
|
+
return file.entries.find((e) => e.key === key) ?? null;
|
|
66
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
|
2
|
+
import * as fs from 'node:fs';
|
|
3
|
+
import * as os from 'node:os';
|
|
4
|
+
import * as path from 'node:path';
|
|
5
|
+
import { DEFAULT_TTL_MS, findEntry, getCachePath, loadNetworkCache, saveNetworkCache, } from './network-cache.js';
|
|
6
|
+
function makeEntry(key, body = { ok: true }) {
|
|
7
|
+
return { key, url: `https://x.com/${key}`, method: 'GET', status: 200, size: 2, ct: 'application/json', body };
|
|
8
|
+
}
|
|
9
|
+
describe('network-cache', () => {
|
|
10
|
+
let baseDir;
|
|
11
|
+
beforeEach(() => {
|
|
12
|
+
baseDir = fs.mkdtempSync(path.join(os.tmpdir(), 'opencli-netcache-'));
|
|
13
|
+
});
|
|
14
|
+
afterEach(() => {
|
|
15
|
+
fs.rmSync(baseDir, { recursive: true, force: true });
|
|
16
|
+
});
|
|
17
|
+
it('sanitizes workspace names into safe filenames', () => {
|
|
18
|
+
const p = getCachePath('browser:default', baseDir);
|
|
19
|
+
expect(path.basename(p)).toBe('browser_default.json');
|
|
20
|
+
});
|
|
21
|
+
it('round-trips entries through save + load', () => {
|
|
22
|
+
saveNetworkCache('ws', [makeEntry('UserTweets'), makeEntry('UserByScreenName')], baseDir);
|
|
23
|
+
const res = loadNetworkCache('ws', { baseDir });
|
|
24
|
+
expect(res.status).toBe('ok');
|
|
25
|
+
expect(res.file?.entries).toHaveLength(2);
|
|
26
|
+
expect(res.file?.entries[0].key).toBe('UserTweets');
|
|
27
|
+
});
|
|
28
|
+
it('reports missing when cache file does not exist', () => {
|
|
29
|
+
expect(loadNetworkCache('nope', { baseDir }).status).toBe('missing');
|
|
30
|
+
});
|
|
31
|
+
it('reports expired when the cache is older than ttl', () => {
|
|
32
|
+
saveNetworkCache('ws', [makeEntry('A')], baseDir);
|
|
33
|
+
const future = Date.now() + DEFAULT_TTL_MS + 60_000;
|
|
34
|
+
const res = loadNetworkCache('ws', { baseDir, now: future });
|
|
35
|
+
expect(res.status).toBe('expired');
|
|
36
|
+
expect(res.file?.entries).toHaveLength(1);
|
|
37
|
+
});
|
|
38
|
+
it('reports corrupt for malformed json', () => {
|
|
39
|
+
const file = getCachePath('ws', baseDir);
|
|
40
|
+
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
41
|
+
fs.writeFileSync(file, '{not json');
|
|
42
|
+
expect(loadNetworkCache('ws', { baseDir }).status).toBe('corrupt');
|
|
43
|
+
});
|
|
44
|
+
it('reports corrupt for wrong schema version', () => {
|
|
45
|
+
const file = getCachePath('ws', baseDir);
|
|
46
|
+
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
47
|
+
fs.writeFileSync(file, JSON.stringify({ version: 0, entries: [] }));
|
|
48
|
+
expect(loadNetworkCache('ws', { baseDir }).status).toBe('corrupt');
|
|
49
|
+
});
|
|
50
|
+
it('findEntry returns matching entry or null', () => {
|
|
51
|
+
const file = {
|
|
52
|
+
version: 1, workspace: 'ws', savedAt: new Date().toISOString(),
|
|
53
|
+
entries: [makeEntry('A'), makeEntry('B')],
|
|
54
|
+
};
|
|
55
|
+
expect(findEntry(file, 'B')?.key).toBe('B');
|
|
56
|
+
expect(findEntry(file, 'missing')).toBeNull();
|
|
57
|
+
});
|
|
58
|
+
});
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stable keys for network capture entries.
|
|
3
|
+
*
|
|
4
|
+
* Agents reference entries by key (e.g. `UserTweets`, `GET api.x.com/1.1/home`)
|
|
5
|
+
* instead of array index, so the mapping survives new captures.
|
|
6
|
+
*
|
|
7
|
+
* Rules:
|
|
8
|
+
* GraphQL (URL contains `/graphql/`): key = operationName derived from URL path
|
|
9
|
+
* (the segment after a 22-char query id, or the last segment)
|
|
10
|
+
* Everything else: key = `METHOD host+pathname`
|
|
11
|
+
*
|
|
12
|
+
* On collision assignKeys suffixes duplicates as `base#2`, `base#3`, ... —
|
|
13
|
+
* the first occurrence stays bare (there is no `#1`).
|
|
14
|
+
*/
|
|
15
|
+
export interface KeyableRequest {
|
|
16
|
+
url: string;
|
|
17
|
+
method: string;
|
|
18
|
+
}
|
|
19
|
+
export declare function deriveKey(req: KeyableRequest): string;
|
|
20
|
+
export declare function assignKeys<T extends KeyableRequest>(requests: T[]): Array<T & {
|
|
21
|
+
key: string;
|
|
22
|
+
}>;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stable keys for network capture entries.
|
|
3
|
+
*
|
|
4
|
+
* Agents reference entries by key (e.g. `UserTweets`, `GET api.x.com/1.1/home`)
|
|
5
|
+
* instead of array index, so the mapping survives new captures.
|
|
6
|
+
*
|
|
7
|
+
* Rules:
|
|
8
|
+
* GraphQL (URL contains `/graphql/`): key = operationName derived from URL path
|
|
9
|
+
* (the segment after a 22-char query id, or the last segment)
|
|
10
|
+
* Everything else: key = `METHOD host+pathname`
|
|
11
|
+
*
|
|
12
|
+
* On collision assignKeys suffixes duplicates as `base#2`, `base#3`, ... —
|
|
13
|
+
* the first occurrence stays bare (there is no `#1`).
|
|
14
|
+
*/
|
|
15
|
+
export function deriveKey(req) {
|
|
16
|
+
const parsed = safeParseUrl(req.url);
|
|
17
|
+
if (!parsed)
|
|
18
|
+
return `${req.method.toUpperCase()} ${truncate(req.url, 120)}`;
|
|
19
|
+
const path = parsed.pathname;
|
|
20
|
+
if (path.includes('/graphql/')) {
|
|
21
|
+
const op = graphqlOperationName(path);
|
|
22
|
+
if (op)
|
|
23
|
+
return op;
|
|
24
|
+
}
|
|
25
|
+
return `${req.method.toUpperCase()} ${parsed.host}${path}`;
|
|
26
|
+
}
|
|
27
|
+
export function assignKeys(requests) {
|
|
28
|
+
const counts = new Map();
|
|
29
|
+
const out = [];
|
|
30
|
+
for (const req of requests) {
|
|
31
|
+
const base = deriveKey(req);
|
|
32
|
+
const n = counts.get(base) ?? 0;
|
|
33
|
+
counts.set(base, n + 1);
|
|
34
|
+
const key = n === 0 ? base : `${base}#${n + 1}`;
|
|
35
|
+
out.push({ ...req, key });
|
|
36
|
+
}
|
|
37
|
+
return out;
|
|
38
|
+
}
|
|
39
|
+
function graphqlOperationName(pathname) {
|
|
40
|
+
// Patterns we've seen in the wild:
|
|
41
|
+
// /i/api/graphql/<queryId>/UserTweets
|
|
42
|
+
// /graphql/<queryId>/SomeOp
|
|
43
|
+
// /graphql/SomeOp (rare, no id)
|
|
44
|
+
const segments = pathname.split('/').filter(Boolean);
|
|
45
|
+
const idx = segments.indexOf('graphql');
|
|
46
|
+
if (idx < 0)
|
|
47
|
+
return null;
|
|
48
|
+
const tail = segments.slice(idx + 1);
|
|
49
|
+
if (tail.length === 0)
|
|
50
|
+
return null;
|
|
51
|
+
if (tail.length === 1)
|
|
52
|
+
return tail[0];
|
|
53
|
+
// tail[0] is usually a query id; the operation name is the next segment.
|
|
54
|
+
return tail[1] || tail[0];
|
|
55
|
+
}
|
|
56
|
+
function safeParseUrl(url) {
|
|
57
|
+
try {
|
|
58
|
+
return new URL(url);
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
function truncate(s, max) {
|
|
65
|
+
return s.length <= max ? s : `${s.slice(0, max - 1)}…`;
|
|
66
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|