@jackwener/opencli 1.7.5 → 1.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +22 -10
  2. package/README.zh-CN.md +18 -9
  3. package/cli-manifest.json +401 -11
  4. package/clis/51job/company.js +125 -0
  5. package/clis/51job/detail.js +108 -0
  6. package/clis/51job/hot.js +55 -0
  7. package/clis/51job/search.js +79 -0
  8. package/clis/51job/utils.js +302 -0
  9. package/clis/51job/utils.test.js +69 -0
  10. package/clis/bilibili/video.js +68 -0
  11. package/clis/bilibili/video.test.js +132 -0
  12. package/clis/chatgpt/image.js +1 -1
  13. package/clis/deepseek/ask.js +37 -11
  14. package/clis/deepseek/ask.test.js +165 -0
  15. package/clis/deepseek/utils.js +192 -24
  16. package/clis/deepseek/utils.test.js +145 -0
  17. package/clis/gemini/image.js +1 -1
  18. package/clis/instagram/download.js +1 -1
  19. package/clis/jianyu/search.js +139 -3
  20. package/clis/jianyu/search.test.js +25 -0
  21. package/clis/jianyu/shared/procurement-detail.js +15 -0
  22. package/clis/jianyu/shared/procurement-detail.test.js +12 -0
  23. package/clis/twitter/likes.js +3 -2
  24. package/clis/twitter/search.js +4 -2
  25. package/clis/twitter/search.test.js +4 -0
  26. package/clis/twitter/shared.js +35 -2
  27. package/clis/twitter/shared.test.js +96 -0
  28. package/clis/twitter/thread.js +3 -1
  29. package/clis/twitter/timeline.js +3 -2
  30. package/clis/twitter/tweets.js +219 -0
  31. package/clis/twitter/tweets.test.js +125 -0
  32. package/clis/web/read.js +25 -5
  33. package/clis/web/read.test.js +76 -0
  34. package/clis/weread/ai-outline.js +170 -0
  35. package/clis/weread/ai-outline.test.js +83 -0
  36. package/clis/weread/book.js +57 -44
  37. package/clis/weread/commands.test.js +24 -0
  38. package/clis/xiaoyuzhou/podcast-episodes.js +2 -2
  39. package/clis/xiaoyuzhou/podcast-episodes.test.js +78 -0
  40. package/clis/youtube/channel.js +35 -0
  41. package/dist/src/browser/analyze.d.ts +103 -0
  42. package/dist/src/browser/analyze.js +230 -0
  43. package/dist/src/browser/analyze.test.d.ts +1 -0
  44. package/dist/src/browser/analyze.test.js +164 -0
  45. package/dist/src/browser/article-extract.d.ts +57 -0
  46. package/dist/src/browser/article-extract.e2e.test.d.ts +1 -0
  47. package/dist/src/browser/article-extract.e2e.test.js +105 -0
  48. package/dist/src/browser/article-extract.js +169 -0
  49. package/dist/src/browser/article-extract.test.d.ts +1 -0
  50. package/dist/src/browser/article-extract.test.js +94 -0
  51. package/dist/src/browser/base-page.d.ts +13 -3
  52. package/dist/src/browser/base-page.js +35 -25
  53. package/dist/src/browser/cdp.d.ts +1 -0
  54. package/dist/src/browser/cdp.js +23 -5
  55. package/dist/src/browser/compound.d.ts +59 -0
  56. package/dist/src/browser/compound.js +112 -0
  57. package/dist/src/browser/compound.test.d.ts +1 -0
  58. package/dist/src/browser/compound.test.js +175 -0
  59. package/dist/src/browser/dom-snapshot.d.ts +7 -0
  60. package/dist/src/browser/dom-snapshot.js +76 -3
  61. package/dist/src/browser/dom-snapshot.test.js +65 -0
  62. package/dist/src/browser/extract.d.ts +69 -0
  63. package/dist/src/browser/extract.js +132 -0
  64. package/dist/src/browser/extract.test.d.ts +1 -0
  65. package/dist/src/browser/extract.test.js +129 -0
  66. package/dist/src/browser/find.d.ts +76 -0
  67. package/dist/src/browser/find.js +179 -0
  68. package/dist/src/browser/find.test.d.ts +1 -0
  69. package/dist/src/browser/find.test.js +120 -0
  70. package/dist/src/browser/html-tree.d.ts +75 -0
  71. package/dist/src/browser/html-tree.js +112 -0
  72. package/dist/src/browser/html-tree.test.d.ts +1 -0
  73. package/dist/src/browser/html-tree.test.js +181 -0
  74. package/dist/src/browser/network-cache.d.ts +48 -0
  75. package/dist/src/browser/network-cache.js +66 -0
  76. package/dist/src/browser/network-cache.test.d.ts +1 -0
  77. package/dist/src/browser/network-cache.test.js +58 -0
  78. package/dist/src/browser/network-key.d.ts +22 -0
  79. package/dist/src/browser/network-key.js +66 -0
  80. package/dist/src/browser/network-key.test.d.ts +1 -0
  81. package/dist/src/browser/network-key.test.js +49 -0
  82. package/dist/src/browser/shape-filter.d.ts +52 -0
  83. package/dist/src/browser/shape-filter.js +101 -0
  84. package/dist/src/browser/shape-filter.test.d.ts +1 -0
  85. package/dist/src/browser/shape-filter.test.js +101 -0
  86. package/dist/src/browser/shape.d.ts +23 -0
  87. package/dist/src/browser/shape.js +95 -0
  88. package/dist/src/browser/shape.test.d.ts +1 -0
  89. package/dist/src/browser/shape.test.js +82 -0
  90. package/dist/src/browser/target-errors.d.ts +14 -1
  91. package/dist/src/browser/target-errors.js +13 -0
  92. package/dist/src/browser/target-errors.test.js +39 -6
  93. package/dist/src/browser/target-resolver.d.ts +57 -10
  94. package/dist/src/browser/target-resolver.js +195 -75
  95. package/dist/src/browser/target-resolver.test.js +80 -5
  96. package/dist/src/browser/verify-fixture.d.ts +59 -0
  97. package/dist/src/browser/verify-fixture.js +213 -0
  98. package/dist/src/browser/verify-fixture.test.d.ts +1 -0
  99. package/dist/src/browser/verify-fixture.test.js +161 -0
  100. package/dist/src/cli.d.ts +32 -0
  101. package/dist/src/cli.js +936 -141
  102. package/dist/src/cli.test.js +1051 -1
  103. package/dist/src/daemon.d.ts +3 -2
  104. package/dist/src/daemon.js +16 -4
  105. package/dist/src/daemon.test.d.ts +1 -0
  106. package/dist/src/daemon.test.js +19 -0
  107. package/dist/src/download/article-download.d.ts +12 -0
  108. package/dist/src/download/article-download.js +141 -17
  109. package/dist/src/download/article-download.test.js +196 -0
  110. package/dist/src/download/index.js +73 -86
  111. package/dist/src/errors.js +4 -2
  112. package/dist/src/errors.test.js +13 -0
  113. package/dist/src/execution.js +7 -2
  114. package/dist/src/execution.test.js +54 -0
  115. package/dist/src/launcher.d.ts +1 -1
  116. package/dist/src/launcher.js +3 -3
  117. package/dist/src/main.js +16 -0
  118. package/dist/src/output.js +1 -1
  119. package/dist/src/output.test.js +6 -0
  120. package/dist/src/types.d.ts +18 -3
  121. package/package.json +5 -1
@@ -0,0 +1,132 @@
1
+ /**
2
+ * `browser extract` — agent-native article/content reading channel.
3
+ *
4
+ * Pipeline (from first principles — agents want the *content*, not the DOM):
5
+ * 1. Scope: select `--selector` (default: document.body or <main>/<article>)
6
+ * 2. Denoise: strip script/style/nav/header/footer/aside/iframe/svg/form, inline noise
7
+ * 3. Convert: HTML → Markdown via shared `htmlToMarkdown` (turndown)
8
+ * 4. Chunk: paragraph-boundary-aware slicing with `next_start_char` cursor
9
+ *
10
+ * Why a separate command:
11
+ * - `get html --as json` returns tree structure; useless for "read the article".
12
+ * - `get text` flattens everything; loses headings, lists, links.
13
+ * - Markdown is the agent-readable middle ground: structure preserved, noise gone.
14
+ *
15
+ * Continuation contract: the envelope always carries `start`, `end`,
16
+ * `total_chars`, and `next_start_char` (null when the last chunk was emitted).
17
+ * Agents pass `--start <next>` to continue. No session state required.
18
+ */
19
+ import { htmlToMarkdown } from '../utils.js';
20
+ const DEFAULT_CHUNK_SIZE = 20000;
21
+ const MIN_CHUNK_SIZE = 100;
22
+ const MAX_CHUNK_SIZE = 200000;
23
+ const BOUNDARY_WINDOW_RATIO = 0.15;
24
+ /**
25
+ * Returns the JS expression string used with `page.evaluate` to produce the
26
+ * cleaned HTML subtree that we then hand to `htmlToMarkdown`. We do the
27
+ * denoise/clone inside the page so we can use DOM APIs (querySelectorAll,
28
+ * cloneNode) rather than regex on serialized HTML.
29
+ */
30
+ export function buildExtractHtmlJs(selector) {
31
+ const selectorLiteral = selector ? JSON.stringify(selector) : 'null';
32
+ return `(() => {
33
+ const sel = ${selectorLiteral};
34
+ let root = null;
35
+ if (sel) {
36
+ try { root = document.querySelector(sel); }
37
+ catch (e) {
38
+ return { invalidSelector: true, reason: (e && e.message) || String(e) };
39
+ }
40
+ if (!root) return { notFound: true };
41
+ } else {
42
+ root = document.querySelector('main') || document.querySelector('article') || document.body || document.documentElement;
43
+ }
44
+ if (!root) return { notFound: true };
45
+ const clone = root.cloneNode(true);
46
+ const drop = [
47
+ 'script', 'style', 'noscript', 'template',
48
+ 'nav', 'header', 'footer', 'aside',
49
+ 'iframe', 'svg', 'canvas',
50
+ 'form', 'button', 'input', 'select', 'textarea',
51
+ '[role="navigation"]', '[role="banner"]', '[role="contentinfo"]', '[role="complementary"]',
52
+ '[aria-hidden="true"]',
53
+ ];
54
+ for (const q of drop) {
55
+ for (const n of clone.querySelectorAll(q)) n.remove();
56
+ }
57
+ // Also strip event-handler and style attributes that bloat markdown output.
58
+ const walker = document.createTreeWalker(clone, NodeFilter.SHOW_ELEMENT);
59
+ let n = walker.currentNode;
60
+ while (n) {
61
+ if (n.nodeType === 1) {
62
+ const el = n;
63
+ for (const a of [...el.attributes]) {
64
+ if (a.name.startsWith('on') || a.name === 'style' || a.name.startsWith('data-')) el.removeAttribute(a.name);
65
+ }
66
+ }
67
+ n = walker.nextNode();
68
+ }
69
+ return { ok: true, url: location.href, title: document.title || '', html: clone.outerHTML || '' };
70
+ })()`;
71
+ }
72
+ /**
73
+ * Slice `content` into one chunk starting at `start` with target size
74
+ * `chunkSize`. When the chunk would land mid-paragraph, we pull the break
75
+ * back to the nearest `\n\n` (or `\n`) within a small window to keep the
76
+ * output readable. If no boundary is found, we hard-cut at `start+chunkSize`.
77
+ */
78
+ export function chunkMarkdown(opts) {
79
+ const { content, start } = opts;
80
+ const chunkSize = Math.max(MIN_CHUNK_SIZE, Math.min(MAX_CHUNK_SIZE, opts.chunkSize));
81
+ if (start >= content.length) {
82
+ return { content: '', start, end: start, nextStartChar: null };
83
+ }
84
+ const hardEnd = Math.min(content.length, start + chunkSize);
85
+ if (hardEnd === content.length) {
86
+ return { content: content.slice(start, hardEnd), start, end: hardEnd, nextStartChar: null };
87
+ }
88
+ const windowSize = Math.max(1, Math.floor(chunkSize * BOUNDARY_WINDOW_RATIO));
89
+ const windowStart = Math.max(start + 1, hardEnd - windowSize);
90
+ const slice = content.slice(windowStart, hardEnd);
91
+ const paraBreak = slice.lastIndexOf('\n\n');
92
+ let cut = hardEnd;
93
+ if (paraBreak >= 0) {
94
+ cut = windowStart + paraBreak + 2;
95
+ }
96
+ else {
97
+ const lineBreak = slice.lastIndexOf('\n');
98
+ if (lineBreak >= 0)
99
+ cut = windowStart + lineBreak + 1;
100
+ }
101
+ return {
102
+ content: content.slice(start, cut),
103
+ start,
104
+ end: cut,
105
+ nextStartChar: cut,
106
+ };
107
+ }
108
+ /** End-to-end host-side pipeline: HTML → markdown → chunked envelope. */
109
+ export function runExtractFromHtml(opts) {
110
+ const md = htmlToMarkdown(opts.html);
111
+ const chunk = chunkMarkdown({
112
+ content: md,
113
+ start: Math.max(0, opts.start),
114
+ chunkSize: opts.chunkSize || DEFAULT_CHUNK_SIZE,
115
+ });
116
+ return {
117
+ url: opts.url,
118
+ title: opts.title,
119
+ selector: opts.selector,
120
+ total_chars: md.length,
121
+ chunk_size: chunk.end - chunk.start,
122
+ start: chunk.start,
123
+ end: chunk.end,
124
+ next_start_char: chunk.nextStartChar,
125
+ content: chunk.content,
126
+ };
127
+ }
128
+ export const __extractInternals = {
129
+ DEFAULT_CHUNK_SIZE,
130
+ MIN_CHUNK_SIZE,
131
+ MAX_CHUNK_SIZE,
132
+ };
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,129 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { buildExtractHtmlJs, chunkMarkdown, runExtractFromHtml } from './extract.js';
3
+ describe('chunkMarkdown', () => {
4
+ it('returns the full content when it fits in one chunk', () => {
5
+ const content = 'short body';
6
+ const r = chunkMarkdown({ content, start: 0, chunkSize: 20000 });
7
+ expect(r.content).toBe(content);
8
+ expect(r.start).toBe(0);
9
+ expect(r.end).toBe(content.length);
10
+ expect(r.nextStartChar).toBeNull();
11
+ });
12
+ it('emits next_start_char when more content remains', () => {
13
+ // Build content long enough that chunkSize cuts it mid-stream.
14
+ const para = 'p'.repeat(400);
15
+ const content = [para, para, para].join('\n\n');
16
+ const r = chunkMarkdown({ content, start: 0, chunkSize: 500 });
17
+ expect(r.nextStartChar).not.toBeNull();
18
+ expect(r.nextStartChar).toBeGreaterThan(0);
19
+ expect(r.nextStartChar).toBeLessThan(content.length);
20
+ });
21
+ it('prefers to break at a paragraph boundary inside the boundary window', () => {
22
+ // chunkSize=500, window=15% → [425, 500). Place `\n\n` at 450 so it lands
23
+ // inside the window; the chunker should snap the cut back to it.
24
+ const a = 'a'.repeat(450);
25
+ const b = 'b'.repeat(400);
26
+ const content = `${a}\n\n${b}`;
27
+ const r = chunkMarkdown({ content, start: 0, chunkSize: 500 });
28
+ expect(r.content.endsWith('\n\n')).toBe(true);
29
+ expect(r.nextStartChar).toBe(r.end);
30
+ expect(content.slice(r.end).startsWith('b')).toBe(true);
31
+ });
32
+ it('falls back to a single newline when no paragraph boundary is in window', () => {
33
+ // 6 lines × 90 chars joined by `\n` → `\n` at 90, 181, 272, 363, 454.
34
+ // chunkSize=500 with window [425, 500) catches the `\n` at 454.
35
+ const line = 'l'.repeat(90);
36
+ const content = Array.from({ length: 6 }, () => line).join('\n');
37
+ const r = chunkMarkdown({ content, start: 0, chunkSize: 500 });
38
+ expect(r.content.endsWith('\n')).toBe(true);
39
+ expect(content.slice(r.end).startsWith('l')).toBe(true);
40
+ });
41
+ it('hard-cuts when no boundary is found within the window', () => {
42
+ const content = 'x'.repeat(5000);
43
+ const r = chunkMarkdown({ content, start: 0, chunkSize: 500 });
44
+ expect(r.end).toBe(500);
45
+ expect(r.content).toHaveLength(500);
46
+ expect(r.nextStartChar).toBe(500);
47
+ });
48
+ it('handles start >= content.length with an empty final chunk', () => {
49
+ const content = 'hello';
50
+ const r = chunkMarkdown({ content, start: 5, chunkSize: 100 });
51
+ expect(r.content).toBe('');
52
+ expect(r.nextStartChar).toBeNull();
53
+ });
54
+ it('resumes from a provided start cursor until the stream terminates', () => {
55
+ const content = `${'a'.repeat(100)}\n\n${'b'.repeat(100)}\n\n${'c'.repeat(100)}`;
56
+ const first = chunkMarkdown({ content, start: 0, chunkSize: 110 });
57
+ expect(first.nextStartChar).not.toBeNull();
58
+ const second = chunkMarkdown({ content, start: first.nextStartChar, chunkSize: 110 });
59
+ expect(second.start).toBe(first.nextStartChar);
60
+ expect(second.content.length).toBeGreaterThan(0);
61
+ let cursor = second.nextStartChar;
62
+ let safety = 20;
63
+ while (cursor !== null && safety-- > 0) {
64
+ const step = chunkMarkdown({ content, start: cursor, chunkSize: 110 });
65
+ cursor = step.nextStartChar;
66
+ }
67
+ expect(cursor).toBeNull();
68
+ });
69
+ it('clamps chunk size to the configured minimum', () => {
70
+ const content = 'a'.repeat(2000);
71
+ const r = chunkMarkdown({ content, start: 0, chunkSize: 1 });
72
+ // MIN_CHUNK_SIZE is 100 — requesting 1 should still produce >= 100 chars.
73
+ expect(r.end).toBeGreaterThanOrEqual(100);
74
+ });
75
+ });
76
+ describe('runExtractFromHtml', () => {
77
+ it('converts HTML to markdown and wraps it in the chunking envelope', () => {
78
+ const html = '<article><h1>Title</h1><p>Hello <strong>world</strong>.</p></article>';
79
+ const r = runExtractFromHtml({
80
+ html,
81
+ url: 'https://example.com/a',
82
+ title: 'Example',
83
+ selector: 'article',
84
+ start: 0,
85
+ chunkSize: 20000,
86
+ });
87
+ expect(r.url).toBe('https://example.com/a');
88
+ expect(r.title).toBe('Example');
89
+ expect(r.selector).toBe('article');
90
+ expect(r.content).toContain('# Title');
91
+ expect(r.content).toContain('**world**');
92
+ expect(r.start).toBe(0);
93
+ expect(r.end).toBe(r.content.length);
94
+ expect(r.total_chars).toBe(r.content.length);
95
+ expect(r.next_start_char).toBeNull();
96
+ });
97
+ it('reports total_chars and chunk_size against the final markdown', () => {
98
+ const body = Array.from({ length: 30 }, (_, i) => `<p>paragraph ${i} ${'x'.repeat(200)}</p>`).join('');
99
+ const r = runExtractFromHtml({
100
+ html: `<main>${body}</main>`,
101
+ url: 'https://example.com/b',
102
+ title: 't',
103
+ selector: 'main',
104
+ start: 0,
105
+ chunkSize: 500,
106
+ });
107
+ expect(r.total_chars).toBeGreaterThan(r.end);
108
+ expect(r.chunk_size).toBe(r.end - r.start);
109
+ expect(r.next_start_char).toBe(r.end);
110
+ });
111
+ });
112
+ describe('buildExtractHtmlJs', () => {
113
+ it('embeds the selector as a JSON literal', () => {
114
+ const js = buildExtractHtmlJs('main.article');
115
+ expect(js).toContain('"main.article"');
116
+ });
117
+ it('uses null when no selector given', () => {
118
+ const js = buildExtractHtmlJs(null);
119
+ // The expression references `sel` and compares to null.
120
+ expect(js).toContain('const sel = null;');
121
+ });
122
+ it('includes the denoise selector list', () => {
123
+ const js = buildExtractHtmlJs(null);
124
+ expect(js).toContain("'script'");
125
+ expect(js).toContain("'nav'");
126
+ expect(js).toContain("'iframe'");
127
+ expect(js).toContain("'[aria-hidden=\"true\"]'");
128
+ });
129
+ });
@@ -0,0 +1,76 @@
1
+ /**
2
+ * `browser find --css <sel>` — structured CSS query.
3
+ *
4
+ * Returns every match of a selector as a JSON envelope agents can read
5
+ * without parsing free-text snapshot output. Each entry carries two
6
+ * identifiers — a numeric `ref` (matching the snapshot contract) and a
7
+ * stable 0-based `nth` — so the agent can act on a specific result via
8
+ * either path:
9
+ *
10
+ * browser click <ref> // when ref is numeric
11
+ * browser click "<sel>" --nth <n> // always works
12
+ *
13
+ * Refs are *allocated on the spot* for matched elements that were not
14
+ * tagged by a prior snapshot: `data-opencli-ref` is set on the element
15
+ * and a fingerprint is written into `window.__opencli_ref_identity`
16
+ * (same shape the snapshot uses). That makes `find` a first-class entry
17
+ * point to the ref system — agents can skip running `browser state`
18
+ * when they already know the selector.
19
+ *
20
+ * Attributes are whitelisted to keep output small and high-signal.
21
+ * Invisible elements are still returned so agents can reason about
22
+ * offscreen vs truly-missing targets.
23
+ *
24
+ * When a matched element is a compound form control (date-like input,
25
+ * select, file input), the entry gains a `compound` field with the
26
+ * rich view from `compound.ts`. This is what kills the three biggest
27
+ * agent-fail modes on form pages (wrong date format, guessed options,
28
+ * re-uploaded files) without forcing agents to probe further.
29
+ */
30
+ import { type CompoundInfo } from './compound.js';
31
+ /** Whitelist of attributes surfaced per entry. Keep small; agents do not need full DOM dumps. */
32
+ export declare const FIND_ATTR_WHITELIST: readonly ["id", "class", "name", "type", "placeholder", "aria-label", "title", "href", "value", "role", "data-testid"];
33
+ export interface FindEntry {
34
+ /** Zero-based position within the match set — pair with `--nth` on downstream commands. */
35
+ nth: number;
36
+ /**
37
+ * Numeric data-opencli-ref. Find assigns one if the element was not
38
+ * tagged by a prior snapshot, so downstream `browser click <ref>` works
39
+ * directly off the find output without requiring `browser state` first.
40
+ */
41
+ ref: number;
42
+ tag: string;
43
+ role: string;
44
+ text: string;
45
+ attrs: Record<string, string>;
46
+ visible: boolean;
47
+ /**
48
+ * Rich view for date / time / datetime-local / month / week / select /
49
+ * file inputs. Omitted (undefined) for all other element types. See
50
+ * `compound.ts` for the shape.
51
+ */
52
+ compound?: CompoundInfo;
53
+ }
54
+ export interface FindResult {
55
+ matches_n: number;
56
+ entries: FindEntry[];
57
+ }
58
+ export interface FindError {
59
+ error: {
60
+ code: 'invalid_selector' | 'selector_not_found';
61
+ message: string;
62
+ hint?: string;
63
+ };
64
+ }
65
+ export interface FindOptions {
66
+ /** Max entries returned. Default 50 — enough to pick from without flooding context. */
67
+ limit?: number;
68
+ /** Max chars of trimmed text per entry. Default 120. */
69
+ textMax?: number;
70
+ }
71
+ /**
72
+ * Build the browser-side JS that performs the CSS query and emits the
73
+ * FindResult (or FindError) envelope. Evaluated inside `page.evaluate`.
74
+ */
75
+ export declare function buildFindJs(selector: string, opts?: FindOptions): string;
76
+ export declare function isFindError(result: unknown): result is FindError;
@@ -0,0 +1,179 @@
1
+ /**
2
+ * `browser find --css <sel>` — structured CSS query.
3
+ *
4
+ * Returns every match of a selector as a JSON envelope agents can read
5
+ * without parsing free-text snapshot output. Each entry carries two
6
+ * identifiers — a numeric `ref` (matching the snapshot contract) and a
7
+ * stable 0-based `nth` — so the agent can act on a specific result via
8
+ * either path:
9
+ *
10
+ * browser click <ref> // when ref is numeric
11
+ * browser click "<sel>" --nth <n> // always works
12
+ *
13
+ * Refs are *allocated on the spot* for matched elements that were not
14
+ * tagged by a prior snapshot: `data-opencli-ref` is set on the element
15
+ * and a fingerprint is written into `window.__opencli_ref_identity`
16
+ * (same shape the snapshot uses). That makes `find` a first-class entry
17
+ * point to the ref system — agents can skip running `browser state`
18
+ * when they already know the selector.
19
+ *
20
+ * Attributes are whitelisted to keep output small and high-signal.
21
+ * Invisible elements are still returned so agents can reason about
22
+ * offscreen vs truly-missing targets.
23
+ *
24
+ * When a matched element is a compound form control (date-like input,
25
+ * select, file input), the entry gains a `compound` field with the
26
+ * rich view from `compound.ts`. This is what kills the three biggest
27
+ * agent-fail modes on form pages (wrong date format, guessed options,
28
+ * re-uploaded files) without forcing agents to probe further.
29
+ */
30
+ import { COMPOUND_INFO_JS } from './compound.js';
31
+ /** Whitelist of attributes surfaced per entry. Keep small; agents do not need full DOM dumps. */
32
+ export const FIND_ATTR_WHITELIST = [
33
+ 'id',
34
+ 'class',
35
+ 'name',
36
+ 'type',
37
+ 'placeholder',
38
+ 'aria-label',
39
+ 'title',
40
+ 'href',
41
+ 'value',
42
+ 'role',
43
+ 'data-testid',
44
+ ];
45
+ /**
46
+ * Build the browser-side JS that performs the CSS query and emits the
47
+ * FindResult (or FindError) envelope. Evaluated inside `page.evaluate`.
48
+ */
49
+ export function buildFindJs(selector, opts = {}) {
50
+ const safeSel = JSON.stringify(selector);
51
+ const limit = opts.limit ?? 50;
52
+ const textMax = opts.textMax ?? 120;
53
+ const whitelist = JSON.stringify(FIND_ATTR_WHITELIST);
54
+ return `
55
+ (() => {
56
+ const sel = ${safeSel};
57
+ const LIMIT = ${limit};
58
+ const TEXT_MAX = ${textMax};
59
+ const ATTR_WHITELIST = ${whitelist};
60
+
61
+ ${COMPOUND_INFO_JS}
62
+
63
+ let matches;
64
+ try {
65
+ matches = document.querySelectorAll(sel);
66
+ } catch (e) {
67
+ return {
68
+ error: {
69
+ code: 'invalid_selector',
70
+ message: 'Invalid CSS selector: ' + sel + ' (' + ((e && e.message) || String(e)) + ')',
71
+ hint: 'Check the selector syntax.',
72
+ },
73
+ };
74
+ }
75
+
76
+ if (matches.length === 0) {
77
+ return {
78
+ error: {
79
+ code: 'selector_not_found',
80
+ message: 'CSS selector ' + sel + ' matched 0 elements',
81
+ hint: 'Use browser state to inspect the page, or try a less specific selector.',
82
+ },
83
+ };
84
+ }
85
+
86
+ function pickAttrs(el) {
87
+ const out = {};
88
+ for (const key of ATTR_WHITELIST) {
89
+ const v = el.getAttribute(key);
90
+ if (v != null && v !== '') out[key] = v;
91
+ }
92
+ return out;
93
+ }
94
+
95
+ function isVisible(el) {
96
+ const rect = el.getBoundingClientRect();
97
+ if (rect.width === 0 && rect.height === 0) return false;
98
+ try {
99
+ const style = getComputedStyle(el);
100
+ if (style.display === 'none' || style.visibility === 'hidden') return false;
101
+ if (parseFloat(style.opacity || '1') === 0) return false;
102
+ } catch (_) {}
103
+ return true;
104
+ }
105
+
106
+ // Ref allocation: reuse \`window.__opencli_ref_identity\` (the same map
107
+ // snapshot populates) as the source of truth. For matched elements that
108
+ // don't already carry a \`data-opencli-ref\`, assign the next free numeric
109
+ // ref and write the fingerprint so the target resolver can verify it on
110
+ // downstream click/type/get calls.
111
+ const identity = (window.__opencli_ref_identity = window.__opencli_ref_identity || {});
112
+ let maxRef = 0;
113
+ for (const k in identity) {
114
+ const n = parseInt(k, 10);
115
+ if (!isNaN(n) && n > maxRef) maxRef = n;
116
+ }
117
+ // Also walk any \`data-opencli-ref\` already in the DOM in case the identity
118
+ // map was cleared but annotations remain (e.g. soft navigation without a
119
+ // fresh snapshot). Guarantees allocated refs don't collide.
120
+ try {
121
+ const tagged = document.querySelectorAll('[data-opencli-ref]');
122
+ for (let t = 0; t < tagged.length; t++) {
123
+ const v = tagged[t].getAttribute('data-opencli-ref');
124
+ const n = v != null && /^\\d+$/.test(v) ? parseInt(v, 10) : NaN;
125
+ if (!isNaN(n) && n > maxRef) maxRef = n;
126
+ }
127
+ } catch (_) {}
128
+
129
+ function fingerprintOf(el) {
130
+ return {
131
+ tag: el.tagName.toLowerCase(),
132
+ role: el.getAttribute('role') || '',
133
+ text: (el.textContent || '').trim().slice(0, 30),
134
+ ariaLabel: el.getAttribute('aria-label') || '',
135
+ id: el.id || '',
136
+ testId: el.getAttribute('data-testid') || el.getAttribute('data-test') || '',
137
+ };
138
+ }
139
+
140
+ const take = Math.min(matches.length, LIMIT);
141
+ const entries = [];
142
+ for (let i = 0; i < take; i++) {
143
+ const el = matches[i];
144
+ const refAttr = el.getAttribute('data-opencli-ref');
145
+ let refNum = refAttr != null && /^\\d+$/.test(refAttr) ? parseInt(refAttr, 10) : null;
146
+ if (refNum === null) {
147
+ refNum = ++maxRef;
148
+ try { el.setAttribute('data-opencli-ref', '' + refNum); } catch (_) {}
149
+ identity['' + refNum] = fingerprintOf(el);
150
+ } else if (!identity['' + refNum]) {
151
+ // Ref annotation survived but identity map was cleared — repopulate so the
152
+ // target resolver's fingerprint check passes on downstream calls.
153
+ identity['' + refNum] = fingerprintOf(el);
154
+ }
155
+ const text = (el.textContent || '').trim();
156
+ const entry = {
157
+ nth: i,
158
+ ref: refNum,
159
+ tag: el.tagName.toLowerCase(),
160
+ role: el.getAttribute('role') || '',
161
+ text: text.length > TEXT_MAX ? text.slice(0, TEXT_MAX) : text,
162
+ attrs: pickAttrs(el),
163
+ visible: isVisible(el),
164
+ };
165
+ const compound = compoundInfoOf(el);
166
+ if (compound) entry.compound = compound;
167
+ entries.push(entry);
168
+ }
169
+
170
+ return {
171
+ matches_n: matches.length,
172
+ entries,
173
+ };
174
+ })()
175
+ `;
176
+ }
177
+ export function isFindError(result) {
178
+ return !!result && typeof result === 'object' && 'error' in result;
179
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,120 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { buildFindJs, FIND_ATTR_WHITELIST, isFindError } from './find.js';
3
+ /**
4
+ * These tests validate the shape and options of the generated JS string
5
+ * (no DOM available in the default vitest unit env). Runtime behavior of
6
+ * the generated JS against a real DOM is covered by the browser e2e suite.
7
+ */
8
+ describe('buildFindJs', () => {
9
+ it('produces syntactically valid JS that can be parsed', () => {
10
+ expect(() => new Function(`return (${buildFindJs('.btn')});`)).not.toThrow();
11
+ });
12
+ it('embeds the selector via JSON.stringify (injection-safe)', () => {
13
+ const js = buildFindJs('[data-x="a\"b"]');
14
+ // Unescaped literal break-out must not appear
15
+ expect(js).not.toContain('[data-x="a"b"]');
16
+ // The JSON-encoded form (with escaped quotes) should
17
+ expect(js).toContain(JSON.stringify('[data-x="a\"b"]'));
18
+ });
19
+ it('emits invalid_selector + selector_not_found branches', () => {
20
+ const js = buildFindJs('.btn');
21
+ expect(js).toContain("code: 'invalid_selector'");
22
+ expect(js).toContain("code: 'selector_not_found'");
23
+ });
24
+ it('emits matches_n + entries + per-entry shape', () => {
25
+ const js = buildFindJs('.btn');
26
+ expect(js).toContain('matches_n: matches.length');
27
+ expect(js).toContain('entries.push(');
28
+ // Per-entry keys reviewers signed off on: nth, ref, tag, role, text, attrs, visible
29
+ expect(js).toContain('nth: i');
30
+ expect(js).toContain('ref: refNum');
31
+ expect(js).toContain('tag: el.tagName.toLowerCase()');
32
+ expect(js).toContain("el.getAttribute('role')");
33
+ expect(js).toContain('visible: isVisible(el)');
34
+ });
35
+ it('allocates fresh refs for untagged matches (write attribute + identity map)', () => {
36
+ const js = buildFindJs('.btn');
37
+ // On the just-annotated branch we must flip the attribute on the element
38
+ // so downstream `browser click <ref>` works off the find output.
39
+ expect(js).toContain("el.setAttribute('data-opencli-ref'");
40
+ // The fingerprint must also land in the shared identity map so the
41
+ // target resolver's stale-ref check has data to verify against.
42
+ expect(js).toContain('__opencli_ref_identity');
43
+ expect(js).toContain("identity['' + refNum] = fingerprintOf(el)");
44
+ // Allocation walks both the identity map and any existing data-opencli-ref
45
+ // annotations — guards against collisions after a soft nav.
46
+ expect(js).toContain("document.querySelectorAll('[data-opencli-ref]')");
47
+ });
48
+ it('fingerprint shape matches the snapshot / resolver contract', () => {
49
+ const js = buildFindJs('.btn');
50
+ // The six fields resolveTargetJs verifies in its stale_ref check.
51
+ for (const field of ['tag:', 'role:', 'text:', 'ariaLabel:', 'id:', 'testId:']) {
52
+ expect(js).toContain(field);
53
+ }
54
+ });
55
+ it('embeds defaults for limit and textMax', () => {
56
+ const js = buildFindJs('.btn');
57
+ expect(js).toContain('LIMIT = 50');
58
+ expect(js).toContain('TEXT_MAX = 120');
59
+ });
60
+ it('overrides limit and textMax when requested', () => {
61
+ const js = buildFindJs('.btn', { limit: 3, textMax: 20 });
62
+ expect(js).toContain('LIMIT = 3');
63
+ expect(js).toContain('TEXT_MAX = 20');
64
+ });
65
+ it('embeds the attribute whitelist verbatim (no style/onclick leaking)', () => {
66
+ const js = buildFindJs('.btn');
67
+ // Whitelist fields appear inside the generated JS
68
+ for (const key of FIND_ATTR_WHITELIST) {
69
+ expect(js).toContain(`"${key}"`);
70
+ }
71
+ // Sensitive / high-noise attrs must stay out of the whitelist
72
+ expect(FIND_ATTR_WHITELIST).not.toContain('style');
73
+ expect(FIND_ATTR_WHITELIST).not.toContain('onclick');
74
+ expect(FIND_ATTR_WHITELIST).not.toContain('onload');
75
+ });
76
+ it('inlines compoundInfoOf and attaches compound field per entry', () => {
77
+ const js = buildFindJs('input, select');
78
+ // Helper definition is inlined so each matched element can be classified.
79
+ expect(js).toContain('function compoundInfoOf(el)');
80
+ // The emitted entry opts in only when compound data is present — no noisy
81
+ // compound: null on every non-form element.
82
+ expect(js).toContain('const compound = compoundInfoOf(el);');
83
+ expect(js).toContain('if (compound) entry.compound = compound;');
84
+ // Spot-check all three compound families are covered in the inlined helper.
85
+ expect(js).toContain("'YYYY-MM-DD'");
86
+ expect(js).toContain("control: 'file'");
87
+ expect(js).toContain("control: 'select'");
88
+ });
89
+ it('keeps the whitelist small and explicit (guardrail against silent expansion)', () => {
90
+ expect(FIND_ATTR_WHITELIST).toEqual([
91
+ 'id',
92
+ 'class',
93
+ 'name',
94
+ 'type',
95
+ 'placeholder',
96
+ 'aria-label',
97
+ 'title',
98
+ 'href',
99
+ 'value',
100
+ 'role',
101
+ 'data-testid',
102
+ ]);
103
+ });
104
+ });
105
+ describe('isFindError', () => {
106
+ it('narrows { error: ... } as FindError', () => {
107
+ const payload = { error: { code: 'invalid_selector', message: 'x' } };
108
+ expect(isFindError(payload)).toBe(true);
109
+ if (isFindError(payload)) {
110
+ const err = payload;
111
+ expect(err.error.code).toBe('invalid_selector');
112
+ }
113
+ });
114
+ it('rejects successful envelopes', () => {
115
+ expect(isFindError({ matches_n: 0, entries: [] })).toBe(false);
116
+ expect(isFindError(null)).toBe(false);
117
+ expect(isFindError(undefined)).toBe(false);
118
+ expect(isFindError('string')).toBe(false);
119
+ });
120
+ });