ax-grep 0.0.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +106 -1
- package/dist/browser.d.ts +11 -0
- package/dist/browser.js +12 -0
- package/dist/browser.js.map +1 -0
- package/dist/chunk-HPZ32BKV.js +612 -0
- package/dist/chunk-HPZ32BKV.js.map +1 -0
- package/dist/chunk-ZXTURCRT.js +925 -0
- package/dist/chunk-ZXTURCRT.js.map +1 -0
- package/dist/cli.d.ts +10 -0
- package/dist/cli.js +22364 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +436 -0
- package/dist/index.js.map +1 -0
- package/dist/static.d.ts +6 -0
- package/dist/static.js +8 -0
- package/dist/static.js.map +1 -0
- package/dist/types-gwHWhYmw.d.ts +3660 -0
- package/docs/README.md +19 -0
- package/docs/agent-handoff.md +95 -0
- package/docs/agent-readiness.md +38 -0
- package/docs/assets/ax-grep-benchmark.png +0 -0
- package/docs/assets/ax-grep-og.png +0 -0
- package/docs/assets/ax-grep-search.png +0 -0
- package/docs/benchmarks.md +123 -0
- package/docs/cli-agent.md +194 -0
- package/docs/comparison-baseline.md +625 -0
- package/docs/features.md +28 -0
- package/docs/library-api.md +211 -0
- package/docs/progress.md +1306 -0
- package/package.json +92 -6
- package/skills/ax-grep-cli/SKILL.md +89 -0
- package/skills.sh +24 -0
- package/index.js +0 -1
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# Library API and Browser Injection
|
|
2
|
+
|
|
3
|
+
## Entry Points
|
|
4
|
+
|
|
5
|
+
| Situation | Use |
|
|
6
|
+
| --- | --- |
|
|
7
|
+
| HTML string from `fetch()`, SSR, or a Worker | `extract(html)` from `ax-grep` |
|
|
8
|
+
| Small Worker/static-only bundle | `extract(html)` from `ax-grep/static` |
|
|
9
|
+
| Code already running inside the page | `extract()` from `ax-grep/browser` |
|
|
10
|
+
| Puppeteer, Playwright, WebView, or external page controller | `createExtractorScript()` from `ax-grep` |
|
|
11
|
+
|
|
12
|
+
## Static HTML
|
|
13
|
+
|
|
14
|
+
`ax-grep` is ESM-only and requires Node 18 or newer.
|
|
15
|
+
|
|
16
|
+
```ts
|
|
17
|
+
import { extract } from "ax-grep";
|
|
18
|
+
|
|
19
|
+
const response = await fetch("https://example.com");
|
|
20
|
+
const html = await response.text();
|
|
21
|
+
const tree = extract(html);
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
The root `extract(html)` function is the same static extractor exposed at
|
|
25
|
+
`ax-grep/static`.
|
|
26
|
+
|
|
27
|
+
```ts
|
|
28
|
+
import { extract } from "ax-grep/static";
|
|
29
|
+
|
|
30
|
+
const tree = extract(html, {
|
|
31
|
+
includeAttributes: false,
|
|
32
|
+
});
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
CommonJS services can import it dynamically:
|
|
36
|
+
|
|
37
|
+
```js
|
|
38
|
+
const { extract, formatSemanticTreeText } = await import("ax-grep");
|
|
39
|
+
const tree = extract(html, { includeAttributes: false });
|
|
40
|
+
console.log(formatSemanticTreeText(tree));
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Static extraction can infer roles, names, labels, ARIA state, links, forms,
|
|
44
|
+
headings, tables, and lists from SSR markup. It cannot see computed CSS, layout
|
|
45
|
+
bounds, client-rendered DOM, shadow DOM, iframe contents, or post-load
|
|
46
|
+
mutations.
|
|
47
|
+
|
|
48
|
+
## Browser Injection
|
|
49
|
+
|
|
50
|
+
Use `createExtractorScript()` when you control a page from Puppeteer,
|
|
51
|
+
Playwright, WebView, or an agent browser.
|
|
52
|
+
|
|
53
|
+
```ts
|
|
54
|
+
import { createExtractorScript } from "ax-grep";
|
|
55
|
+
|
|
56
|
+
const tree = await page.evaluate(createExtractorScript());
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Playwright example:
|
|
60
|
+
|
|
61
|
+
```ts
|
|
62
|
+
import { chromium } from "playwright";
|
|
63
|
+
import { createExtractorScript, formatSemanticTreeText } from "ax-grep";
|
|
64
|
+
|
|
65
|
+
const browser = await chromium.launch();
|
|
66
|
+
const page = await browser.newPage();
|
|
67
|
+
|
|
68
|
+
await page.goto("https://example.com");
|
|
69
|
+
|
|
70
|
+
const tree = await page.evaluate(createExtractorScript({
|
|
71
|
+
includeBounds: false,
|
|
72
|
+
includeAttributes: false,
|
|
73
|
+
}));
|
|
74
|
+
|
|
75
|
+
console.log(formatSemanticTreeText(tree));
|
|
76
|
+
|
|
77
|
+
await browser.close();
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
WebView-style injection works the same way:
|
|
81
|
+
|
|
82
|
+
```ts
|
|
83
|
+
import { createExtractorScript } from "ax-grep";
|
|
84
|
+
|
|
85
|
+
const script = createExtractorScript({
|
|
86
|
+
mode: "interactive",
|
|
87
|
+
format: "text",
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
const text = await page.evaluate(script);
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Android and iOS WebViews return the script result through their normal
|
|
94
|
+
JavaScript evaluation callbacks:
|
|
95
|
+
|
|
96
|
+
```ts
|
|
97
|
+
// Android: webView.evaluateJavascript(script) receives a JSON-encoded string.
|
|
98
|
+
// iOS: webView.evaluateJavaScript(script) receives the text or object value.
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Direct In-Page Usage
|
|
102
|
+
|
|
103
|
+
Use `ax-grep/browser` when your code is already executing in the page, such as a
|
|
104
|
+
browser extension content script. Extension content scripts usually need a
|
|
105
|
+
bundler, and the result reflects the content script world that executed it.
|
|
106
|
+
|
|
107
|
+
```ts
|
|
108
|
+
import { extract, formatSemanticTreeText } from "ax-grep/browser";
|
|
109
|
+
|
|
110
|
+
const tree = extract({
|
|
111
|
+
mode: "interactive",
|
|
112
|
+
includeBounds: false,
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
console.log(formatSemanticTreeText(tree));
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## Output Shape
|
|
119
|
+
|
|
120
|
+
`extract()` returns a `SemanticNode` tree:
|
|
121
|
+
|
|
122
|
+
```ts
|
|
123
|
+
type SemanticNode = {
|
|
124
|
+
id: string;
|
|
125
|
+
tag: string;
|
|
126
|
+
role: string | null;
|
|
127
|
+
name: string;
|
|
128
|
+
interactive: boolean;
|
|
129
|
+
focusable: boolean;
|
|
130
|
+
selector?: string;
|
|
131
|
+
xpath?: string;
|
|
132
|
+
text?: string;
|
|
133
|
+
value?: string;
|
|
134
|
+
state?: Record<string, unknown>;
|
|
135
|
+
attributes?: Record<string, string>;
|
|
136
|
+
children: SemanticNode[];
|
|
137
|
+
};
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Use `formatSemanticTreeText(tree)` for a compact prompt-friendly text view, or
|
|
141
|
+
`flattenSemanticTree(tree)` and `summarizeSemanticTree(tree)` for analysis and
|
|
142
|
+
benchmarks.
|
|
143
|
+
|
|
144
|
+
## Options
|
|
145
|
+
|
|
146
|
+
```ts
|
|
147
|
+
const tree = extract(html, {
|
|
148
|
+
mode: "compact",
|
|
149
|
+
includeAttributes: false,
|
|
150
|
+
includeHidden: false,
|
|
151
|
+
includeSelectOptions: true,
|
|
152
|
+
maxTextLength: 240,
|
|
153
|
+
});
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
| Option | Default | Notes |
|
|
157
|
+
| --- | ---: | --- |
|
|
158
|
+
| `mode` | `"compact"` | Use `"interactive"` to keep mostly actionable nodes. |
|
|
159
|
+
| `includeAttributes` | `true` | Turn off for smaller prompt payloads. |
|
|
160
|
+
| `includeHidden` | `false` | Keep hidden/collapsed content only when needed. |
|
|
161
|
+
| `includeSelectOptions` | `true` | Useful for agent planning, verbose for huge selects. |
|
|
162
|
+
| `includeTextNodes` | browser: `true`, static: `false` | Static extraction relies more on semantic names by default. |
|
|
163
|
+
| `maxTextLength` | `240` | Clips long direct text/name fragments. |
|
|
164
|
+
| `excludeLikelyAds` | `false` | Optional heuristic pruning for benchmark or prompt use. |
|
|
165
|
+
| `summarizeLargeSubtrees` | static: `true` | Keeps SSR payloads bounded. |
|
|
166
|
+
| `summarizeLikelyLinkFarms` | static: `true` | Helps forum/sidebar/navigation-heavy pages. |
|
|
167
|
+
|
|
168
|
+
## Mutation Stream
|
|
169
|
+
|
|
170
|
+
```ts
|
|
171
|
+
import { observeSemanticTree } from "ax-grep/browser";
|
|
172
|
+
|
|
173
|
+
const observer = observeSemanticTree((change) => {
|
|
174
|
+
console.log(change.mutationCount, change.tree);
|
|
175
|
+
}, { debounceMs: 50 });
|
|
176
|
+
|
|
177
|
+
observer.disconnect();
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
For injected-script use, `createObserverScript()` installs an observer on
|
|
181
|
+
`window.__AX_LITE_OBSERVER__` and dispatches `__AX_LITE_OBSERVER__:change`
|
|
182
|
+
events.
|
|
183
|
+
|
|
184
|
+
```ts
|
|
185
|
+
import { createObserverScript } from "ax-grep";
|
|
186
|
+
|
|
187
|
+
await page.evaluate(createObserverScript({ format: "text" }));
|
|
188
|
+
await page.evaluate(() => window.__AX_LITE_OBSERVER__?.disconnect());
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Worker Example
|
|
192
|
+
|
|
193
|
+
```ts
|
|
194
|
+
import { extract } from "ax-grep/static";
|
|
195
|
+
import { formatSemanticTreeText } from "ax-grep";
|
|
196
|
+
|
|
197
|
+
export default {
|
|
198
|
+
async fetch(request: Request): Promise<Response> {
|
|
199
|
+
const url = new URL(request.url).searchParams.get("url");
|
|
200
|
+
if (!url) return new Response("Missing url", { status: 400 });
|
|
201
|
+
|
|
202
|
+
const response = await fetch(url);
|
|
203
|
+
const html = await response.text();
|
|
204
|
+
const tree = extract(html);
|
|
205
|
+
|
|
206
|
+
return new Response(formatSemanticTreeText(tree), {
|
|
207
|
+
headers: { "content-type": "text/plain; charset=utf-8" },
|
|
208
|
+
});
|
|
209
|
+
},
|
|
210
|
+
};
|
|
211
|
+
```
|