ax-grep 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +154 -1
- package/dist/browser.d.ts +11 -0
- package/dist/browser.js +12 -0
- package/dist/browser.js.map +1 -0
- package/dist/chunk-U3GDKPLQ.js +578 -0
- package/dist/chunk-U3GDKPLQ.js.map +1 -0
- package/dist/chunk-Z7V6PIPH.js +735 -0
- package/dist/chunk-Z7V6PIPH.js.map +1 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +402 -0
- package/dist/index.js.map +1 -0
- package/dist/static.d.ts +6 -0
- package/dist/static.js +8 -0
- package/dist/static.js.map +1 -0
- package/dist/types-dgf3brcf.d.ts +74 -0
- package/package.json +61 -5
- package/index.js +0 -1
package/README.md
CHANGED
|
@@ -1,4 +1,157 @@
|
|
|
1
1
|
# ax-grep
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
`ax-grep` extracts a semantic accessibility-like tree from HTML or from a live
|
|
4
|
+
web page. It is designed for agents, browser extensions, injected scripts, and
|
|
5
|
+
WebView bridges that need a compact, inspectable view of page structure.
|
|
4
6
|
|
|
7
|
+
It is not a replacement for a real browser accessibility tree. It approximates
|
|
8
|
+
one from DOM, ARIA, computed style, labels, focusability, and element state.
|
|
9
|
+
|
|
10
|
+
## Install
|
|
11
|
+
|
|
12
|
+
```sh
|
|
13
|
+
pnpm add ax-grep
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Which API Should I Use?
|
|
17
|
+
|
|
18
|
+
| Situation | Use |
|
|
19
|
+
| --- | --- |
|
|
20
|
+
| You have an HTML string from `fetch()`, SSR, or a Worker | `extract(html)` from `ax-grep` |
|
|
21
|
+
| You control a live page through Puppeteer, Playwright, or a WebView bridge | `createExtractorScript()` from `ax-grep` |
|
|
22
|
+
| Your code already runs inside the page, such as a browser extension content script | `extract()` from `ax-grep/browser` |
|
|
23
|
+
| You want the explicit Worker-oriented static entry | `extract(html)` from `ax-grep/static` |
|
|
24
|
+
|
|
25
|
+
## Static HTML
|
|
26
|
+
|
|
27
|
+
```ts
|
|
28
|
+
import { extract } from "ax-grep";
|
|
29
|
+
|
|
30
|
+
const response = await fetch("https://example.com");
|
|
31
|
+
const html = await response.text();
|
|
32
|
+
const tree = extract(html);
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Use `ax-grep/static` for the same static extractor as an explicit subpath when
|
|
36
|
+
you want the smallest Worker-oriented import.
|
|
37
|
+
|
|
38
|
+
## Browser Injection
|
|
39
|
+
|
|
40
|
+
```ts
|
|
41
|
+
import { createExtractorScript } from "ax-grep";
|
|
42
|
+
|
|
43
|
+
const tree = await page.evaluate(createExtractorScript());
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Playwright example:
|
|
47
|
+
|
|
48
|
+
```ts
|
|
49
|
+
import { chromium } from "playwright";
|
|
50
|
+
import { createExtractorScript, formatSemanticTreeText } from "ax-grep";
|
|
51
|
+
|
|
52
|
+
const browser = await chromium.launch();
|
|
53
|
+
const page = await browser.newPage();
|
|
54
|
+
|
|
55
|
+
await page.goto("https://example.com");
|
|
56
|
+
|
|
57
|
+
const tree = await page.evaluate(createExtractorScript({
|
|
58
|
+
includeBounds: false,
|
|
59
|
+
includeAttributes: false,
|
|
60
|
+
}));
|
|
61
|
+
|
|
62
|
+
console.log(formatSemanticTreeText(tree));
|
|
63
|
+
|
|
64
|
+
await browser.close();
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
WebView-style injection:
|
|
68
|
+
|
|
69
|
+
```ts
|
|
70
|
+
import { createExtractorScript } from "ax-grep";
|
|
71
|
+
|
|
72
|
+
const script = createExtractorScript({
|
|
73
|
+
mode: "interactive",
|
|
74
|
+
format: "json",
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// Android: webView.evaluateJavascript(script, callback)
|
|
78
|
+
// iOS: webView.evaluateJavaScript(script, completionHandler)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Direct In-Page Usage
|
|
82
|
+
|
|
83
|
+
```ts
|
|
84
|
+
import { extract, formatSemanticTreeText } from "ax-grep/browser";
|
|
85
|
+
|
|
86
|
+
const tree = extract({
|
|
87
|
+
mode: "interactive",
|
|
88
|
+
includeBounds: false,
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
console.log(formatSemanticTreeText(tree));
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Static SSR HTML
|
|
95
|
+
|
|
96
|
+
```ts
|
|
97
|
+
import { extract } from "ax-grep/static";
|
|
98
|
+
import { formatSemanticTreeText } from "ax-grep";
|
|
99
|
+
|
|
100
|
+
export default {
|
|
101
|
+
async fetch(request: Request): Promise<Response> {
|
|
102
|
+
const url = new URL(request.url).searchParams.get("url");
|
|
103
|
+
if (!url) return new Response("Missing url", { status: 400 });
|
|
104
|
+
|
|
105
|
+
const response = await fetch(url);
|
|
106
|
+
const html = await response.text();
|
|
107
|
+
const tree = extract(html);
|
|
108
|
+
|
|
109
|
+
return new Response(formatSemanticTreeText(tree), {
|
|
110
|
+
headers: { "content-type": "text/plain; charset=utf-8" },
|
|
111
|
+
});
|
|
112
|
+
},
|
|
113
|
+
};
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Static extraction parses the HTML string directly, so it can infer roles, names,
|
|
117
|
+
labels, ARIA state, links, forms, headings, tables, and lists from SSR markup. It
|
|
118
|
+
cannot see computed style, layout bounds, client-rendered DOM, shadow DOM, or
|
|
119
|
+
iframe contents.
|
|
120
|
+
|
|
121
|
+
By default, static extraction prunes hidden markup and collapsed controlled
|
|
122
|
+
regions, skips non-semantic payload tags, summarizes very large child lists, and
|
|
123
|
+
collapses repeated template-like subtrees. It also infers broad source profiles
|
|
124
|
+
from the HTML, preserving more links for wiki-like pages while tightening dense
|
|
125
|
+
link-list summarization for forum-like pages.
|
|
126
|
+
|
|
127
|
+
## Mutation Stream
|
|
128
|
+
|
|
129
|
+
```ts
|
|
130
|
+
import { observeSemanticTree } from "ax-grep/browser";
|
|
131
|
+
|
|
132
|
+
const observer = observeSemanticTree((change) => {
|
|
133
|
+
console.log(change.mutationCount, change.tree);
|
|
134
|
+
}, { debounceMs: 50 });
|
|
135
|
+
|
|
136
|
+
observer.disconnect();
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
For injected-script use, `createObserverScript()` installs an observer on
|
|
140
|
+
`window.__AX_LITE_OBSERVER__` and dispatches `__AX_LITE_OBSERVER__:change`
|
|
141
|
+
events.
|
|
142
|
+
|
|
143
|
+
## Benchmarking
|
|
144
|
+
|
|
145
|
+
```sh
|
|
146
|
+
pnpm compare:sample
|
|
147
|
+
pnpm compare:static https://example.com https://news.ycombinator.com
|
|
148
|
+
pnpm compare:tokens https://example.com https://news.ycombinator.com
|
|
149
|
+
pnpm compare:static:korea-social
|
|
150
|
+
pnpm compare:tokens:korea-social
|
|
151
|
+
pnpm compare:static:china-japan
|
|
152
|
+
pnpm compare:tokens:china-japan
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
The comparison scripts compare `ax-grep` output with `agent-browser snapshot`
|
|
156
|
+
output and estimate token cost for compact agent prompts. See
|
|
157
|
+
`docs/comparison-baseline.md` for the current baseline run.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { a as SemanticTreeOptions, S as SemanticNode, f as SemanticTreeChange, g as SemanticTreeObserverOptions } from './types-dgf3brcf.js';
|
|
2
|
+
|
|
3
|
+
declare function extractSemanticTree(options?: SemanticTreeOptions): SemanticNode;
|
|
4
|
+
|
|
5
|
+
declare function formatSemanticTreeText(node: SemanticNode): string;
|
|
6
|
+
declare function observeSemanticTree(onChange: (change: SemanticTreeChange) => void, options?: SemanticTreeObserverOptions): {
|
|
7
|
+
disconnect: () => void;
|
|
8
|
+
snapshot: () => SemanticNode;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export { extractSemanticTree as extract, extractSemanticTree, formatSemanticTreeText, observeSemanticTree };
|
package/dist/browser.js
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import {
|
|
2
|
+
extractSemanticTree,
|
|
3
|
+
formatSemanticTreeText,
|
|
4
|
+
observeSemanticTree
|
|
5
|
+
} from "./chunk-U3GDKPLQ.js";
|
|
6
|
+
export {
|
|
7
|
+
extractSemanticTree as extract,
|
|
8
|
+
extractSemanticTree,
|
|
9
|
+
formatSemanticTreeText,
|
|
10
|
+
observeSemanticTree
|
|
11
|
+
};
|
|
12
|
+
//# sourceMappingURL=browser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|