ax-grep 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,157 @@
1
1
  # ax-grep
2
2
 
3
- Reserved package name for an accessibility tree extraction utility.
3
+ `ax-grep` extracts a semantic accessibility-like tree from HTML or from a live
4
+ web page. It is designed for agents, browser extensions, injected scripts, and
5
+ WebView bridges that need a compact, inspectable view of page structure.
4
6
 
7
+ It is not a replacement for a real browser accessibility tree. It approximates
8
+ one from DOM, ARIA, computed style, labels, focusability, and element state.
9
+
10
+ ## Install
11
+
12
+ ```sh
13
+ pnpm add ax-grep
14
+ ```
15
+
16
+ ## Which API Should I Use?
17
+
18
+ | Situation | Use |
19
+ | --- | --- |
20
+ | You have an HTML string from `fetch()`, SSR, or a Worker | `extract(html)` from `ax-grep` |
21
+ | You control a live page through Puppeteer, Playwright, or a WebView bridge | `createExtractorScript()` from `ax-grep` |
22
+ | Your code already runs inside the page, such as a browser extension content script | `extract()` from `ax-grep/browser` |
23
+ | You want the explicit Worker-oriented static entry | `extract(html)` from `ax-grep/static` |
24
+
25
+ ## Static HTML
26
+
27
+ ```ts
28
+ import { extract } from "ax-grep";
29
+
30
+ const response = await fetch("https://example.com");
31
+ const html = await response.text();
32
+ const tree = extract(html);
33
+ ```
34
+
35
+ Use `ax-grep/static` for the same static extractor as an explicit subpath when
36
+ you want the smallest Worker-oriented import.
37
+
38
+ ## Browser Injection
39
+
40
+ ```ts
41
+ import { createExtractorScript } from "ax-grep";
42
+
43
+ const tree = await page.evaluate(createExtractorScript());
44
+ ```
45
+
46
+ Playwright example:
47
+
48
+ ```ts
49
+ import { chromium } from "playwright";
50
+ import { createExtractorScript, formatSemanticTreeText } from "ax-grep";
51
+
52
+ const browser = await chromium.launch();
53
+ const page = await browser.newPage();
54
+
55
+ await page.goto("https://example.com");
56
+
57
+ const tree = await page.evaluate(createExtractorScript({
58
+ includeBounds: false,
59
+ includeAttributes: false,
60
+ }));
61
+
62
+ console.log(formatSemanticTreeText(tree));
63
+
64
+ await browser.close();
65
+ ```
66
+
67
+ WebView-style injection:
68
+
69
+ ```ts
70
+ import { createExtractorScript } from "ax-grep";
71
+
72
+ const script = createExtractorScript({
73
+ mode: "interactive",
74
+ format: "json",
75
+ });
76
+
77
+ // Android: webView.evaluateJavascript(script, callback)
78
+ // iOS: webView.evaluateJavaScript(script, completionHandler)
79
+ ```
80
+
81
+ ## Direct In-Page Usage
82
+
83
+ ```ts
84
+ import { extract, formatSemanticTreeText } from "ax-grep/browser";
85
+
86
+ const tree = extract({
87
+ mode: "interactive",
88
+ includeBounds: false,
89
+ });
90
+
91
+ console.log(formatSemanticTreeText(tree));
92
+ ```
93
+
94
+ ## Static SSR HTML
95
+
96
+ ```ts
97
+ import { extract } from "ax-grep/static";
98
+ import { formatSemanticTreeText } from "ax-grep";
99
+
100
+ export default {
101
+ async fetch(request: Request): Promise<Response> {
102
+ const url = new URL(request.url).searchParams.get("url");
103
+ if (!url) return new Response("Missing url", { status: 400 });
104
+
105
+ const response = await fetch(url);
106
+ const html = await response.text();
107
+ const tree = extract(html);
108
+
109
+ return new Response(formatSemanticTreeText(tree), {
110
+ headers: { "content-type": "text/plain; charset=utf-8" },
111
+ });
112
+ },
113
+ };
114
+ ```
115
+
116
+ Static extraction parses the HTML string directly, so it can infer roles, names,
117
+ labels, ARIA state, links, forms, headings, tables, and lists from SSR markup. It
118
+ cannot see computed style, layout bounds, client-rendered DOM, shadow DOM, or
119
+ iframe contents.
120
+
121
+ By default, static extraction prunes hidden markup and collapsed controlled
122
+ regions, skips non-semantic payload tags, summarizes very large child lists, and
123
+ collapses repeated template-like subtrees. It also infers broad source profiles
124
+ from the HTML, preserving more links for wiki-like pages while tightening dense
125
+ link-list summarization for forum-like pages.
126
+
127
+ ## Mutation Stream
128
+
129
+ ```ts
130
+ import { observeSemanticTree } from "ax-grep/browser";
131
+
132
+ const observer = observeSemanticTree((change) => {
133
+ console.log(change.mutationCount, change.tree);
134
+ }, { debounceMs: 50 });
135
+
136
+ observer.disconnect();
137
+ ```
138
+
139
+ For injected-script use, `createObserverScript()` installs an observer on
140
+ `window.__AX_LITE_OBSERVER__` and dispatches `__AX_LITE_OBSERVER__:change`
141
+ events.
142
+
143
+ ## Benchmarking
144
+
145
+ ```sh
146
+ pnpm compare:sample
147
+ pnpm compare:static https://example.com https://news.ycombinator.com
148
+ pnpm compare:tokens https://example.com https://news.ycombinator.com
149
+ pnpm compare:static:korea-social
150
+ pnpm compare:tokens:korea-social
151
+ pnpm compare:static:china-japan
152
+ pnpm compare:tokens:china-japan
153
+ ```
154
+
155
+ The comparison scripts compare `ax-grep` output with `agent-browser snapshot`
156
+ output and estimate token cost for compact agent prompts. See
157
+ `docs/comparison-baseline.md` for the current baseline run.
@@ -0,0 +1,11 @@
1
+ import { a as SemanticTreeOptions, S as SemanticNode, f as SemanticTreeChange, g as SemanticTreeObserverOptions } from './types-dgf3brcf.js';
2
+
3
+ declare function extractSemanticTree(options?: SemanticTreeOptions): SemanticNode;
4
+
5
+ declare function formatSemanticTreeText(node: SemanticNode): string;
6
+ declare function observeSemanticTree(onChange: (change: SemanticTreeChange) => void, options?: SemanticTreeObserverOptions): {
7
+ disconnect: () => void;
8
+ snapshot: () => SemanticNode;
9
+ };
10
+
11
+ export { extractSemanticTree as extract, extractSemanticTree, formatSemanticTreeText, observeSemanticTree };
@@ -0,0 +1,12 @@
1
+ import {
2
+ extractSemanticTree,
3
+ formatSemanticTreeText,
4
+ observeSemanticTree
5
+ } from "./chunk-U3GDKPLQ.js";
6
+ export {
7
+ extractSemanticTree as extract,
8
+ extractSemanticTree,
9
+ formatSemanticTreeText,
10
+ observeSemanticTree
11
+ };
12
+ //# sourceMappingURL=browser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}