ax-grep 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  export { formatSemanticTreeText, observeSemanticTree } from './browser.js';
2
2
  import { StaticSemanticTreeOptions } from './static.js';
3
- import { E as ExtractorScriptOptions, O as ObserverScriptOptions, S as SemanticNode, a as SemanticTreeOptions } from './types-gwHWhYmw.js';
4
- export { A as AgentAction, b as AgentActionTargetChoice, c as AgentAnswerPlan, d as AgentBrowserHtmlCapture, e as AgentCitation, f as AgentContinuationMode, g as AgentContract, h as AgentContractFeature, i as AgentExecutionMode, j as AgentExecutorStep, k as AgentExpectedOutcome, l as AgentExpectedOutcomeKind, m as AgentFindMatch, n as AgentFindSummary, o as AgentFormChoice, p as AgentFormField, q as AgentFormHiddenField, r as AgentHandoff, s as AgentJsonEnvelope, t as AgentLoopDecision, u as AgentLoopDirective, v as AgentNext, w as AgentPageAction, x as AgentPageApiEndpoint, y as AgentPageAppHint, z as AgentPageAuthorLink, B as AgentPageBarrier, C as AgentPageBreadcrumb, D as AgentPageBreadcrumbItem, F as AgentPageCheck, G as AgentPageCitation, H as AgentPageClientState, I as AgentPageCodeBlock, J as AgentPageConfig, K as AgentPageContactPoint, L as AgentPageDataTable, M as AgentPageDataset, N as AgentPageDecision, P as AgentPageEmbed, Q as AgentPageEvidence, R as AgentPageFaq, T as AgentPageForm, U as AgentPageHttpPolicy, V as AgentPageHydration, W as AgentPageIdentity, X as AgentPageKeyValue, Y as AgentPageMedia, Z as AgentPageMetaFact, _ as AgentPageMetadata, $ as AgentPageMobileHint, a0 as AgentPageOffer, a1 as AgentPagePagination, a2 as AgentPageProvenance, a3 as AgentPageResource, a4 as AgentPageRuntime, a5 as AgentPageSchemaFact, a6 as AgentPageSchemaFactValue, a7 as AgentPageSection, a8 as AgentPageTimeline, a9 as AgentPageToc, aa as AgentPageTocItem, ab as AgentPageTopic, ac as AgentPageTranscript, ad as AgentQualityGate, ae as AgentQualityGateKind, af as AgentReadTarget, ag as AgentReadValue, ah as AgentReadValueInline, ai as AgentReadValueKind, aj as AgentReadValuePayload, ak as AgentReadValueReference, al as AgentReadValueScalar, am as AgentResultChoice, an as AgentRoutingIntent, ao as AgentSearchDecision, ap as AgentSemanticSummary, aq as AgentSignal, ar as AgentSignalKind, as as AgentSignalSeverity, at as AgentSourceChoice, au as AgentSourceSearch, av as AgentSourceSearchResult, aw as AgentStatus, ax as AgentSummary, ay as AgentTarget, az as AgentVerification, aA as ExtractMode, aB as OutputFormat, aC as SemanticNodeBounds, aD as SemanticNodeState, aE as SemanticTreeChange, aF as SemanticTreeObserverOptions } from './types-gwHWhYmw.js';
3
+ import { E as ExtractorScriptOptions, O as ObserverScriptOptions, S as SemanticNode, a as SemanticTreeOptions } from './types-K1hqb7Pq.js';
4
+ export { A as AgentAction, b as AgentActionTargetChoice, c as AgentAnswerPlan, d as AgentBrowserHtmlCapture, e as AgentCitation, f as AgentContinuationMode, g as AgentContract, h as AgentContractFeature, i as AgentExecutionMode, j as AgentExecutorStep, k as AgentExpectedOutcome, l as AgentExpectedOutcomeKind, m as AgentFindMatch, n as AgentFindSummary, o as AgentFormChoice, p as AgentFormField, q as AgentFormHiddenField, r as AgentHandoff, s as AgentJsonEnvelope, t as AgentLoopDecision, u as AgentLoopDirective, v as AgentNext, w as AgentPageAction, x as AgentPageApiEndpoint, y as AgentPageAppHint, z as AgentPageAuthorLink, B as AgentPageBarrier, C as AgentPageBreadcrumb, D as AgentPageBreadcrumbItem, F as AgentPageCheck, G as AgentPageCitation, H as AgentPageClientState, I as AgentPageCodeBlock, J as AgentPageConfig, K as AgentPageContactPoint, L as AgentPageDataTable, M as AgentPageDataset, N as AgentPageDecision, P as AgentPageEmbed, Q as AgentPageEvidence, R as AgentPageFaq, T as AgentPageForm, U as AgentPageHttpPolicy, V as AgentPageHydration, W as AgentPageIdentity, X as AgentPageKeyValue, Y as AgentPageMedia, Z as AgentPageMetaFact, _ as AgentPageMetadata, $ as AgentPageMobileHint, a0 as AgentPageOffer, a1 as AgentPagePagination, a2 as AgentPageProvenance, a3 as AgentPageResource, a4 as AgentPageRuntime, a5 as AgentPageSchemaFact, a6 as AgentPageSchemaFactValue, a7 as AgentPageSection, a8 as AgentPageTimeline, a9 as AgentPageToc, aa as AgentPageTocItem, ab as AgentPageTopic, ac as AgentPageTranscript, ad as AgentQualityGate, ae as AgentQualityGateKind, af as AgentReadTarget, ag as AgentReadValue, ah as AgentReadValueInline, ai as AgentReadValueKind, aj as AgentReadValuePayload, ak as AgentReadValueReference, al as AgentReadValueScalar, am as AgentResultChoice, an as AgentRoutingIntent, ao as AgentSearchDecision, ap as AgentSemanticSummary, aq as AgentSignal, ar as AgentSignalKind, as as AgentSignalSeverity, at as AgentSourceChoice, au as AgentSourceSearch, av as AgentSourceSearchResult, aw as AgentStatus, ax as AgentSummary, ay as AgentTarget, az as AgentVerification, aA as ExtractMode, aB as OutputFormat, aC as SemanticNodeBounds, aD as SemanticNodeState, aE as SemanticTreeChange, aF as SemanticTreeObserverOptions } from './types-K1hqb7Pq.js';
5
5
 
6
6
  declare function extract(html: string, options?: StaticSemanticTreeOptions): SemanticNode;
7
7
  declare function createExtractorScript(options?: ExtractorScriptOptions): string;
package/dist/index.js CHANGED
File without changes
package/dist/index.js.map CHANGED
File without changes
package/dist/static.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { a as SemanticTreeOptions, S as SemanticNode } from './types-gwHWhYmw.js';
1
+ import { a as SemanticTreeOptions, S as SemanticNode } from './types-K1hqb7Pq.js';
2
2
 
3
3
  type StaticSemanticTreeOptions = Pick<SemanticTreeOptions, "excludeLikelyAds" | "excludeLikelyBoilerplate" | "includeAttributes" | "includeHidden" | "includeSelectOptions" | "includeTextNodes" | "maxChildrenPerNode" | "maxLinkFarmChildren" | "maxRepeatedSubtreeInstances" | "maxTextLength" | "mode" | "pruneCollapsedSubtrees" | "pruneLikelyClosedOverlays" | "summarizeLargeSubtrees" | "summarizeLikelyLinkFarms" | "summarizeRepeatedSubtrees">;
4
4
  declare function extractStaticSemanticTree(html: string, options?: StaticSemanticTreeOptions): SemanticNode;
package/dist/static.js CHANGED
File without changes
File without changes
@@ -86,7 +86,7 @@ type ObserverScriptOptions = SemanticTreeObserverOptions & {
86
86
  globalName?: string;
87
87
  };
88
88
  type AgentStatus = "ready" | "choose-result" | "verify" | "needs-browser" | "error";
89
- type AgentBrowserHtmlReasonCode = "no-inspectable-content" | "client-rendered" | "http-error" | "fetch-error" | "challenge" | "hcaptcha" | "recaptcha" | "cloudflare-challenge" | "login-required" | "paywall" | "blocked-or-empty" | "retry-action" | "interaction-required" | "browser-interaction" | "unknown";
89
+ type AgentBrowserHtmlReasonCode = "no-inspectable-content" | "client-rendered" | "http-error" | "fetch-error" | "challenge" | "hcaptcha" | "recaptcha" | "cloudflare-challenge" | "akamai-challenge" | "datadome-challenge" | "perimeterx-challenge" | "kasada-challenge" | "login-required" | "paywall" | "blocked-or-empty" | "retry-action" | "interaction-required" | "browser-interaction" | "unknown";
90
90
  type AgentStaticReadiness = "usable-content" | "usable-structured-data" | "usable-hidden-data" | "thin" | "needs-browser" | "error";
91
91
  type AgentStaticReadinessReasonCode = "browser-required" | "client-rendered" | "interaction-required" | "extraction-error" | "hidden-data" | "source-link" | "form" | "action-target" | "structured-data" | "readable-content" | "limited-static-payload" | "thin-content";
92
92
  type AgentSourceSearchFailureKind = "not-found" | "http-client-error" | "http-server-error" | "http-error" | "fetch-error" | "timeout" | "rate-limited" | "no-inspectable-content" | "unknown";
package/docs/README.md CHANGED
@@ -8,11 +8,13 @@ Start here when README is too small for the detail you need.
8
8
  | Use CLI search and `--agent` output | [cli-agent.md](./cli-agent.md) |
9
9
  | Build a minimal agent handoff loop | [agent-handoff.md](./agent-handoff.md) |
10
10
  | Use as a server/library package | [library-api.md](./library-api.md) |
11
- | Inject into WebViews or browser pages | [library-api.md](./library-api.md#browser-injection) |
11
+ | Integrate with server-side agent SDKs | [server-agent.md](./server-agent.md) |
12
+ | Inject into WebViews or browser pages | [webview.md](./webview.md) |
12
13
  | Check readiness before promotion | [agent-readiness.md](./agent-readiness.md) |
13
14
  | Track current progress and next work | [progress.md](./progress.md) |
14
15
  | Review feature details | [features.md](./features.md) |
15
16
  | Run benchmarks and comparisons | [benchmarks.md](./benchmarks.md) |
17
+ | Publish with npm trusted publishing | [release.md](./release.md) |
16
18
  | Read current `agent-browser` comparison notes | [comparison-baseline.md](./comparison-baseline.md) |
17
19
 
18
20
  The root README should stay short: skill install first, server library usage
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -30,6 +30,7 @@ Resource safety:
30
30
 
31
31
  ```sh
32
32
  pnpm benchmark:agent-cost
33
+ pnpm benchmark:library-cost
33
34
  pnpm compare:sample
34
35
  pnpm compare:static:fixtures
35
36
  pnpm compare:static:fixtures:gate
@@ -82,6 +83,39 @@ Search, social, challenge, and volatile targets may be diagnostic-only and
82
83
  excluded from gate averages. Check each run's `included` and `excluded` counts
83
84
  before treating an average as release-gating coverage.
84
85
 
86
+ ## Library Cost Benchmark
87
+
88
+ `pnpm benchmark:library-cost` measures warm in-process `extract(html)` calls and
89
+ writes `tmp/benchmarks/library-cost.json`. It does not fetch remote pages and
90
+ does not launch a browser. This is the better metric for server integrations
91
+ where a Node process is already running and the question is incremental RSS per
92
+ library call, not total CLI process RSS.
93
+
94
+ The report includes:
95
+
96
+ - `incrementalRssKb`: RSS after extraction minus RSS before extraction.
97
+ - `estimatedTokens`: `cl100k_base` tokens for `formatSemanticTreeText(tree)`.
98
+ - `summary.nodeCount`: semantic tree size after compact extraction.
99
+
100
+ Run it with the package script so Node exposes GC before each measured case:
101
+
102
+ ```sh
103
+ pnpm benchmark:library-cost
104
+ ```
105
+
106
+ Use `benchmark:agent-cost` for CLI-vs-browser release claims. Use
107
+ `benchmark:library-cost` for server SDK sizing and memory regression checks.
108
+
109
+ Latest local library-only run:
110
+
111
+ | Case | HTML bytes | Incremental RSS | Output tokens | Nodes |
112
+ | --- | ---: | ---: | ---: | ---: |
113
+ | content-page | 737 | 0 KB | 79 | 16 |
114
+ | challenge-page | 251 | 0 KB | 8 | 2 |
115
+ | large-list-page | 37,390 | 896 KB | 428 | 76 |
116
+
117
+ Summary: max incremental RSS was 896 KB, average incremental RSS was 299 KB.
118
+
85
119
  `compare:static:fixtures:gate` is the non-browser smoke gate: it uses synthetic
86
120
  HTML fixtures only, so it should not fetch remote pages or launch
87
121
  `agent-browser`. Use `compare:static:fixtures` when you need the JSON report.
package/docs/cli-agent.md CHANGED
@@ -31,7 +31,7 @@ ax-grep --search "ax-grep npm" --engine bing --lang en --region US
31
31
  ax-grep --search "ax-grep npm" --open-result best --json
32
32
  ```
33
33
 
34
- - `--search` tries DuckDuckGo, Bing, and StartPage by default.
34
+ - `--search` tries DuckDuckGo, Bing, StartPage, and Google by default.
35
35
  - `--engine <name>` forces one search engine.
36
36
  - `--open-result <n|best>` fetches a ranked result in the same command.
37
37
  - `--lang` and `--region` make locale-specific searches reproducible.
File without changes
package/docs/features.md CHANGED
File without changes
File without changes
package/docs/progress.md CHANGED
File without changes
@@ -0,0 +1,24 @@
1
+ # Release
2
+
3
+ ## npm Trusted Publisher
4
+
5
+ Configure npm once before publishing from GitHub Actions:
6
+
7
+ - Package settings: `https://www.npmjs.com/package/ax-grep/access`
8
+ - Publisher: GitHub Actions
9
+ - Organization or user: `hmmhmmhm`
10
+ - Repository: `ax-grep`
11
+ - Workflow filename: `publish.yml`
12
+ - Environment name: leave blank
13
+ - Allowed actions: `npm publish`
14
+
15
+ After the trusted publisher is saved, publish a release by pushing a version tag:
16
+
17
+ ```sh
18
+ git tag v0.1.2
19
+ git push origin v0.1.2
20
+ ```
21
+
22
+ The workflow uses GitHub OIDC instead of an npm token and runs `npm publish`.
23
+ For GitHub Actions and public packages, npm automatically publishes provenance
24
+ attestations when trusted publishing is used.
@@ -0,0 +1,71 @@
1
+ # Server Agent Integration
2
+
3
+ Use `ax-grep` in an agent service when you already fetched HTML and want a
4
+ compact, accessibility-style source view before paying for browser automation.
5
+ This is useful in Codex SDK, OpenRouter, queue workers, and custom agent loops.
6
+
7
+ ## Minimal Pattern
8
+
9
+ ```ts
10
+ import { extract, formatSemanticTreeText } from "ax-grep";
11
+
12
+ export async function readForAgent(url: string) {
13
+ const response = await fetch(url, {
14
+ headers: {
15
+ accept: "text/html,application/xhtml+xml",
16
+ "user-agent": "my-agent/1.0",
17
+ },
18
+ });
19
+
20
+ const html = await response.text();
21
+ const tree = extract(html, {
22
+ includeAttributes: false,
23
+ includeHidden: false,
24
+ });
25
+
26
+ return {
27
+ url: response.url || url,
28
+ status: response.status,
29
+ text: formatSemanticTreeText(tree),
30
+ };
31
+ }
32
+ ```
33
+
34
+ Pass `text` into the model as source evidence. Escalate to a browser only when
35
+ the fetched HTML is thin, blocked, or client-rendered.
36
+
37
+ ## Agent Prompt Shape
38
+
39
+ ```ts
40
+ const page = await readForAgent("https://example.com");
41
+
42
+ const messages = [
43
+ {
44
+ role: "system",
45
+ content: "Use the provided semantic tree as page evidence. Ask for browser automation only when the evidence is insufficient.",
46
+ },
47
+ {
48
+ role: "user",
49
+ content: `URL: ${page.url}\nHTTP: ${page.status}\n\n${page.text}`,
50
+ },
51
+ ];
52
+ ```
53
+
54
+ For live URLs, the CLI can also produce an agent handoff with challenge
55
+ detection and search metadata:
56
+
57
+ ```sh
58
+ npx --yes ax-grep@latest https://example.com --agent-brief
59
+ ```
60
+
61
+ Use that command when you want `agent.executor`, `agent.handoff`, `pageCheck`,
62
+ and challenge reason codes without building the fetch layer yourself.
63
+
64
+ ## Failure Policy
65
+
66
+ - If a challenge marker is detected, return a browser-required message instead
67
+ of retrying in a loop.
68
+ - If HTML is mostly an app shell, fetch browser-captured HTML or run WebView
69
+ injection.
70
+ - Keep server fetches sequential for release smoke checks. Browser-backed
71
+ comparisons must use `pnpm check:processes` before and after the run.
@@ -0,0 +1,70 @@
1
+ # WebView and In-Page Usage
2
+
3
+ Use `createExtractorScript()` when the current page already exists in a mobile
4
+ WebView, Playwright page, Puppeteer page, browser extension, or in-app browser.
5
+ It builds an accessibility-style semantic tree from the live DOM without
6
+ opening a separate browser.
7
+
8
+ ## Playwright
9
+
10
+ ```ts
11
+ import { createExtractorScript } from "ax-grep";
12
+
13
+ const text = await page.evaluate(createExtractorScript({
14
+ format: "text",
15
+ mode: "interactive",
16
+ includeBounds: false,
17
+ includeAttributes: false,
18
+ }));
19
+ ```
20
+
21
+ ## Android WebView
22
+
23
+ ```kotlin
24
+ webView.evaluateJavascript(scriptFromServer) { jsonEncodedResult ->
25
+ // scriptFromServer is createExtractorScript({ format: "text" }).
26
+ // jsonEncodedResult contains the semantic tree text.
27
+ }
28
+ ```
29
+
30
+ Generate the script in your JavaScript bundle or server:
31
+
32
+ ```ts
33
+ import { createExtractorScript } from "ax-grep";
34
+
35
+ export const scriptFromServer = createExtractorScript({
36
+ format: "text",
37
+ mode: "interactive",
38
+ });
39
+ ```
40
+
41
+ ## iOS WKWebView
42
+
43
+ ```swift
44
+ webView.evaluateJavaScript(scriptFromServer) { result, error in
45
+ if let text = result as? String {
46
+ // Send text to the local model or agent parser.
47
+ }
48
+ }
49
+ ```
50
+
51
+ ## In-Page Bundle
52
+
53
+ When your code already runs inside the page, use the browser entry point:
54
+
55
+ ```ts
56
+ import { extract, formatSemanticTreeText } from "ax-grep/browser";
57
+
58
+ const tree = extract({
59
+ mode: "interactive",
60
+ includeBounds: false,
61
+ });
62
+
63
+ console.log(formatSemanticTreeText(tree));
64
+ ```
65
+
66
+ ## Mobile Agent Policy
67
+
68
+ For local sLLM search or parsing, run extraction in the current WebView first.
69
+ Escalate to network search or remote browser automation only when the semantic
70
+ tree lacks the target evidence, login state, or post-interaction content.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ax-grep",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "A browser-native semantic accessibility tree extractor that runs without DevTools or CDP.",
5
5
  "type": "module",
6
6
  "engines": {
@@ -48,6 +48,7 @@
48
48
  "readiness:audit": "tsx scripts/check-agent-readiness.ts",
49
49
  "readiness:real-page-smoke": "tsx scripts/check-real-page-smoke.ts",
50
50
  "readiness:search-smoke": "tsx scripts/check-search-smoke.ts",
51
+ "readiness:published-smoke": "tsx scripts/check-published-package-smoke.ts",
51
52
  "readiness:agent-browser-smoke": "tsx scripts/check-agent-browser-smoke.ts",
52
53
  "readiness:agent-browser-text-heavy-smoke": "tsx scripts/check-agent-browser-text-heavy-smoke.ts",
53
54
  "test": "vitest run",
@@ -57,6 +58,7 @@
57
58
  "compare:static": "tsx scripts/compare-static.ts",
58
59
  "compare:tokens": "tsx scripts/compare-token-cost.ts",
59
60
  "benchmark:agent-cost": "tsx scripts/benchmark-agent-cost.ts",
61
+ "benchmark:library-cost": "node --expose-gc --import tsx scripts/benchmark-library-cost.ts",
60
62
  "compare:browser:fixture": "tsx scripts/compare-browser-fixture.ts",
61
63
  "compare:gate": "tsx scripts/check-comparison-gates.ts",
62
64
  "compare:sample": "tsx scripts/compare.ts https://example.com https://www.wikipedia.org https://developer.mozilla.org/en-US/docs/Web/Accessibility https://news.ycombinator.com https://github.com/features https://libraries.io/npm/typescript https://www.npmjs.com/package/typescript",
@@ -96,5 +98,10 @@
96
98
  },
97
99
  "optionalDependencies": {
98
100
  "impit": "^0.14.1"
101
+ },
102
+ "pnpm": {
103
+ "overrides": {
104
+ "esbuild": "0.28.1"
105
+ }
99
106
  }
100
107
  }
File without changes
package/skills.sh CHANGED
File without changes