browser-goat 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. browser_goat-0.1.0/LICENSE +21 -0
  2. browser_goat-0.1.0/PKG-INFO +24 -0
  3. browser_goat-0.1.0/README.md +177 -0
  4. browser_goat-0.1.0/pyproject.toml +65 -0
  5. browser_goat-0.1.0/setup.cfg +4 -0
  6. browser_goat-0.1.0/src/browser_goat/__init__.py +15 -0
  7. browser_goat-0.1.0/src/browser_goat/cli.py +277 -0
  8. browser_goat-0.1.0/src/browser_goat/extraction/__init__.py +9 -0
  9. browser_goat-0.1.0/src/browser_goat/extraction/content_extractor.py +415 -0
  10. browser_goat-0.1.0/src/browser_goat/extraction/goal_oriented.py +175 -0
  11. browser_goat-0.1.0/src/browser_goat/extraction/scrapling_fetcher.py +237 -0
  12. browser_goat-0.1.0/src/browser_goat/mcp_server.py +132 -0
  13. browser_goat-0.1.0/src/browser_goat/models.py +320 -0
  14. browser_goat-0.1.0/src/browser_goat/post_search/__init__.py +17 -0
  15. browser_goat-0.1.0/src/browser_goat/post_search/ranking.py +395 -0
  16. browser_goat-0.1.0/src/browser_goat/post_search/url_pipeline.py +185 -0
  17. browser_goat-0.1.0/src/browser_goat/pre_search/__init__.py +16 -0
  18. browser_goat-0.1.0/src/browser_goat/pre_search/browser_profiles.py +224 -0
  19. browser_goat-0.1.0/src/browser_goat/pre_search/language_detect.py +76 -0
  20. browser_goat-0.1.0/src/browser_goat/pre_search/query_intel.py +387 -0
  21. browser_goat-0.1.0/src/browser_goat/reliability/__init__.py +15 -0
  22. browser_goat-0.1.0/src/browser_goat/reliability/force_answer.py +77 -0
  23. browser_goat-0.1.0/src/browser_goat/reliability/give_up_detector.py +110 -0
  24. browser_goat-0.1.0/src/browser_goat/reliability/quality_gate.py +64 -0
  25. browser_goat-0.1.0/src/browser_goat/router.py +457 -0
  26. browser_goat-0.1.0/src/browser_goat/searxng_client.py +200 -0
  27. browser_goat-0.1.0/src/browser_goat/strategy/__init__.py +7 -0
  28. browser_goat-0.1.0/src/browser_goat/strategy/adaptive_explorer.py +466 -0
  29. browser_goat-0.1.0/src/browser_goat/strategy/query_classifier.py +383 -0
  30. browser_goat-0.1.0/src/browser_goat/strategy/recursive_decomposer.py +380 -0
  31. browser_goat-0.1.0/src/browser_goat/verification/__init__.py +7 -0
  32. browser_goat-0.1.0/src/browser_goat/verification/answer_voter.py +146 -0
  33. browser_goat-0.1.0/src/browser_goat/verification/llm_verifier.py +263 -0
  34. browser_goat-0.1.0/src/browser_goat/verification/multi_rollout.py +206 -0
  35. browser_goat-0.1.0/src/browser_goat.egg-info/PKG-INFO +24 -0
  36. browser_goat-0.1.0/src/browser_goat.egg-info/SOURCES.txt +41 -0
  37. browser_goat-0.1.0/src/browser_goat.egg-info/dependency_links.txt +1 -0
  38. browser_goat-0.1.0/src/browser_goat.egg-info/entry_points.txt +3 -0
  39. browser_goat-0.1.0/src/browser_goat.egg-info/requires.txt +17 -0
  40. browser_goat-0.1.0/src/browser_goat.egg-info/top_level.txt +1 -0
  41. browser_goat-0.1.0/tests/test_models.py +278 -0
  42. browser_goat-0.1.0/tests/test_router.py +377 -0
  43. browser_goat-0.1.0/tests/test_searxng_client.py +132 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 browser-goat
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.4
2
+ Name: browser-goat
3
+ Version: 0.1.0
4
+ Summary: Meta-layer search intelligence wrapping SearXNG with pre/post processing, extraction, strategy, and verification from SearchWala, local-deep-research, Marco-DeepResearch, Tongyi-DeepResearch, and Scrapling.
5
+ License-Expression: MIT
6
+ Requires-Python: >=3.13
7
+ License-File: LICENSE
8
+ Requires-Dist: httpx[http2]>=0.28
9
+ Requires-Dist: beautifulsoup4>=4.12
10
+ Requires-Dist: lxml>=5.3
11
+ Requires-Dist: pydantic>=2.10
12
+ Requires-Dist: tenacity>=9.0
13
+ Requires-Dist: tiktoken>=0.8
14
+ Requires-Dist: playwright>=1.49
15
+ Requires-Dist: scrapling>=0.2
16
+ Requires-Dist: mcp>=1.0
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=8.3; extra == "dev"
19
+ Requires-Dist: pytest-asyncio>=0.25; extra == "dev"
20
+ Requires-Dist: pytest-cov>=6.0; extra == "dev"
21
+ Requires-Dist: ruff>=0.8; extra == "dev"
22
+ Requires-Dist: mypy>=1.13; extra == "dev"
23
+ Requires-Dist: bandit>=1.8; extra == "dev"
24
+ Dynamic: license-file
@@ -0,0 +1,177 @@
1
+ # browser-goat
2
+
3
+ [![Tests](https://img.shields.io/badge/tests-304%20passed-brightgreen)](https://github.com/Im-Busy/browser-goat)
4
+ [![Python](https://img.shields.io/badge/python-3.13%2B-blue)](https://python.org)
5
+ [![License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
6
+
7
+ > Meta-layer search intelligence wrapping SearXNG — Tavily-quality results on your own infrastructure.
8
+
9
+ browser-goat adds six processing layers to [SearXNG](https://docs.searxng.org/): intent detection, hybrid ranking, anti-bot content extraction, reliability gating, adaptive strategy, and multi-rollout verification. The result is agent-ready search output competitive with commercial APIs — running entirely on your own infrastructure.
10
+
11
+ ```mermaid
12
+ flowchart TD
13
+ Q["Query"] --> L1
14
+
15
+ subgraph L1["1. Pre-Search"]
16
+ A["Intent detection<br/>Browser profiles<br/>Language detection"]
17
+ end
18
+
19
+ L1 --> SX["SearXNG Engine"]
20
+
21
+ SX --> L2
22
+ subgraph L2["2. Post-Search"]
23
+ B["URL normalization<br/>RRF + BM25 + MMR"]
24
+ end
25
+
26
+ L2 --> L3
27
+ subgraph L3["3. Extraction"]
28
+ C["7-tier cascading<br/>Anti-bot bypass<br/>Goal-oriented summary"]
29
+ end
30
+
31
+ L3 --> L4
32
+ subgraph L4["4. Reliability"]
33
+ D["Give-up detection<br/>Quality-gated retry<br/>Force synthesis"]
34
+ end
35
+
36
+ L4 --> L5
37
+ subgraph L5["5. Strategy"]
38
+ E["Query classification<br/>Adaptive exploration<br/>Recursive decomposition"]
39
+ end
40
+
41
+ L5 --> L6
42
+ subgraph L6["6. Verification"]
43
+ F["Multi-rollout voting<br/>Consensus verification<br/>LLM tie-breaking"]
44
+ end
45
+
46
+ L6 --> A["Answer"]
47
+ ```
48
+
49
+ ---
50
+
51
+ ## Quick Start
52
+
53
+ ### MCP (AI Agents)
54
+
55
+ ```json
56
+ {
57
+ "mcpServers": {
58
+ "browser-goat": {
59
+ "command": "npx",
60
+ "args": ["browser-goat"],
61
+ "env": { "SEARXNG_URL": "http://localhost:8080" }
62
+ }
63
+ }
64
+ }
65
+ ```
66
+
67
+ Requires Python 3.13+ and a running SearXNG instance.
68
+
69
+ ### CLI
70
+
71
+ ```bash
72
+ uvx browser-goat search "latest AI research"
73
+ uvx browser-goat search "Python vs Rust" --strategy explore
74
+ uvx browser-goat extract "https://example.com/article"
75
+ ```
76
+
77
+ ### Library
78
+
79
+ ```bash
80
+ pip install browser-goat
81
+ ```
82
+
83
+ ```python
84
+ from browser_goat import BrowserGoat
85
+
86
+ meta = BrowserGoat(searxng_url="http://localhost:8080")
87
+ result = await meta.search("quantum computing")
88
+ print(result.answer)
89
+ ```
90
+
91
+ ---
92
+
93
+ ## MCP Tools
94
+
95
+ | Tool | Description |
96
+ |------|-------------|
97
+ | `search` | Full pipeline: intent analysis → SearXNG → ranking → extraction → reliability. Supports `time_range` (day/week/month/year), `max_sources`, and `strategy` (default/auto/explore/decompose). |
98
+ | `extract` | Fetch and extract a single URL with anti-bot bypass (Cloudflare Turnstile). Returns title, clean text, and extraction tier. |
99
+
100
+ ---
101
+
102
+ ## Client Configuration
103
+
104
+ ### Claude Desktop
105
+
106
+ ```json
107
+ {
108
+ "mcpServers": {
109
+ "browser-goat": {
110
+ "command": "uvx",
111
+ "args": ["browser-goat-mcp", "--searxng-url", "http://localhost:8080"]
112
+ }
113
+ }
114
+ }
115
+ ```
116
+
117
+ ### Cursor / VS Code
118
+
119
+ ```json
120
+ {
121
+ "mcpServers": {
122
+ "browser-goat": {
123
+ "command": "npx",
124
+ "args": ["browser-goat"],
125
+ "env": { "SEARXNG_URL": "http://localhost:8080" }
126
+ }
127
+ }
128
+ }
129
+ ```
130
+
131
+ ---
132
+
133
+ ## Docker
134
+
135
+ Bundled SearXNG + Redis sidecar deployment:
136
+
137
+ ```bash
138
+ docker compose up
139
+ ```
140
+
141
+ SearXNG starts at `localhost:8080`, browser-goat API at `localhost:8000`.
142
+
143
+ ```bash
144
+ docker exec browser-goat uv run browser-goat search "your query"
145
+ ```
146
+
147
+ ---
148
+
149
+ ## How It Works
150
+
151
+ Each search passes through six layers before returning an answer. The diagram above shows the full pipeline. Layers 1-4 run on every query; Layers 5-6 activate when `--strategy` or `--reliability` are set.
152
+
153
+ ---
154
+
155
+ ## Development
156
+
157
+ ```bash
158
+ git clone https://github.com/Im-Busy/browser-goat.git
159
+ cd browser-goat
160
+ uv sync
161
+
162
+ uv run pytest # 304 tests (287 unit + 17 integration)
163
+ uv run ruff check src/ tests/ # zero violations
164
+ uv run mypy src/ # zero errors
165
+ ```
166
+
167
+ Tests require SearXNG at `localhost:8080`. Skip integration tests:
168
+
169
+ ```bash
170
+ uv run pytest -m "not integration"
171
+ ```
172
+
173
+ ---
174
+
175
+ ## License
176
+
177
+ MIT
@@ -0,0 +1,65 @@
1
+ [project]
2
+ name = "browser-goat"
3
+ version = "0.1.0"
4
+ description = "Meta-layer search intelligence wrapping SearXNG with pre/post processing, extraction, strategy, and verification from SearchWala, local-deep-research, Marco-DeepResearch, Tongyi-DeepResearch, and Scrapling."
5
+ requires-python = ">=3.13"
6
+ license = "MIT"
7
+ license-files = ["LICENSE"]
8
+ dependencies = [
9
+ "httpx[http2]>=0.28",
10
+ "beautifulsoup4>=4.12",
11
+ "lxml>=5.3",
12
+ "pydantic>=2.10",
13
+ "tenacity>=9.0",
14
+ "tiktoken>=0.8",
15
+ "playwright>=1.49",
16
+ "scrapling>=0.2",
17
+ "mcp>=1.0",
18
+ ]
19
+
20
+ [project.scripts]
21
+ browser-goat = "browser_goat.cli:main"
22
+ browser-goat-mcp = "browser_goat.mcp_server:main"
23
+
24
+ [project.optional-dependencies]
25
+ dev = [
26
+ "pytest>=8.3",
27
+ "pytest-asyncio>=0.25",
28
+ "pytest-cov>=6.0",
29
+ "ruff>=0.8",
30
+ "mypy>=1.13",
31
+ "bandit>=1.8",
32
+ ]
33
+
34
+ [tool.ruff]
35
+ line-length = 100
36
+ target-version = "py313"
37
+
38
+ [tool.ruff.lint]
39
+ select = ["E", "F", "I", "N", "W", "UP", "B", "C4", "SIM"]
40
+ ignore = ["E501"]
41
+
42
+ [tool.mypy]
43
+ python_version = "3.13"
44
+ strict = true
45
+ warn_unreachable = true
46
+ warn_unused_ignores = true
47
+
48
+ [tool.pytest.ini_options]
49
+ asyncio_mode = "auto"
50
+ testpaths = ["tests"]
51
+ markers = [
52
+ "integration: end-to-end tests requiring a running SearXNG instance",
53
+ ]
54
+
55
+ [tool.uv]
56
+ package = true
57
+
58
+ [dependency-groups]
59
+ dev = [
60
+ "mypy>=2.1.0",
61
+ "pytest>=9.1.0",
62
+ "pytest-asyncio>=1.4.0",
63
+ "pytest-cov>=7.1.0",
64
+ "ruff>=0.15.17",
65
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,15 @@
1
+ """browser-goat — Meta-layer search intelligence wrapping SearXNG.
2
+
3
+ Layers:
4
+ pre_search — Query intelligence, language detection, browser profiles
5
+ post_search — URL pipeline, RRF+BM25+MMR ranking, dedup
6
+ extraction — Content extraction, goal-oriented, Scrapling anti-bot
7
+ reliability — Give-up detection, quality gating, force answer
8
+ strategy — Query classification, adaptive exploration (Phase 2)
9
+ verification — Multi-rollout voting (Phase 3)
10
+ """
11
+
12
+ from browser_goat.router import BrowserGoat
13
+
14
+ __version__ = "0.1.0"
15
+ __all__ = ["BrowserGoat"]
@@ -0,0 +1,277 @@
1
+ """CLI entry point for browser-goat.
2
+
3
+ Usage:
4
+ browser-goat search "What is Python?" --searxng-url http://localhost:8080
5
+ browser-goat search "latest AI news" --time-range week --strategy explore
6
+ browser-goat search "Python vs Rust" --reliability high
7
+ uvx browser-goat search "quantum computing research"
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import asyncio
14
+ import json
15
+ import sys
16
+ from typing import Any
17
+
18
+ from browser_goat.router import BrowserGoat
19
+
20
+
21
+ def build_parser() -> argparse.ArgumentParser:
22
+ parser = argparse.ArgumentParser(
23
+ prog="browser-goat",
24
+ description="Meta-layer search intelligence wrapping SearXNG",
25
+ )
26
+ sub = parser.add_subparsers(dest="command", required=True)
27
+
28
+ # ── search ──
29
+ search = sub.add_parser("search", help="Run a full search pipeline")
30
+ search.add_argument("query", help="Search query string")
31
+ search.add_argument(
32
+ "--searxng-url",
33
+ default="http://localhost:8080",
34
+ help="SearXNG instance URL (default: http://localhost:8080)",
35
+ )
36
+ search.add_argument(
37
+ "--engines",
38
+ nargs="*",
39
+ default=None,
40
+ help="SearXNG engines to use (e.g. google bing scholar)",
41
+ )
42
+ search.add_argument(
43
+ "--time-range",
44
+ choices=["day", "week", "month", "year"],
45
+ default=None,
46
+ help="Time filter for results",
47
+ )
48
+ search.add_argument(
49
+ "--language",
50
+ default="en",
51
+ help="Language code for results (default: en)",
52
+ )
53
+ search.add_argument(
54
+ "--max-sources",
55
+ type=int,
56
+ default=15,
57
+ help="Maximum sources to extract (default: 15)",
58
+ )
59
+ search.add_argument(
60
+ "--strategy",
61
+ choices=["default", "auto", "explore", "decompose"],
62
+ default="default",
63
+ help="Search strategy (default: default)",
64
+ )
65
+ search.add_argument(
66
+ "--reliability",
67
+ choices=["standard", "high", "maximum"],
68
+ default="standard",
69
+ help="Reliability mode (default: standard)",
70
+ )
71
+ search.add_argument(
72
+ "--format",
73
+ choices=["json", "pretty"],
74
+ default="json",
75
+ help="Output format (default: json)",
76
+ )
77
+
78
+ # ── extract ──
79
+ extract = sub.add_parser("extract", help="Extract content from a URL")
80
+ extract.add_argument("url", help="URL to extract content from")
81
+ extract.add_argument(
82
+ "--searxng-url",
83
+ default="http://localhost:8080",
84
+ help="SearXNG instance URL",
85
+ )
86
+ extract.add_argument(
87
+ "--format",
88
+ choices=["json", "pretty"],
89
+ default="json",
90
+ help="Output format (default: json)",
91
+ )
92
+
93
+ # ── verify ──
94
+ verify = sub.add_parser("verify", help="Verify an answer via multi-rollout voting")
95
+ verify.add_argument("query", help="The query to verify")
96
+ verify.add_argument(
97
+ "--searxng-url",
98
+ default="http://localhost:8080",
99
+ help="SearXNG instance URL",
100
+ )
101
+ verify.add_argument(
102
+ "--rollouts",
103
+ type=int,
104
+ default=5,
105
+ help="Number of parallel rollouts (default: 5)",
106
+ )
107
+ verify.add_argument(
108
+ "--format",
109
+ choices=["json", "pretty"],
110
+ default="json",
111
+ help="Output format (default: json)",
112
+ )
113
+
114
+ # ── serve ──
115
+ serve = sub.add_parser("serve", help="Run browser-goat as an HTTP JSON API server")
116
+ serve.add_argument(
117
+ "--host",
118
+ default="0.0.0.0",
119
+ help="Host to bind to (default: 0.0.0.0)",
120
+ )
121
+ serve.add_argument(
122
+ "--port",
123
+ type=int,
124
+ default=8000,
125
+ help="Port to listen on (default: 8000)",
126
+ )
127
+ serve.add_argument(
128
+ "--searxng-url",
129
+ default="http://localhost:8080",
130
+ help="SearXNG instance URL (default: http://localhost:8080)",
131
+ )
132
+
133
+ return parser
134
+
135
+
136
+ def format_output(data: Any, fmt: str) -> str:
137
+ """Format output as JSON or pretty-printed."""
138
+ if fmt == "pretty":
139
+ if hasattr(data, "model_dump"):
140
+ return json.dumps(data.model_dump(), indent=2, ensure_ascii=False)
141
+ return json.dumps(data, indent=2, ensure_ascii=False)
142
+ if hasattr(data, "model_dump"):
143
+ return str(data.model_dump_json())
144
+ return json.dumps(data, ensure_ascii=False)
145
+
146
+
147
+ async def cmd_search(args: argparse.Namespace) -> None:
148
+ meta = BrowserGoat(searxng_url=args.searxng_url)
149
+ result = await meta.search(
150
+ query=args.query,
151
+ engines=args.engines,
152
+ time_range=args.time_range,
153
+ language=args.language,
154
+ max_sources=args.max_sources,
155
+ strategy=args.strategy,
156
+ reliability_mode=args.reliability,
157
+ )
158
+ output = format_output(result, args.format)
159
+ print(output)
160
+
161
+
162
+ async def cmd_extract(args: argparse.Namespace) -> None:
163
+ meta = BrowserGoat(searxng_url=args.searxng_url)
164
+ fetcher = meta.scrapling
165
+ extractor = meta.content_extractor
166
+ profile = meta.browser_profiles.get_random_profile()
167
+
168
+ fetch_result = await fetcher.fetch(args.url, profile)
169
+ if not fetch_result.success:
170
+ print(json.dumps({"error": fetch_result.error or "fetch failed"}), file=sys.stderr)
171
+ sys.exit(1)
172
+
173
+ content = extractor.extract(fetch_result.html, args.url)
174
+
175
+ output = format_output(
176
+ {
177
+ "url": args.url,
178
+ "title": content.title,
179
+ "text": content.text[:1000] + "..." if len(content.text) > 1000 else content.text,
180
+ "extraction_tier": content.extraction_tier,
181
+ "text_length": len(content.text),
182
+ },
183
+ args.format,
184
+ )
185
+ print(output)
186
+
187
+
188
+ async def cmd_verify(args: argparse.Namespace) -> None:
189
+ meta = BrowserGoat(searxng_url=args.searxng_url)
190
+ result = await meta.search(
191
+ query=args.query,
192
+ reliability_mode="high" if args.rollouts <= 5 else "maximum",
193
+ )
194
+ output = format_output(result, args.format)
195
+ print(output)
196
+
197
+
198
+ async def cmd_serve(args: argparse.Namespace) -> None:
199
+ """Run browser-goat as a minimal HTTP JSON API server (zero extra deps)."""
200
+ meta = BrowserGoat(searxng_url=args.searxng_url)
201
+
202
+ async def handle(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
203
+ try:
204
+ raw = await asyncio.wait_for(reader.readuntil(b"\r\n\r\n"), timeout=30)
205
+ request_line, *_ = raw.decode("utf-8", errors="replace").split("\r\n")
206
+ method, path, *_ = request_line.split(" ") + ["", ""]
207
+
208
+ if method == "GET" and path in ("/health", "/"):
209
+ body = b'{"status":"ok"}'
210
+ writer.write(
211
+ b"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\n"
212
+ b"Content-Length: " + str(len(body)).encode() + b"\r\n\r\n" + body
213
+ )
214
+ await writer.drain()
215
+ return
216
+
217
+ if method == "POST" and path == "/search":
218
+ content_length = 0
219
+ for line in raw.decode("utf-8", errors="replace").split("\r\n"):
220
+ if line.lower().startswith("content-length:"):
221
+ content_length = int(line.split(":")[1].strip())
222
+ body_raw = await asyncio.wait_for(reader.readexactly(content_length), timeout=5)
223
+ params = json.loads(body_raw)
224
+
225
+ result = await meta.search(
226
+ query=params.get("query", ""),
227
+ time_range=params.get("time_range"),
228
+ max_sources=params.get("max_sources", 15),
229
+ strategy=params.get("strategy", "default"),
230
+ reliability_mode=params.get("reliability", "standard"),
231
+ )
232
+ body = result.model_dump_json().encode()
233
+ writer.write(
234
+ b"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\n"
235
+ b"Content-Length: " + str(len(body)).encode() + b"\r\n\r\n" + body
236
+ )
237
+ await writer.drain()
238
+ return
239
+
240
+ body = b'{"error":"not found"}'
241
+ writer.write(
242
+ b"HTTP/1.1 404 Not Found\r\nContent-Type: application/json\r\n"
243
+ b"Content-Length: " + str(len(body)).encode() + b"\r\n\r\n" + body
244
+ )
245
+ await writer.drain()
246
+ except Exception:
247
+ pass
248
+ finally:
249
+ writer.close()
250
+ await writer.wait_closed()
251
+
252
+ server = await asyncio.start_server(handle, host=args.host, port=args.port)
253
+ print(f"browser-goat API listening on http://{args.host}:{args.port}", file=sys.stderr)
254
+ async with server:
255
+ await server.serve_forever()
256
+
257
+
258
+ def main() -> None:
259
+ parser = build_parser()
260
+ args = parser.parse_args()
261
+
262
+ try:
263
+ if args.command == "search":
264
+ asyncio.run(cmd_search(args))
265
+ elif args.command == "extract":
266
+ asyncio.run(cmd_extract(args))
267
+ elif args.command == "verify":
268
+ asyncio.run(cmd_verify(args))
269
+ elif args.command == "serve":
270
+ asyncio.run(cmd_serve(args))
271
+ except Exception as e:
272
+ print(json.dumps({"error": str(e)}), file=sys.stderr)
273
+ sys.exit(1)
274
+
275
+
276
+ if __name__ == "__main__":
277
+ main()
@@ -0,0 +1,9 @@
1
+ """Extraction layer: 7-tier content extraction, goal-oriented extraction, Scrapling anti-bot."""
2
+
3
+ from browser_goat.extraction.content_extractor import ContentExtractor
4
+ from browser_goat.extraction.goal_oriented import GoalOrientedExtractor
5
+ from browser_goat.extraction.scrapling_fetcher import ScraplingFetcher
6
+
7
+ __all__ = ["ContentExtractor", "GoalOrientedExtractor", "ScraplingFetcher"]
8
+
9
+ __all__ = ["ContentExtractor", "GoalOrientedExtractor", "ScraplingFetcher"]