agentsearchcli 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentsearchcli-1.0.0/LICENSE +21 -0
- agentsearchcli-1.0.0/PKG-INFO +423 -0
- agentsearchcli-1.0.0/README.md +386 -0
- agentsearchcli-1.0.0/pyproject.toml +54 -0
- agentsearchcli-1.0.0/setup.cfg +4 -0
- agentsearchcli-1.0.0/src/agent_search/__init__.py +21 -0
- agentsearchcli-1.0.0/src/agent_search/__main__.py +9 -0
- agentsearchcli-1.0.0/src/agent_search/aws_ip_rotator.py +3 -0
- agentsearchcli-1.0.0/src/agent_search/batch_processor.py +3 -0
- agentsearchcli-1.0.0/src/agent_search/captcha_detector.py +3 -0
- agentsearchcli-1.0.0/src/agent_search/change_detector.py +3 -0
- agentsearchcli-1.0.0/src/agent_search/cli/__init__.py +1 -0
- agentsearchcli-1.0.0/src/agent_search/cli/commands/auth.py +156 -0
- agentsearchcli-1.0.0/src/agent_search/cli/commands/crawl.py +44 -0
- agentsearchcli-1.0.0/src/agent_search/cli/commands/extract.py +63 -0
- agentsearchcli-1.0.0/src/agent_search/cli/commands/monitor.py +260 -0
- agentsearchcli-1.0.0/src/agent_search/cli/commands/pool.py +405 -0
- agentsearchcli-1.0.0/src/agent_search/cli/commands/query.py +121 -0
- agentsearchcli-1.0.0/src/agent_search/cli/main.py +292 -0
- agentsearchcli-1.0.0/src/agent_search/cli/onboarding.py +303 -0
- agentsearchcli-1.0.0/src/agent_search/core/__init__.py +1 -0
- agentsearchcli-1.0.0/src/agent_search/core/api_server.py +453 -0
- agentsearchcli-1.0.0/src/agent_search/core/aws_gateway_session.py +130 -0
- agentsearchcli-1.0.0/src/agent_search/core/aws_ip_rotator.py +195 -0
- agentsearchcli-1.0.0/src/agent_search/core/batch_processor.py +357 -0
- agentsearchcli-1.0.0/src/agent_search/core/captcha_detector.py +398 -0
- agentsearchcli-1.0.0/src/agent_search/core/change_detector.py +381 -0
- agentsearchcli-1.0.0/src/agent_search/core/data_extraction.py +621 -0
- agentsearchcli-1.0.0/src/agent_search/core/demo.py +103 -0
- agentsearchcli-1.0.0/src/agent_search/core/html_to_markdown.py +566 -0
- agentsearchcli-1.0.0/src/agent_search/core/llm_extractor.py +445 -0
- agentsearchcli-1.0.0/src/agent_search/core/macbook_server.py +220 -0
- agentsearchcli-1.0.0/src/agent_search/core/multi_search.py +319 -0
- agentsearchcli-1.0.0/src/agent_search/core/nordvpn_proxy.py +128 -0
- agentsearchcli-1.0.0/src/agent_search/core/playwright_browser.py +450 -0
- agentsearchcli-1.0.0/src/agent_search/core/proxy_chain.py +334 -0
- agentsearchcli-1.0.0/src/agent_search/core/rate_limiter.py +242 -0
- agentsearchcli-1.0.0/src/agent_search/core/retry_handler.py +376 -0
- agentsearchcli-1.0.0/src/agent_search/core/session_manager.py +296 -0
- agentsearchcli-1.0.0/src/agent_search/core/sitemap_crawler.py +401 -0
- agentsearchcli-1.0.0/src/agent_search/core/user_agents.py +146 -0
- agentsearchcli-1.0.0/src/agent_search/data_extraction.py +3 -0
- agentsearchcli-1.0.0/src/agent_search/html_to_markdown.py +3 -0
- agentsearchcli-1.0.0/src/agent_search/nordvpn_proxy.py +3 -0
- agentsearchcli-1.0.0/src/agent_search/pool/__init__.py +41 -0
- agentsearchcli-1.0.0/src/agent_search/proxy_chain.py +3 -0
- agentsearchcli-1.0.0/src/agent_search/rate_limiter.py +3 -0
- agentsearchcli-1.0.0/src/agent_search/retry_handler.py +3 -0
- agentsearchcli-1.0.0/src/agent_search/user_agents.py +3 -0
- agentsearchcli-1.0.0/src/agent_search/utils/logger.py +17 -0
- agentsearchcli-1.0.0/src/agent_search/utils/version.py +10 -0
- agentsearchcli-1.0.0/src/agentsearchcli.egg-info/PKG-INFO +423 -0
- agentsearchcli-1.0.0/src/agentsearchcli.egg-info/SOURCES.txt +64 -0
- agentsearchcli-1.0.0/src/agentsearchcli.egg-info/dependency_links.txt +1 -0
- agentsearchcli-1.0.0/src/agentsearchcli.egg-info/entry_points.txt +3 -0
- agentsearchcli-1.0.0/src/agentsearchcli.egg-info/requires.txt +13 -0
- agentsearchcli-1.0.0/src/agentsearchcli.egg-info/top_level.txt +1 -0
- agentsearchcli-1.0.0/tests/test_batch_processor.py +236 -0
- agentsearchcli-1.0.0/tests/test_change_detector.py +236 -0
- agentsearchcli-1.0.0/tests/test_data_extraction.py +246 -0
- agentsearchcli-1.0.0/tests/test_html_to_markdown.py +238 -0
- agentsearchcli-1.0.0/tests/test_integration.py +302 -0
- agentsearchcli-1.0.0/tests/test_proxy_chain.py +198 -0
- agentsearchcli-1.0.0/tests/test_rate_limiter.py +174 -0
- agentsearchcli-1.0.0/tests/test_retry_handler.py +228 -0
- agentsearchcli-1.0.0/tests/test_user_agents.py +191 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Qwert
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agentsearchcli
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Give any AI agent the ability to search, crawl, and extract the web.
|
|
5
|
+
Author-email: Qwert <hello@qwert.ai>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://qwert.ai
|
|
8
|
+
Project-URL: Documentation, https://qwert.ai/docs
|
|
9
|
+
Project-URL: Repository, https://github.com/r0botsorg/agent-search-cli
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/r0botsorg/agent-search-cli/issues
|
|
11
|
+
Keywords: web-search,ai-agents,web-scraping,proxy,cli
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Requires-Python: >=3.9
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: click>=8.0.0
|
|
26
|
+
Requires-Dist: requests>=2.28.0
|
|
27
|
+
Requires-Dist: rich>=13.0.0
|
|
28
|
+
Requires-Dist: pyyaml>=6.0
|
|
29
|
+
Provides-Extra: browser
|
|
30
|
+
Requires-Dist: playwright>=1.30.0; extra == "browser"
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
33
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
35
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
<p align="center">
|
|
39
|
+
<h1 align="center">Agent Search</h1>
|
|
40
|
+
<p align="center"><strong>Give any AI agent the ability to search, crawl, and extract the web.</strong></p>
|
|
41
|
+
</p>
|
|
42
|
+
|
|
43
|
+
<p align="center">
|
|
44
|
+
<a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="MIT License"></a>
|
|
45
|
+
<a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.9+-blue.svg" alt="Python 3.9+"></a>
|
|
46
|
+
<a href="https://pypi.org/project/agentsearch/"><img src="https://img.shields.io/pypi/v/agentsearch.svg" alt="PyPI"></a>
|
|
47
|
+
</p>
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
Agent Search is a CLI and Python library that gives AI agents reliable web access. One command to search, crawl websites, extract structured data, and monitor pages for changes — all routed through a 4-layer proxy chain that automatically handles IP rotation, CAPTCHA detection, and rate limiting.
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install agentsearch
|
|
55
|
+
search "latest NVIDIA earnings" --format json
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Why Agent Search?
|
|
59
|
+
|
|
60
|
+
Most AI agents can't reliably access the web. Search APIs are expensive, direct requests get blocked, and scraping requires infrastructure. Agent Search solves this:
|
|
61
|
+
|
|
62
|
+
- **Multi-engine search** — Aggregates results from Google, DuckDuckGo, Bing, and Wikipedia. Deduplicates and ranks by relevance.
|
|
63
|
+
- **4-layer proxy chain** — Automatic failover: MacBook relay -> NordVPN SOCKS5 -> AWS API Gateway IP rotation -> direct. Never get blocked.
|
|
64
|
+
- **Headless browsing** — Playwright with stealth mode for JavaScript-rendered pages.
|
|
65
|
+
- **Structured extraction** — Pull data from any page using CSS selectors, XPath, or LLM-powered extraction.
|
|
66
|
+
- **Change monitoring** — Watch any URL for content changes with configurable intervals.
|
|
67
|
+
- **Community proxy pool** — Earn credits by sharing bandwidth. Spend credits to use the network.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Quick Start
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# Install
|
|
75
|
+
pip install agentsearch
|
|
76
|
+
|
|
77
|
+
# First run — creates account and gets API key
|
|
78
|
+
search
|
|
79
|
+
|
|
80
|
+
# Search the web
|
|
81
|
+
search "Python asyncio documentation"
|
|
82
|
+
|
|
83
|
+
# Output as JSON (for agents)
|
|
84
|
+
search query "React hooks tutorial" --format json
|
|
85
|
+
|
|
86
|
+
# Use headless browser for JS-heavy sites
|
|
87
|
+
search query "site:twitter.com AI news" --browser
|
|
88
|
+
|
|
89
|
+
# Crawl a docs site
|
|
90
|
+
search crawl https://docs.python.org --depth 3 --max-pages 100
|
|
91
|
+
|
|
92
|
+
# Extract structured data
|
|
93
|
+
search extract https://shop.com/products --schema schema.json --format json
|
|
94
|
+
|
|
95
|
+
# Monitor a page for changes (check every 30 min)
|
|
96
|
+
search monitor https://example.com/pricing --interval 1800
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Installation
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
# Core (requests-based, no browser)
|
|
105
|
+
pip install agentsearch
|
|
106
|
+
|
|
107
|
+
# With headless browser support
|
|
108
|
+
pip install agentsearch[browser]
|
|
109
|
+
|
|
110
|
+
# From source
|
|
111
|
+
git clone https://github.com/r0botsorg/agent-search-cli.git
|
|
112
|
+
cd agent-search-cli
|
|
113
|
+
pip install -e ".[dev]"
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**Requirements:** Python 3.9+ and an internet connection. Everything else is optional.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Architecture
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
┌─────────────────────────────────────────────────────┐
|
|
124
|
+
│ CLI / Library │
|
|
125
|
+
│ search query | crawl | extract | monitor │
|
|
126
|
+
└──────────────────────┬──────────────────────────────┘
|
|
127
|
+
│
|
|
128
|
+
┌──────────────────────▼──────────────────────────────┐
|
|
129
|
+
│ Multi-Engine Search │
|
|
130
|
+
│ Google + DuckDuckGo + Bing + Wikipedia │
|
|
131
|
+
└──────────────────────┬──────────────────────────────┘
|
|
132
|
+
│
|
|
133
|
+
┌──────────────────────▼──────────────────────────────┐
|
|
134
|
+
│ 4-Layer Proxy Chain │
|
|
135
|
+
│ │
|
|
136
|
+
│ 1. MacBook Relay (residential IP) │
|
|
137
|
+
│ 2. NordVPN SOCKS5 (residential IP) │
|
|
138
|
+
│ 3. AWS API Gateway (rotating datacenter IPs) │
|
|
139
|
+
│ 4. Direct (fallback) │
|
|
140
|
+
│ │
|
|
141
|
+
│ Auto-failover · CAPTCHA detection · Rate limiting │
|
|
142
|
+
└──────────────────────┬──────────────────────────────┘
|
|
143
|
+
│
|
|
144
|
+
┌──────────────────────▼──────────────────────────────┐
|
|
145
|
+
│ Content Processing │
|
|
146
|
+
│ │
|
|
147
|
+
│ HTML → Markdown · CSS/XPath extraction │
|
|
148
|
+
│ LLM extraction · Change detection │
|
|
149
|
+
│ Playwright stealth · Session management │
|
|
150
|
+
└─────────────────────────────────────────────────────┘
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Modes
|
|
156
|
+
|
|
157
|
+
| Mode | Cost | Proxies | Best For |
|
|
158
|
+
|------|------|---------|----------|
|
|
159
|
+
| **Lite** | Free | Self-managed (your proxies) | Developers with existing infrastructure |
|
|
160
|
+
| **Pro** | Paid | Fully managed | Teams who want zero setup |
|
|
161
|
+
| **Pool** | Free | Community-powered | Everyone — share bandwidth, earn credits |
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## CLI Reference
|
|
166
|
+
|
|
167
|
+
### Global Options
|
|
168
|
+
|
|
169
|
+
| Option | Description |
|
|
170
|
+
|--------|-------------|
|
|
171
|
+
| `--version` | Show version and exit |
|
|
172
|
+
| `--verbose` / `-v` | Enable debug logging |
|
|
173
|
+
| `--config PATH` | Path to custom config file |
|
|
174
|
+
| `--skip-onboarding` | Skip the first-run setup wizard |
|
|
175
|
+
|
|
176
|
+
### Search
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
search "your query" # quick search
|
|
180
|
+
search query "your query" --format json # JSON output
|
|
181
|
+
search query "your query" --browser # JS rendering
|
|
182
|
+
search query "your query" --extract "h1, .price" # CSS extraction
|
|
183
|
+
search query "your query" --pro # hosted mode
|
|
184
|
+
search query "your query" -o results.json # save to file
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### Crawl
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
search crawl https://docs.example.com --depth 3 --max-pages 100
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### Extract
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
search extract https://shop.com/product --schema schema.json --format json
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Monitor
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
search monitor https://example.com/pricing --interval 1800
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Proxy Pool
|
|
206
|
+
|
|
207
|
+
```bash
|
|
208
|
+
search pool join # contribute bandwidth, earn credits
|
|
209
|
+
search pool leave # stop participating
|
|
210
|
+
search pool status # your node status
|
|
211
|
+
search pool stats # global network stats
|
|
212
|
+
search pool credits # your balance
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Auth
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
search auth login # authenticate for Pro mode
|
|
219
|
+
search auth logout # remove stored credentials
|
|
220
|
+
search auth status # check auth state
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### Command Tree
|
|
224
|
+
|
|
225
|
+
```
|
|
226
|
+
search [QUERY]
|
|
227
|
+
├── query QUERY [--pro] [-f markdown|html|json] [-o PATH] [--extract CSS] [--browser]
|
|
228
|
+
├── crawl URL [--pro] [--depth N] [--max-pages N]
|
|
229
|
+
├── extract URL [--pro] [--schema PATH] [-f markdown|json]
|
|
230
|
+
├── monitor URL [--pro] [--interval N]
|
|
231
|
+
├── onboard
|
|
232
|
+
├── auth
|
|
233
|
+
│ ├── login
|
|
234
|
+
│ ├── logout
|
|
235
|
+
│ └── status
|
|
236
|
+
└── pool
|
|
237
|
+
├── join
|
|
238
|
+
├── leave
|
|
239
|
+
├── status
|
|
240
|
+
├── stats
|
|
241
|
+
└── credits
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
**13 commands** total.
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
248
|
+
## Python Library
|
|
249
|
+
|
|
250
|
+
Use Agent Search as a library in your own code:
|
|
251
|
+
|
|
252
|
+
```python
|
|
253
|
+
from agent_search.core.proxy_chain import ProxyChain
|
|
254
|
+
from agent_search.core.multi_search import MultiEngineSearch
|
|
255
|
+
from agent_search.core.html_to_markdown import HTMLToMarkdown
|
|
256
|
+
from agent_search.core.data_extraction import DataExtractor
|
|
257
|
+
from agent_search.core.change_detector import ChangeDetector
|
|
258
|
+
|
|
259
|
+
# Proxy-aware HTTP requests with automatic failover
|
|
260
|
+
proxy = ProxyChain()
|
|
261
|
+
response = proxy.get("https://example.com")
|
|
262
|
+
data = await proxy.async_get("https://api.example.com/data")
|
|
263
|
+
proxies = proxy.get_best_proxies_dict() # for use with requests
|
|
264
|
+
|
|
265
|
+
# Multi-engine search with dedup + ranking
|
|
266
|
+
engine = MultiEngineSearch()
|
|
267
|
+
results = engine.search("latest AI research", max_results=10)
|
|
268
|
+
|
|
269
|
+
# HTML to clean Markdown
|
|
270
|
+
converter = HTMLToMarkdown()
|
|
271
|
+
markdown = converter.convert(html, base_url="https://example.com")
|
|
272
|
+
|
|
273
|
+
# Structured data extraction
|
|
274
|
+
extractor = DataExtractor()
|
|
275
|
+
data = extractor.extract(url, selectors=["h1", ".price", ".description"])
|
|
276
|
+
|
|
277
|
+
# Change monitoring
|
|
278
|
+
detector = ChangeDetector()
|
|
279
|
+
changed = detector.check(url) # returns True if content changed
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
284
|
+
## Core Modules
|
|
285
|
+
|
|
286
|
+
| Module | Description |
|
|
287
|
+
|--------|-------------|
|
|
288
|
+
| `proxy_chain` | 4-layer proxy with automatic failover |
|
|
289
|
+
| `multi_search` | Multi-engine search aggregation with dedup + ranking |
|
|
290
|
+
| `html_to_markdown` | Clean HTML-to-Markdown conversion |
|
|
291
|
+
| `data_extraction` | CSS, XPath, and LLM-powered structured extraction |
|
|
292
|
+
| `playwright_browser` | Headless Chrome with stealth mode |
|
|
293
|
+
| `batch_processor` | Async batch URL processing with concurrency control |
|
|
294
|
+
| `change_detector` | Content change monitoring via SHA-256 snapshots |
|
|
295
|
+
| `captcha_detector` | CAPTCHA and anti-bot block detection |
|
|
296
|
+
| `rate_limiter` | Thread-safe rate limiting with adaptive backoff |
|
|
297
|
+
| `retry_handler` | Exponential backoff with circuit breaker pattern |
|
|
298
|
+
| `sitemap_crawler` | URL discovery via sitemap.xml and robots.txt |
|
|
299
|
+
| `aws_ip_rotator` | AWS API Gateway IP rotation (new IP per request) |
|
|
300
|
+
| `nordvpn_proxy` | NordVPN SOCKS5 residential proxy support |
|
|
301
|
+
| `session_manager` | Persistent session and cookie storage |
|
|
302
|
+
| `user_agents` | 27 real browser User-Agent strings with rotation |
|
|
303
|
+
| `llm_extractor` | LLM-powered intelligent data extraction |
|
|
304
|
+
|
|
305
|
+
---
|
|
306
|
+
|
|
307
|
+
## Configuration
|
|
308
|
+
|
|
309
|
+
Config is stored at `~/.config/agent-search/config.json` (created on first run via onboarding wizard).
|
|
310
|
+
|
|
311
|
+
### Environment Variables
|
|
312
|
+
|
|
313
|
+
| Variable | Description |
|
|
314
|
+
|----------|-------------|
|
|
315
|
+
| `AGENT_SEARCH_ENDPOINT` | Search endpoint URL (default: `http://localhost:15000`) |
|
|
316
|
+
| `AGENT_SEARCH_API_KEY` | Pro mode API key |
|
|
317
|
+
| `NORDVPN_SERVICE_USER` | NordVPN SOCKS5 username |
|
|
318
|
+
| `NORDVPN_SERVICE_PASS` | NordVPN SOCKS5 password |
|
|
319
|
+
| `AWS_API_GATEWAY_ID` | AWS API Gateway ID for IP rotation |
|
|
320
|
+
| `AWS_REGION` | AWS region (default: `us-east-1`) |
|
|
321
|
+
| `MACBOOK_PROXY_URL` | MacBook relay proxy URL |
|
|
322
|
+
| `MACBOOK_API_KEY` | MacBook relay auth key |
|
|
323
|
+
| `OPENAI_API_KEY` | For LLM-powered extraction |
|
|
324
|
+
| `BING_SEARCH_API_KEY` | Bing Search API key (optional engine) |
|
|
325
|
+
|
|
326
|
+
---
|
|
327
|
+
|
|
328
|
+
## Project Structure
|
|
329
|
+
|
|
330
|
+
```
|
|
331
|
+
agent-search-cli/
|
|
332
|
+
├── pyproject.toml # Package config + entry points
|
|
333
|
+
├── src/agent_search/
|
|
334
|
+
│ ├── cli/ # CLI layer (Click)
|
|
335
|
+
│ │ ├── main.py # Command routing
|
|
336
|
+
│ │ ├── onboarding.py # First-run setup wizard
|
|
337
|
+
│ │ └── commands/
|
|
338
|
+
│ │ ├── query.py # Web search
|
|
339
|
+
│ │ ├── crawl.py # Website crawling
|
|
340
|
+
│ │ ├── extract.py # Data extraction
|
|
341
|
+
│ │ ├── monitor.py # Change monitoring
|
|
342
|
+
│ │ ├── auth.py # Authentication
|
|
343
|
+
│ │ └── pool.py # Proxy pool management
|
|
344
|
+
│ ├── core/ # Core library (usable independently)
|
|
345
|
+
│ │ ├── proxy_chain.py # 4-layer proxy failover
|
|
346
|
+
│ │ ├── multi_search.py # Multi-engine search
|
|
347
|
+
│ │ ├── html_to_markdown.py # HTML → Markdown
|
|
348
|
+
│ │ ├── data_extraction.py # Structured extraction
|
|
349
|
+
│ │ ├── playwright_browser.py # Headless browser
|
|
350
|
+
│ │ ├── batch_processor.py # Async batch processing
|
|
351
|
+
│ │ ├── change_detector.py # Change monitoring
|
|
352
|
+
│ │ ├── captcha_detector.py # Anti-bot detection
|
|
353
|
+
│ │ ├── rate_limiter.py # Rate limiting
|
|
354
|
+
│ │ ├── retry_handler.py # Retry + circuit breaker
|
|
355
|
+
│ │ ├── sitemap_crawler.py # Sitemap discovery
|
|
356
|
+
│ │ ├── aws_ip_rotator.py # AWS IP rotation
|
|
357
|
+
│ │ ├── nordvpn_proxy.py # NordVPN SOCKS5
|
|
358
|
+
│ │ ├── session_manager.py # Session persistence
|
|
359
|
+
│ │ ├── llm_extractor.py # LLM extraction
|
|
360
|
+
│ │ └── user_agents.py # UA rotation
|
|
361
|
+
│ ├── pool/ # Proxy pool network
|
|
362
|
+
│ └── utils/
|
|
363
|
+
│ ├── logger.py
|
|
364
|
+
│ └── version.py
|
|
365
|
+
└── tests/
|
|
366
|
+
├── test_*.py
|
|
367
|
+
└── unit/
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
---
|
|
371
|
+
|
|
372
|
+
## Development
|
|
373
|
+
|
|
374
|
+
```bash
|
|
375
|
+
git clone https://github.com/r0botsorg/agent-search-cli.git
|
|
376
|
+
cd agent-search-cli
|
|
377
|
+
pip install -e ".[dev]"
|
|
378
|
+
python -m pytest tests/ -v
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
---
|
|
382
|
+
|
|
383
|
+
## About Qwerty
|
|
384
|
+
|
|
385
|
+
Agent Search is built by **Qwerty** ([qwert.ai](https://qwert.ai)) — an AI-powered search platform designed specifically for agents and autonomous systems.
|
|
386
|
+
|
|
387
|
+
Traditional search wasn't built for the agent era. It was built for humans typing queries into search boxes. Qwerty is different: an agent-first search infrastructure built from the ground up for the software that's replacing manual workflows.
|
|
388
|
+
|
|
389
|
+
### The Platform
|
|
390
|
+
|
|
391
|
+
Agent Search CLI is the open-source core of the Qwerty platform. The full stack includes:
|
|
392
|
+
|
|
393
|
+
| Component | Description |
|
|
394
|
+
|-----------|-------------|
|
|
395
|
+
| **[Agent Search CLI](https://github.com/r0botsorg/agent-search-cli)** | Open-source CLI and Python library (this repo) |
|
|
396
|
+
| **Qwerty API** | Hosted search API at `api.qwert.ai` — managed proxy infrastructure, no setup required |
|
|
397
|
+
| **Proxy Pool** | Community-powered proxy network — share bandwidth, earn credits |
|
|
398
|
+
|
|
399
|
+
### Pricing
|
|
400
|
+
|
|
401
|
+
| Plan | Price | Requests | What You Get |
|
|
402
|
+
|------|-------|----------|--------------|
|
|
403
|
+
| **Lite** | Free | 1,000/mo | Basic search, API access, community support |
|
|
404
|
+
| **Pro** | $49/mo | 50,000/mo | Managed proxies, semantic search, priority support, analytics |
|
|
405
|
+
| **Enterprise** | $999/mo | Unlimited | Dedicated infrastructure, SLA, SSO, custom integrations |
|
|
406
|
+
|
|
407
|
+
Start free at [qwert.ai](https://qwert.ai) or self-host the entire stack with the open-source repos.
|
|
408
|
+
|
|
409
|
+
### Contact
|
|
410
|
+
|
|
411
|
+
- **Email**: [hello@qwert.ai](mailto:hello@qwert.ai)
|
|
412
|
+
- **Website**: [qwert.ai](https://qwert.ai)
|
|
413
|
+
- **Docs**: [qwert.ai/docs](https://qwert.ai/docs)
|
|
414
|
+
|
|
415
|
+
---
|
|
416
|
+
|
|
417
|
+
## License
|
|
418
|
+
|
|
419
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
420
|
+
|
|
421
|
+
---
|
|
422
|
+
|
|
423
|
+
<p align="center">Built by <a href="https://qwert.ai">Qwerty</a></p>
|