searchts 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- searchts-0.1.0/.env.example +18 -0
- searchts-0.1.0/.github/workflows/pytest.yml +70 -0
- searchts-0.1.0/.gitignore +18 -0
- searchts-0.1.0/CHANGELOG.md +33 -0
- searchts-0.1.0/CLAUDE.md +43 -0
- searchts-0.1.0/CONTRIBUTING.md +80 -0
- searchts-0.1.0/LICENSE +22 -0
- searchts-0.1.0/PKG-INFO +140 -0
- searchts-0.1.0/README.md +94 -0
- searchts-0.1.0/SECURITY.md +51 -0
- searchts-0.1.0/config/mcporter.json +8 -0
- searchts-0.1.0/constraints.txt +17 -0
- searchts-0.1.0/docs/assets/logo-1.png +0 -0
- searchts-0.1.0/docs/assets/logo-1.svg +34 -0
- searchts-0.1.0/docs/assets/logo-2.png +0 -0
- searchts-0.1.0/docs/assets/logo-2.svg +51 -0
- searchts-0.1.0/docs/assets/logo-3.png +0 -0
- searchts-0.1.0/docs/assets/logo-3.svg +34 -0
- searchts-0.1.0/docs/cookie-export.md +40 -0
- searchts-0.1.0/docs/dependency-locking.md +30 -0
- searchts-0.1.0/docs/install.md +263 -0
- searchts-0.1.0/docs/troubleshooting.md +45 -0
- searchts-0.1.0/docs/update.md +92 -0
- searchts-0.1.0/docs/wechat-group-qr.jpg +0 -0
- searchts-0.1.0/llms.txt +31 -0
- searchts-0.1.0/pyproject.toml +83 -0
- searchts-0.1.0/scripts/sync-upstream.sh +72 -0
- searchts-0.1.0/searchts/__init__.py +9 -0
- searchts-0.1.0/searchts/backends/__init__.py +15 -0
- searchts-0.1.0/searchts/backends/opencli.py +136 -0
- searchts-0.1.0/searchts/channels/__init__.py +49 -0
- searchts-0.1.0/searchts/channels/base.py +70 -0
- searchts-0.1.0/searchts/channels/exa_search.py +40 -0
- searchts-0.1.0/searchts/channels/github.py +43 -0
- searchts-0.1.0/searchts/channels/linkedin.py +43 -0
- searchts-0.1.0/searchts/channels/reddit.py +166 -0
- searchts-0.1.0/searchts/channels/rss.py +27 -0
- searchts-0.1.0/searchts/channels/twitter.py +146 -0
- searchts-0.1.0/searchts/channels/web.py +32 -0
- searchts-0.1.0/searchts/channels/youtube.py +91 -0
- searchts-0.1.0/searchts/cli.py +1525 -0
- searchts-0.1.0/searchts/config.py +110 -0
- searchts-0.1.0/searchts/cookie_extract.py +239 -0
- searchts-0.1.0/searchts/core.py +42 -0
- searchts-0.1.0/searchts/doctor.py +127 -0
- searchts-0.1.0/searchts/guides/setup-exa.md +41 -0
- searchts-0.1.0/searchts/guides/setup-groq.md +47 -0
- searchts-0.1.0/searchts/guides/setup-reddit.md +54 -0
- searchts-0.1.0/searchts/guides/setup-twitter.md +84 -0
- searchts-0.1.0/searchts/integrations/__init__.py +1 -0
- searchts-0.1.0/searchts/integrations/mcp_server.py +96 -0
- searchts-0.1.0/searchts/probe.py +103 -0
- searchts-0.1.0/searchts/skill/SKILL.md +129 -0
- searchts-0.1.0/searchts/skill/SKILL_en.md +114 -0
- searchts-0.1.0/searchts/skill/references/career.md +29 -0
- searchts-0.1.0/searchts/skill/references/dev.md +62 -0
- searchts-0.1.0/searchts/skill/references/search.md +33 -0
- searchts-0.1.0/searchts/skill/references/social.md +94 -0
- searchts-0.1.0/searchts/skill/references/video.md +58 -0
- searchts-0.1.0/searchts/skill/references/web.md +50 -0
- searchts-0.1.0/searchts/transcribe.py +261 -0
- searchts-0.1.0/searchts/unlocker.py +435 -0
- searchts-0.1.0/searchts/utils/paths.py +45 -0
- searchts-0.1.0/searchts/utils/process.py +26 -0
- searchts-0.1.0/searchts/utils/text.py +14 -0
- searchts-0.1.0/test.sh +89 -0
- searchts-0.1.0/tests/test_channel_contracts.py +177 -0
- searchts-0.1.0/tests/test_channels.py +294 -0
- searchts-0.1.0/tests/test_cli.py +281 -0
- searchts-0.1.0/tests/test_config.py +88 -0
- searchts-0.1.0/tests/test_cookie_extract_perms.py +89 -0
- searchts-0.1.0/tests/test_core.py +29 -0
- searchts-0.1.0/tests/test_doctor.py +126 -0
- searchts-0.1.0/tests/test_mcp_server.py +32 -0
- searchts-0.1.0/tests/test_opencli_backend.py +100 -0
- searchts-0.1.0/tests/test_probe.py +90 -0
- searchts-0.1.0/tests/test_process.py +18 -0
- searchts-0.1.0/tests/test_skill_command.py +125 -0
- searchts-0.1.0/tests/test_transcribe.py +262 -0
- searchts-0.1.0/tests/test_twitter_channel.py +184 -0
- searchts-0.1.0/tests/test_unlocker.py +287 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# searchts Configuration
|
|
2
|
+
# Copy to .env and fill in your values
|
|
3
|
+
# Or use: searchts configure
|
|
4
|
+
|
|
5
|
+
# Exa Search (free 1000/month) — https://exa.ai
|
|
6
|
+
# EXA_API_KEY=exa-your-key-here
|
|
7
|
+
|
|
8
|
+
# GitHub Token (optional, for higher rate limits) — https://github.com/settings/tokens
|
|
9
|
+
# GITHUB_TOKEN=ghp_your_token_here
|
|
10
|
+
|
|
11
|
+
# Reddit ISP Proxy (optional, for full Reddit access)
|
|
12
|
+
# REDDIT_PROXY=http://user:pass@ip:port
|
|
13
|
+
|
|
14
|
+
# Groq Whisper (optional, for video transcription) — https://console.groq.com
|
|
15
|
+
# GROQ_API_KEY=gsk_your_key_here
|
|
16
|
+
|
|
17
|
+
# OpenAI Whisper (optional fallback when Groq is rate-limited) — https://platform.openai.com
|
|
18
|
+
# OPENAI_API_KEY=sk-your_key_here
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
name: ci
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
pull_request:
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
test:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
strategy:
|
|
11
|
+
fail-fast: false
|
|
12
|
+
matrix:
|
|
13
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
14
|
+
steps:
|
|
15
|
+
- name: Checkout
|
|
16
|
+
uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Setup Python
|
|
19
|
+
uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: ${{ matrix.python-version }}
|
|
22
|
+
|
|
23
|
+
- name: Install package and test deps
|
|
24
|
+
run: |
|
|
25
|
+
python -m pip install --upgrade pip
|
|
26
|
+
pip install -c constraints.txt -e .[dev]
|
|
27
|
+
|
|
28
|
+
- name: Run tests
|
|
29
|
+
run: |
|
|
30
|
+
pytest -q
|
|
31
|
+
|
|
32
|
+
# Editable installs (-e) never exercise wheel packaging, so a broken wheel
|
|
33
|
+
# can pass tests and still fail every real `pip install` from source.
|
|
34
|
+
# This job builds the actual wheel and installs it into a clean venv.
|
|
35
|
+
wheel-gate:
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
steps:
|
|
38
|
+
- name: Checkout
|
|
39
|
+
uses: actions/checkout@v4
|
|
40
|
+
|
|
41
|
+
- name: Setup Python
|
|
42
|
+
uses: actions/setup-python@v5
|
|
43
|
+
with:
|
|
44
|
+
python-version: "3.12"
|
|
45
|
+
|
|
46
|
+
- name: Build wheel
|
|
47
|
+
run: |
|
|
48
|
+
python -m pip install --upgrade pip build
|
|
49
|
+
python -m build
|
|
50
|
+
|
|
51
|
+
- name: Verify wheel has no duplicate entries and ships data files
|
|
52
|
+
run: |
|
|
53
|
+
python - <<'PY'
|
|
54
|
+
import glob, zipfile, collections
|
|
55
|
+
whl = glob.glob("dist/*.whl")[0]
|
|
56
|
+
names = zipfile.ZipFile(whl).namelist()
|
|
57
|
+
dupes = [n for n, c in collections.Counter(names).items() if c > 1]
|
|
58
|
+
assert not dupes, f"duplicate entries in wheel: {dupes}"
|
|
59
|
+
assert "searchts/skill/SKILL.md" in names, "SKILL.md missing from wheel"
|
|
60
|
+
for prefix in ("searchts/guides/", "searchts/scripts/", "searchts/skill/references/"):
|
|
61
|
+
assert any(n.startswith(prefix) for n in names), f"{prefix} missing from wheel"
|
|
62
|
+
print(f"wheel OK: {len(names)} entries, no duplicates, data files present")
|
|
63
|
+
PY
|
|
64
|
+
|
|
65
|
+
- name: Smoke-install wheel into clean venv
|
|
66
|
+
run: |
|
|
67
|
+
python -m venv /tmp/smoke
|
|
68
|
+
/tmp/smoke/bin/pip install --quiet dist/*.whl
|
|
69
|
+
/tmp/smoke/bin/searchts version
|
|
70
|
+
cd /tmp && /tmp/smoke/bin/python -c "import searchts; from importlib.resources import files; assert (files('searchts')/'skill'/'SKILL.md').is_file(); print('SKILL.md ships in site-packages OK')"
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.pyc
|
|
3
|
+
*.pyo
|
|
4
|
+
.pytest_cache/
|
|
5
|
+
*.egg-info/
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
.env
|
|
9
|
+
.searchts/
|
|
10
|
+
*.log
|
|
11
|
+
|
|
12
|
+
# Claude Code personal permission settings — local only, never commit
|
|
13
|
+
.claude/settings.local.json
|
|
14
|
+
uv.lock
|
|
15
|
+
|
|
16
|
+
# Local dev (this fork)
|
|
17
|
+
.venv/
|
|
18
|
+
scratch/
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## [1.1.0] - 2025-02-25
|
|
8
|
+
|
|
9
|
+
### New Channels
|
|
10
|
+
|
|
11
|
+
#### LinkedIn
|
|
12
|
+
- Read person profiles, company pages, and job details via [linkedin-scraper-mcp](https://github.com/stickerdaniel/linkedin-mcp-server)
|
|
13
|
+
- Search people and jobs via MCP, with Exa fallback
|
|
14
|
+
- Fallback to Jina Reader when MCP is not configured
|
|
15
|
+
|
|
16
|
+
### Improvements
|
|
17
|
+
|
|
18
|
+
- `searchts doctor` now detects the LinkedIn channel
|
|
19
|
+
- CLI: added `search-linkedin` subcommand
|
|
20
|
+
- Updated install guide with setup instructions for the new channel
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## [1.0.0] - 2025-02-24
|
|
25
|
+
|
|
26
|
+
### Initial Release
|
|
27
|
+
|
|
28
|
+
- 8 channels: Web (Jina Reader), Search (Exa), GitHub, YouTube, Reddit, Twitter/X, LinkedIn, RSS
|
|
29
|
+
- CLI with `read`, `search`, `doctor`, `install` commands
|
|
30
|
+
- Unified channel interface — each platform is a single pluggable Python file
|
|
31
|
+
- Auto-detection of local vs server environments
|
|
32
|
+
- Built-in diagnostics via `searchts doctor`
|
|
33
|
+
- Skill registration for Claude Code / OpenClaw / Cursor
|
searchts-0.1.0/CLAUDE.md
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
## Project
|
|
4
|
+
searchts — Python CLI + library that gives AI agents read/search access to 8 platforms.
|
|
5
|
+
Positioning: installer + doctor + config tool. NOT a wrapper — after install, agents call upstream tools directly.
|
|
6
|
+
Repo: github.com/capad-xyz/searchts | License: MIT | Version: 0.1.0
|
|
7
|
+
|
|
8
|
+
## Commands
|
|
9
|
+
- `pip install -e .` — Dev install
|
|
10
|
+
- `pytest tests/ -v` — All tests
|
|
11
|
+
- `pytest tests/test_cli.py -v` — CLI tests only
|
|
12
|
+
- `bash test.sh` — Full integration test (creates venv, installs, runs doctor + channel tests)
|
|
13
|
+
- `python -m searchts.cli doctor` — Run diagnostics
|
|
14
|
+
- `python -m searchts.cli install --env=auto` — Auto-configure
|
|
15
|
+
|
|
16
|
+
## Structure
|
|
17
|
+
- `searchts/cli.py` — CLI entry point (argparse)
|
|
18
|
+
- `searchts/core.py` — Core read/search routing logic
|
|
19
|
+
- `searchts/config.py` — Config management (YAML, env vars)
|
|
20
|
+
- `searchts/doctor.py` — Diagnostics engine
|
|
21
|
+
- `searchts/channels/` — One file per platform (twitter.py, reddit.py, youtube.py, etc.)
|
|
22
|
+
- `searchts/channels/base.py` — Base channel class (all channels inherit from this)
|
|
23
|
+
- `searchts/integrations/mcp_server.py` — MCP server integration
|
|
24
|
+
- `searchts/skill/` — OpenClaw skill files
|
|
25
|
+
- `searchts/guides/` — Usage guides
|
|
26
|
+
- `tests/` — pytest tests
|
|
27
|
+
- `config/mcporter.json` — MCP tool config
|
|
28
|
+
|
|
29
|
+
## Conventions
|
|
30
|
+
- Python 3.10+ with type hints
|
|
31
|
+
- Each channel is a single file in `channels/`, inherits from `BaseChannel`
|
|
32
|
+
- Channel contract: must implement `can_handle(url)`, `read(url)`, `search(query)`, `check()` methods
|
|
33
|
+
- Use `loguru` for logging, `rich` for CLI output
|
|
34
|
+
- Commit format: `type(scope): message` (one commit = one thing)
|
|
35
|
+
- All upstream tool calls go through public API/CLI, never hack internals
|
|
36
|
+
|
|
37
|
+
## Rules
|
|
38
|
+
- NEVER modify upstream open source projects' source code
|
|
39
|
+
- searchts is a "glue layer" — only route and call, don't reimagine
|
|
40
|
+
- Version in THREE places must match: `pyproject.toml`, `__init__.py`, `tests/test_cli.py`
|
|
41
|
+
- Always new branch for changes, PR to main, never push to main directly
|
|
42
|
+
- Run `pytest tests/ -v` before committing — all tests must pass
|
|
43
|
+
- Cookie-based auth (Twitter): use Cookie-Editor export method only, no QR scan
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Contributing to searchts
|
|
2
|
+
|
|
3
|
+
Thank you for your interest in contributing to searchts! This document provides guidelines and instructions for contributing.
|
|
4
|
+
|
|
5
|
+
## Getting Started
|
|
6
|
+
|
|
7
|
+
1. Fork the repository on GitHub
|
|
8
|
+
2. Clone your fork locally
|
|
9
|
+
3. Create a new branch for your contribution
|
|
10
|
+
4. Make your changes
|
|
11
|
+
5. Run tests and linting
|
|
12
|
+
6. Submit a pull request
|
|
13
|
+
|
|
14
|
+
## Development Setup
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# Clone your fork
|
|
18
|
+
git clone https://github.com/YOUR_USERNAME/searchts.git
|
|
19
|
+
cd searchts
|
|
20
|
+
|
|
21
|
+
# Install in development mode
|
|
22
|
+
pip install -e ".[dev]"
|
|
23
|
+
|
|
24
|
+
# Install pre-commit hooks (optional but recommended)
|
|
25
|
+
pre-commit install
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Code Style
|
|
29
|
+
|
|
30
|
+
We use the following tools to maintain code quality:
|
|
31
|
+
|
|
32
|
+
- **ruff**: Linting and import sorting
|
|
33
|
+
- **mypy**: Type checking
|
|
34
|
+
- **pytest**: Testing
|
|
35
|
+
|
|
36
|
+
Run all checks before submitting a PR:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Linting
|
|
40
|
+
ruff check searchts tests
|
|
41
|
+
ruff format searchts tests
|
|
42
|
+
|
|
43
|
+
# Type checking
|
|
44
|
+
mypy searchts
|
|
45
|
+
|
|
46
|
+
# Tests
|
|
47
|
+
pytest
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Adding New Channels
|
|
51
|
+
|
|
52
|
+
searchts uses a unified channel interface. To add a new platform:
|
|
53
|
+
|
|
54
|
+
1. Create a new file in `searchts/channels/`
|
|
55
|
+
2. Implement the channel contract (see existing channels for examples)
|
|
56
|
+
3. Add tests in `tests/test_channels.py`
|
|
57
|
+
4. Update `searchts/doctor.py` to include the new channel
|
|
58
|
+
5. Update documentation
|
|
59
|
+
|
|
60
|
+
## Pull Request Guidelines
|
|
61
|
+
|
|
62
|
+
- **Small, focused changes** are preferred over large refactors
|
|
63
|
+
- Include tests for new functionality
|
|
64
|
+
- Update documentation if needed
|
|
65
|
+
- Follow existing code style
|
|
66
|
+
- Reference any related issues
|
|
67
|
+
|
|
68
|
+
## Reporting Issues
|
|
69
|
+
|
|
70
|
+
When reporting bugs, please include:
|
|
71
|
+
|
|
72
|
+
- Python version
|
|
73
|
+
- Operating system
|
|
74
|
+
- Steps to reproduce
|
|
75
|
+
- Expected vs actual behavior
|
|
76
|
+
- Any error messages
|
|
77
|
+
|
|
78
|
+
## Questions?
|
|
79
|
+
|
|
80
|
+
Feel free to open an issue for questions or join discussions.
|
searchts-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Agent Eyes (original Agent-Reach project)
|
|
4
|
+
Copyright (c) 2026 capad-xyz (searchts modifications and additions)
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in all
|
|
14
|
+
copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
22
|
+
SOFTWARE.
|
searchts-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: searchts
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Give your AI agent eyes on the open web: an escalating open-source unlocker (browser-fingerprinted fetch -> JS-render relay -> stealth browser) plus read/search across web, search, GitHub, YouTube, Reddit, Twitter, LinkedIn, and RSS.
|
|
5
|
+
Project-URL: Homepage, https://github.com/capad-xyz/searchts
|
|
6
|
+
Project-URL: Repository, https://github.com/capad-xyz/searchts
|
|
7
|
+
Project-URL: Issues, https://github.com/capad-xyz/searchts/issues
|
|
8
|
+
Author: capad-xyz
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agent-infrastructure,ai-agent,ai-search,automation,claude-code,cli,cursor,free-api,llm-tools,mcp,no-api-key,openai,reddit-scraper,search,twitter-scraper,unlocker,web-reader,web-scraper,youtube-transcript
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Requires-Dist: curl-cffi>=0.7
|
|
23
|
+
Requires-Dist: feedparser>=6.0
|
|
24
|
+
Requires-Dist: loguru>=0.7
|
|
25
|
+
Requires-Dist: python-dotenv>=1.0
|
|
26
|
+
Requires-Dist: pyyaml>=6.0
|
|
27
|
+
Requires-Dist: requests>=2.28
|
|
28
|
+
Requires-Dist: rich>=13.0
|
|
29
|
+
Requires-Dist: trafilatura>=1.8
|
|
30
|
+
Requires-Dist: yt-dlp>=2024.0
|
|
31
|
+
Provides-Extra: all
|
|
32
|
+
Requires-Dist: browser-cookie3>=0.19; extra == 'all'
|
|
33
|
+
Requires-Dist: mcp[cli]>=1.0; extra == 'all'
|
|
34
|
+
Requires-Dist: patchright>=1.50; extra == 'all'
|
|
35
|
+
Provides-Extra: browser
|
|
36
|
+
Requires-Dist: patchright>=1.50; extra == 'browser'
|
|
37
|
+
Provides-Extra: cookies
|
|
38
|
+
Requires-Dist: browser-cookie3>=0.19; extra == 'cookies'
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: mypy>=1.12; extra == 'dev'
|
|
41
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
42
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
43
|
+
Requires-Dist: types-pyyaml>=6.0; extra == 'dev'
|
|
44
|
+
Requires-Dist: types-requests>=2.32; extra == 'dev'
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
|
|
47
|
+
# searchts
|
|
48
|
+
|
|
49
|
+
**Give your AI agent eyes on the open web.** `searchts` is a Python CLI and library that lets an AI agent read and search the internet, fronted by a fully open-source "unlocker" that gets through common bot-walls without any paid proxy or unlocker service.
|
|
50
|
+
|
|
51
|
+
License: MIT. Python 3.10+.
|
|
52
|
+
|
|
53
|
+
## Why
|
|
54
|
+
|
|
55
|
+
AI agents constantly need to read web pages, but the naive way they fetch is trivially blocked by modern anti-bot systems (Cloudflare, PerimeterX, DataDome). Paid unlocker services solve this, but the thing they really charge for is a large pool of clean residential IP addresses. `searchts` runs on your own machine, from your own connection, at personal volume, so it sidesteps that cost and gets through most of those walls for free.
|
|
56
|
+
|
|
57
|
+
## The unlocker
|
|
58
|
+
|
|
59
|
+
`searchts` reads any URL through an escalating ladder and stops at the first tier that returns real content:
|
|
60
|
+
|
|
61
|
+
1. **curl_cffi** : a fetch that impersonates a real Chrome's TLS/JA3 and HTTP2 fingerprint. Beats user-agent and fingerprint filters. Fast, local, private.
|
|
62
|
+
2. **Jina Reader** : a JavaScript-rendering relay, for pages that only fill in content after running JS.
|
|
63
|
+
3. **stealth browser** : an undetected headless Chromium (patchright), launched lazily only when the cheaper tiers fail, for live JS / Cloudflare managed challenges.
|
|
64
|
+
|
|
65
|
+
If every tier is defeated by an interactive CAPTCHA, an optional human-in-the-loop step opens a real browser so you can solve it once and continue.
|
|
66
|
+
|
|
67
|
+
Block detection is phrase-based, not vendor-name based, so legitimate pages that merely embed a bot-sensor script are not falsely rejected. Content is extracted to clean Markdown with `trafilatura`.
|
|
68
|
+
|
|
69
|
+
## Install
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install searchts
|
|
73
|
+
# optional: the stealth-browser tier
|
|
74
|
+
pip install "searchts[browser]" && patchright install chromium
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
For development:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pip install -e . --no-build-isolation
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Quickstart
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
searchts read https://example.com # clean Markdown to stdout
|
|
87
|
+
searchts read https://news.ycombinator.com --json # structured: backend, status, chars, text
|
|
88
|
+
searchts read https://example.com --backend curl_cffi # force a single tier
|
|
89
|
+
searchts read https://example.com --human # human-in-the-loop CAPTCHA fallback
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Content goes to stdout (pipeable); status goes to stderr.
|
|
93
|
+
|
|
94
|
+
## Features
|
|
95
|
+
|
|
96
|
+
- **Escalating open-source unlocker**: curl_cffi, then Jina Reader, then a stealth browser.
|
|
97
|
+
- **`searchts read <url>`**: run the unlocker from the command line and print clean Markdown.
|
|
98
|
+
- **MCP tool `read_url(url)`**: expose the unlocker to agents (Claude, Cursor, and others) so they can read any page directly.
|
|
99
|
+
- **Per-domain backend memory**: remembers which tier worked for each domain and tries it first; disable with `SEARCHTS_NO_MEMORY=1`.
|
|
100
|
+
- **Human-in-the-loop CAPTCHA**: on an interactive challenge, hand off to your real browser to solve once.
|
|
101
|
+
- **Read and search across sources**: web (any URL), search (Exa), GitHub, YouTube, Reddit, Twitter/X, LinkedIn, and RSS.
|
|
102
|
+
|
|
103
|
+
## Use as a library
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from searchts import unlocker
|
|
107
|
+
|
|
108
|
+
r = unlocker.fetch("https://example.com")
|
|
109
|
+
print(r.backend, r.status, len(r.text))
|
|
110
|
+
print(r.text)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## MCP
|
|
114
|
+
|
|
115
|
+
`searchts` ships an MCP server (`searchts/integrations/mcp_server.py`) that exposes `read_url(url)`. Point your MCP-capable client at it to give the agent a one-call web reader backed by the full unlocker ladder.
|
|
116
|
+
|
|
117
|
+
## How it works, and its limits
|
|
118
|
+
|
|
119
|
+
- It runs from your own residential IP at personal volume, which is why it needs no paid proxy pool. It is a personal-grade research tool, not a mass-scraping system.
|
|
120
|
+
- Interactive CAPTCHAs (DataDome / Turnstile press-and-hold) are the honest ceiling. Use `--human` for those.
|
|
121
|
+
- Anti-bot systems evolve; this is an arms race and the techniques may need occasional updates.
|
|
122
|
+
- Respect each site's terms of service and use responsibly.
|
|
123
|
+
|
|
124
|
+
## Configuration
|
|
125
|
+
|
|
126
|
+
Optional API keys, via `searchts configure` or a `.env` file (see `.env.example`):
|
|
127
|
+
|
|
128
|
+
- **Exa** for web search (free tier available)
|
|
129
|
+
- **GitHub token** for higher rate limits
|
|
130
|
+
- **Groq / OpenAI** for video transcription
|
|
131
|
+
|
|
132
|
+
Run `searchts doctor` to check what is configured and working.
|
|
133
|
+
|
|
134
|
+
## Credits
|
|
135
|
+
|
|
136
|
+
`searchts` builds on and extends [Agent-Reach](https://github.com/Panniantong/Agent-Reach) (MIT), reusing its channel, installer, and diagnostics architecture. The escalating open-source unlocker, per-domain backend memory, human-in-the-loop CAPTCHA flow, the `read_url` MCP tool, and the `read` CLI command are additions in `searchts`. Thanks to the original authors.
|
|
137
|
+
|
|
138
|
+
## License
|
|
139
|
+
|
|
140
|
+
MIT. See [LICENSE](LICENSE). Original portions Copyright (c) 2025 Agent Eyes; modifications and additions Copyright (c) 2026 capad-xyz.
|
searchts-0.1.0/README.md
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# searchts
|
|
2
|
+
|
|
3
|
+
**Give your AI agent eyes on the open web.** `searchts` is a Python CLI and library that lets an AI agent read and search the internet, fronted by a fully open-source "unlocker" that gets through common bot-walls without any paid proxy or unlocker service.
|
|
4
|
+
|
|
5
|
+
License: MIT. Python 3.10+.
|
|
6
|
+
|
|
7
|
+
## Why
|
|
8
|
+
|
|
9
|
+
AI agents constantly need to read web pages, but the naive way they fetch is trivially blocked by modern anti-bot systems (Cloudflare, PerimeterX, DataDome). Paid unlocker services solve this, but the thing they really charge for is a large pool of clean residential IP addresses. `searchts` runs on your own machine, from your own connection, at personal volume, so it sidesteps that cost and gets through most of those walls for free.
|
|
10
|
+
|
|
11
|
+
## The unlocker
|
|
12
|
+
|
|
13
|
+
`searchts` reads any URL through an escalating ladder and stops at the first tier that returns real content:
|
|
14
|
+
|
|
15
|
+
1. **curl_cffi** : a fetch that impersonates a real Chrome's TLS/JA3 and HTTP2 fingerprint. Beats user-agent and fingerprint filters. Fast, local, private.
|
|
16
|
+
2. **Jina Reader** : a JavaScript-rendering relay, for pages that only fill in content after running JS.
|
|
17
|
+
3. **stealth browser** : an undetected headless Chromium (patchright), launched lazily only when the cheaper tiers fail, for live JS / Cloudflare managed challenges.
|
|
18
|
+
|
|
19
|
+
If every tier is defeated by an interactive CAPTCHA, an optional human-in-the-loop step opens a real browser so you can solve it once and continue.
|
|
20
|
+
|
|
21
|
+
Block detection is phrase-based, not vendor-name based, so legitimate pages that merely embed a bot-sensor script are not falsely rejected. Content is extracted to clean Markdown with `trafilatura`.
|
|
22
|
+
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install searchts
|
|
27
|
+
# optional: the stealth-browser tier
|
|
28
|
+
pip install "searchts[browser]" && patchright install chromium
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
For development:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install -e . --no-build-isolation
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Quickstart
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
searchts read https://example.com # clean Markdown to stdout
|
|
41
|
+
searchts read https://news.ycombinator.com --json # structured: backend, status, chars, text
|
|
42
|
+
searchts read https://example.com --backend curl_cffi # force a single tier
|
|
43
|
+
searchts read https://example.com --human # human-in-the-loop CAPTCHA fallback
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Content goes to stdout (pipeable); status goes to stderr.
|
|
47
|
+
|
|
48
|
+
## Features
|
|
49
|
+
|
|
50
|
+
- **Escalating open-source unlocker**: curl_cffi, then Jina Reader, then a stealth browser.
|
|
51
|
+
- **`searchts read <url>`**: run the unlocker from the command line and print clean Markdown.
|
|
52
|
+
- **MCP tool `read_url(url)`**: expose the unlocker to agents (Claude, Cursor, and others) so they can read any page directly.
|
|
53
|
+
- **Per-domain backend memory**: remembers which tier worked for each domain and tries it first; disable with `SEARCHTS_NO_MEMORY=1`.
|
|
54
|
+
- **Human-in-the-loop CAPTCHA**: on an interactive challenge, hand off to your real browser to solve once.
|
|
55
|
+
- **Read and search across sources**: web (any URL), search (Exa), GitHub, YouTube, Reddit, Twitter/X, LinkedIn, and RSS.
|
|
56
|
+
|
|
57
|
+
## Use as a library
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from searchts import unlocker
|
|
61
|
+
|
|
62
|
+
r = unlocker.fetch("https://example.com")
|
|
63
|
+
print(r.backend, r.status, len(r.text))
|
|
64
|
+
print(r.text)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## MCP
|
|
68
|
+
|
|
69
|
+
`searchts` ships an MCP server (`searchts/integrations/mcp_server.py`) that exposes `read_url(url)`. Point your MCP-capable client at it to give the agent a one-call web reader backed by the full unlocker ladder.
|
|
70
|
+
|
|
71
|
+
## How it works, and its limits
|
|
72
|
+
|
|
73
|
+
- It runs from your own residential IP at personal volume, which is why it needs no paid proxy pool. It is a personal-grade research tool, not a mass-scraping system.
|
|
74
|
+
- Interactive CAPTCHAs (DataDome / Turnstile press-and-hold) are the honest ceiling. Use `--human` for those.
|
|
75
|
+
- Anti-bot systems evolve; this is an arms race and the techniques may need occasional updates.
|
|
76
|
+
- Respect each site's terms of service and use responsibly.
|
|
77
|
+
|
|
78
|
+
## Configuration
|
|
79
|
+
|
|
80
|
+
Optional API keys, via `searchts configure` or a `.env` file (see `.env.example`):
|
|
81
|
+
|
|
82
|
+
- **Exa** for web search (free tier available)
|
|
83
|
+
- **GitHub token** for higher rate limits
|
|
84
|
+
- **Groq / OpenAI** for video transcription
|
|
85
|
+
|
|
86
|
+
Run `searchts doctor` to check what is configured and working.
|
|
87
|
+
|
|
88
|
+
## Credits
|
|
89
|
+
|
|
90
|
+
`searchts` builds on and extends [Agent-Reach](https://github.com/Panniantong/Agent-Reach) (MIT), reusing its channel, installer, and diagnostics architecture. The escalating open-source unlocker, per-domain backend memory, human-in-the-loop CAPTCHA flow, the `read_url` MCP tool, and the `read` CLI command are additions in `searchts`. Thanks to the original authors.
|
|
91
|
+
|
|
92
|
+
## License
|
|
93
|
+
|
|
94
|
+
MIT. See [LICENSE](LICENSE). Original portions Copyright (c) 2025 Agent Eyes; modifications and additions Copyright (c) 2026 capad-xyz.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Supported Versions
|
|
4
|
+
|
|
5
|
+
| Version | Supported |
|
|
6
|
+
|---------|-----------|
|
|
7
|
+
| Latest | ✅ Yes |
|
|
8
|
+
|
|
9
|
+
## Reporting a Vulnerability
|
|
10
|
+
|
|
11
|
+
If you discover a security vulnerability in searchts, please report
|
|
12
|
+
it responsibly by using GitHub's private security advisory feature:
|
|
13
|
+
|
|
14
|
+
👉 **[Report a vulnerability](https://github.com/capad-xyz/searchts/security/advisories/new)**
|
|
15
|
+
|
|
16
|
+
Please do NOT open a public GitHub issue for security vulnerabilities.
|
|
17
|
+
|
|
18
|
+
## What to Include
|
|
19
|
+
|
|
20
|
+
- Description of the vulnerability
|
|
21
|
+
- Steps to reproduce
|
|
22
|
+
- Affected versions
|
|
23
|
+
- Potential impact
|
|
24
|
+
- Suggested fix (if any)
|
|
25
|
+
|
|
26
|
+
## Response Timeline
|
|
27
|
+
|
|
28
|
+
- Acknowledgement within **48 hours**
|
|
29
|
+
- Status update within **7 days**
|
|
30
|
+
- Fix timeline communicated within **14 days**
|
|
31
|
+
|
|
32
|
+
## Scope
|
|
33
|
+
|
|
34
|
+
The following are considered in scope:
|
|
35
|
+
- Authentication and authorization bypass
|
|
36
|
+
- Remote code execution
|
|
37
|
+
- Path traversal / arbitrary file read
|
|
38
|
+
- Server-Side Request Forgery (SSRF)
|
|
39
|
+
- Injection vulnerabilities (SQL, command, prompt)
|
|
40
|
+
- Sensitive data exposure
|
|
41
|
+
|
|
42
|
+
## Out of Scope
|
|
43
|
+
|
|
44
|
+
- Vulnerabilities in dependencies (report to the dependency maintainer)
|
|
45
|
+
- Social engineering attacks
|
|
46
|
+
- Denial of service via resource exhaustion
|
|
47
|
+
|
|
48
|
+
## Credits
|
|
49
|
+
|
|
50
|
+
We appreciate responsible disclosure and will credit researchers
|
|
51
|
+
in our release notes unless anonymity is requested.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# searchts tested dependency set
|
|
2
|
+
# Usage:
|
|
3
|
+
# pip install -c constraints.txt -e .[dev]
|
|
4
|
+
|
|
5
|
+
requests==2.32.5
|
|
6
|
+
feedparser==6.0.12
|
|
7
|
+
python-dotenv==1.2.1
|
|
8
|
+
loguru==0.7.3
|
|
9
|
+
PyYAML==6.0.3
|
|
10
|
+
rich==14.3.2
|
|
11
|
+
yt-dlp==2025.5.22
|
|
12
|
+
|
|
13
|
+
pytest==8.0.0
|
|
14
|
+
ruff==0.15.1
|
|
15
|
+
mypy==1.19.1
|
|
16
|
+
types-requests==2.32.4.20260107
|
|
17
|
+
types-PyYAML==6.0.12.20250915
|
|
Binary file
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" width="512" height="512">
|
|
3
|
+
<defs>
|
|
4
|
+
<linearGradient id="grad1" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
5
|
+
<stop offset="0%" style="stop-color:#0066FF"/>
|
|
6
|
+
<stop offset="100%" style="stop-color:#00AAFF"/>
|
|
7
|
+
</linearGradient>
|
|
8
|
+
<clipPath id="clip-circle">
|
|
9
|
+
<circle cx="256" cy="256" r="140"/>
|
|
10
|
+
</clipPath>
|
|
11
|
+
</defs>
|
|
12
|
+
|
|
13
|
+
<rect width="512" height="512" fill="#0A0A1A"/>
|
|
14
|
+
|
|
15
|
+
<!-- Outer ring - the "boundary" -->
|
|
16
|
+
<circle cx="256" cy="256" r="160" fill="none" stroke="url(#grad1)" stroke-width="8" opacity="0.3"/>
|
|
17
|
+
<circle cx="256" cy="256" r="140" fill="none" stroke="url(#grad1)" stroke-width="3" opacity="0.6"/>
|
|
18
|
+
|
|
19
|
+
<!-- Inner: agent core point -->
|
|
20
|
+
<circle cx="220" cy="256" r="24" fill="url(#grad1)"/>
|
|
21
|
+
|
|
22
|
+
<!-- "Reach" arcs - starting from the core, crossing the ring boundary, extending outward -->
|
|
23
|
+
<path d="M 230 256 Q 320 180, 420 200"
|
|
24
|
+
fill="none" stroke="url(#grad1)" stroke-width="6" stroke-linecap="round" opacity="0.9"/>
|
|
25
|
+
<path d="M 230 256 Q 320 256, 420 256"
|
|
26
|
+
fill="none" stroke="url(#grad1)" stroke-width="6" stroke-linecap="round" opacity="0.7"/>
|
|
27
|
+
<path d="M 230 256 Q 320 330, 420 312"
|
|
28
|
+
fill="none" stroke="url(#grad1)" stroke-width="6" stroke-linecap="round" opacity="0.5"/>
|
|
29
|
+
|
|
30
|
+
<!-- Target points: where the reach lands -->
|
|
31
|
+
<circle cx="420" cy="200" r="8" fill="#00AAFF" opacity="0.9"/>
|
|
32
|
+
<circle cx="420" cy="256" r="8" fill="#00AAFF" opacity="0.7"/>
|
|
33
|
+
<circle cx="420" cy="312" r="8" fill="#00AAFF" opacity="0.5"/>
|
|
34
|
+
</svg>
|
|
Binary file
|