site2cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .venv/
10
+ venv/
11
+ .env
12
+ .mypy_cache/
13
+ .ruff_cache/
14
+ .pytest_cache/
15
+ *.db
16
+ *.sqlite3
@@ -0,0 +1,209 @@
1
+ Metadata-Version: 2.4
2
+ Name: site2cli
3
+ Version: 0.1.0
4
+ Summary: Turn any website into a CLI/API for AI agents
5
+ Project-URL: Homepage, https://github.com/lonexreb/webcli
6
+ Project-URL: Repository, https://github.com/lonexreb/webcli
7
+ Project-URL: Issues, https://github.com/lonexreb/webcli/issues
8
+ License-Expression: MIT
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Internet :: WWW/HTTP
19
+ Classifier: Topic :: Software Development :: Code Generators
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: httpx>=0.27.0
23
+ Requires-Dist: keyring>=25.0.0
24
+ Requires-Dist: openapi-spec-validator>=0.7.0
25
+ Requires-Dist: pydantic>=2.0.0
26
+ Requires-Dist: pyyaml>=6.0
27
+ Requires-Dist: rich>=13.0.0
28
+ Requires-Dist: typer>=0.12.0
29
+ Provides-Extra: all
30
+ Requires-Dist: webcli[browser,llm,mcp]; extra == 'all'
31
+ Provides-Extra: browser
32
+ Requires-Dist: browser-cookie3>=0.19.0; extra == 'browser'
33
+ Requires-Dist: playwright>=1.40.0; extra == 'browser'
34
+ Provides-Extra: cookies
35
+ Requires-Dist: browser-cookie3>=0.19.0; extra == 'cookies'
36
+ Provides-Extra: dev
37
+ Requires-Dist: mypy>=1.10.0; extra == 'dev'
38
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
39
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
40
+ Requires-Dist: ruff>=0.5.0; extra == 'dev'
41
+ Requires-Dist: webcli[all]; extra == 'dev'
42
+ Provides-Extra: llm
43
+ Requires-Dist: anthropic>=0.40.0; extra == 'llm'
44
+ Provides-Extra: mcp
45
+ Requires-Dist: mcp>=1.0.0; extra == 'mcp'
46
+ Description-Content-Type: text/markdown
47
+
48
+ # WebCLI
49
+
50
+ Turn any website into a CLI/API for AI agents.
51
+
52
+ WebCLI captures browser network traffic, discovers API patterns, and auto-generates structured interfaces (CLI commands, MCP servers, Python clients) so AI agents can interact with any web service as fast function calls instead of slow browser automation.
53
+
54
+ ## The Problem
55
+
56
+ AI agents interact with websites through browser automation (Playwright, Puppeteer, Computer Use), which is:
57
+ - **10-100x slower** than direct API calls
58
+ - **10-100x more expensive** in LLM tokens
59
+ - **~15-35% reliable** on academic benchmarks
60
+
61
+ ## The Solution: Progressive Formalization
62
+
63
+ WebCLI uses a 3-tier system that automatically graduates interactions from slow-but-universal to fast-but-specific:
64
+
65
+ ```
66
+ Tier 3: Direct API Calls (fastest, most reliable)
67
+ ^ Auto-generated from discovered API patterns
68
+ Tier 2: Cached Workflows (medium speed)
69
+ ^ Recorded browser workflows, parameterized + replayed
70
+ Tier 1: Browser Exploration (slowest, universal fallback)
71
+ ^ LLM-driven browser automation for unknown sites
72
+ ```
73
+
74
+ ## Quick Start
75
+
76
+ ```bash
77
+ # Install (lightweight - no browser deps by default)
78
+ pip install webcli
79
+
80
+ # Install with all features
81
+ pip install webcli[all]
82
+
83
+ # Or pick what you need
84
+ pip install webcli[browser] # Playwright for traffic capture
85
+ pip install webcli[llm] # Claude API for smart analysis
86
+ pip install webcli[mcp] # MCP server generation
87
+ ```
88
+
89
+ ### Discover a Site's API
90
+
91
+ ```bash
92
+ # Capture traffic and discover API endpoints
93
+ webcli discover kayak.com --action "search flights"
94
+
95
+ # WebCLI launches a browser, captures network traffic,
96
+ # and generates: OpenAPI spec + CLI commands + MCP tools
97
+ ```
98
+
99
+ ### Use the Generated Interface
100
+
101
+ ```bash
102
+ # CLI
103
+ webcli run kayak.com search_flights --from SFO --to JFK --date 2025-04-01
104
+
105
+ # Or as MCP tools for AI agents
106
+ webcli mcp generate kayak.com
107
+ webcli mcp serve kayak.com
108
+ ```
109
+
110
+ ### As a Python Library
111
+
112
+ ```python
113
+ from webcli.discovery.analyzer import TrafficAnalyzer
114
+ from webcli.discovery.spec_generator import generate_openapi_spec
115
+ from webcli.generators.mcp_gen import generate_mcp_server_code
116
+
117
+ # Analyze captured traffic
118
+ analyzer = TrafficAnalyzer(exchanges)
119
+ endpoints = analyzer.extract_endpoints()
120
+
121
+ # Generate OpenAPI spec
122
+ spec = generate_openapi_spec(api)
123
+
124
+ # Generate MCP server
125
+ mcp_code = generate_mcp_server_code(site, spec)
126
+ ```
127
+
128
+ ## What Gets Generated
129
+
130
+ From a single discovery session, WebCLI produces:
131
+
132
+ | Output | Description |
133
+ |--------|-------------|
134
+ | **OpenAPI 3.1 Spec** | Full API specification with schemas, parameters, auth |
135
+ | **Python Client** | Typed httpx client with methods for each endpoint |
136
+ | **CLI Commands** | Typer commands you can run from terminal |
137
+ | **MCP Server** | Tools that AI agents (Claude, etc.) can call directly |
138
+
139
+ ## Architecture
140
+
141
+ ```
142
+ WebCLI Core
143
+ +----------+--------------+--------------+------------+
144
+ | CLI | MCP Server | Python SDK | REST API |
145
+ +----------+--------------+--------------+------------+
146
+ | Router / Resolver |
147
+ | (Picks best available tier for a given site+action) |
148
+ +------------------------------------------------------+
149
+ | Tier 1: Browser | Tier 2: Cached | Tier 3: API |
150
+ | Explorer | Workflows | Clients |
151
+ +------------------------------------------------------+
152
+ | API Discovery Engine |
153
+ | Traffic Capture -> Pattern Analysis -> Spec Gen |
154
+ +------------------------------------------------------+
155
+ | Auth Manager | Site Registry | Health Monitor |
156
+ +------------------------------------------------------+
157
+ ```
158
+
159
+ ## Key Features
160
+
161
+ - **Auto-discovery**: Captures browser traffic via CDP and infers API patterns
162
+ - **Smart analysis**: LLM-assisted endpoint description and parameter inference
163
+ - **Progressive promotion**: Actions auto-upgrade from browser -> workflow -> API as patterns stabilize
164
+ - **MCP native**: Generated tools work directly with Claude and other MCP-compatible agents
165
+ - **Self-healing**: Detects when APIs break and attempts automatic repair
166
+ - **Community sharing**: Export/import site specs like yt-dlp extractors
167
+ - **Lightweight core**: Heavy deps (Playwright, Anthropic, MCP) are optional
168
+
169
+ ## Development
170
+
171
+ ```bash
172
+ # Clone and install with dev dependencies
173
+ git clone https://github.com/lonexreb/webcli.git
174
+ cd webcli
175
+ pip install -e ".[dev]"
176
+
177
+ # Run tests
178
+ pytest # Unit + integration tests (no network)
179
+ pytest -m live # Live tests (hits real APIs)
180
+ pytest -v # Verbose output
181
+
182
+ # Lint
183
+ ruff check src/ tests/
184
+ ```
185
+
186
+ ### Test Coverage
187
+
188
+ - **65 unit/integration tests** covering models, registry, analyzer, spec generation, client generation, CLI, MCP generation, tier promotion, and full pipeline
189
+ - **6 live tests** against JSONPlaceholder and httpbin.org
190
+ - All tests pass on Python 3.10+
191
+
192
+ ## API Keys
193
+
194
+ For full functionality:
195
+ - **Anthropic API key** (`ANTHROPIC_API_KEY`): Used for LLM-assisted endpoint analysis. Optional — discovery works without it, just without enhanced descriptions.
196
+ - **No other keys required** for core functionality.
197
+
198
+ ## Roadmap
199
+
200
+ - [ ] Community spec registry (share discovered APIs)
201
+ - [ ] Browser cookie extraction for authenticated sites
202
+ - [ ] OAuth device flow support
203
+ - [ ] PyPI package publication
204
+ - [ ] Workflow recording and replay (Tier 2)
205
+ - [ ] Health monitoring dashboard
206
+
207
+ ## License
208
+
209
+ MIT
@@ -0,0 +1,162 @@
1
+ # WebCLI
2
+
3
+ Turn any website into a CLI/API for AI agents.
4
+
5
+ WebCLI captures browser network traffic, discovers API patterns, and auto-generates structured interfaces (CLI commands, MCP servers, Python clients) so AI agents can interact with any web service as fast function calls instead of slow browser automation.
6
+
7
+ ## The Problem
8
+
9
+ AI agents interact with websites through browser automation (Playwright, Puppeteer, Computer Use), which is:
10
+ - **10-100x slower** than direct API calls
11
+ - **10-100x more expensive** in LLM tokens
12
+ - **~15-35% reliable** on academic benchmarks
13
+
14
+ ## The Solution: Progressive Formalization
15
+
16
+ WebCLI uses a 3-tier system that automatically graduates interactions from slow-but-universal to fast-but-specific:
17
+
18
+ ```
19
+ Tier 3: Direct API Calls (fastest, most reliable)
20
+ ^ Auto-generated from discovered API patterns
21
+ Tier 2: Cached Workflows (medium speed)
22
+ ^ Recorded browser workflows, parameterized + replayed
23
+ Tier 1: Browser Exploration (slowest, universal fallback)
24
+ ^ LLM-driven browser automation for unknown sites
25
+ ```
26
+
27
+ ## Quick Start
28
+
29
+ ```bash
30
+ # Install (lightweight - no browser deps by default)
31
+ pip install webcli
32
+
33
+ # Install with all features
34
+ pip install webcli[all]
35
+
36
+ # Or pick what you need
37
+ pip install webcli[browser] # Playwright for traffic capture
38
+ pip install webcli[llm] # Claude API for smart analysis
39
+ pip install webcli[mcp] # MCP server generation
40
+ ```
41
+
42
+ ### Discover a Site's API
43
+
44
+ ```bash
45
+ # Capture traffic and discover API endpoints
46
+ webcli discover kayak.com --action "search flights"
47
+
48
+ # WebCLI launches a browser, captures network traffic,
49
+ # and generates: OpenAPI spec + CLI commands + MCP tools
50
+ ```
51
+
52
+ ### Use the Generated Interface
53
+
54
+ ```bash
55
+ # CLI
56
+ webcli run kayak.com search_flights --from SFO --to JFK --date 2025-04-01
57
+
58
+ # Or as MCP tools for AI agents
59
+ webcli mcp generate kayak.com
60
+ webcli mcp serve kayak.com
61
+ ```
62
+
63
+ ### As a Python Library
64
+
65
+ ```python
66
+ from webcli.discovery.analyzer import TrafficAnalyzer
67
+ from webcli.discovery.spec_generator import generate_openapi_spec
68
+ from webcli.generators.mcp_gen import generate_mcp_server_code
69
+
70
+ # Analyze captured traffic
71
+ analyzer = TrafficAnalyzer(exchanges)
72
+ endpoints = analyzer.extract_endpoints()
73
+
74
+ # Generate OpenAPI spec
75
+ spec = generate_openapi_spec(api)
76
+
77
+ # Generate MCP server
78
+ mcp_code = generate_mcp_server_code(site, spec)
79
+ ```
80
+
81
+ ## What Gets Generated
82
+
83
+ From a single discovery session, WebCLI produces:
84
+
85
+ | Output | Description |
86
+ |--------|-------------|
87
+ | **OpenAPI 3.1 Spec** | Full API specification with schemas, parameters, auth |
88
+ | **Python Client** | Typed httpx client with methods for each endpoint |
89
+ | **CLI Commands** | Typer commands you can run from terminal |
90
+ | **MCP Server** | Tools that AI agents (Claude, etc.) can call directly |
91
+
92
+ ## Architecture
93
+
94
+ ```
95
+ WebCLI Core
96
+ +----------+--------------+--------------+------------+
97
+ | CLI | MCP Server | Python SDK | REST API |
98
+ +----------+--------------+--------------+------------+
99
+ | Router / Resolver |
100
+ | (Picks best available tier for a given site+action) |
101
+ +------------------------------------------------------+
102
+ | Tier 1: Browser | Tier 2: Cached | Tier 3: API |
103
+ | Explorer | Workflows | Clients |
104
+ +------------------------------------------------------+
105
+ | API Discovery Engine |
106
+ | Traffic Capture -> Pattern Analysis -> Spec Gen |
107
+ +------------------------------------------------------+
108
+ | Auth Manager | Site Registry | Health Monitor |
109
+ +------------------------------------------------------+
110
+ ```
111
+
112
+ ## Key Features
113
+
114
+ - **Auto-discovery**: Captures browser traffic via CDP and infers API patterns
115
+ - **Smart analysis**: LLM-assisted endpoint description and parameter inference
116
+ - **Progressive promotion**: Actions auto-upgrade from browser -> workflow -> API as patterns stabilize
117
+ - **MCP native**: Generated tools work directly with Claude and other MCP-compatible agents
118
+ - **Self-healing**: Detects when APIs break and attempts automatic repair
119
+ - **Community sharing**: Export/import site specs like yt-dlp extractors
120
+ - **Lightweight core**: Heavy deps (Playwright, Anthropic, MCP) are optional
121
+
122
+ ## Development
123
+
124
+ ```bash
125
+ # Clone and install with dev dependencies
126
+ git clone https://github.com/lonexreb/webcli.git
127
+ cd webcli
128
+ pip install -e ".[dev]"
129
+
130
+ # Run tests
131
+ pytest # Unit + integration tests (no network)
132
+ pytest -m live # Live tests (hits real APIs)
133
+ pytest -v # Verbose output
134
+
135
+ # Lint
136
+ ruff check src/ tests/
137
+ ```
138
+
139
+ ### Test Coverage
140
+
141
+ - **65 unit/integration tests** covering models, registry, analyzer, spec generation, client generation, CLI, MCP generation, tier promotion, and full pipeline
142
+ - **6 live tests** against JSONPlaceholder and httpbin.org
143
+ - All tests pass on Python 3.10+
144
+
145
+ ## API Keys
146
+
147
+ For full functionality:
148
+ - **Anthropic API key** (`ANTHROPIC_API_KEY`): Used for LLM-assisted endpoint analysis. Optional — discovery works without it, just without enhanced descriptions.
149
+ - **No other keys required** for core functionality.
150
+
151
+ ## Roadmap
152
+
153
+ - [ ] Community spec registry (share discovered APIs)
154
+ - [ ] Browser cookie extraction for authenticated sites
155
+ - [ ] OAuth device flow support
156
+ - [ ] PyPI package publication
157
+ - [ ] Workflow recording and replay (Tier 2)
158
+ - [ ] Health monitoring dashboard
159
+
160
+ ## License
161
+
162
+ MIT
@@ -0,0 +1,89 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "site2cli"
7
+ version = "0.1.0"
8
+ description = "Turn any website into a CLI/API for AI agents"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+
13
+ classifiers = [
14
+ "Development Status :: 3 - Alpha",
15
+ "Environment :: Console",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Topic :: Internet :: WWW/HTTP",
24
+ "Topic :: Software Development :: Code Generators",
25
+ "Topic :: Software Development :: Libraries :: Python Modules",
26
+ ]
27
+
28
+ dependencies = [
29
+ "typer>=0.12.0",
30
+ "rich>=13.0.0",
31
+ "pydantic>=2.0.0",
32
+ "httpx>=0.27.0",
33
+ "pyyaml>=6.0",
34
+ "keyring>=25.0.0",
35
+ "openapi-spec-validator>=0.7.0",
36
+ ]
37
+
38
+ [project.optional-dependencies]
39
+ browser = [
40
+ "playwright>=1.40.0",
41
+ "browser-cookie3>=0.19.0",
42
+ ]
43
+ llm = [
44
+ "anthropic>=0.40.0",
45
+ ]
46
+ mcp = [
47
+ "mcp>=1.0.0",
48
+ ]
49
+ cookies = [
50
+ "browser-cookie3>=0.19.0",
51
+ ]
52
+ all = [
53
+ "webcli[browser,llm,mcp]",
54
+ ]
55
+ dev = [
56
+ "webcli[all]",
57
+ "pytest>=8.0.0",
58
+ "pytest-asyncio>=0.24.0",
59
+ "ruff>=0.5.0",
60
+ "mypy>=1.10.0",
61
+ ]
62
+
63
+ [project.urls]
64
+ Homepage = "https://github.com/lonexreb/webcli"
65
+ Repository = "https://github.com/lonexreb/webcli"
66
+ Issues = "https://github.com/lonexreb/webcli/issues"
67
+
68
+ [project.scripts]
69
+ webcli = "webcli.cli:app"
70
+
71
+ [tool.ruff]
72
+ target-version = "py310"
73
+ line-length = 100
74
+
75
+ [tool.ruff.lint]
76
+ select = ["E", "F", "I", "N", "W"]
77
+
78
+ [tool.pytest.ini_options]
79
+ asyncio_mode = "auto"
80
+ testpaths = ["tests"]
81
+ markers = [
82
+ "live: tests that make real network requests (deselect with '-m not live')",
83
+ ]
84
+
85
+ [tool.hatch.build.targets.wheel]
86
+ packages = ["src/webcli"]
87
+
88
+ [tool.hatch.build.targets.sdist]
89
+ include = ["src/webcli"]
@@ -0,0 +1,3 @@
1
+ """WebCLI: Turn any website into a CLI/API for AI agents."""
2
+
3
+ __version__ = "0.1.0"
File without changes
@@ -0,0 +1,101 @@
1
+ """Authentication flow management."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+
8
+ import keyring
9
+
10
+ from webcli.config import get_config
11
+ from webcli.models import AuthType
12
+
13
+
14
+ KEYRING_SERVICE = "webcli"
15
+
16
+
17
+ class AuthManager:
18
+ """Manages authentication credentials for discovered sites."""
19
+
20
+ def __init__(self) -> None:
21
+ self._config = get_config()
22
+ self._credentials_dir = self._config.data_dir / "auth"
23
+ self._credentials_dir.mkdir(parents=True, exist_ok=True)
24
+
25
+ def store_api_key(self, domain: str, api_key: str) -> None:
26
+ """Store an API key securely using system keyring."""
27
+ keyring.set_password(KEYRING_SERVICE, f"{domain}:api_key", api_key)
28
+
29
+ def get_api_key(self, domain: str) -> str | None:
30
+ """Retrieve a stored API key."""
31
+ return keyring.get_password(KEYRING_SERVICE, f"{domain}:api_key")
32
+
33
+ def store_cookies(self, domain: str, cookies: dict[str, str]) -> None:
34
+ """Store cookies for a domain."""
35
+ cookie_file = self._credentials_dir / f"{domain}.cookies.json"
36
+ with open(cookie_file, "w") as f:
37
+ json.dump(cookies, f)
38
+
39
+ def get_cookies(self, domain: str) -> dict[str, str] | None:
40
+ """Retrieve stored cookies for a domain."""
41
+ cookie_file = self._credentials_dir / f"{domain}.cookies.json"
42
+ if cookie_file.exists():
43
+ with open(cookie_file) as f:
44
+ return json.load(f)
45
+ return None
46
+
47
+ def store_token(self, domain: str, token: str, token_type: str = "bearer") -> None:
48
+ """Store an OAuth/bearer token."""
49
+ keyring.set_password(KEYRING_SERVICE, f"{domain}:token:{token_type}", token)
50
+
51
+ def get_token(self, domain: str, token_type: str = "bearer") -> str | None:
52
+ """Retrieve a stored token."""
53
+ return keyring.get_password(KEYRING_SERVICE, f"{domain}:token:{token_type}")
54
+
55
+ def get_auth_headers(self, domain: str, auth_type: AuthType) -> dict[str, str]:
56
+ """Get authentication headers for a domain based on auth type."""
57
+ if auth_type == AuthType.API_KEY:
58
+ key = self.get_api_key(domain)
59
+ if key:
60
+ return {"X-API-Key": key}
61
+ elif auth_type == AuthType.OAUTH:
62
+ token = self.get_token(domain)
63
+ if token:
64
+ return {"Authorization": f"Bearer {token}"}
65
+ return {}
66
+
67
+ def get_auth_cookies(self, domain: str) -> dict[str, str]:
68
+ """Get authentication cookies for a domain."""
69
+ return self.get_cookies(domain) or {}
70
+
71
+ def extract_browser_cookies(self, domain: str) -> dict[str, str] | None:
72
+ """Extract cookies from the user's real browser for a domain."""
73
+ try:
74
+ import browser_cookie3
75
+
76
+ cookies = {}
77
+ # Try Chrome first, then Firefox
78
+ for loader in [browser_cookie3.chrome, browser_cookie3.firefox]:
79
+ try:
80
+ jar = loader(domain_name=f".{domain}")
81
+ for cookie in jar:
82
+ cookies[cookie.name] = cookie.value
83
+ if cookies:
84
+ self.store_cookies(domain, cookies)
85
+ return cookies
86
+ except Exception:
87
+ continue
88
+ except ImportError:
89
+ pass
90
+ return None
91
+
92
+ def clear_auth(self, domain: str) -> None:
93
+ """Remove all stored credentials for a domain."""
94
+ for suffix in ["api_key", "token:bearer", "token:refresh"]:
95
+ try:
96
+ keyring.delete_password(KEYRING_SERVICE, f"{domain}:{suffix}")
97
+ except keyring.errors.PasswordDeleteError:
98
+ pass
99
+ cookie_file = self._credentials_dir / f"{domain}.cookies.json"
100
+ if cookie_file.exists():
101
+ cookie_file.unlink()