PyPI - site2cli - Versions diffs - 0.1.0__tar.gz - Mend

site2cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

site2cli-0.1.0/.gitignore +16 -0
site2cli-0.1.0/PKG-INFO +209 -0
site2cli-0.1.0/README.md +162 -0
site2cli-0.1.0/pyproject.toml +89 -0
site2cli-0.1.0/src/webcli/__init__.py +3 -0
site2cli-0.1.0/src/webcli/auth/__init__.py +0 -0
site2cli-0.1.0/src/webcli/auth/manager.py +101 -0
site2cli-0.1.0/src/webcli/cli.py +615 -0
site2cli-0.1.0/src/webcli/community/__init__.py +0 -0
site2cli-0.1.0/src/webcli/community/registry.py +99 -0
site2cli-0.1.0/src/webcli/config.py +108 -0
site2cli-0.1.0/src/webcli/discovery/__init__.py +0 -0
site2cli-0.1.0/src/webcli/discovery/analyzer.py +252 -0
site2cli-0.1.0/src/webcli/discovery/capture.py +213 -0
site2cli-0.1.0/src/webcli/discovery/client_generator.py +198 -0
site2cli-0.1.0/src/webcli/discovery/spec_generator.py +154 -0
site2cli-0.1.0/src/webcli/generators/__init__.py +0 -0
site2cli-0.1.0/src/webcli/generators/cli_gen.py +192 -0
site2cli-0.1.0/src/webcli/generators/mcp_gen.py +220 -0
site2cli-0.1.0/src/webcli/health/__init__.py +0 -0
site2cli-0.1.0/src/webcli/health/monitor.py +81 -0
site2cli-0.1.0/src/webcli/health/self_heal.py +177 -0
site2cli-0.1.0/src/webcli/models.py +187 -0
site2cli-0.1.0/src/webcli/registry.py +212 -0
site2cli-0.1.0/src/webcli/router.py +121 -0
site2cli-0.1.0/src/webcli/tiers/__init__.py +0 -0
site2cli-0.1.0/src/webcli/tiers/browser_explorer.py +216 -0
site2cli-0.1.0/src/webcli/tiers/cached_workflow.py +163 -0
site2cli-0.1.0/src/webcli/tiers/direct_api.py +135 -0

site2cli-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,16 @@
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+dist/
+build/
+.eggs/
+*.egg
+.venv/
+venv/
+.env
+.mypy_cache/
+.ruff_cache/
+.pytest_cache/
+*.db
+*.sqlite3

site2cli-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,209 @@
+Metadata-Version: 2.4
+Name: site2cli
+Version: 0.1.0
+Summary: Turn any website into a CLI/API for AI agents
+Project-URL: Homepage, https://github.com/lonexreb/webcli
+Project-URL: Repository, https://github.com/lonexreb/webcli
+Project-URL: Issues, https://github.com/lonexreb/webcli/issues
+License-Expression: MIT
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Internet :: WWW/HTTP
+Classifier: Topic :: Software Development :: Code Generators
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.10
+Requires-Dist: httpx>=0.27.0
+Requires-Dist: keyring>=25.0.0
+Requires-Dist: openapi-spec-validator>=0.7.0
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: pyyaml>=6.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: typer>=0.12.0
+Provides-Extra: all
+Requires-Dist: webcli[browser,llm,mcp]; extra == 'all'
+Provides-Extra: browser
+Requires-Dist: browser-cookie3>=0.19.0; extra == 'browser'
+Requires-Dist: playwright>=1.40.0; extra == 'browser'
+Provides-Extra: cookies
+Requires-Dist: browser-cookie3>=0.19.0; extra == 'cookies'
+Provides-Extra: dev
+Requires-Dist: mypy>=1.10.0; extra == 'dev'
+Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
+Requires-Dist: pytest>=8.0.0; extra == 'dev'
+Requires-Dist: ruff>=0.5.0; extra == 'dev'
+Requires-Dist: webcli[all]; extra == 'dev'
+Provides-Extra: llm
+Requires-Dist: anthropic>=0.40.0; extra == 'llm'
+Provides-Extra: mcp
+Requires-Dist: mcp>=1.0.0; extra == 'mcp'
+Description-Content-Type: text/markdown
+# WebCLI
+Turn any website into a CLI/API for AI agents.
+WebCLI captures browser network traffic, discovers API patterns, and auto-generates structured interfaces (CLI commands, MCP servers, Python clients) so AI agents can interact with any web service as fast function calls instead of slow browser automation.
+## The Problem
+AI agents interact with websites through browser automation (Playwright, Puppeteer, Computer Use), which is:
+- **10-100x slower** than direct API calls
+- **10-100x more expensive** in LLM tokens
+- **~15-35% reliable** on academic benchmarks
+## The Solution: Progressive Formalization
+WebCLI uses a 3-tier system that automatically graduates interactions from slow-but-universal to fast-but-specific:
+```
+Tier 3: Direct API Calls     (fastest, most reliable)
+  ^  Auto-generated from discovered API patterns
+Tier 2: Cached Workflows     (medium speed)
+  ^  Recorded browser workflows, parameterized + replayed
+Tier 1: Browser Exploration   (slowest, universal fallback)
+  ^  LLM-driven browser automation for unknown sites
+```
+## Quick Start
+```bash
+# Install (lightweight - no browser deps by default)
+pip install webcli
+# Install with all features
+pip install webcli[all]
+# Or pick what you need
+pip install webcli[browser]   # Playwright for traffic capture
+pip install webcli[llm]       # Claude API for smart analysis
+pip install webcli[mcp]       # MCP server generation
+```
+### Discover a Site's API
+```bash
+# Capture traffic and discover API endpoints
+webcli discover kayak.com --action "search flights"
+# WebCLI launches a browser, captures network traffic,
+# and generates: OpenAPI spec + CLI commands + MCP tools
+```
+### Use the Generated Interface
+```bash
+# CLI
+webcli run kayak.com search_flights --from SFO --to JFK --date 2025-04-01
+# Or as MCP tools for AI agents
+webcli mcp generate kayak.com
+webcli mcp serve kayak.com
+```
+### As a Python Library
+```python
+from webcli.discovery.analyzer import TrafficAnalyzer
+from webcli.discovery.spec_generator import generate_openapi_spec
+from webcli.generators.mcp_gen import generate_mcp_server_code
+# Analyze captured traffic
+analyzer = TrafficAnalyzer(exchanges)
+endpoints = analyzer.extract_endpoints()
+# Generate OpenAPI spec
+spec = generate_openapi_spec(api)
+# Generate MCP server
+mcp_code = generate_mcp_server_code(site, spec)
+```
+## What Gets Generated
+From a single discovery session, WebCLI produces:
+| Output | Description |
+|--------|-------------|
+| **OpenAPI 3.1 Spec** | Full API specification with schemas, parameters, auth |
+| **Python Client** | Typed httpx client with methods for each endpoint |
+| **CLI Commands** | Typer commands you can run from terminal |
+| **MCP Server** | Tools that AI agents (Claude, etc.) can call directly |
+## Architecture
+```
+                    WebCLI Core
++----------+--------------+--------------+------------+
+|  CLI     |  MCP Server  |  Python SDK  |  REST API  |
++----------+--------------+--------------+------------+
+|                Router / Resolver                     |
+|  (Picks best available tier for a given site+action) |
++------------------------------------------------------+
+|  Tier 1: Browser  | Tier 2: Cached   | Tier 3: API  |
+|  Explorer          | Workflows        | Clients      |
++------------------------------------------------------+
+|              API Discovery Engine                    |
+|  Traffic Capture -> Pattern Analysis -> Spec Gen     |
++------------------------------------------------------+
+|  Auth Manager  |  Site Registry  |  Health Monitor   |
++------------------------------------------------------+
+```
+## Key Features
+- **Auto-discovery**: Captures browser traffic via CDP and infers API patterns
+- **Smart analysis**: LLM-assisted endpoint description and parameter inference
+- **Progressive promotion**: Actions auto-upgrade from browser -> workflow -> API as patterns stabilize
+- **MCP native**: Generated tools work directly with Claude and other MCP-compatible agents
+- **Self-healing**: Detects when APIs break and attempts automatic repair
+- **Community sharing**: Export/import site specs like yt-dlp extractors
+- **Lightweight core**: Heavy deps (Playwright, Anthropic, MCP) are optional
+## Development
+```bash
+# Clone and install with dev dependencies
+git clone https://github.com/lonexreb/webcli.git
+cd webcli
+pip install -e ".[dev]"
+# Run tests
+pytest                         # Unit + integration tests (no network)
+pytest -m live                 # Live tests (hits real APIs)
+pytest -v                      # Verbose output
+# Lint
+ruff check src/ tests/
+```
+### Test Coverage
+- **65 unit/integration tests** covering models, registry, analyzer, spec generation, client generation, CLI, MCP generation, tier promotion, and full pipeline
+- **6 live tests** against JSONPlaceholder and httpbin.org
+- All tests pass on Python 3.10+
+## API Keys
+For full functionality:
+- **Anthropic API key** (`ANTHROPIC_API_KEY`): Used for LLM-assisted endpoint analysis. Optional — discovery works without it, just without enhanced descriptions.
+- **No other keys required** for core functionality.
+## Roadmap
+- [ ] Community spec registry (share discovered APIs)
+- [ ] Browser cookie extraction for authenticated sites
+- [ ] OAuth device flow support
+- [ ] PyPI package publication
+- [ ] Workflow recording and replay (Tier 2)
+- [ ] Health monitoring dashboard
+## License
+MIT

site2cli-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,162 @@
+# WebCLI
+Turn any website into a CLI/API for AI agents.
+WebCLI captures browser network traffic, discovers API patterns, and auto-generates structured interfaces (CLI commands, MCP servers, Python clients) so AI agents can interact with any web service as fast function calls instead of slow browser automation.
+## The Problem
+AI agents interact with websites through browser automation (Playwright, Puppeteer, Computer Use), which is:
+- **10-100x slower** than direct API calls
+- **10-100x more expensive** in LLM tokens
+- **~15-35% reliable** on academic benchmarks
+## The Solution: Progressive Formalization
+WebCLI uses a 3-tier system that automatically graduates interactions from slow-but-universal to fast-but-specific:
+```
+Tier 3: Direct API Calls     (fastest, most reliable)
+  ^  Auto-generated from discovered API patterns
+Tier 2: Cached Workflows     (medium speed)
+  ^  Recorded browser workflows, parameterized + replayed
+Tier 1: Browser Exploration   (slowest, universal fallback)
+  ^  LLM-driven browser automation for unknown sites
+```
+## Quick Start
+```bash
+# Install (lightweight - no browser deps by default)
+pip install webcli
+# Install with all features
+pip install webcli[all]
+# Or pick what you need
+pip install webcli[browser]   # Playwright for traffic capture
+pip install webcli[llm]       # Claude API for smart analysis
+pip install webcli[mcp]       # MCP server generation
+```
+### Discover a Site's API
+```bash
+# Capture traffic and discover API endpoints
+webcli discover kayak.com --action "search flights"
+# WebCLI launches a browser, captures network traffic,
+# and generates: OpenAPI spec + CLI commands + MCP tools
+```
+### Use the Generated Interface
+```bash
+# CLI
+webcli run kayak.com search_flights --from SFO --to JFK --date 2025-04-01
+# Or as MCP tools for AI agents
+webcli mcp generate kayak.com
+webcli mcp serve kayak.com
+```
+### As a Python Library
+```python
+from webcli.discovery.analyzer import TrafficAnalyzer
+from webcli.discovery.spec_generator import generate_openapi_spec
+from webcli.generators.mcp_gen import generate_mcp_server_code
+# Analyze captured traffic
+analyzer = TrafficAnalyzer(exchanges)
+endpoints = analyzer.extract_endpoints()
+# Generate OpenAPI spec
+spec = generate_openapi_spec(api)
+# Generate MCP server
+mcp_code = generate_mcp_server_code(site, spec)
+```
+## What Gets Generated
+From a single discovery session, WebCLI produces:
+| Output | Description |
+|--------|-------------|
+| **OpenAPI 3.1 Spec** | Full API specification with schemas, parameters, auth |
+| **Python Client** | Typed httpx client with methods for each endpoint |
+| **CLI Commands** | Typer commands you can run from terminal |
+| **MCP Server** | Tools that AI agents (Claude, etc.) can call directly |
+## Architecture
+```
+                    WebCLI Core
++----------+--------------+--------------+------------+
+|  CLI     |  MCP Server  |  Python SDK  |  REST API  |
++----------+--------------+--------------+------------+
+|                Router / Resolver                     |
+|  (Picks best available tier for a given site+action) |
++------------------------------------------------------+
+|  Tier 1: Browser  | Tier 2: Cached   | Tier 3: API  |
+|  Explorer          | Workflows        | Clients      |
++------------------------------------------------------+
+|              API Discovery Engine                    |
+|  Traffic Capture -> Pattern Analysis -> Spec Gen     |
++------------------------------------------------------+
+|  Auth Manager  |  Site Registry  |  Health Monitor   |
++------------------------------------------------------+
+```
+## Key Features
+- **Auto-discovery**: Captures browser traffic via CDP and infers API patterns
+- **Smart analysis**: LLM-assisted endpoint description and parameter inference
+- **Progressive promotion**: Actions auto-upgrade from browser -> workflow -> API as patterns stabilize
+- **MCP native**: Generated tools work directly with Claude and other MCP-compatible agents
+- **Self-healing**: Detects when APIs break and attempts automatic repair
+- **Community sharing**: Export/import site specs like yt-dlp extractors
+- **Lightweight core**: Heavy deps (Playwright, Anthropic, MCP) are optional
+## Development
+```bash
+# Clone and install with dev dependencies
+git clone https://github.com/lonexreb/webcli.git
+cd webcli
+pip install -e ".[dev]"
+# Run tests
+pytest                         # Unit + integration tests (no network)
+pytest -m live                 # Live tests (hits real APIs)
+pytest -v                      # Verbose output
+# Lint
+ruff check src/ tests/
+```
+### Test Coverage
+- **65 unit/integration tests** covering models, registry, analyzer, spec generation, client generation, CLI, MCP generation, tier promotion, and full pipeline
+- **6 live tests** against JSONPlaceholder and httpbin.org
+- All tests pass on Python 3.10+
+## API Keys
+For full functionality:
+- **Anthropic API key** (`ANTHROPIC_API_KEY`): Used for LLM-assisted endpoint analysis. Optional — discovery works without it, just without enhanced descriptions.
+- **No other keys required** for core functionality.
+## Roadmap
+- [ ] Community spec registry (share discovered APIs)
+- [ ] Browser cookie extraction for authenticated sites
+- [ ] OAuth device flow support
+- [ ] PyPI package publication
+- [ ] Workflow recording and replay (Tier 2)
+- [ ] Health monitoring dashboard
+## License
+MIT

site2cli-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,89 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "site2cli"
+version = "0.1.0"
+description = "Turn any website into a CLI/API for AI agents"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Environment :: Console",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Internet :: WWW/HTTP",
+    "Topic :: Software Development :: Code Generators",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = [
+    "typer>=0.12.0",
+    "rich>=13.0.0",
+    "pydantic>=2.0.0",
+    "httpx>=0.27.0",
+    "pyyaml>=6.0",
+    "keyring>=25.0.0",
+    "openapi-spec-validator>=0.7.0",
+]
+[project.optional-dependencies]
+browser = [
+    "playwright>=1.40.0",
+    "browser-cookie3>=0.19.0",
+]
+llm = [
+    "anthropic>=0.40.0",
+]
+mcp = [
+    "mcp>=1.0.0",
+]
+cookies = [
+    "browser-cookie3>=0.19.0",
+]
+all = [
+    "webcli[browser,llm,mcp]",
+]
+dev = [
+    "webcli[all]",
+    "pytest>=8.0.0",
+    "pytest-asyncio>=0.24.0",
+    "ruff>=0.5.0",
+    "mypy>=1.10.0",
+]
+[project.urls]
+Homepage = "https://github.com/lonexreb/webcli"
+Repository = "https://github.com/lonexreb/webcli"
+Issues = "https://github.com/lonexreb/webcli/issues"
+[project.scripts]
+webcli = "webcli.cli:app"
+[tool.ruff]
+target-version = "py310"
+line-length = 100
+[tool.ruff.lint]
+select = ["E", "F", "I", "N", "W"]
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+testpaths = ["tests"]
+markers = [
+    "live: tests that make real network requests (deselect with '-m not live')",
+]
+[tool.hatch.build.targets.wheel]
+packages = ["src/webcli"]
+[tool.hatch.build.targets.sdist]
+include = ["src/webcli"]

site2cli-0.1.0/src/webcli/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""WebCLI: Turn any website into a CLI/API for AI agents."""
+__version__ = "0.1.0"

site2cli-0.1.0/src/webcli/auth/__init__.py ADDED Viewed

File without changes

site2cli-0.1.0/src/webcli/auth/manager.py ADDED Viewed

@@ -0,0 +1,101 @@
+"""Authentication flow management."""
+from __future__ import annotations
+import json
+from pathlib import Path
+import keyring
+from webcli.config import get_config
+from webcli.models import AuthType
+KEYRING_SERVICE = "webcli"
+class AuthManager:
+    """Manages authentication credentials for discovered sites."""
+    def __init__(self) -> None:
+        self._config = get_config()
+        self._credentials_dir = self._config.data_dir / "auth"
+        self._credentials_dir.mkdir(parents=True, exist_ok=True)
+    def store_api_key(self, domain: str, api_key: str) -> None:
+        """Store an API key securely using system keyring."""
+        keyring.set_password(KEYRING_SERVICE, f"{domain}:api_key", api_key)
+    def get_api_key(self, domain: str) -> str | None:
+        """Retrieve a stored API key."""
+        return keyring.get_password(KEYRING_SERVICE, f"{domain}:api_key")
+    def store_cookies(self, domain: str, cookies: dict[str, str]) -> None:
+        """Store cookies for a domain."""
+        cookie_file = self._credentials_dir / f"{domain}.cookies.json"
+        with open(cookie_file, "w") as f:
+            json.dump(cookies, f)
+    def get_cookies(self, domain: str) -> dict[str, str] | None:
+        """Retrieve stored cookies for a domain."""
+        cookie_file = self._credentials_dir / f"{domain}.cookies.json"
+        if cookie_file.exists():
+            with open(cookie_file) as f:
+                return json.load(f)
+        return None
+    def store_token(self, domain: str, token: str, token_type: str = "bearer") -> None:
+        """Store an OAuth/bearer token."""
+        keyring.set_password(KEYRING_SERVICE, f"{domain}:token:{token_type}", token)
+    def get_token(self, domain: str, token_type: str = "bearer") -> str | None:
+        """Retrieve a stored token."""
+        return keyring.get_password(KEYRING_SERVICE, f"{domain}:token:{token_type}")
+    def get_auth_headers(self, domain: str, auth_type: AuthType) -> dict[str, str]:
+        """Get authentication headers for a domain based on auth type."""
+        if auth_type == AuthType.API_KEY:
+            key = self.get_api_key(domain)
+            if key:
+                return {"X-API-Key": key}
+        elif auth_type == AuthType.OAUTH:
+            token = self.get_token(domain)
+            if token:
+                return {"Authorization": f"Bearer {token}"}
+        return {}
+    def get_auth_cookies(self, domain: str) -> dict[str, str]:
+        """Get authentication cookies for a domain."""
+        return self.get_cookies(domain) or {}
+    def extract_browser_cookies(self, domain: str) -> dict[str, str] | None:
+        """Extract cookies from the user's real browser for a domain."""
+        try:
+            import browser_cookie3
+            cookies = {}
+            # Try Chrome first, then Firefox
+            for loader in [browser_cookie3.chrome, browser_cookie3.firefox]:
+                try:
+                    jar = loader(domain_name=f".{domain}")
+                    for cookie in jar:
+                        cookies[cookie.name] = cookie.value
+                    if cookies:
+                        self.store_cookies(domain, cookies)
+                        return cookies
+                except Exception:
+                    continue
+        except ImportError:
+            pass
+        return None
+    def clear_auth(self, domain: str) -> None:
+        """Remove all stored credentials for a domain."""
+        for suffix in ["api_key", "token:bearer", "token:refresh"]:
+            try:
+                keyring.delete_password(KEYRING_SERVICE, f"{domain}:{suffix}")
+            except keyring.errors.PasswordDeleteError:
+                pass
+        cookie_file = self._credentials_dir / f"{domain}.cookies.json"
+        if cookie_file.exists():
+            cookie_file.unlink()