PyPI - cli-web-amazon - Versions diffs - 0.1.1__py3-none-any.whl - Mend

cli-web-amazon 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

cli_web/amazon/README.md +83 -0
cli_web/amazon/__init__.py +3 -0
cli_web/amazon/__main__.py +6 -0
cli_web/amazon/amazon_cli.py +155 -0
cli_web/amazon/commands/__init__.py +1 -0
cli_web/amazon/commands/bestsellers.py +61 -0
cli_web/amazon/commands/product.py +36 -0
cli_web/amazon/commands/search.py +45 -0
cli_web/amazon/commands/suggest.py +36 -0
cli_web/amazon/core/__init__.py +1 -0
cli_web/amazon/core/client.py +416 -0
cli_web/amazon/core/exceptions.py +76 -0
cli_web/amazon/core/models.py +63 -0
cli_web/amazon/skills/SKILL.md +105 -0
cli_web/amazon/tests/TEST.md +173 -0
cli_web/amazon/tests/__init__.py +1 -0
cli_web/amazon/tests/test_core.py +369 -0
cli_web/amazon/tests/test_e2e.py +355 -0
cli_web/amazon/utils/__init__.py +1 -0
cli_web/amazon/utils/config.py +5 -0
cli_web/amazon/utils/doctor.py +188 -0
cli_web/amazon/utils/helpers.py +127 -0
cli_web/amazon/utils/mcp_server.py +290 -0
cli_web/amazon/utils/output.py +130 -0
cli_web/amazon/utils/repl_skin.py +486 -0
cli_web_amazon-0.1.1.dist-info/METADATA +14 -0
cli_web_amazon-0.1.1.dist-info/RECORD +30 -0
cli_web_amazon-0.1.1.dist-info/WHEEL +5 -0
cli_web_amazon-0.1.1.dist-info/entry_points.txt +2 -0
cli_web_amazon-0.1.1.dist-info/top_level.txt +1 -0

cli_web/amazon/core/client.py ADDED Viewed

@@ -0,0 +1,416 @@
+"""HTTP client for cli-web-amazon.
+Protocol: SSR HTML + REST JSON hybrid.
+Library: curl_cffi — Amazon returns 503 to plain httpx; browser TLS
+impersonation (curl_cffi) is required to reach the public endpoints.
+"""
+import re
+from typing import Any
+from bs4 import BeautifulSoup
+from curl_cffi import requests as curl_requests
+from .exceptions import (
+    NetworkError,
+    NotFoundError,
+    ParsingError,
+    RateLimitError,
+    ServerError,
+)
+from .models import BestSeller, Product, SearchResult, Suggestion
+BASE_URL = "https://www.amazon.com"
+COMPLETION_URL = "https://completion.amazon.com"
+MERCHANT_ID = "ATVPDKIKX0DER"
+_DEFAULT_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/124.0.0.0 Safari/537.36"
+    ),
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+    "Accept-Language": "en-US,en;q=0.9",
+    "Accept-Encoding": "gzip, deflate, br",
+    "Sec-Fetch-Dest": "document",
+    "Sec-Fetch-Mode": "navigate",
+    "Sec-Fetch-Site": "none",
+    "Sec-Fetch-User": "?1",
+    "Upgrade-Insecure-Requests": "1",
+    "Cache-Control": "max-age=0",
+}
+_JSON_HEADERS = {
+    **_DEFAULT_HEADERS,
+    "Accept": "application/json, text/plain, */*",
+    "X-Requested-With": "XMLHttpRequest",
+}
+class AmazonClient:
+    """Amazon web client with HTML scraping and JSON API support."""
+    def __init__(self):
+        """Initialize the client."""
+        self._client: Any = None
+    def __enter__(self):
+        self._client = curl_requests.Session(
+            impersonate="chrome124",
+            headers=_DEFAULT_HEADERS,
+            timeout=30,
+        )
+        return self
+    def __exit__(self, *args):
+        if self._client:
+            self._client.close()
+            self._client = None
+    # ── Internal helpers ────────────────────────────────────────────────
+    def _get(
+        self, url: str, params: dict | None = None, headers: dict | None = None
+    ) -> Any:
+        """Make a GET request with error mapping."""
+        try:
+            resp = self._client.get(url, params=params, headers=headers)
+        except Exception as exc:
+            raise NetworkError(f"Request failed: {url}: {exc}") from exc
+        return self._check_status(resp, url)
+    def _post(self, url: str, data: dict | None = None, json: dict | None = None) -> Any:
+        """Make a POST request with error mapping."""
+        try:
+            resp = self._client.post(url, data=data, json=json)
+        except Exception as exc:
+            raise NetworkError(f"Request failed: {url}: {exc}") from exc
+        return self._check_status(resp, url)
+    def _check_status(self, resp: Any, url: str) -> Any:
+        """Map HTTP status codes to typed exceptions."""
+        if resp.status_code == 200:
+            return resp
+        if resp.status_code in (401, 403):
+            raise ServerError(
+                f"Access denied (HTTP {resp.status_code}) — unexpected on a public endpoint.",
+                status_code=resp.status_code,
+            )
+        if resp.status_code == 404:
+            raise NotFoundError(f"Resource not found: {url}")
+        if resp.status_code == 429:
+            retry_after = None
+            if "retry-after" in resp.headers:
+                try:
+                    retry_after = float(resp.headers["retry-after"])
+                except ValueError:
+                    pass
+            raise RateLimitError("Rate limited by Amazon", retry_after=retry_after)
+        if resp.status_code >= 500:
+            raise ServerError(
+                f"Amazon server error: {resp.status_code}", status_code=resp.status_code
+            )
+        return resp
+    def _soup(self, resp: Any) -> BeautifulSoup:
+        """Parse HTML response as BeautifulSoup."""
+        return BeautifulSoup(resp.text, "html.parser")
+    # ── Autocomplete Suggestions ────────────────────────────────────────
+    def get_suggestions(self, query: str, limit: int = 11) -> list[Suggestion]:
+        """Get autocomplete suggestions for a query.
+        Uses the /suggestions JSON endpoint.
+        """
+        params = {
+            "limit": str(limit),
+            "prefix": query,
+            "suggestion-type": ["WIDGET", "KEYWORD"],
+            "mid": MERCHANT_ID,
+            "alias": "aps",
+        }
+        resp = self._get(
+            f"{BASE_URL}/suggestions",
+            params=params,
+            headers=_JSON_HEADERS,
+        )
+        try:
+            data = resp.json()
+        except Exception as exc:
+            raise ParsingError(f"Could not parse suggestions response: {exc}") from exc
+        results = []
+        for item in data.get("suggestions", []):
+            value = item.get("value", "")
+            stype = item.get("type", "KEYWORD")
+            if value:
+                results.append(Suggestion(value=value, type=stype))
+        return results
+    # ── Search ──────────────────────────────────────────────────────────
+    def search(
+        self, query: str, page: int = 1, department: str | None = None
+    ) -> list[SearchResult]:
+        """Search Amazon products.
+        Args:
+            query: Search keywords.
+            page: Page number (default: 1).
+            department: Optional department/node filter.
+        Returns:
+            List of SearchResult objects.
+        """
+        params: dict[str, Any] = {"k": query}
+        if page > 1:
+            params["page"] = str(page)
+        if department:
+            params["i"] = department
+        resp = self._get(f"{BASE_URL}/s", params=params)
+        soup = self._soup(resp)
+        cards = soup.find_all("div", attrs={"data-component-type": "s-search-result"})
+        if not cards:
+            return []
+        results = []
+        for card in cards:
+            asin = card.get("data-asin", "")
+            if not asin:
+                continue
+            # Title from h2
+            title_elem = card.find("h2")
+            title = title_elem.get_text(strip=True) if title_elem else ""
+            # Price — try a-offscreen first (most reliable), then structured price
+            price = ""
+            offscreen = card.find("span", class_="a-offscreen")
+            if offscreen:
+                price = offscreen.get_text(strip=True)
+            else:
+                whole = card.find("span", class_="a-price-whole")
+                frac = card.find("span", class_="a-price-fraction")
+                if whole:
+                    price = whole.get_text(strip=True)
+                    if frac:
+                        price += frac.get_text(strip=True)
+            # Rating from a-icon-alt
+            rating = ""
+            rating_elem = card.find("span", class_="a-icon-alt")
+            if rating_elem:
+                rating = rating_elem.get_text(strip=True)
+            # Review count — aria-label near rating
+            review_count = ""
+            review_elem = card.find("span", attrs={"aria-label": re.compile(r"\d")})
+            if review_elem:
+                review_count = review_elem.get("aria-label", "")
+            # URL — first product link
+            link_elem = card.find("a", class_="a-link-normal", href=True)
+            url = ""
+            if link_elem:
+                href = link_elem.get("href", "")
+                if href.startswith("http"):
+                    url = href
+                elif href:
+                    url = f"{BASE_URL}{href}"
+            results.append(
+                SearchResult(
+                    asin=asin,
+                    title=title,
+                    price=price,
+                    rating=rating,
+                    review_count=review_count,
+                    url=url,
+                )
+            )
+        return results
+    # ── Product Detail ──────────────────────────────────────────────────
+    def get_product(self, asin: str) -> Product:
+        """Get product details by ASIN.
+        Args:
+            asin: Amazon Standard Identification Number.
+        Returns:
+            Product object with full details.
+        Raises:
+            NotFoundError: If ASIN does not exist.
+        """
+        resp = self._get(f"{BASE_URL}/dp/{asin}")
+        soup = self._soup(resp)
+        html_text = resp.text
+        # Title
+        title_elem = soup.find("span", attrs={"id": "productTitle"})
+        title = title_elem.get_text(strip=True) if title_elem else ""
+        if not title:
+            # Fallback: check if page actually has a product
+            if "dp/" not in str(resp.url):
+                raise NotFoundError(f"Product not found: {asin}")
+            raise ParsingError(f"Could not parse product title for ASIN: {asin}")
+        # Detect geo-restriction — Amazon replaces buybox with a "cannot ship" message
+        geo_restricted = (
+            "cannot be shipped to your selected delivery location" in html_text
+            or "item can't be shipped to your selected location" in html_text.lower()
+        )
+        # Price — try a-offscreen (available in SSR when product ships to this region),
+        # then a-price-whole, then embedded JSON blobs in script tags.
+        # Note: price is empty when the product is geo-restricted or JS-rendered.
+        price = ""
+        price_elem = soup.find("span", class_="a-offscreen")
+        if price_elem:
+            price = price_elem.get_text(strip=True)
+        if not price:
+            whole = soup.find("span", class_="a-price-whole")
+            frac = soup.find("span", class_="a-price-fraction")
+            if whole:
+                price = whole.get_text(strip=True)
+                if frac:
+                    price += frac.get_text(strip=True)
+        if not price:
+            # Fallback: scan embedded script tags for priceAmount / displayPrice JSON fields
+            for m in re.finditer(r'"(?:priceAmount|displayPrice)"\s*:\s*"?([^",}]+)"?', html_text):
+                candidate = m.group(1).strip()
+                if candidate and candidate not in ("", "0"):
+                    price = candidate
+                    break
+        # Build price_note when price is unavailable
+        price_note = ""
+        if not price:
+            if geo_restricted:
+                price_note = "Product not available in your region — price not shown"
+            else:
+                price_note = "Price JS-rendered, not available in SSR HTML"
+        # Rating
+        rating = ""
+        rating_elem = soup.find("span", attrs={"id": "acrPopover"})
+        if rating_elem:
+            rating = rating_elem.get("title", "") or rating_elem.get_text(strip=True)
+        # Review count
+        review_count = ""
+        review_elem = soup.find("span", attrs={"id": "acrCustomerReviewText"})
+        if review_elem:
+            review_count = review_elem.get_text(strip=True)
+        # Brand
+        brand = ""
+        brand_elem = soup.find(attrs={"id": "bylineInfo"})
+        if brand_elem:
+            brand = brand_elem.get_text(strip=True)
+        # Image
+        image_url = ""
+        img_elem = soup.find("img", attrs={"id": "landingImage"})
+        if img_elem:
+            image_url = img_elem.get("src", "") or img_elem.get("data-old-hires", "")
+        return Product(
+            asin=asin,
+            title=title,
+            price=price,
+            price_note=price_note,
+            geo_restricted=geo_restricted,
+            rating=rating,
+            review_count=review_count,
+            brand=brand,
+            image_url=image_url,
+            url=f"{BASE_URL}/dp/{asin}",
+        )
+    # ── Product Variants ────────────────────────────────────────────────
+    # ── Best Sellers ────────────────────────────────────────────────────
+    def get_bestsellers(self, category: str = "electronics", page: int = 1) -> list[BestSeller]:
+        """Get Amazon Best Sellers for a category.
+        Args:
+            category: Category slug (e.g., "electronics", "books", "toys").
+            page: Page number.
+        Returns:
+            List of BestSeller objects.
+        """
+        url = f"{BASE_URL}/Best-Sellers/zgbs/{category}"
+        params = {}
+        if page > 1:
+            params["pg"] = str(page)
+        resp = self._get(url, params=params if params else None)
+        soup = self._soup(resp)
+        results = []
+        # Best seller grid items — each has id="gridItemRoot"
+        containers = soup.find_all("div", attrs={"id": "gridItemRoot"})
+        for container in containers:
+            # ASIN from inner div
+            asin_div = container.find("div", attrs={"data-asin": True})
+            asin = asin_div.get("data-asin", "") if asin_div else ""
+            if not asin:
+                continue
+            # Rank
+            rank = 0
+            rank_elem = container.find("span", class_="zg-bdg-text")
+            if rank_elem:
+                rank_text = rank_elem.get_text(strip=True).lstrip("#")
+                try:
+                    rank = int(rank_text)
+                except ValueError:
+                    pass
+            # Title — from image alt or link text
+            title = ""
+            img = container.find("img")
+            if img:
+                title = img.get("alt", "")
+            if not title:
+                link = container.find("a", class_="a-link-normal")
+                if link:
+                    title = link.get_text(strip=True)
+            # Price
+            price = ""
+            price_elem = container.find("span", class_="p13n-sc-price")
+            if price_elem:
+                price = price_elem.get_text(strip=True)
+            # URL
+            url_path = ""
+            link_elem = container.find("a", class_="a-link-normal", href=True)
+            if link_elem:
+                href = link_elem.get("href", "")
+                if href.startswith("http"):
+                    url_path = href
+                elif href:
+                    url_path = f"{BASE_URL}{href}"
+            results.append(
+                BestSeller(
+                    rank=rank,
+                    asin=asin,
+                    title=title,
+                    price=price,
+                    url=url_path,
+                )
+            )
+        return results

cli_web/amazon/core/exceptions.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""Domain-specific exception hierarchy for cli-web-amazon."""
+class AmazonError(Exception):
+    """Base exception for all amazon CLI errors."""
+    def to_dict(self) -> dict:
+        return {"error": True, "code": "ERROR", "message": str(self)}
+class NetworkError(AmazonError):
+    """Connection failed, DNS error, timeout."""
+    def to_dict(self) -> dict:
+        return {"error": True, "code": "NETWORK_ERROR", "message": str(self)}
+class RateLimitError(AmazonError):
+    """HTTP 429 — too many requests."""
+    def __init__(self, message: str, retry_after: float | None = None):
+        self.retry_after = retry_after
+        super().__init__(message)
+    def to_dict(self) -> dict:
+        d = {"error": True, "code": "RATE_LIMITED", "message": str(self)}
+        if self.retry_after is not None:
+            d["retry_after"] = self.retry_after
+        return d
+class ParsingError(AmazonError):
+    """HTML/JSON response could not be parsed — site structure may have changed."""
+    def to_dict(self) -> dict:
+        return {"error": True, "code": "PARSING_ERROR", "message": str(self)}
+class NotFoundError(AmazonError):
+    """Resource not found (product ASIN, category, etc.)."""
+    def to_dict(self) -> dict:
+        return {"error": True, "code": "NOT_FOUND", "message": str(self)}
+class ServerError(AmazonError):
+    """Amazon returned 5xx."""
+    def __init__(self, message: str, status_code: int = 500):
+        self.status_code = status_code
+        super().__init__(message)
+    def to_dict(self) -> dict:
+        return {
+            "error": True,
+            "code": "SERVER_ERROR",
+            "message": str(self),
+            "status_code": self.status_code,
+        }
+EXCEPTION_CODE_MAP = {
+    RateLimitError: "RATE_LIMITED",
+    NotFoundError: "NOT_FOUND",
+    ServerError: "SERVER_ERROR",
+    NetworkError: "NETWORK_ERROR",
+    ParsingError: "PARSING_ERROR",
+}
+def error_code_for(exc: Exception) -> str:
+    """Get the JSON error code string for an exception."""
+    for exc_type, code in EXCEPTION_CODE_MAP.items():
+        if isinstance(exc, exc_type):
+            return code
+    return "UNKNOWN_ERROR"

cli_web/amazon/core/models.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""Data models for cli-web-amazon."""
+from dataclasses import asdict, dataclass
+from typing import Any
+@dataclass
+class SearchResult:
+    """A single product in Amazon search results."""
+    asin: str
+    title: str
+    price: str = ""
+    rating: str = ""
+    review_count: str = ""
+    url: str = ""
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+@dataclass
+class Product:
+    """Amazon product detail."""
+    asin: str
+    title: str
+    price: str = ""
+    price_note: str = ""
+    geo_restricted: bool = False
+    rating: str = ""
+    review_count: str = ""
+    brand: str = ""
+    image_url: str = ""
+    url: str = ""
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+@dataclass
+class BestSeller:
+    """A product in Amazon Best Sellers list."""
+    rank: int
+    asin: str
+    title: str
+    price: str = ""
+    url: str = ""
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+@dataclass
+class Suggestion:
+    """An autocomplete suggestion from Amazon."""
+    value: str
+    type: str = "KEYWORD"
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)

cli_web/amazon/skills/SKILL.md ADDED Viewed

@@ -0,0 +1,105 @@
+---
+name: amazon-cli
+description: Use cli-web-amazon to search Amazon products, get product details, check
+  prices, browse best sellers, and get autocomplete suggestions. Invoke this skill
+  whenever the user asks about Amazon products, prices, best sellers, or wants to
+  search Amazon. Always prefer cli-web-amazon over manually fetching the website.
+  No authentication required — fully public site.
+---
+# cli-web-amazon
+Search Amazon products, view details, browse Best Sellers, and get autocomplete suggestions. No authentication required.
+## Quick Start
+```bash
+cli-web-amazon search "laptop" --json
+cli-web-amazon product get B0GRZ78683 --json
+cli-web-amazon bestsellers electronics --json
+```
+Always use `--json` when parsing output programmatically.
+---
+## Commands
+### `search QUERY`
+Search Amazon products by keyword.
+```bash
+cli-web-amazon search "wireless headphones" --json
+cli-web-amazon search "laptop" --page 2 --dept electronics --json
+```
+**Key options:** `--page N` (default 1), `--dept <department>`
+**Output fields:** `asin`, `title`, `price`, `rating`, `review_count`, `url`
+---
+### `suggest QUERY`
+Autocomplete suggestions.
+```bash
+cli-web-amazon suggest "iphone case" --json
+```
+**Output fields:** `value`, `type`
+---
+### `product get ASIN`
+Full product detail by ASIN.
+```bash
+cli-web-amazon product get B0GRZ78683 --json
+```
+**Output fields:** `asin`, `title`, `price`, `price_note`, `geo_restricted`, `rating`, `review_count`, `brand`, `image_url`, `url`
+---
+### `bestsellers [CATEGORY]`
+Browse Amazon Best Sellers by category.
+```bash
+cli-web-amazon bestsellers electronics --json
+cli-web-amazon bestsellers books --page 2 --json
+```
+**Categories:** `electronics`, `books`, `toys-and-games`, `music`, `kitchen`, `clothing`
+**Key options:** `--page N`
+**Output fields:** `rank`, `asin`, `title`, `price`, `url`
+---
+## Agent Patterns
+```bash
+# Search then get full detail on top result
+ASIN=$(cli-web-amazon search "headphones" --json | python -c "import json,sys; print(json.load(sys.stdin)[0]['asin'])")
+cli-web-amazon product get "$ASIN" --json
+# Top-5 bestsellers
+cli-web-amazon bestsellers electronics --json | \
+  python -c "import json,sys; [print(p['rank'], p['title'], p['price']) for p in json.load(sys.stdin)[:5]]"
+# Autocomplete then search
+cli-web-amazon suggest "wireles" --json | \
+  python -c "import json,sys; print(json.load(sys.stdin)[0]['value'])"
+```
+---
+## Notes
+- **Auth:** No authentication required — all commands work on public Amazon endpoints.
+- **Price:** May be empty for some products (Amazon client-side renders prices). Use `product get` for reliable pricing; `price_note` explains why price is missing.
+- **ASIN:** 10-character alphanumeric identifier (e.g. `B0GRZ78683`).
+- **Pagination:** Search supports `--page N` (typically 1–7 pages). Best sellers supports `--page N`.
+- **Errors in --json mode:** `{"error": true, "code": "NOT_FOUND|RATE_LIMITED|NETWORK_ERROR|SERVER_ERROR", "message": "..."}`
+- **Installation:** `pip install cli-web-amazon`