PyPI - cli-web-pexels - Versions diffs - 0.1.0__py3-none-any.whl - Mend

cli-web-pexels 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

cli_web/pexels/README.md +83 -0
cli_web/pexels/__init__.py +3 -0
cli_web/pexels/__main__.py +6 -0
cli_web/pexels/commands/__init__.py +0 -0
cli_web/pexels/commands/collections.py +63 -0
cli_web/pexels/commands/photos.py +116 -0
cli_web/pexels/commands/users.py +62 -0
cli_web/pexels/commands/videos.py +162 -0
cli_web/pexels/core/__init__.py +0 -0
cli_web/pexels/core/client.py +299 -0
cli_web/pexels/core/exceptions.py +54 -0
cli_web/pexels/core/models.py +213 -0
cli_web/pexels/pexels_cli.py +139 -0
cli_web/pexels/skills/SKILL.md +105 -0
cli_web/pexels/tests/TEST.md +130 -0
cli_web/pexels/tests/__init__.py +0 -0
cli_web/pexels/tests/test_core.py +326 -0
cli_web/pexels/tests/test_e2e.py +168 -0
cli_web/pexels/utils/__init__.py +0 -0
cli_web/pexels/utils/doctor.py +188 -0
cli_web/pexels/utils/helpers.py +42 -0
cli_web/pexels/utils/mcp_server.py +290 -0
cli_web/pexels/utils/output.py +139 -0
cli_web/pexels/utils/repl_skin.py +486 -0
cli_web_pexels-0.1.0.dist-info/METADATA +11 -0
cli_web_pexels-0.1.0.dist-info/RECORD +29 -0
cli_web_pexels-0.1.0.dist-info/WHEEL +5 -0
cli_web_pexels-0.1.0.dist-info/entry_points.txt +2 -0
cli_web_pexels-0.1.0.dist-info/top_level.txt +1 -0

cli_web/pexels/core/client.py ADDED Viewed

@@ -0,0 +1,299 @@
+"""HTTP client for Pexels — fetches SSR pages and parses __NEXT_DATA__.
+Uses curl_cffi to bypass Cloudflare protection on pexels.com.
+"""
+import json
+import re
+from typing import Any
+from curl_cffi import requests as curl_requests
+from curl_cffi.requests.exceptions import RequestException
+from .exceptions import (
+    NetworkError,
+    NotFoundError,
+    ParseError,
+    PexelsError,
+    RateLimitError,
+    ServerError,
+)
+from .models import (
+    normalize_collection,
+    normalize_collection_summary,
+    normalize_media_item,
+    normalize_photo,
+    normalize_photo_detail,
+    normalize_user,
+    normalize_video,
+    normalize_video_detail,
+)
+BASE_URL = "https://www.pexels.com"
+SUGGESTIONS_URL = f"{BASE_URL}/en-us/api/v3/search/suggestions"
+_NEXT_DATA_RE = re.compile(r'<script id="__NEXT_DATA__"[^>]*>(.*?)</script>', re.DOTALL)
+_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/120.0.0.0 Safari/537.36"
+    ),
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+    "Accept-Language": "en-US,en;q=0.9",
+}
+class PexelsClient:
+    """Client for fetching data from Pexels via SSR page parsing."""
+    def __init__(self, timeout: float = 30.0):
+        self.timeout = timeout
+    def __enter__(self) -> "PexelsClient":
+        return self
+    def __exit__(self, *exc) -> None:
+        pass  # No persistent connection — curl_cffi calls are scoped per request.
+    def _request(self, url: str, params: dict | None = None) -> curl_requests.Response:
+        """Make an HTTP GET request with Cloudflare bypass."""
+        try:
+            resp = curl_requests.get(
+                url,
+                params=params,
+                headers=_HEADERS,
+                impersonate="chrome",
+                timeout=self.timeout,
+                allow_redirects=True,
+            )
+        except RequestException as e:
+            raise NetworkError(f"Request failed: {e}") from e
+        except Exception as e:
+            if "timeout" in str(e).lower():
+                raise NetworkError(f"Request timed out: {url}") from e
+            raise NetworkError(f"Connection failed: {e}") from e
+        self._check_status(resp, url)
+        return resp
+    @staticmethod
+    def _check_status(resp, url: str) -> None:
+        """Check HTTP status and raise typed exceptions."""
+        code = resp.status_code
+        if code < 400:
+            return
+        text = resp.text[:200]
+        msg = f"HTTP {code}: {text}"
+        if code == 404:
+            raise NotFoundError(msg)
+        if code == 429:
+            retry_after = resp.headers.get("Retry-After")
+            raise RateLimitError(msg, retry_after=float(retry_after) if retry_after else None)
+        if 500 <= code < 600:
+            raise ServerError(msg, status_code=code)
+        raise PexelsError(msg)
+    def _get_page(self, path: str, params: dict | None = None) -> dict:
+        """Fetch an SSR page and extract __NEXT_DATA__ JSON."""
+        url = f"{BASE_URL}{path}"
+        filtered = {k: v for k, v in (params or {}).items() if v is not None}
+        resp = self._request(url, params=filtered if filtered else None)
+        match = _NEXT_DATA_RE.search(resp.text)
+        if not match:
+            raise ParseError(f"No __NEXT_DATA__ found at {url}")
+        try:
+            data = json.loads(match.group(1))
+        except json.JSONDecodeError as e:
+            raise ParseError(f"Invalid __NEXT_DATA__ JSON: {e}") from e
+        return data.get("props", {}).get("pageProps", {})
+    def _get_json(self, url: str, params: dict | None = None) -> Any:
+        """Fetch a JSON API endpoint."""
+        resp = self._request(url, params=params)
+        return resp.json()
+    # ── Photos ─────────────────────────────────────────────────────────
+    def search_photos(
+        self,
+        query: str,
+        page: int = 1,
+        orientation: str | None = None,
+        size: str | None = None,
+        color: str | None = None,
+    ) -> dict:
+        """Search photos. Returns {data, pagination}."""
+        params = {
+            "page": page if page > 1 else None,
+            "orientation": orientation,
+            "size": size,
+            "color": color,
+        }
+        props = self._get_page(f"/search/{query}/", params)
+        initial = props.get("initialData", {})
+        return {
+            "data": [normalize_photo(p) for p in (initial.get("data") or [])],
+            "pagination": initial.get("pagination", {}),
+        }
+    def get_photo(self, slug: str) -> dict:
+        """Get photo detail by slug (e.g., 'green-leaves-1072179')."""
+        if slug.isdigit():
+            slug = f"photo-{slug}"
+        props = self._get_page(f"/photo/{slug}/")
+        medium = props.get("medium", {})
+        if not medium:
+            raise NotFoundError(f"Photo not found: {slug}")
+        details = props.get("mediumDetails", {})
+        return normalize_photo_detail(medium, details)
+    # ── Videos ─────────────────────────────────────────────────────────
+    def search_videos(
+        self,
+        query: str,
+        page: int = 1,
+        orientation: str | None = None,
+    ) -> dict:
+        """Search videos. Returns {data, pagination}."""
+        params = {
+            "page": page if page > 1 else None,
+            "orientation": orientation,
+        }
+        props = self._get_page(f"/search/videos/{query}/", params)
+        initial = props.get("initialData", {})
+        return {
+            "data": [normalize_video(v) for v in (initial.get("data") or [])],
+            "pagination": initial.get("pagination", {}),
+        }
+    def get_video(self, slug: str) -> dict:
+        """Get video detail by slug."""
+        if slug.isdigit():
+            slug = f"video-{slug}"
+        props = self._get_page(f"/video/{slug}/")
+        medium = props.get("medium", {})
+        if not medium:
+            raise NotFoundError(f"Video not found: {slug}")
+        return normalize_video_detail(medium)
+    # ── Users ──────────────────────────────────────────────────────────
+    def get_user(self, username: str) -> dict:
+        """Get user profile by username."""
+        props = self._get_page(f"/@{username}/")
+        user = props.get("user", {})
+        if not user:
+            raise NotFoundError(f"User not found: {username}")
+        media_page = props.get("firstPageOfMedia", {})
+        return {
+            "user": normalize_user(user),
+            "media": {
+                "data": [normalize_media_item(m) for m in (media_page.get("data") or [])],
+                "pagination": media_page.get("pagination", {}),
+            },
+        }
+    def get_user_media(self, username: str, page: int = 1) -> dict:
+        """Get paginated user media."""
+        params = {"page": page if page > 1 else None}
+        props = self._get_page(f"/@{username}/", params)
+        media_page = props.get("firstPageOfMedia") or props.get("initialData") or {}
+        return {
+            "data": [normalize_media_item(m) for m in (media_page.get("data") or [])],
+            "pagination": media_page.get("pagination", {}),
+        }
+    # ── Collections ────────────────────────────────────────────────────
+    def get_collection(self, slug: str, page: int = 1) -> dict:
+        """Get collection detail + media."""
+        params = {"page": page if page > 1 else None}
+        props = self._get_page(f"/collections/{slug}/", params)
+        collection = props.get("collection", {})
+        if not collection:
+            raise NotFoundError(f"Collection not found: {slug}")
+        initial = props.get("initialData", {})
+        return {
+            "collection": normalize_collection(collection),
+            "media": {
+                "data": [normalize_media_item(m) for m in (initial.get("data") or [])],
+                "pagination": initial.get("pagination", {}),
+            },
+        }
+    def discover(self) -> dict:
+        """Get discover page data (popular collections, challenges)."""
+        props = self._get_page("/discover/")
+        initial = props.get("initialData", {})
+        return {
+            "popular": [normalize_collection_summary(c) for c in (initial.get("popular") or [])],
+            "collections": self._flatten_collection_groups(initial.get("collections") or []),
+        }
+    # ── Suggestions ────────────────────────────────────────────────────
+    def search_suggestions(self, query: str) -> list[str]:
+        """Get search autocomplete suggestions."""
+        data = self._get_json(f"{SUGGESTIONS_URL}/{query}?")
+        attrs = data.get("data", {}).get("attributes", {})
+        return attrs.get("suggestions", [])
+    # ── Download helpers ───────────────────────────────────────────────
+    def download_file(self, url: str, output_path: str) -> str:
+        """Download a file (photo or video) to disk."""
+        try:
+            resp = curl_requests.get(
+                url,
+                headers=_HEADERS,
+                impersonate="chrome",
+                timeout=120.0,
+                allow_redirects=True,
+            )
+        except Exception as e:
+            raise NetworkError(f"Download failed: {e}") from e
+        self._check_status(resp, url)
+        with open(output_path, "wb") as f:
+            f.write(resp.content)
+        return output_path
+    @staticmethod
+    def _flatten_collection_groups(groups: list) -> list[dict]:
+        """Flatten nested collection groups from discover page."""
+        result = []
+        for group in groups:
+            if isinstance(group, list):
+                for item in group:
+                    attrs = item.get("attributes", {})
+                    result.append(
+                        {
+                            "id": attrs.get("id"),
+                            "title": attrs.get("title"),
+                            "slug": attrs.get("slug"),
+                            "media_count": attrs.get("collection_media_count"),
+                            "photos_count": attrs.get("photos_count"),
+                            "videos_count": attrs.get("videos_count"),
+                        }
+                    )
+            elif isinstance(group, dict):
+                attrs = group.get("attributes", {})
+                result.append(
+                    {
+                        "id": attrs.get("id"),
+                        "title": attrs.get("title"),
+                        "slug": attrs.get("slug"),
+                        "media_count": attrs.get("collection_media_count"),
+                        "photos_count": attrs.get("photos_count"),
+                        "videos_count": attrs.get("videos_count"),
+                    }
+                )
+        return result

cli_web/pexels/core/exceptions.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""Domain-specific exception hierarchy for cli-web-pexels."""
+class PexelsError(Exception):
+    """Base exception for all Pexels CLI errors."""
+    def to_dict(self) -> dict:
+        """Return a JSON-serializable error dictionary."""
+        return {"error": True, "code": error_code_for(self), "message": str(self)}
+class RateLimitError(PexelsError):
+    """Server returned 429 — too many requests."""
+    def __init__(self, message: str, retry_after: float | None = None):
+        self.retry_after = retry_after
+        super().__init__(message)
+class NetworkError(PexelsError):
+    """Connection failed — DNS, TCP, TLS, or timeout."""
+class ServerError(PexelsError):
+    """Server returned 5xx."""
+    def __init__(self, message: str, status_code: int = 500):
+        self.status_code = status_code
+        super().__init__(message)
+class NotFoundError(PexelsError):
+    """Resource not found (HTTP 404)."""
+class ParseError(PexelsError):
+    """Failed to parse __NEXT_DATA__ or response HTML."""
+EXCEPTION_CODE_MAP = {
+    RateLimitError: "RATE_LIMITED",
+    NotFoundError: "NOT_FOUND",
+    ServerError: "SERVER_ERROR",
+    NetworkError: "NETWORK_ERROR",
+    ParseError: "PARSE_ERROR",
+}
+def error_code_for(exc: Exception) -> str:
+    """Get the JSON error code string for an exception."""
+    for exc_type, code in EXCEPTION_CODE_MAP.items():
+        if isinstance(exc, exc_type):
+            return code
+    return "UNKNOWN_ERROR"

cli_web/pexels/core/models.py ADDED Viewed

@@ -0,0 +1,213 @@
+"""Data models for cli-web-pexels.
+Normalizer functions that transform raw Pexels __NEXT_DATA__ structures
+into clean, flat dictionaries for CLI output and --json serialization.
+"""
+from __future__ import annotations
+def normalize_photo(item: dict) -> dict:
+    """Normalize a photo item from search results."""
+    attrs = item.get("attributes", {})
+    user = attrs.get("user", {})
+    image = attrs.get("image", {})
+    return {
+        "id": attrs.get("id"),
+        "type": "photo",
+        "slug": attrs.get("slug"),
+        "title": attrs.get("title"),
+        "description": attrs.get("description"),
+        "width": attrs.get("width"),
+        "height": attrs.get("height"),
+        "license": attrs.get("license"),
+        "photographer": _format_name(user),
+        "photographer_username": user.get("username"),
+        "image_url": image.get("large") or image.get("medium"),
+        "download_url": image.get("download_link"),
+        "tags": [t.get("name") for t in (attrs.get("tags") or [])[:5]],
+        "colors": attrs.get("colors", []),
+    }
+def normalize_photo_detail(medium: dict, details: dict) -> dict:
+    """Normalize a photo detail page."""
+    attrs = medium.get("attributes", {})
+    user = attrs.get("user", {})
+    image = attrs.get("image", {})
+    det_attrs = details.get("attributes", {})
+    return {
+        "id": attrs.get("id"),
+        "type": "photo",
+        "slug": attrs.get("slug"),
+        "title": attrs.get("title"),
+        "description": attrs.get("description"),
+        "alt": attrs.get("alt"),
+        "width": attrs.get("width"),
+        "height": attrs.get("height"),
+        "license": attrs.get("license"),
+        "created_at": attrs.get("created_at"),
+        "photographer": _format_name(user),
+        "photographer_username": user.get("username"),
+        "photographer_url": f"https://www.pexels.com/@{user.get('slug', '')}",
+        "image": {
+            "small": image.get("small"),
+            "medium": image.get("medium"),
+            "large": image.get("large"),
+            "download": image.get("download_link"),
+        },
+        "tags": [t.get("name") for t in (attrs.get("tags") or [])],
+        "colors": attrs.get("colors", []),
+        "main_color": attrs.get("main_color"),
+        "exif": {
+            "camera": det_attrs.get("camera"),
+            "aperture": det_attrs.get("aperture"),
+            "focal_length": det_attrs.get("focal_length"),
+            "iso": det_attrs.get("iso"),
+            "shutter_speed": det_attrs.get("shutter_speed"),
+        },
+        "location": det_attrs.get("location"),
+        "file_size": det_attrs.get("size"),
+    }
+def normalize_video(item: dict) -> dict:
+    """Normalize a video item from search results."""
+    attrs = item.get("attributes", {})
+    user = attrs.get("user", {})
+    video = attrs.get("video", {})
+    thumb = video.get("thumbnail", {}) if video else {}
+    return {
+        "id": attrs.get("id"),
+        "type": "video",
+        "slug": attrs.get("slug"),
+        "title": attrs.get("title"),
+        "description": attrs.get("description"),
+        "width": attrs.get("width"),
+        "height": attrs.get("height"),
+        "license": attrs.get("license"),
+        "photographer": _format_name(user),
+        "photographer_username": user.get("username"),
+        "thumbnail_url": thumb.get("medium") or thumb.get("small"),
+        "preview_url": video.get("preview_src") if video else None,
+        "download_url": video.get("download_link") if video else None,
+    }
+def normalize_video_detail(medium: dict) -> dict:
+    """Normalize a video detail page."""
+    attrs = medium.get("attributes", {})
+    user = attrs.get("user", {})
+    video = attrs.get("video", {})
+    thumb = video.get("thumbnail", {}) if video else {}
+    files = video.get("video_files", []) if video else []
+    return {
+        "id": attrs.get("id"),
+        "type": "video",
+        "slug": attrs.get("slug"),
+        "title": attrs.get("title"),
+        "description": attrs.get("description"),
+        "width": attrs.get("width"),
+        "height": attrs.get("height"),
+        "license": attrs.get("license"),
+        "created_at": attrs.get("created_at"),
+        "photographer": _format_name(user),
+        "photographer_username": user.get("username"),
+        "photographer_url": f"https://www.pexels.com/@{user.get('slug', '')}",
+        "thumbnail": {
+            "small": thumb.get("small"),
+            "medium": thumb.get("medium"),
+            "large": thumb.get("large"),
+        },
+        "video_src": video.get("src") if video else None,
+        "preview_src": video.get("preview_src") if video else None,
+        "video_files": [
+            {
+                "quality": f.get("quality"),
+                "width": f.get("width"),
+                "height": f.get("height"),
+                "fps": f.get("fps"),
+                "file_type": f.get("file_type"),
+                "link": f.get("link"),
+            }
+            for f in files
+        ],
+        "tags": [t.get("name") for t in (attrs.get("tags") or [])],
+    }
+def normalize_user(user: dict) -> dict:
+    """Normalize a user profile."""
+    attrs = user.get("attributes", {})
+    avatar = attrs.get("avatar", {})
+    return {
+        "id": attrs.get("id"),
+        "username": attrs.get("username"),
+        "first_name": attrs.get("first_name"),
+        "last_name": attrs.get("last_name"),
+        "location": attrs.get("location"),
+        "bio": attrs.get("bio"),
+        "avatar": avatar.get("medium") or avatar.get("small"),
+        "photos_count": attrs.get("photos_count"),
+        "media_count": attrs.get("media_count"),
+        "followers_count": attrs.get("followers_count"),
+        "hero": attrs.get("hero", False),
+        "url": f"https://www.pexels.com/@{attrs.get('slug', '')}",
+    }
+def normalize_media_item(item: dict) -> dict:
+    """Normalize a media item (photo or video) from user/collection pages."""
+    item_type = item.get("type", "photo")
+    attrs = item.get("attributes", {})
+    image = attrs.get("image", {})
+    video = attrs.get("video", {})
+    result = {
+        "id": attrs.get("id"),
+        "type": item_type,
+        "slug": attrs.get("slug"),
+        "title": attrs.get("title"),
+        "width": attrs.get("width"),
+        "height": attrs.get("height"),
+    }
+    if item_type == "video" and video:
+        thumb = video.get("thumbnail", {})
+        result["thumbnail_url"] = thumb.get("medium") or thumb.get("small")
+    elif image:
+        result["image_url"] = image.get("medium") or image.get("small")
+    return result
+def normalize_collection(collection: dict) -> dict:
+    """Normalize a collection."""
+    attrs = collection.get("attributes", {})
+    return {
+        "id": attrs.get("id"),
+        "title": attrs.get("title"),
+        "description": attrs.get("description"),
+        "slug": attrs.get("slug"),
+        "media_count": attrs.get("collection_media_count"),
+        "photos_count": attrs.get("photos_count"),
+        "videos_count": attrs.get("videos_count"),
+    }
+def normalize_collection_summary(item: dict) -> dict:
+    """Normalize a collection from discover/popular."""
+    attrs = item.get("attributes", {})
+    return {
+        "id": attrs.get("id"),
+        "title": attrs.get("title"),
+        "slug": attrs.get("slug"),
+        "media_count": attrs.get("collection_media_count"),
+        "photos_count": attrs.get("photos_count"),
+        "videos_count": attrs.get("videos_count"),
+    }
+# ── Helpers ──────────────────────────────────────────────────────────
+def _format_name(user: dict) -> str:
+    """Format user first/last name into a display string."""
+    return f"{user.get('first_name', '')} {user.get('last_name', '') or ''}".strip()