PyPI - prismadata - Versions diffs - 0.1.0__py3-none-any.whl - Mend

prismadata 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

prismadata/__init__.py +69 -0
prismadata/_async_auth.py +112 -0
prismadata/_async_http.py +143 -0
prismadata/_auth.py +126 -0
prismadata/_batch.py +196 -0
prismadata/_cache.py +52 -0
prismadata/_columns.py +32 -0
prismadata/_constants.py +27 -0
prismadata/_enrich.py +96 -0
prismadata/_http.py +248 -0
prismadata/_progress.py +34 -0
prismadata/_types.py +117 -0
prismadata/_validation.py +29 -0
prismadata/async_client.py +606 -0
prismadata/client.py +795 -0
prismadata/exceptions.py +61 -0
prismadata/py.typed +0 -0
prismadata/sklearn.py +146 -0
prismadata-0.1.0.dist-info/METADATA +247 -0
prismadata-0.1.0.dist-info/RECORD +22 -0
prismadata-0.1.0.dist-info/WHEEL +4 -0
prismadata-0.1.0.dist-info/licenses/LICENSE +21 -0

prismadata/__init__.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""PrismaData - Python client for location intelligence API.
+Usage::
+    from prismadata import Client
+    client = Client(api_key="your-key")
+    result = client.geocode(full_address="Av Paulista 1000, Sao Paulo")
+Async usage::
+    from prismadata import AsyncClient
+    async with await AsyncClient.create(api_key="your-key") as client:
+        result = await client.geocode(full_address="Av Paulista 1000, Sao Paulo")
+"""
+from ._types import (
+    BorderResult,
+    GeocodeResult,
+    IncomePdfResult,
+    IncomeStaticResult,
+    InfoscResult,
+    IsochroneResult,
+    PrecatoryResult,
+    PrisonResult,
+    ReverseGeocodeResult,
+    RouteResult,
+    RoutingBatchResult,
+    SlumResult,
+    UserInfo,
+)
+from .async_client import AsyncClient
+from .client import Client
+from .exceptions import (
+    AuthenticationError,
+    BatchError,
+    PrismaDataError,
+    QuotaExhaustedError,
+    RateLimitError,
+    ValidationError,
+)
+__version__ = "0.1.0"
+__all__ = [
+    "AsyncClient",
+    "Client",
+    "__version__",
+    "AuthenticationError",
+    "BatchError",
+    "PrismaDataError",
+    "QuotaExhaustedError",
+    "RateLimitError",
+    "ValidationError",
+    "BorderResult",
+    "GeocodeResult",
+    "IncomePdfResult",
+    "IncomeStaticResult",
+    "InfoscResult",
+    "IsochroneResult",
+    "PrecatoryResult",
+    "PrisonResult",
+    "ReverseGeocodeResult",
+    "RouteResult",
+    "RoutingBatchResult",
+    "SlumResult",
+    "UserInfo",
+]

prismadata/_async_auth.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""Async JWT authentication manager for the PrismaData API."""
+from __future__ import annotations
+import asyncio
+import logging
+import time
+from typing import Any
+import httpx
+from ._auth import _decode_jwt_claims
+from ._constants import TOKEN_RENEW_MARGIN
+from .exceptions import AuthenticationError
+logger = logging.getLogger("prismadata.async_auth")
+class AsyncAuthManager:
+    """Manages JWT token lifecycle asynchronously."""
+    def __init__(
+        self,
+        base_url: str,
+        timeout: int,
+        *,
+        api_key: str | None = None,
+        username: str | None = None,
+        password: str | None = None,
+    ) -> None:
+        self._base_url = base_url
+        self._timeout = timeout
+        self._api_key = api_key
+        self._username = username
+        self._password = password
+        self._token: str | None = None
+        self._token_exp: float = 0.0
+        self._rate_limit: float | None = None
+        self._sandbox: bool = False
+        self._lock = asyncio.Lock()
+        self._http_client: httpx.AsyncClient | None = None
+    def set_http_client(self, client: httpx.AsyncClient) -> None:
+        """Attach a shared httpx.AsyncClient for token refresh requests."""
+        self._http_client = client
+    @property
+    def rate_limit(self) -> float | None:
+        return self._rate_limit
+    @property
+    def is_sandbox(self) -> bool:
+        return self._sandbox
+    async def authenticate(self) -> None:
+        """Force token fetch immediately."""
+        async with self._lock:
+            await self._fetch_token()
+    async def ensure_valid_token(self) -> None:
+        """Ensure a valid token is available, refreshing if needed."""
+        if self._token and time.time() < (self._token_exp - TOKEN_RENEW_MARGIN):
+            return
+        async with self._lock:
+            if self._token and time.time() < (self._token_exp - TOKEN_RENEW_MARGIN):
+                return
+            await self._fetch_token()
+    def get_headers(self) -> dict[str, str]:
+        """Return auth headers (sync — call ensure_valid_token first)."""
+        if self._api_key:
+            return {"X-Apikey": self._api_key}
+        return {"Authorization": f"Bearer {self._token}"}
+    async def _try_refresh_claims(self) -> None:
+        """Best-effort claim refresh in API key mode."""
+        try:
+            await self.ensure_valid_token()
+        except Exception as exc:
+            logger.warning("Claim refresh failed (API key mode): %s", exc)
+    async def _fetch_token(self) -> None:
+        mode = "api_key" if self._api_key else "credentials"
+        logger.debug("Fetching token (mode=%s)", mode)
+        if self._api_key:
+            data = {"api_key": self._api_key}
+        else:
+            data = {"username": self._username, "password": self._password}
+        url = f"{self._base_url}/auth/token"
+        try:
+            if self._http_client is not None:
+                resp = await self._http_client.post(url, data=data)
+            else:
+                async with httpx.AsyncClient(timeout=self._timeout) as http:
+                    resp = await http.post(url, data=data)
+        except httpx.HTTPError as exc:
+            raise AuthenticationError(f"Failed to connect to auth endpoint: {exc}") from exc
+        if resp.status_code in (401, 403):
+            raise AuthenticationError(f"Authentication failed: {resp.text}")
+        resp.raise_for_status()
+        body = resp.json()
+        self._token = body["access_token"]
+        claims = _decode_jwt_claims(self._token)
+        self._token_exp = claims.get("exp", time.time() + 3600)
+        self._rate_limit = claims.get("rate_limit")
+        self._sandbox = claims.get("sandbox", False)
+        expires_in = self._token_exp - time.time()
+        logger.debug("Token obtained, expires in %.0fs, rate_limit=%s", expires_in, self._rate_limit)

prismadata/_async_http.py ADDED Viewed

@@ -0,0 +1,143 @@
+"""Async HTTP transport layer with retry, throttle, and error handling."""
+from __future__ import annotations
+import asyncio
+import logging
+import time
+from typing import Any
+import httpx
+from tenacity import (
+    RetryCallState,
+    retry,
+    retry_if_result,
+    stop_after_attempt,
+)
+from ._async_auth import AsyncAuthManager
+from ._constants import (
+    DEFAULT_TIMEOUT,
+    RETRYABLE_STATUS_CODES,
+    RETRY_MAX_ATTEMPTS,
+    USER_AGENT_PREFIX,
+)
+from ._http import _handle_response, _is_retryable, _wait_for_retry
+logger = logging.getLogger("prismadata.async_http")
+class AsyncHttpClient:
+    """Async HTTP client with automatic auth, retry, and throttle."""
+    def __init__(
+        self,
+        auth: AsyncAuthManager,
+        timeout: int = DEFAULT_TIMEOUT,
+        version: str = "0.1.0",
+        app_name: str | None = None,
+    ) -> None:
+        self._auth = auth
+        self._timeout = timeout
+        self._last_request_time: float = 0.0
+        self._rl_remaining: int | None = None
+        self._rl_reset: float | None = None
+        headers: dict[str, str] = {
+            "User-Agent": f"{USER_AGENT_PREFIX}/{version}",
+            "X-Client": f"{USER_AGENT_PREFIX}/{version}",
+        }
+        if app_name:
+            headers["X-App"] = app_name
+        self._client = httpx.AsyncClient(timeout=timeout, headers=headers)
+    async def close(self) -> None:
+        await self._client.aclose()
+    async def get(self, path: str, params: dict[str, Any] | None = None) -> Any:
+        return await self._request("GET", path, params=params)
+    async def post(self, path: str, json_body: Any = None, params: dict[str, Any] | None = None) -> Any:
+        return await self._request("POST", path, json_body=json_body, params=params)
+    async def _request(
+        self,
+        method: str,
+        path: str,
+        params: dict[str, Any] | None = None,
+        json_body: Any = None,
+    ) -> Any:
+        response = await self._do_request(method, path, params, json_body)
+        return _handle_response(response)
+    @retry(
+        retry=retry_if_result(_is_retryable),
+        stop=stop_after_attempt(RETRY_MAX_ATTEMPTS),
+        wait=_wait_for_retry,
+        retry_error_callback=lambda state: state.outcome.result(),
+    )
+    async def _do_request(
+        self,
+        method: str,
+        path: str,
+        params: dict[str, Any] | None = None,
+        json_body: Any = None,
+    ) -> httpx.Response:
+        await self._throttle()
+        if self._auth._api_key:
+            await self._auth._try_refresh_claims()
+        else:
+            await self._auth.ensure_valid_token()
+        headers = self._auth.get_headers()
+        url = f"{self._auth._base_url}{path}"
+        if params:
+            params = {k: v for k, v in params.items() if v is not None}
+        logger.debug("%s %s", method, path)
+        t0 = time.monotonic()
+        response = await self._client.request(
+            method, url, params=params, json=json_body, headers=headers
+        )
+        elapsed = time.monotonic() - t0
+        self._update_rate_limit(response)
+        status = response.status_code
+        if status in RETRYABLE_STATUS_CODES:
+            logger.warning("Retryable %d on %s %s, will retry", status, method, path)
+        else:
+            logger.debug("%s %s -> %d (%.1fs)", method, path, status, elapsed)
+        return response
+    def _update_rate_limit(self, response: httpx.Response) -> None:
+        remaining = response.headers.get("x-ratelimit-remaining")
+        reset = response.headers.get("x-ratelimit-reset")
+        if remaining is not None:
+            self._rl_remaining = int(remaining)
+        if reset is not None:
+            self._rl_reset = float(reset)
+    async def _throttle(self) -> None:
+        if self._rl_remaining is not None and self._rl_remaining <= 0:
+            if self._rl_reset:
+                wait = self._rl_reset - time.time()
+                if wait > 0:
+                    logger.debug("Rate limit: sleeping %.2fs (headers)", wait)
+                    await asyncio.sleep(wait)
+            self._rl_remaining = None
+            self._rl_reset = None
+            return
+        if self._rl_remaining is not None:
+            return
+        rate_limit = self._auth.rate_limit
+        if not rate_limit or rate_limit <= 0:
+            return
+        min_interval = 1.0 / rate_limit
+        elapsed = time.time() - self._last_request_time
+        if elapsed < min_interval:
+            wait = min_interval - elapsed
+            logger.debug("Rate limit: sleeping %.2fs (jwt claim)", wait)
+            await asyncio.sleep(wait)
+        self._last_request_time = time.time()

prismadata/_auth.py ADDED Viewed

@@ -0,0 +1,126 @@
+"""JWT authentication manager for the PrismaData API."""
+from __future__ import annotations
+import base64
+import json
+import logging
+import threading
+import time
+from typing import Any
+import httpx
+from ._constants import TOKEN_RENEW_MARGIN
+from .exceptions import AuthenticationError
+logger = logging.getLogger("prismadata.auth")
+class AuthManager:
+    """Manages JWT token lifecycle: obtain, cache, and auto-renew."""
+    def __init__(
+        self,
+        base_url: str,
+        timeout: int,
+        *,
+        api_key: str | None = None,
+        username: str | None = None,
+        password: str | None = None,
+    ) -> None:
+        self._base_url = base_url
+        self._timeout = timeout
+        self._api_key = api_key
+        self._username = username
+        self._password = password
+        self._token: str | None = None
+        self._token_exp: float = 0.0
+        self._rate_limit: float | None = None
+        self._sandbox: bool = False
+        self._lock = threading.Lock()
+    @property
+    def rate_limit(self) -> float | None:
+        return self._rate_limit
+    @property
+    def is_sandbox(self) -> bool:
+        return self._sandbox
+    def authenticate(self) -> None:
+        """Force token fetch immediately. Raises AuthenticationError on failure."""
+        with self._lock:
+            self._fetch_token()
+    def get_headers(self) -> dict[str, str]:
+        if self._api_key:
+            self._try_refresh_claims()
+            return {"X-Apikey": self._api_key}
+        self._ensure_token()
+        return {"Authorization": f"Bearer {self._token}"}
+    def _try_refresh_claims(self) -> None:
+        """Best-effort claim refresh in API key mode.
+        Uses token exp as cadence — adapts to whatever validity
+        the server sets. Failures are logged as warnings.
+        """
+        try:
+            self._ensure_token()
+        except Exception as exc:
+            logger.warning("Claim refresh failed (API key mode): %s", exc)
+    def _ensure_token(self) -> None:
+        if self._token and time.time() < (self._token_exp - TOKEN_RENEW_MARGIN):
+            return
+        with self._lock:
+            if self._token and time.time() < (self._token_exp - TOKEN_RENEW_MARGIN):
+                return
+            self._fetch_token()
+    def _fetch_token(self) -> None:
+        mode = "api_key" if self._api_key else "credentials"
+        logger.debug("Fetching token (mode=%s)", mode)
+        if self._api_key:
+            data = {"api_key": self._api_key}
+        else:
+            data = {"username": self._username, "password": self._password}
+        try:
+            resp = httpx.post(
+                f"{self._base_url}/auth/token",
+                data=data,
+                timeout=self._timeout,
+            )
+        except httpx.HTTPError as exc:
+            raise AuthenticationError(f"Failed to connect to auth endpoint: {exc}") from exc
+        if resp.status_code in (401, 403):
+            raise AuthenticationError(f"Authentication failed: {resp.text}")
+        resp.raise_for_status()
+        body = resp.json()
+        self._token = body["access_token"]
+        claims = _decode_jwt_claims(self._token)
+        self._token_exp = claims.get("exp", time.time() + 3600)
+        self._rate_limit = claims.get("rate_limit")
+        self._sandbox = claims.get("sandbox", False)
+        expires_in = self._token_exp - time.time()
+        logger.debug("Token obtained, expires in %.0fs, rate_limit=%s", expires_in, self._rate_limit)
+def _decode_jwt_claims(token: str) -> dict[str, Any]:
+    """Decode JWT payload without signature verification."""
+    parts = token.split(".")
+    if len(parts) != 3:
+        return {}
+    payload = parts[1]
+    padding = 4 - len(payload) % 4
+    if padding != 4:
+        payload += "=" * padding
+    try:
+        return json.loads(base64.urlsafe_b64decode(payload))
+    except Exception:
+        return {}

prismadata/_batch.py ADDED Viewed

@@ -0,0 +1,196 @@
+"""Batch processing with automatic chunking."""
+from __future__ import annotations
+import logging
+import math
+from typing import Any, Awaitable, Callable
+from .exceptions import BatchError
+logger = logging.getLogger("prismadata.batch")
+def _raise_if_partial(
+    results: dict[str, Any],
+    failed_keys: list[str],
+    errors: list[Exception],
+) -> None:
+    """Raise BatchError if any chunks failed."""
+    if not errors:
+        return
+    msg = f"Batch completed with {len(errors)} chunk failure(s), {len(failed_keys)} keys failed"
+    raise BatchError(
+        msg,
+        partial_results=results,
+        failed_keys=failed_keys,
+    )
+def process_batch(
+    items: dict[str, list[float]],
+    request_fn: Callable[[dict[str, list[float]]], dict[str, Any]],
+    max_size: int,
+    on_progress: Callable[[int], None] | None = None,
+) -> dict[str, Any]:
+    """Split a dict of {id: [lat, lng]} into chunks and merge results.
+    Args:
+        items: Mapping of point_id to [lat, lng].
+        request_fn: Function that posts a batch and returns results dict.
+        max_size: Maximum items per request.
+        on_progress: Callback invoked with the number of items completed per chunk.
+    Returns:
+        Merged results dict.
+    Raises:
+        BatchError: If one or more chunks fail. Contains partial_results
+            from successful chunks and failed_keys from failed ones.
+    """
+    keys = list(items.keys())
+    total = len(keys)
+    num_chunks = math.ceil(total / max_size) if total else 0
+    logger.debug("Processing %d items in %d chunks (max_size=%d)", total, num_chunks, max_size)
+    results: dict[str, Any] = {}
+    failed_keys: list[str] = []
+    errors: list[Exception] = []
+    chunk_idx = 0
+    for start in range(0, total, max_size):
+        chunk_idx += 1
+        chunk_keys = keys[start : start + max_size]
+        chunk = {k: items[k] for k in chunk_keys}
+        try:
+            chunk_result = request_fn(chunk)
+            results.update(chunk_result)
+            logger.debug("Chunk %d/%d completed (%d items)", chunk_idx, num_chunks, len(chunk_keys))
+        except Exception as exc:
+            failed_keys.extend(chunk_keys)
+            errors.append(exc)
+            logger.warning("Chunk %d/%d failed (%d items): %s", chunk_idx, num_chunks, len(chunk_keys), exc)
+        if on_progress:
+            on_progress(len(chunk_keys))
+    _raise_if_partial(results, failed_keys, errors)
+    return results
+def process_routing_batch(
+    items: list[dict[str, Any]],
+    request_fn: Callable[[list[dict[str, Any]]], dict[str, Any]],
+    max_size: int,
+    on_progress: Callable[[int], None] | None = None,
+) -> dict[str, Any]:
+    """Split a list of routing items into chunks and merge results.
+    Args:
+        items: List of routing request items.
+        request_fn: Function that posts a batch and returns results dict.
+        max_size: Maximum items per request.
+        on_progress: Callback invoked with the number of items completed per chunk.
+    Returns:
+        Merged results dict with TOTAL, SUCESSOS, FALHAS, RESULTADOS.
+    Raises:
+        BatchError: If one or more chunks fail.
+    """
+    total = len(items)
+    num_chunks = math.ceil(total / max_size) if total else 0
+    logger.debug("Processing %d items in %d chunks (max_size=%d)", total, num_chunks, max_size)
+    merged: dict[str, Any] = {"TOTAL": 0, "SUCESSOS": 0, "FALHAS": 0, "RESULTADOS": []}
+    failed_keys: list[str] = []
+    errors: list[Exception] = []
+    chunk_idx = 0
+    for start in range(0, total, max_size):
+        chunk_idx += 1
+        chunk = items[start : start + max_size]
+        try:
+            chunk_result = request_fn(chunk)
+            merged["TOTAL"] += chunk_result.get("TOTAL", 0)
+            merged["SUCESSOS"] += chunk_result.get("SUCESSOS", 0)
+            merged["FALHAS"] += chunk_result.get("FALHAS", 0)
+            merged["RESULTADOS"].extend(chunk_result.get("RESULTADOS", []))
+            logger.debug("Chunk %d/%d completed (%d items)", chunk_idx, num_chunks, len(chunk))
+        except Exception as exc:
+            failed_keys.extend(str(start + i) for i in range(len(chunk)))
+            errors.append(exc)
+            logger.warning("Chunk %d/%d failed (%d items): %s", chunk_idx, num_chunks, len(chunk), exc)
+        if on_progress:
+            on_progress(len(chunk))
+    _raise_if_partial(merged, failed_keys, errors)
+    return merged
+async def async_process_batch(
+    items: dict[str, list[float]],
+    request_fn: Callable[[dict[str, list[float]]], Awaitable[dict[str, Any]]],
+    max_size: int,
+    on_progress: Callable[[int], None] | None = None,
+) -> dict[str, Any]:
+    """Async version of process_batch — sequential chunks to respect rate limits."""
+    keys = list(items.keys())
+    total = len(keys)
+    num_chunks = math.ceil(total / max_size) if total else 0
+    logger.debug("Async processing %d items in %d chunks (max_size=%d)", total, num_chunks, max_size)
+    results: dict[str, Any] = {}
+    failed_keys: list[str] = []
+    errors: list[Exception] = []
+    chunk_idx = 0
+    for start in range(0, total, max_size):
+        chunk_idx += 1
+        chunk_keys = keys[start : start + max_size]
+        chunk = {k: items[k] for k in chunk_keys}
+        try:
+            chunk_result = await request_fn(chunk)
+            results.update(chunk_result)
+            logger.debug("Chunk %d/%d completed (%d items)", chunk_idx, num_chunks, len(chunk_keys))
+        except Exception as exc:
+            failed_keys.extend(chunk_keys)
+            errors.append(exc)
+            logger.warning("Chunk %d/%d failed (%d items): %s", chunk_idx, num_chunks, len(chunk_keys), exc)
+        if on_progress:
+            on_progress(len(chunk_keys))
+    _raise_if_partial(results, failed_keys, errors)
+    return results
+async def async_process_routing_batch(
+    items: list[dict[str, Any]],
+    request_fn: Callable[[list[dict[str, Any]]], Awaitable[dict[str, Any]]],
+    max_size: int,
+    on_progress: Callable[[int], None] | None = None,
+) -> dict[str, Any]:
+    """Async version of process_routing_batch — sequential chunks to respect rate limits."""
+    total = len(items)
+    num_chunks = math.ceil(total / max_size) if total else 0
+    logger.debug("Async processing %d items in %d chunks (max_size=%d)", total, num_chunks, max_size)
+    merged: dict[str, Any] = {"TOTAL": 0, "SUCESSOS": 0, "FALHAS": 0, "RESULTADOS": []}
+    failed_keys: list[str] = []
+    errors: list[Exception] = []
+    chunk_idx = 0
+    for start in range(0, total, max_size):
+        chunk_idx += 1
+        chunk = items[start : start + max_size]
+        try:
+            chunk_result = await request_fn(chunk)
+            merged["TOTAL"] += chunk_result.get("TOTAL", 0)
+            merged["SUCESSOS"] += chunk_result.get("SUCESSOS", 0)
+            merged["FALHAS"] += chunk_result.get("FALHAS", 0)
+            merged["RESULTADOS"].extend(chunk_result.get("RESULTADOS", []))
+            logger.debug("Chunk %d/%d completed (%d items)", chunk_idx, num_chunks, len(chunk))
+        except Exception as exc:
+            failed_keys.extend(str(start + i) for i in range(len(chunk)))
+            errors.append(exc)
+            logger.warning("Chunk %d/%d failed (%d items): %s", chunk_idx, num_chunks, len(chunk), exc)
+        if on_progress:
+            on_progress(len(chunk))
+    _raise_if_partial(merged, failed_keys, errors)
+    return merged

prismadata/_cache.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""Optional disk cache wrapper using diskcache."""
+from __future__ import annotations
+import hashlib
+import json
+import os
+from typing import Any
+class CacheManager:
+    """Transparent disk cache. Degrades gracefully if diskcache is not installed."""
+    def __init__(self, enabled: bool = False, ttl: int = 86400, directory: str | None = None) -> None:
+        self._enabled = enabled
+        self._ttl = ttl
+        self._cache: Any = None
+        if not enabled:
+            return
+        try:
+            import diskcache
+            cache_dir = directory or os.path.expanduser("~/.prismadata/cache")
+            self._cache = diskcache.Cache(cache_dir)
+        except ImportError:
+            self._enabled = False
+    @property
+    def enabled(self) -> bool:
+        return self._enabled and self._cache is not None
+    def get(self, endpoint: str, params: dict[str, Any]) -> Any | None:
+        if not self.enabled:
+            return None
+        key = _make_key(endpoint, params)
+        return self._cache.get(key)
+    def set(self, endpoint: str, params: dict[str, Any], value: Any) -> None:
+        if not self.enabled:
+            return
+        key = _make_key(endpoint, params)
+        self._cache.set(key, value, expire=self._ttl)
+    def close(self) -> None:
+        if self._cache is not None:
+            self._cache.close()
+def _make_key(endpoint: str, params: dict[str, Any]) -> str:
+    raw = json.dumps({"e": endpoint, "p": params}, sort_keys=True)
+    return hashlib.sha256(raw.encode()).hexdigest()