PyPI - cloudos-cb-py - Versions diffs - 1.2.0__py3-none-any.whl - Mend

cloudos-cb-py 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

cloudos_cb/__init__.py +49 -0
cloudos_cb/config.py +201 -0
cloudos_cb/exceptions.py +44 -0
cloudos_cb/http.py +157 -0
cloudos_cb/queries.py +455 -0
cloudos_cb/utils.py +44 -0
cloudos_cb_py-1.2.0.dist-info/METADATA +400 -0
cloudos_cb_py-1.2.0.dist-info/RECORD +11 -0
cloudos_cb_py-1.2.0.dist-info/WHEEL +5 -0
cloudos_cb_py-1.2.0.dist-info/licenses/LICENSE +21 -0
cloudos_cb_py-1.2.0.dist-info/top_level.txt +1 -0

cloudos_cb/__init__.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""cloudos_cb - Python client for the CloudOS Cohort Browser API."""
+from .config import configure, profile_list
+from .exceptions import (
+    CloudOSAPIError,
+    CloudOSAccessError,
+    CloudOSAuthError,
+    CloudOSConfigError,
+    CloudOSError,
+    CloudOSQueryError,
+    CloudOSServerError,
+    CloudOSTimeoutError,
+    CloudOSValidationError,
+)
+from .queries import (
+    CohortTables,
+    cohort_tables,
+    query,
+    query_results,
+    query_status,
+    query_submit_async,
+    sql_validate,
+)
+__version__ = "1.2.0"
+__all__ = [
+    # Config
+    "configure",
+    "profile_list",
+    # Query
+    "sql_validate",
+    "cohort_tables",
+    "CohortTables",
+    "query_submit_async",
+    "query_status",
+    "query_results",
+    "query",
+    # Exceptions
+    "CloudOSError",
+    "CloudOSConfigError",
+    "CloudOSValidationError",
+    "CloudOSAuthError",
+    "CloudOSAccessError",
+    "CloudOSServerError",
+    "CloudOSAPIError",
+    "CloudOSTimeoutError",
+    "CloudOSQueryError",
+]

cloudos_cb/config.py ADDED Viewed

@@ -0,0 +1,201 @@
+"""Profile configuration management for cloudos_cb."""
+from __future__ import annotations
+import json
+import logging
+import os
+import stat
+from datetime import datetime
+import pandas as pd
+from .exceptions import CloudOSConfigError, CloudOSValidationError
+logger = logging.getLogger(__name__)
+_DEFAULT_BASE_URL = "https://cloudos.lifebit.ai"
+_CONFIG_FILENAME = "config.json"
+_CONFIG_DIR_NAME = ".cloudos-cb"
+def _get_config_dir() -> str:
+    return os.environ.get("CLOUDOS_CONFIG_DIR") or os.path.join(
+        os.path.expanduser("~"), _CONFIG_DIR_NAME
+    )
+def _get_config_file() -> str:
+    return os.path.join(_get_config_dir(), _CONFIG_FILENAME)
+def _read_config() -> dict:
+    config_file = _get_config_file()
+    if not os.path.exists(config_file):
+        return {}
+    try:
+        with open(config_file, "r") as f:
+            return json.load(f)
+    except json.JSONDecodeError as e:
+        raise CloudOSConfigError(f"Error reading config file: {e}") from e
+def _write_config(config: dict) -> None:
+    config_file = _get_config_file()
+    config_dir = os.path.dirname(config_file)
+    dir_existed = os.path.isdir(config_dir)
+    os.makedirs(config_dir, exist_ok=True)
+    if not dir_existed:
+        os.chmod(config_dir, 0o700)
+    try:
+        with open(config_file, "w") as f:
+            json.dump(config, f, indent=2)
+        os.chmod(config_file, stat.S_IRUSR | stat.S_IWUSR)
+    except OSError as e:
+        raise CloudOSConfigError(f"Error writing config file: {e}") from e
+def configure(
+    profilename: str,
+    apikey: str,
+    workspace_id: str,
+    base_url: str = _DEFAULT_BASE_URL,
+    set_default: bool = False,
+) -> None:
+    """Configure a CloudOS profile with API credentials.
+    Stores the profile in ``~/.cloudos-cb/config.json`` (or the path set by the
+    ``CLOUDOS_CONFIG_DIR`` environment variable). File permissions are set to
+    0600; the directory is created with 0700 if it does not already exist.
+    Args:
+        profilename (str): Name of the profile to create or update.
+        apikey (str): API key for authentication.
+        workspace_id (str): Workspace/team ID for API requests.
+        base_url (str): Base URL for the CloudOS API.
+        set_default (bool): If True, marks this profile as the default.
+    Raises:
+        CloudOSValidationError: If any required parameter is empty.
+        CloudOSConfigError: If the config file cannot be written.
+    """
+    if not profilename:
+        raise CloudOSValidationError("profilename is required and cannot be empty.")
+    if not apikey:
+        raise CloudOSValidationError("apikey is required and cannot be empty.")
+    if not workspace_id:
+        raise CloudOSValidationError("workspace_id is required and cannot be empty.")
+    config = _read_config()
+    if set_default:
+        for name in config:
+            config[name]["default"] = False
+    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    existing_default = config.get(profilename, {}).get("default", False)
+    config[profilename] = {
+        "apikey": apikey,
+        "workspace_id": workspace_id,
+        "base_url": base_url,
+        "default": set_default or existing_default,
+        "created_at": config.get(profilename, {}).get("created_at", now),
+        "updated_at": now,
+    }
+    _write_config(config)
+    if set_default:
+        logger.info(
+            "Profile '%s' configured successfully and set as default.", profilename
+        )
+    else:
+        logger.info("Profile '%s' configured successfully.", profilename)
+    logger.info("Config stored at: %s", _get_config_file())
+def profile_list() -> pd.DataFrame:
+    """List all configured CloudOS profiles.
+    Returns:
+        pandas.DataFrame: Columns are profile_name, workspace_id, base_url,
+            default, created_at, updated_at. Empty DataFrame when no profiles
+            are configured.
+    """
+    config = _read_config()
+    columns = ["profile_name", "workspace_id", "base_url", "default", "created_at", "updated_at"]
+    if not config:
+        logger.info("No profiles configured. Use configure() to create a profile.")
+        return pd.DataFrame(columns=columns)
+    rows = [
+        {
+            "profile_name": name,
+            "workspace_id": profile.get("workspace_id", ""),
+            "base_url": profile.get("base_url", _DEFAULT_BASE_URL),
+            "default": profile.get("default", False),
+            "created_at": profile.get("created_at", ""),
+            "updated_at": profile.get("updated_at", ""),
+        }
+        for name, profile in config.items()
+    ]
+    return pd.DataFrame(rows, columns=columns)
+def load_profile(profilename: str = "") -> dict[str, str]:
+    """Load a profile configuration (internal).
+    Args:
+        profilename (str): Profile name to load. Uses the default profile
+            when empty or not provided.
+    Returns:
+        dict: Profile configuration with keys apikey, workspace_id, base_url.
+    Raises:
+        CloudOSConfigError: If the config file is missing, the profile is not
+            found, or required fields are absent.
+    """
+    config_file = _get_config_file()
+    if not os.path.exists(config_file):
+        raise CloudOSConfigError(
+            "No configuration file found. Use configure() to create a profile first.\n"
+            f"Expected location: {config_file}"
+        )
+    config = _read_config()
+    if not profilename:
+        default_name = next(
+            (name for name, p in config.items() if p.get("default")), None
+        )
+        if not default_name:
+            available = ", ".join(config.keys())
+            raise CloudOSConfigError(
+                "No default profile configured.\n"
+                f"Available profiles: {available}\n"
+                "Specify profilename or set a default with configure(..., set_default=True)"
+            )
+        profilename = default_name
+    elif profilename not in config:
+        available = ", ".join(config.keys())
+        raise CloudOSConfigError(
+            f"Profile '{profilename}' not found.\n"
+            f"Available profiles: {available}\n"
+            "Use configure() to create this profile."
+        )
+    profile = config[profilename]
+    if not profile.get("apikey"):
+        raise CloudOSConfigError(f"Profile '{profilename}' is missing apikey.")
+    if not profile.get("workspace_id"):
+        raise CloudOSConfigError(f"Profile '{profilename}' is missing workspace_id.")
+    return {
+        "apikey": profile["apikey"],
+        "workspace_id": profile["workspace_id"],
+        "base_url": profile.get("base_url") or _DEFAULT_BASE_URL,
+    }

cloudos_cb/exceptions.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""Custom exceptions for the cloudos_cb package."""
+from __future__ import annotations
+class CloudOSError(Exception):
+    """Base exception for all CloudOS errors."""
+class CloudOSConfigError(CloudOSError):
+    """Raised for configuration-related errors (missing file, unknown profile)."""
+class CloudOSValidationError(CloudOSError):
+    """Raised when a required input parameter is missing or invalid."""
+class CloudOSAuthError(CloudOSError):
+    """Raised on HTTP 401 authentication failures."""
+class CloudOSAccessError(CloudOSError):
+    """Raised on HTTP 403/404 access-denied or not-found responses."""
+class CloudOSServerError(CloudOSError):
+    """Raised on HTTP 5xx server-side errors."""
+class CloudOSAPIError(CloudOSError):
+    """Raised for general API errors not covered by the more specific classes."""
+    def __init__(self, message: str, status_code: int | None = None, endpoint: str | None = None):
+        super().__init__(message)
+        self.status_code = status_code
+        self.endpoint = endpoint
+class CloudOSTimeoutError(CloudOSError):
+    """Raised when a query task does not complete within max_wait seconds."""
+class CloudOSQueryError(CloudOSError):
+    """Raised when a query task reports a failed status."""

cloudos_cb/http.py ADDED Viewed

@@ -0,0 +1,157 @@
+"""HTTP utilities for authenticated CloudOS API requests."""
+from __future__ import annotations
+import logging
+import requests
+from .exceptions import (
+    CloudOSAPIError,
+    CloudOSAccessError,
+    CloudOSAuthError,
+    CloudOSServerError,
+)
+logger = logging.getLogger(__name__)
+_CONNECT_TIMEOUT = 10
+_READ_TIMEOUT = 60
+def _build_headers(profile: dict, method: str = "GET") -> dict[str, str]:
+    headers: dict[str, str] = {
+        "apikey": profile["apikey"],
+        "Accept": "application/json",
+    }
+    if method == "POST":
+        headers["Content-Type"] = "application/json"
+    return headers
+def _handle_error(response: requests.Response, endpoint: str) -> None:
+    status = response.status_code
+    try:
+        body = response.json()
+        error_msg = body.get("message") or body.get("error") or "Unknown error"
+    except (ValueError, AttributeError):
+        error_msg = "Unable to parse error response"
+    if status == 401:
+        raise CloudOSAuthError(
+            f"Authentication failed (401).\n"
+            f"Endpoint: {endpoint}\n"
+            "Please check your API key and workspace ID."
+        )
+    if status == 403:
+        raise CloudOSAccessError(
+            f"Access denied (403).\n"
+            f"Endpoint: {endpoint}\n"
+            f"You do not have permission to access this resource.\n"
+            f"Details: {error_msg}"
+        )
+    if status == 404:
+        raise CloudOSAccessError(
+            f"Resource not found (404).\n"
+            f"Endpoint: {endpoint}\n"
+            f"This resource does not exist or you do not have access.\n"
+            f"Details: {error_msg}"
+        )
+    if status >= 500:
+        raise CloudOSServerError(
+            f"Server error ({status}).\n"
+            f"Endpoint: {endpoint}\n"
+            f"The server encountered an error. Please try again later.\n"
+            f"Details: {error_msg}"
+        )
+    raise CloudOSAPIError(
+        f"API request failed ({status}).\nEndpoint: {endpoint}\nDetails: {error_msg}",
+        status_code=status,
+        endpoint=endpoint,
+    )
+def _make_request(
+    method: str,
+    profile: dict,
+    endpoint: str,
+    body: dict | None = None,
+    query_params: dict | None = None,
+) -> dict:
+    url = profile["base_url"] + endpoint
+    params: dict = {"teamId": profile["workspace_id"]}
+    if query_params:
+        params.update(query_params)
+    headers = _build_headers(profile, method)
+    timeout = (_CONNECT_TIMEOUT, _READ_TIMEOUT)
+    try:
+        if method == "GET":
+            response = requests.get(url, headers=headers, params=params, timeout=timeout)
+        elif method == "POST":
+            response = requests.post(
+                url,
+                headers=headers,
+                params=params,
+                json=body or {},
+                timeout=timeout,
+            )
+        else:
+            raise ValueError(f"Unsupported HTTP method: {method}")
+    except requests.RequestException as e:
+        raise CloudOSAPIError(f"HTTP request failed: {e}") from e
+    if response.status_code not in (200, 202):
+        _handle_error(response, endpoint)
+    try:
+        return response.json()
+    except ValueError as e:
+        raise CloudOSAPIError(f"Error parsing JSON response: {e}") from e
+def http_get(profile: dict, endpoint: str, query_params: dict | None = None) -> dict:
+    """Make an authenticated GET request to the CloudOS API.
+    Args:
+        profile (dict): Profile configuration from load_profile().
+        endpoint (str): API endpoint path (without base URL).
+        query_params (dict, optional): Additional query parameters.
+    Returns:
+        dict: Parsed JSON response.
+    Raises:
+        CloudOSAuthError: On HTTP 401.
+        CloudOSAccessError: On HTTP 403 or 404.
+        CloudOSServerError: On HTTP 5xx.
+        CloudOSAPIError: On other request or parsing errors.
+    """
+    return _make_request("GET", profile, endpoint, query_params=query_params)
+def http_post(
+    profile: dict,
+    endpoint: str,
+    body: dict | None = None,
+    query_params: dict | None = None,
+) -> dict:
+    """Make an authenticated POST request to the CloudOS API.
+    Args:
+        profile (dict): Profile configuration from load_profile().
+        endpoint (str): API endpoint path (without base URL).
+        body (dict, optional): Request body serialised as JSON.
+        query_params (dict, optional): Additional query parameters.
+    Returns:
+        dict: Parsed JSON response.
+    Raises:
+        CloudOSAuthError: On HTTP 401.
+        CloudOSAccessError: On HTTP 403 or 404.
+        CloudOSServerError: On HTTP 5xx.
+        CloudOSAPIError: On other request or parsing errors.
+    """
+    return _make_request("POST", profile, endpoint, body=body, query_params=query_params)

cloudos_cb/queries.py ADDED Viewed

@@ -0,0 +1,455 @@
+"""Cohort Browser query functions."""
+from __future__ import annotations
+import logging
+import time
+from concurrent.futures import ThreadPoolExecutor
+import pandas as pd
+from .config import load_profile
+from .exceptions import (
+    CloudOSAPIError,
+    CloudOSQueryError,
+    CloudOSTimeoutError,
+    CloudOSValidationError,
+)
+from .http import http_get, http_post
+from .utils import convert_results_to_dataframe, validate_required_string
+logger = logging.getLogger(__name__)
+_PAGE_SUBMIT_DELAY = 0.2  # seconds between page submissions to avoid API rate limits
+_MAX_PARALLEL_PAGE_WORKERS = 10
+# ---------------------------------------------------------------------------
+# Private helpers (defined before the public functions that call them)
+# ---------------------------------------------------------------------------
+def _validate_pagination(pagination: dict | None) -> None:
+    if pagination is None:
+        return
+    if not isinstance(pagination, dict):
+        raise CloudOSValidationError(
+            "pagination must be a dict with pageNumber and pageSize."
+        )
+    if "pageNumber" not in pagination or "pageSize" not in pagination:
+        raise CloudOSValidationError(
+            "pagination must contain both pageNumber and pageSize."
+        )
+    if not isinstance(pagination["pageNumber"], int) or pagination["pageNumber"] < 0:
+        raise CloudOSValidationError(
+            "pagination['pageNumber'] must be a non-negative integer."
+        )
+    if not isinstance(pagination["pageSize"], int) or pagination["pageSize"] < 1:
+        raise CloudOSValidationError(
+            "pagination['pageSize'] must be a positive integer."
+        )
+def _poll_until_complete(
+    task_id: str,
+    profilename: str,
+    poll_interval: float,
+    max_wait: float,
+    label: str,
+) -> dict:
+    """Poll a task until it reaches 'completed' or a terminal state.
+    Args:
+        task_id (str): Task ID to poll.
+        profilename (str): Profile to use for query_status calls.
+        poll_interval (float): Seconds between status checks.
+        max_wait (float): Maximum seconds to wait before raising.
+        label (str): Human-readable label used in log messages and errors.
+    Returns:
+        dict: Final status info dict from query_status().
+    Raises:
+        CloudOSTimeoutError: If the task does not complete within max_wait seconds.
+        CloudOSQueryError: If the task reports a failed status.
+    """
+    start = time.monotonic()
+    while True:
+        elapsed = time.monotonic() - start
+        if elapsed >= max_wait:
+            raise CloudOSTimeoutError(
+                f"Task did not complete within {max_wait} seconds.\n"
+                f"Task ID: {task_id}\n"
+                "Use query_status() to check progress."
+            )
+        info = query_status(task_id, profilename)
+        status = info["status"].lower().strip()
+        if status == "completed":
+            return info
+        if status == "failed":
+            raise CloudOSQueryError(
+                f"Query execution failed for {label}.\n"
+                f"Task ID: {task_id}\nCheck task status for details."
+            )
+        logger.info("  %s: %s (%.1fs elapsed)...", label, info["status"], elapsed)
+        time.sleep(poll_interval)
+def _poll_and_fetch_page(
+    page_num: int,
+    task_id: str,
+    profilename: str,
+    poll_interval: float,
+    max_wait: float,
+) -> pd.DataFrame:
+    """Poll a single page task to completion and fetch its results."""
+    _poll_until_complete(task_id, profilename, poll_interval, max_wait, f"Page {page_num}")
+    return query_results(task_id, profilename)
+def _fetch_remaining_pages(
+    cohort_id: str,
+    sql: str,
+    page_size: int,
+    poll_interval: float,
+    max_wait: float,
+    profilename: str,
+    total_pages: int,
+    first_page: pd.DataFrame,
+) -> pd.DataFrame:
+    """Submit, poll, and fetch pages 1..total_pages-1 in parallel, then concat with first_page."""
+    total_rows = first_page.attrs.get("total_rows", 0)
+    logger.info(
+        "Fetching remaining pages (1 to %d) — %d total rows across %d pages...",
+        total_pages - 1,
+        total_rows,
+        total_pages,
+    )
+    remaining_tasks: list[tuple[int, str]] = []
+    for page_num in range(1, total_pages):
+        task = query_submit_async(
+            cohort_id, sql,
+            pagination={"pageNumber": page_num, "pageSize": page_size},
+            profilename=profilename,
+        )
+        remaining_tasks.append((page_num, task["task_id"]))
+        logger.info("  Submitted page %d (task ID: %s)", page_num, task["task_id"])
+        time.sleep(_PAGE_SUBMIT_DELAY)
+    logger.info("Polling and fetching all pages concurrently...")
+    workers = min(len(remaining_tasks), _MAX_PARALLEL_PAGE_WORKERS)
+    with ThreadPoolExecutor(max_workers=workers) as executor:
+        futures = [
+            executor.submit(
+                _poll_and_fetch_page, page_num, tid, profilename, poll_interval, max_wait,
+            )
+            for page_num, tid in remaining_tasks
+        ]
+        remaining_results = [f.result() for f in futures]
+    combined = pd.concat([first_page] + remaining_results, ignore_index=True)
+    combined.attrs["total_rows"] = total_rows
+    combined.attrs["page"] = 0
+    combined.attrs["page_size"] = total_rows
+    combined.attrs["total_pages"] = 1
+    combined.attrs["all_pages_fetched"] = True
+    logger.info("Query complete: %d rows across %d pages", len(combined), total_pages)
+    return combined
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+class CohortTables:
+    """Cohort schema information returned by cohort_tables().
+    Attributes:
+        cohort_id (str): The cohort ID this schema belongs to.
+        schemas (list): Raw schema list from the API response.
+    """
+    def __init__(self, response: dict, cohort_id: str) -> None:
+        self.cohort_id = cohort_id
+        self.schemas = response.get("schemas", [])
+    def __repr__(self) -> str:
+        return f"CohortTables(cohort_id={self.cohort_id!r}, schemas={len(self.schemas)})"
+    def __str__(self) -> str:
+        if not self.schemas:
+            return "No schemas found"
+        lines = [f"Cohort {self.cohort_id}:"]
+        total_tables = 0
+        for schema in self.schemas:
+            for table in schema.get("tables", []):
+                lines.append(f"  - {schema['name']}.{table['name']}")
+                for col in table.get("columns", []):
+                    lines.append(f"      - {col['name']} ({col['dataType']})")
+                total_tables += 1
+        n_dbs = len(self.schemas)
+        lines.append(f"\nTotal: {n_dbs} database(s), {total_tables} table(s)")
+        return "\n".join(lines)
+def sql_validate(sql: str, profilename: str = "") -> dict:
+    """Validate SQL syntax and table/column references before execution.
+    Args:
+        sql (str): SQL query to validate.
+        profilename (str): Profile to use. Uses the default profile when empty.
+    Returns:
+        dict: Validation result with keys isValid, tableReferences,
+            columnReferences, and (on failure) error.
+    Raises:
+        CloudOSValidationError: If sql is empty.
+        CloudOSAuthError: On authentication failure.
+        CloudOSAPIError: On request or server errors.
+    """
+    validate_required_string(sql, "sql")
+    profile = load_profile(profilename)
+    endpoint = "/api/v2-cli/cohort-browser/sql-query/validate"
+    return http_post(profile, endpoint, body={"sql": sql})
+def cohort_tables(cohort_id: str, profilename: str = "") -> CohortTables:
+    """Retrieve available schemas, tables, and columns for a cohort.
+    Args:
+        cohort_id (str): ID of the cohort to query.
+        profilename (str): Profile to use. Uses the default profile when empty.
+    Returns:
+        CohortTables: Schema information. Print it for a human-readable view,
+            or access `.schemas` for the raw list.
+    Raises:
+        CloudOSValidationError: If cohort_id is empty.
+        CloudOSAuthError: On authentication failure.
+        CloudOSAPIError: On request or server errors.
+    """
+    validate_required_string(cohort_id, "cohort_id")
+    profile = load_profile(profilename)
+    endpoint = "/api/v2-cli/cohort-browser/schemas"
+    response = http_get(profile, endpoint, query_params={"cohortId": cohort_id})
+    return CohortTables(response, cohort_id)
+def query_submit_async(
+    cohort_id: str,
+    sql: str,
+    pagination: dict | None = None,
+    profilename: str = "",
+) -> dict:
+    """Submit an async SQL query task for a cohort.
+    Args:
+        cohort_id (str): ID of the cohort to query.
+        sql (str): SQL query to execute.
+        pagination (dict, optional): Dict with keys pageNumber (int >= 0) and
+            pageSize (int >= 1). Omit to use the API default.
+        profilename (str): Profile to use. Uses the default profile when empty.
+    Returns:
+        dict: Keys: task_id, status, query, type, sync_execution_timeout,
+            full_response.
+    Raises:
+        CloudOSValidationError: If required parameters are missing or invalid.
+        CloudOSAPIError: On request or server errors.
+    """
+    validate_required_string(cohort_id, "cohort_id")
+    validate_required_string(sql, "sql")
+    _validate_pagination(pagination)
+    profile = load_profile(profilename)
+    endpoint = f"/api/v2-cli/cohort-browser/cohort/{cohort_id}/query-results/async"
+    body: dict = {"query": sql}
+    if pagination is not None:
+        body["pagination"] = pagination
+    response = http_post(
+        profile,
+        endpoint,
+        body=body,
+        query_params={"cohortId": cohort_id},
+    )
+    task = response.get("task", {})
+    if not task.get("_id"):
+        raise CloudOSAPIError("Invalid response from server: missing task ID")
+    result = {
+        "task_id": task["_id"],
+        "status": task.get("status", "unknown"),
+        "query": task.get("query", sql),
+        "type": task.get("type", "unknown"),
+        "sync_execution_timeout": response.get("syncExecutionTimeout", 5000),
+        "full_response": response,
+    }
+    logger.info("Query submitted successfully. Task ID: %s", result["task_id"])
+    return result
+def query_status(task_id: str, profilename: str = "") -> dict:
+    """Check the status of a submitted async query task.
+    Args:
+        task_id (str): Task ID returned by query_submit_async().
+        profilename (str): Profile to use. Uses the default profile when empty.
+    Returns:
+        dict: Keys: task_id, status, type, count_of_results, query,
+            created_at, started_at, ended_at, user, full_response.
+    Raises:
+        CloudOSValidationError: If task_id is empty.
+        CloudOSAPIError: On request or server errors.
+    """
+    validate_required_string(task_id, "task_id")
+    profile = load_profile(profilename)
+    endpoint = f"/api/v2-cli/cohort-browser/async-tasks/{task_id}"
+    response = http_get(profile, endpoint)
+    return {
+        "task_id": response.get("_id", task_id),
+        "status": response.get("status", "unknown"),
+        "type": response.get("type", "unknown"),
+        "count_of_results": response.get("countOfResults", 0),
+        "query": response.get("query", ""),
+        "created_at": response.get("createdAt", ""),
+        "started_at": response.get("startedAt", ""),
+        "ended_at": response.get("endedAt", ""),
+        "user": response.get("user", ""),
+        "full_response": response,
+    }
+def query_results(task_id: str, profilename: str = "") -> pd.DataFrame:
+    """Fetch results from a completed async query task.
+    Pagination is configured at submission time (query_submit_async), not here.
+    This function returns whichever page the task was configured for.
+    Args:
+        task_id (str): Task ID returned by query_submit_async().
+        profilename (str): Profile to use. Uses the default profile when empty.
+    Returns:
+        pandas.DataFrame: Query results. The DataFrame carries metadata in
+            ``.attrs``: total_rows, page, page_size, total_pages.
+    Raises:
+        CloudOSValidationError: If task_id is empty.
+        CloudOSAPIError: On request or server errors.
+    """
+    validate_required_string(task_id, "task_id")
+    profile = load_profile(profilename)
+    endpoint = f"/api/v2-cli/cohort-browser/async-tasks/{task_id}/results"
+    response = http_get(profile, endpoint)
+    total_rows = int(response.get("total", 0))
+    # The API response misnames this field: "pageSize" here means total page count
+    total_pages = int(response.get("pageSize", 1))
+    current_page = int(response.get("pageNumber", 0))
+    column_names = [col["name"] for col in response.get("columns", [])]
+    df = convert_results_to_dataframe(response.get("data", []), column_names)
+    df.attrs["total_rows"] = total_rows
+    df.attrs["page"] = current_page
+    df.attrs["page_size"] = len(df)
+    df.attrs["total_pages"] = total_pages
+    return df
+def query(
+    cohort_id: str,
+    sql: str,
+    poll_interval: float = 2,
+    max_wait: float = 600,
+    page_size: int = 1000,
+    all_pages: bool = True,
+    profilename: str = "",
+) -> pd.DataFrame:
+    """Execute an SQL query with automatic polling and result fetching.
+    Orchestrates the full lifecycle: submit -> poll -> fetch. When all_pages
+    is True, submits one async task per page and combines the results.
+    Remaining pages are polled and fetched concurrently.
+    Args:
+        cohort_id (str): ID of the cohort to query.
+        sql (str): SQL query to execute.
+        poll_interval (int|float): Seconds between status checks (minimum 1).
+        max_wait (int|float): Maximum seconds to wait for a task to complete.
+        page_size (int): Number of rows per page (minimum 1).
+        all_pages (bool): When True, fetches all pages and combines them.
+            When False, returns only the first page.
+        profilename (str): Profile to use. Uses the default profile when empty.
+    Returns:
+        pandas.DataFrame: Query results with metadata in ``.attrs``.
+    Raises:
+        CloudOSValidationError: If required parameters are missing or invalid.
+        CloudOSAPIError: On request or server errors.
+        CloudOSTimeoutError: If a task does not finish within max_wait seconds.
+        CloudOSQueryError: If a task reports a failed status.
+    """
+    validate_required_string(cohort_id, "cohort_id")
+    validate_required_string(sql, "sql")
+    if not isinstance(poll_interval, (int, float)) or poll_interval < 1:
+        raise CloudOSValidationError("poll_interval must be at least 1 second.")
+    if not isinstance(max_wait, (int, float)) or max_wait < 1:
+        raise CloudOSValidationError("max_wait must be at least 1 second.")
+    if not isinstance(page_size, int) or page_size < 1:
+        raise CloudOSValidationError("page_size must be a positive integer.")
+    logger.info("Submitting initial query...")
+    task = query_submit_async(
+        cohort_id, sql,
+        pagination={"pageNumber": 0, "pageSize": page_size},
+        profilename=profilename,
+    )
+    task_id = task["task_id"]
+    logger.info("Polling for completion (max wait: %d seconds)...", max_wait)
+    _poll_until_complete(task_id, profilename, poll_interval, max_wait, "Page 0")
+    logger.info("Page 0 completed, fetching results...")
+    first_page = query_results(task_id, profilename)
+    if not all_pages:
+        total_pages = first_page.attrs.get("total_pages", 1)
+        if total_pages > 1:
+            logger.info(
+                "Note: Query has %d total rows across %d pages. "
+                "Only page 0 (%d rows) returned. Use all_pages=True to fetch all.",
+                first_page.attrs.get("total_rows", 0),
+                total_pages,
+                len(first_page),
+            )
+        logger.info("Query complete: %d rows", len(first_page))
+        return first_page
+    total_pages = first_page.attrs.get("total_pages", 1)
+    if total_pages <= 1:
+        logger.info("Query complete: %d rows", len(first_page))
+        return first_page
+    return _fetch_remaining_pages(
+        cohort_id, sql, page_size, poll_interval, max_wait,
+        profilename, total_pages, first_page,
+    )

cloudos_cb/utils.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""Shared utility helpers for cloudos_cb."""
+from __future__ import annotations
+import logging
+import pandas as pd
+from .exceptions import CloudOSValidationError
+logger = logging.getLogger(__name__)
+def validate_required_string(value: str, param_name: str) -> None:
+    """Raise CloudOSValidationError when value is empty or None.
+    Args:
+        value: Value to validate.
+        param_name (str): Parameter name used in the error message.
+    Raises:
+        CloudOSValidationError: If value is falsy.
+    """
+    if not value:
+        raise CloudOSValidationError(
+            f"{param_name} is required and cannot be empty."
+        )
+def convert_results_to_dataframe(data: list[dict], column_names: list[str]) -> pd.DataFrame:
+    """Convert API response rows to a pandas DataFrame.
+    Args:
+        data (list[dict]): Row objects from the API response.
+        column_names (list[str]): Ordered list of column names.
+    Returns:
+        pandas.DataFrame: Query results. Missing values become None.
+    """
+    if not data:
+        return pd.DataFrame(columns=column_names)
+    rows = [{col: row.get(col) for col in column_names} for row in data]
+    return pd.DataFrame(rows, columns=column_names)

cloudos_cb_py-1.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,400 @@
+Metadata-Version: 2.4
+Name: cloudos-cb-py
+Version: 1.2.0
+Summary: Python client for CloudOS Cohort Browser API
+Author-email: David Pineyro <david.pineyro@lifebit.ai>
+License: MIT License
+        Copyright (c) 2026 Lifebit
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Project-URL: Homepage, https://github.com/lifebit-ai/cloudos-cb-py
+Project-URL: Repository, https://github.com/lifebit-ai/cloudos-cb-py
+Project-URL: Issues, https://github.com/lifebit-ai/cloudos-cb-py/issues
+Project-URL: Changelog, https://github.com/lifebit-ai/cloudos-cb-py/blob/main/CHANGELOG.md
+Keywords: cloudos,cohort-browser,api-client,bioinformatics
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: requests>=2.28.0
+Requires-Dist: pandas>=1.5.0
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
+Requires-Dist: flake8>=6.0.0; extra == "dev"
+Requires-Dist: responses>=0.23.0; extra == "dev"
+Dynamic: license-file
+# cloudos-cb-py
+Python client for the CloudOS Cohort Browser API. Provides functions for schema discovery,
+table exploration, and SQL query execution with team-based access control.
+## Requirements
+- Python >= 3.9
+- requests >= 2.28.0
+- pandas >= 1.5.0
+## Prerequisites
+**IMPORTANT:** Before using this package, ensure the following requirements are met:
+- **Bastion must be enabled** for your workspace
+- **You are running the package from within an interactive session**
+- **The interactive session and the cohort queried must be in the same workspace**
+Without these prerequisites, API calls will fail even with valid credentials.
+## Installation
+### From PyPI (recommended)
+```bash
+pip install cloudos-cb-py
+```
+### From source
+```bash
+git clone https://github.com/lifebit-ai/cloudos-cb-py
+cd cloudos-cb-py
+pip install .
+```
+### Development install (includes test dependencies)
+```bash
+pip install -e ".[dev]"
+```
+## Quick Start
+### 1. Configure a profile
+```python
+import cloudos_cb
+cloudos_cb.configure(
+    profilename="production",
+    apikey="your-api-key-here",
+    workspace_id="953h453uhr73894hhr9348h9",
+    set_default=True,
+)
+```
+Credentials are stored in `~/.cloudos-cb/config.json` with 0600 permissions.
+Set `CLOUDOS_CONFIG_DIR` to store the file elsewhere.
+### 2. List configured profiles
+```python
+profiles = cloudos_cb.profile_list()
+print(profiles)
+# Returns a pandas DataFrame with columns:
+# profile_name, workspace_id, base_url, default, created_at, updated_at
+```
+### 3. Discover cohort tables
+```python
+tables = cloudos_cb.cohort_tables(cohort_id="1a2b3c4d5e6f7g8h9i10j11k")
+print(tables)
+# Cohort 1a2b3c4d5e6f7g8h9i10j11k:
+#   - omop_data.person
+#       - person_id (integer)
+#       - year_of_birth (integer)
+#       - gender_concept_id (integer)
+#       ...
+#   - omop_data.observation
+#       ...
+#
+# Total: 1 database(s), 5 table(s)
+# Access raw data
+schema_list = tables.schemas
+```
+### 4. Validate SQL (optional but recommended)
+```python
+result = cloudos_cb.sql_validate(
+    sql="SELECT person_id FROM omop_data.person WHERE year_of_birth >= 1960"
+)
+if result["isValid"]:
+    print("SQL is valid")
+else:
+    print("SQL invalid:", result["error"]["message"])
+```
+### 5. Execute a query (high-level)
+```python
+df = cloudos_cb.query(
+    cohort_id="1a2b3c4d5e6f7g8h9i10j11k",
+    sql="SELECT person_id, gender_concept_id FROM omop_data.person LIMIT 100",
+)
+print(df.head())
+print(f"Total rows: {df.attrs['total_rows']}")
+```
+By default `query()` fetches all pages automatically. To return only the first page:
+```python
+df = cloudos_cb.query(
+    cohort_id="1a2b3c4d5e6f7g8h9i10j11k",
+    sql="SELECT person_id FROM omop_data.person",
+    all_pages=False,
+    page_size=500,
+)
+```
+### 6. Manual workflow
+For fine-grained control over the submit / poll / fetch cycle:
+```python
+# Step 1: Submit
+task = cloudos_cb.query_submit_async(
+    cohort_id="1a2b3c4d5e6f7g8h9i10j11k",
+    sql="SELECT person_id FROM omop_data.person",
+    pagination={"pageNumber": 0, "pageSize": 100},
+)
+print("Task ID:", task["task_id"])
+# Step 2: Poll
+status = cloudos_cb.query_status(task_id=task["task_id"])
+print("Status:", status["status"])
+# status["status"] is one of: "pending", "running", "completed", "failed"
+# Step 3: Fetch results when completed
+df = cloudos_cb.query_results(task_id=task["task_id"])
+print(df)
+```
+## API Reference
+### `configure(profilename, apikey, workspace_id, base_url=..., set_default=False)`
+Create or update a named credential profile.
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `profilename` | str | Profile name (required) |
+| `apikey` | str | API key (required) |
+| `workspace_id` | str | Workspace/team ID (required) |
+| `base_url` | str | CloudOS base URL (default: `https://cloudos.lifebit.ai`) |
+| `set_default` | bool | Mark this profile as the default |
+---
+### `profile_list()`
+Return a `pandas.DataFrame` of all configured profiles.
+---
+### `cohort_tables(cohort_id, profilename="")`
+Retrieve schemas, tables, and columns for a cohort.
+Returns a `CohortTables` object. Print it for a human-readable tree, or
+access `.schemas` for the raw list.
+---
+### `sql_validate(sql, profilename="")`
+Validate SQL syntax and references before execution.
+Returns a `dict` with `isValid` (bool), `tableReferences`, `columnReferences`,
+and on failure an `error` dict with a `message` key.
+---
+### `query_submit_async(cohort_id, sql, pagination=None, profilename="")`
+Submit an async SQL task. Returns a `dict` with:
+| Key | Description |
+|-----|-------------|
+| `task_id` | Use this to poll status and fetch results |
+| `status` | Initial status (typically `"pending"`) |
+| `query` | Echo of the submitted SQL |
+| `type` | Task type string |
+| `sync_execution_timeout` | Server-side timeout hint in ms |
+| `full_response` | Raw API response |
+`pagination` is an optional `dict` with `pageNumber` (int >= 0) and
+`pageSize` (int >= 1).
+---
+### `query_status(task_id, profilename="")`
+Check task status. Returns a `dict` with `task_id`, `status`, `type`,
+`count_of_results`, `query`, `created_at`, `started_at`, `ended_at`,
+`user`, `full_response`.
+---
+### `query_results(task_id, profilename="")`
+Fetch results for a completed task. Returns a `pandas.DataFrame` with
+metadata in `.attrs`:
+| Attribute | Description |
+|-----------|-------------|
+| `total_rows` | Total rows across all pages |
+| `page` | Page index returned |
+| `page_size` | Rows in this page |
+| `total_pages` | Total number of pages available |
+---
+### `query(cohort_id, sql, poll_interval=2, max_wait=600, page_size=1000, all_pages=True, profilename="")`
+High-level orchestrator. Submits, polls, and fetches results automatically.
+When `all_pages=True`, submits one async task per page and concatenates them.
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `poll_interval` | 2 | Seconds between status checks (minimum 1) |
+| `max_wait` | 600 | Maximum seconds to wait per task |
+| `page_size` | 1000 | Rows per page |
+| `all_pages` | True | Fetch all pages and combine them |
+---
+## Using multiple profiles
+```python
+# Configure multiple profiles
+cloudos_cb.configure(
+    profilename="production",
+    apikey="prod-key",
+    workspace_id="prod-workspace",
+    set_default=True,
+)
+cloudos_cb.configure(
+    profilename="staging",
+    apikey="stage-key",
+    workspace_id="stage-workspace",
+)
+# Use default profile (production)
+df = cloudos_cb.query(cohort_id="cohort-prod", sql="SELECT 1")
+# Explicitly use staging profile
+df = cloudos_cb.query(
+    cohort_id="cohort-stage",
+    sql="SELECT 1",
+    profilename="staging",
+)
+```
+## Configuration storage
+The config file is located at:
+- `$CLOUDOS_CONFIG_DIR/config.json` when the env var is set
+- `~/.cloudos/config.json` otherwise (home directory)
+File permissions are set to 0600 (user read/write only). The default location
+(`~/.cloudos/`) is outside any repository. If you override `CLOUDOS_CONFIG_DIR`
+to a path inside a project, add that directory to your `.gitignore`.
+## Error handling
+```python
+from cloudos_cb import (
+    CloudOSAuthError,
+    CloudOSAccessError,
+    CloudOSServerError,
+    CloudOSConfigError,
+    CloudOSValidationError,
+)
+try:
+    df = cloudos_cb.query(cohort_id="...", sql="SELECT 1")
+except CloudOSAuthError:
+    print("Authentication failed - check your API key.")
+except CloudOSAccessError:
+    print("Access denied or resource not found.")
+except CloudOSServerError:
+    print("Server error - try again later.")
+except CloudOSConfigError:
+    print("Profile not configured - run configure() first.")
+except CloudOSValidationError as e:
+    print(f"Invalid input: {e}")
+```
+## Logging
+The package uses Python's standard `logging` module under the `cloudos_cb`
+namespace. To see informational messages:
+```python
+import logging
+logging.basicConfig(level=logging.INFO)
+```
+## Running tests
+```bash
+pip install -e ".[dev]"
+pytest
+```
+To check code style:
+```bash
+flake8 cloudos_cb tests
+```
+## Package structure
+```
+cloudos-cb-py/
+├── pyproject.toml        # Package metadata and build config
+├── CHANGELOG.md
+├── README.md
+├── LICENSE
+├── cloudos_cb/           # Package source
+│   ├── __init__.py       # Public API
+│   ├── exceptions.py     # Custom exception classes
+│   ├── config.py         # Profile management
+│   ├── http.py           # Authenticated HTTP helpers
+│   ├── utils.py          # Shared utilities
+│   └── queries.py        # Cohort Browser query functions
+└── tests/
+    ├── test_config.py
+    ├── test_http.py
+    ├── test_utils.py
+    └── test_query.py
+```

cloudos_cb_py-1.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+cloudos_cb/__init__.py,sha256=OCEFYxynZfj52wzsKrgo76sVQZ9SlZxBycrhJ5-KMYU,988
+cloudos_cb/config.py,sha256=Z-KDHoNXehFqzY3eGZ2KgAMmxb_VC4Nj68znxd4IN5g,6606
+cloudos_cb/exceptions.py,sha256=HVACrXQha6IuIY9xQB3UpSXHanaJgPoGpGb4exfEA-c,1260
+cloudos_cb/http.py,sha256=Ty3miOLRGqWe9eyD987tJ5Tz6jQXMhJ_c1mv4Au8dvk,4779
+cloudos_cb/queries.py,sha256=5WbAA4CX_RRob1p3xpnAeS7uudwG4_QZHSj1sDUI4pg,16302
+cloudos_cb/utils.py,sha256=-31zGKyYiOrLF2M9WLNmsmcYOMao3kNoMj7IhnVAqIY,1222
+cloudos_cb_py-1.2.0.dist-info/licenses/LICENSE,sha256=ACBpTnDEVaAfEQSdrypv9uiDoKSiovBenzkMYixrj_E,1064
+cloudos_cb_py-1.2.0.dist-info/METADATA,sha256=C3sx1CE9TapMh7bIOtzgcOswjE8e81j0Xddp-B4e-Go,11276
+cloudos_cb_py-1.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+cloudos_cb_py-1.2.0.dist-info/top_level.txt,sha256=pI-uF3e1PafDTdQXy3JbLahKa4Z_mNSmYlF503-OyTk,11
+cloudos_cb_py-1.2.0.dist-info/RECORD,,

cloudos_cb_py-1.2.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

cloudos_cb_py-1.2.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Lifebit
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

cloudos_cb_py-1.2.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ cloudos_cb