cloudos-cb-py 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cloudos_cb/__init__.py ADDED
@@ -0,0 +1,49 @@
1
+ """cloudos_cb - Python client for the CloudOS Cohort Browser API."""
2
+
3
+ from .config import configure, profile_list
4
+ from .exceptions import (
5
+ CloudOSAPIError,
6
+ CloudOSAccessError,
7
+ CloudOSAuthError,
8
+ CloudOSConfigError,
9
+ CloudOSError,
10
+ CloudOSQueryError,
11
+ CloudOSServerError,
12
+ CloudOSTimeoutError,
13
+ CloudOSValidationError,
14
+ )
15
+ from .queries import (
16
+ CohortTables,
17
+ cohort_tables,
18
+ query,
19
+ query_results,
20
+ query_status,
21
+ query_submit_async,
22
+ sql_validate,
23
+ )
24
+
25
+ __version__ = "1.2.0"
26
+
27
+ __all__ = [
28
+ # Config
29
+ "configure",
30
+ "profile_list",
31
+ # Query
32
+ "sql_validate",
33
+ "cohort_tables",
34
+ "CohortTables",
35
+ "query_submit_async",
36
+ "query_status",
37
+ "query_results",
38
+ "query",
39
+ # Exceptions
40
+ "CloudOSError",
41
+ "CloudOSConfigError",
42
+ "CloudOSValidationError",
43
+ "CloudOSAuthError",
44
+ "CloudOSAccessError",
45
+ "CloudOSServerError",
46
+ "CloudOSAPIError",
47
+ "CloudOSTimeoutError",
48
+ "CloudOSQueryError",
49
+ ]
cloudos_cb/config.py ADDED
@@ -0,0 +1,201 @@
1
+ """Profile configuration management for cloudos_cb."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import os
8
+ import stat
9
+ from datetime import datetime
10
+
11
+ import pandas as pd
12
+
13
+ from .exceptions import CloudOSConfigError, CloudOSValidationError
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ _DEFAULT_BASE_URL = "https://cloudos.lifebit.ai"
18
+ _CONFIG_FILENAME = "config.json"
19
+ _CONFIG_DIR_NAME = ".cloudos-cb"
20
+
21
+
22
+ def _get_config_dir() -> str:
23
+ return os.environ.get("CLOUDOS_CONFIG_DIR") or os.path.join(
24
+ os.path.expanduser("~"), _CONFIG_DIR_NAME
25
+ )
26
+
27
+
28
+ def _get_config_file() -> str:
29
+ return os.path.join(_get_config_dir(), _CONFIG_FILENAME)
30
+
31
+
32
+ def _read_config() -> dict:
33
+ config_file = _get_config_file()
34
+ if not os.path.exists(config_file):
35
+ return {}
36
+ try:
37
+ with open(config_file, "r") as f:
38
+ return json.load(f)
39
+ except json.JSONDecodeError as e:
40
+ raise CloudOSConfigError(f"Error reading config file: {e}") from e
41
+
42
+
43
+ def _write_config(config: dict) -> None:
44
+ config_file = _get_config_file()
45
+ config_dir = os.path.dirname(config_file)
46
+ dir_existed = os.path.isdir(config_dir)
47
+ os.makedirs(config_dir, exist_ok=True)
48
+ if not dir_existed:
49
+ os.chmod(config_dir, 0o700)
50
+ try:
51
+ with open(config_file, "w") as f:
52
+ json.dump(config, f, indent=2)
53
+ os.chmod(config_file, stat.S_IRUSR | stat.S_IWUSR)
54
+ except OSError as e:
55
+ raise CloudOSConfigError(f"Error writing config file: {e}") from e
56
+
57
+
58
+ def configure(
59
+ profilename: str,
60
+ apikey: str,
61
+ workspace_id: str,
62
+ base_url: str = _DEFAULT_BASE_URL,
63
+ set_default: bool = False,
64
+ ) -> None:
65
+ """Configure a CloudOS profile with API credentials.
66
+
67
+ Stores the profile in ``~/.cloudos-cb/config.json`` (or the path set by the
68
+ ``CLOUDOS_CONFIG_DIR`` environment variable). File permissions are set to
69
+ 0600; the directory is created with 0700 if it does not already exist.
70
+
71
+ Args:
72
+ profilename (str): Name of the profile to create or update.
73
+ apikey (str): API key for authentication.
74
+ workspace_id (str): Workspace/team ID for API requests.
75
+ base_url (str): Base URL for the CloudOS API.
76
+ set_default (bool): If True, marks this profile as the default.
77
+
78
+ Raises:
79
+ CloudOSValidationError: If any required parameter is empty.
80
+ CloudOSConfigError: If the config file cannot be written.
81
+ """
82
+ if not profilename:
83
+ raise CloudOSValidationError("profilename is required and cannot be empty.")
84
+ if not apikey:
85
+ raise CloudOSValidationError("apikey is required and cannot be empty.")
86
+ if not workspace_id:
87
+ raise CloudOSValidationError("workspace_id is required and cannot be empty.")
88
+
89
+ config = _read_config()
90
+
91
+ if set_default:
92
+ for name in config:
93
+ config[name]["default"] = False
94
+
95
+ now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
96
+ existing_default = config.get(profilename, {}).get("default", False)
97
+ config[profilename] = {
98
+ "apikey": apikey,
99
+ "workspace_id": workspace_id,
100
+ "base_url": base_url,
101
+ "default": set_default or existing_default,
102
+ "created_at": config.get(profilename, {}).get("created_at", now),
103
+ "updated_at": now,
104
+ }
105
+
106
+ _write_config(config)
107
+
108
+ if set_default:
109
+ logger.info(
110
+ "Profile '%s' configured successfully and set as default.", profilename
111
+ )
112
+ else:
113
+ logger.info("Profile '%s' configured successfully.", profilename)
114
+ logger.info("Config stored at: %s", _get_config_file())
115
+
116
+
117
+ def profile_list() -> pd.DataFrame:
118
+ """List all configured CloudOS profiles.
119
+
120
+ Returns:
121
+ pandas.DataFrame: Columns are profile_name, workspace_id, base_url,
122
+ default, created_at, updated_at. Empty DataFrame when no profiles
123
+ are configured.
124
+ """
125
+ config = _read_config()
126
+ columns = ["profile_name", "workspace_id", "base_url", "default", "created_at", "updated_at"]
127
+
128
+ if not config:
129
+ logger.info("No profiles configured. Use configure() to create a profile.")
130
+ return pd.DataFrame(columns=columns)
131
+
132
+ rows = [
133
+ {
134
+ "profile_name": name,
135
+ "workspace_id": profile.get("workspace_id", ""),
136
+ "base_url": profile.get("base_url", _DEFAULT_BASE_URL),
137
+ "default": profile.get("default", False),
138
+ "created_at": profile.get("created_at", ""),
139
+ "updated_at": profile.get("updated_at", ""),
140
+ }
141
+ for name, profile in config.items()
142
+ ]
143
+ return pd.DataFrame(rows, columns=columns)
144
+
145
+
146
+ def load_profile(profilename: str = "") -> dict[str, str]:
147
+ """Load a profile configuration (internal).
148
+
149
+ Args:
150
+ profilename (str): Profile name to load. Uses the default profile
151
+ when empty or not provided.
152
+
153
+ Returns:
154
+ dict: Profile configuration with keys apikey, workspace_id, base_url.
155
+
156
+ Raises:
157
+ CloudOSConfigError: If the config file is missing, the profile is not
158
+ found, or required fields are absent.
159
+ """
160
+ config_file = _get_config_file()
161
+
162
+ if not os.path.exists(config_file):
163
+ raise CloudOSConfigError(
164
+ "No configuration file found. Use configure() to create a profile first.\n"
165
+ f"Expected location: {config_file}"
166
+ )
167
+
168
+ config = _read_config()
169
+
170
+ if not profilename:
171
+ default_name = next(
172
+ (name for name, p in config.items() if p.get("default")), None
173
+ )
174
+ if not default_name:
175
+ available = ", ".join(config.keys())
176
+ raise CloudOSConfigError(
177
+ "No default profile configured.\n"
178
+ f"Available profiles: {available}\n"
179
+ "Specify profilename or set a default with configure(..., set_default=True)"
180
+ )
181
+ profilename = default_name
182
+ elif profilename not in config:
183
+ available = ", ".join(config.keys())
184
+ raise CloudOSConfigError(
185
+ f"Profile '{profilename}' not found.\n"
186
+ f"Available profiles: {available}\n"
187
+ "Use configure() to create this profile."
188
+ )
189
+
190
+ profile = config[profilename]
191
+
192
+ if not profile.get("apikey"):
193
+ raise CloudOSConfigError(f"Profile '{profilename}' is missing apikey.")
194
+ if not profile.get("workspace_id"):
195
+ raise CloudOSConfigError(f"Profile '{profilename}' is missing workspace_id.")
196
+
197
+ return {
198
+ "apikey": profile["apikey"],
199
+ "workspace_id": profile["workspace_id"],
200
+ "base_url": profile.get("base_url") or _DEFAULT_BASE_URL,
201
+ }
@@ -0,0 +1,44 @@
1
+ """Custom exceptions for the cloudos_cb package."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class CloudOSError(Exception):
7
+ """Base exception for all CloudOS errors."""
8
+
9
+
10
+ class CloudOSConfigError(CloudOSError):
11
+ """Raised for configuration-related errors (missing file, unknown profile)."""
12
+
13
+
14
+ class CloudOSValidationError(CloudOSError):
15
+ """Raised when a required input parameter is missing or invalid."""
16
+
17
+
18
+ class CloudOSAuthError(CloudOSError):
19
+ """Raised on HTTP 401 authentication failures."""
20
+
21
+
22
+ class CloudOSAccessError(CloudOSError):
23
+ """Raised on HTTP 403/404 access-denied or not-found responses."""
24
+
25
+
26
+ class CloudOSServerError(CloudOSError):
27
+ """Raised on HTTP 5xx server-side errors."""
28
+
29
+
30
+ class CloudOSAPIError(CloudOSError):
31
+ """Raised for general API errors not covered by the more specific classes."""
32
+
33
+ def __init__(self, message: str, status_code: int | None = None, endpoint: str | None = None):
34
+ super().__init__(message)
35
+ self.status_code = status_code
36
+ self.endpoint = endpoint
37
+
38
+
39
+ class CloudOSTimeoutError(CloudOSError):
40
+ """Raised when a query task does not complete within max_wait seconds."""
41
+
42
+
43
+ class CloudOSQueryError(CloudOSError):
44
+ """Raised when a query task reports a failed status."""
cloudos_cb/http.py ADDED
@@ -0,0 +1,157 @@
1
+ """HTTP utilities for authenticated CloudOS API requests."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+
7
+ import requests
8
+
9
+ from .exceptions import (
10
+ CloudOSAPIError,
11
+ CloudOSAccessError,
12
+ CloudOSAuthError,
13
+ CloudOSServerError,
14
+ )
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ _CONNECT_TIMEOUT = 10
19
+ _READ_TIMEOUT = 60
20
+
21
+
22
+ def _build_headers(profile: dict, method: str = "GET") -> dict[str, str]:
23
+ headers: dict[str, str] = {
24
+ "apikey": profile["apikey"],
25
+ "Accept": "application/json",
26
+ }
27
+ if method == "POST":
28
+ headers["Content-Type"] = "application/json"
29
+ return headers
30
+
31
+
32
+ def _handle_error(response: requests.Response, endpoint: str) -> None:
33
+ status = response.status_code
34
+ try:
35
+ body = response.json()
36
+ error_msg = body.get("message") or body.get("error") or "Unknown error"
37
+ except (ValueError, AttributeError):
38
+ error_msg = "Unable to parse error response"
39
+
40
+ if status == 401:
41
+ raise CloudOSAuthError(
42
+ f"Authentication failed (401).\n"
43
+ f"Endpoint: {endpoint}\n"
44
+ "Please check your API key and workspace ID."
45
+ )
46
+ if status == 403:
47
+ raise CloudOSAccessError(
48
+ f"Access denied (403).\n"
49
+ f"Endpoint: {endpoint}\n"
50
+ f"You do not have permission to access this resource.\n"
51
+ f"Details: {error_msg}"
52
+ )
53
+ if status == 404:
54
+ raise CloudOSAccessError(
55
+ f"Resource not found (404).\n"
56
+ f"Endpoint: {endpoint}\n"
57
+ f"This resource does not exist or you do not have access.\n"
58
+ f"Details: {error_msg}"
59
+ )
60
+ if status >= 500:
61
+ raise CloudOSServerError(
62
+ f"Server error ({status}).\n"
63
+ f"Endpoint: {endpoint}\n"
64
+ f"The server encountered an error. Please try again later.\n"
65
+ f"Details: {error_msg}"
66
+ )
67
+ raise CloudOSAPIError(
68
+ f"API request failed ({status}).\nEndpoint: {endpoint}\nDetails: {error_msg}",
69
+ status_code=status,
70
+ endpoint=endpoint,
71
+ )
72
+
73
+
74
+ def _make_request(
75
+ method: str,
76
+ profile: dict,
77
+ endpoint: str,
78
+ body: dict | None = None,
79
+ query_params: dict | None = None,
80
+ ) -> dict:
81
+ url = profile["base_url"] + endpoint
82
+ params: dict = {"teamId": profile["workspace_id"]}
83
+ if query_params:
84
+ params.update(query_params)
85
+
86
+ headers = _build_headers(profile, method)
87
+ timeout = (_CONNECT_TIMEOUT, _READ_TIMEOUT)
88
+
89
+ try:
90
+ if method == "GET":
91
+ response = requests.get(url, headers=headers, params=params, timeout=timeout)
92
+ elif method == "POST":
93
+ response = requests.post(
94
+ url,
95
+ headers=headers,
96
+ params=params,
97
+ json=body or {},
98
+ timeout=timeout,
99
+ )
100
+ else:
101
+ raise ValueError(f"Unsupported HTTP method: {method}")
102
+ except requests.RequestException as e:
103
+ raise CloudOSAPIError(f"HTTP request failed: {e}") from e
104
+
105
+ if response.status_code not in (200, 202):
106
+ _handle_error(response, endpoint)
107
+
108
+ try:
109
+ return response.json()
110
+ except ValueError as e:
111
+ raise CloudOSAPIError(f"Error parsing JSON response: {e}") from e
112
+
113
+
114
+ def http_get(profile: dict, endpoint: str, query_params: dict | None = None) -> dict:
115
+ """Make an authenticated GET request to the CloudOS API.
116
+
117
+ Args:
118
+ profile (dict): Profile configuration from load_profile().
119
+ endpoint (str): API endpoint path (without base URL).
120
+ query_params (dict, optional): Additional query parameters.
121
+
122
+ Returns:
123
+ dict: Parsed JSON response.
124
+
125
+ Raises:
126
+ CloudOSAuthError: On HTTP 401.
127
+ CloudOSAccessError: On HTTP 403 or 404.
128
+ CloudOSServerError: On HTTP 5xx.
129
+ CloudOSAPIError: On other request or parsing errors.
130
+ """
131
+ return _make_request("GET", profile, endpoint, query_params=query_params)
132
+
133
+
134
+ def http_post(
135
+ profile: dict,
136
+ endpoint: str,
137
+ body: dict | None = None,
138
+ query_params: dict | None = None,
139
+ ) -> dict:
140
+ """Make an authenticated POST request to the CloudOS API.
141
+
142
+ Args:
143
+ profile (dict): Profile configuration from load_profile().
144
+ endpoint (str): API endpoint path (without base URL).
145
+ body (dict, optional): Request body serialised as JSON.
146
+ query_params (dict, optional): Additional query parameters.
147
+
148
+ Returns:
149
+ dict: Parsed JSON response.
150
+
151
+ Raises:
152
+ CloudOSAuthError: On HTTP 401.
153
+ CloudOSAccessError: On HTTP 403 or 404.
154
+ CloudOSServerError: On HTTP 5xx.
155
+ CloudOSAPIError: On other request or parsing errors.
156
+ """
157
+ return _make_request("POST", profile, endpoint, body=body, query_params=query_params)
cloudos_cb/queries.py ADDED
@@ -0,0 +1,455 @@
1
+ """Cohort Browser query functions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import time
7
+ from concurrent.futures import ThreadPoolExecutor
8
+
9
+ import pandas as pd
10
+
11
+ from .config import load_profile
12
+ from .exceptions import (
13
+ CloudOSAPIError,
14
+ CloudOSQueryError,
15
+ CloudOSTimeoutError,
16
+ CloudOSValidationError,
17
+ )
18
+ from .http import http_get, http_post
19
+ from .utils import convert_results_to_dataframe, validate_required_string
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ _PAGE_SUBMIT_DELAY = 0.2 # seconds between page submissions to avoid API rate limits
24
+ _MAX_PARALLEL_PAGE_WORKERS = 10
25
+
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Private helpers (defined before the public functions that call them)
29
+ # ---------------------------------------------------------------------------
30
+
31
+ def _validate_pagination(pagination: dict | None) -> None:
32
+ if pagination is None:
33
+ return
34
+ if not isinstance(pagination, dict):
35
+ raise CloudOSValidationError(
36
+ "pagination must be a dict with pageNumber and pageSize."
37
+ )
38
+ if "pageNumber" not in pagination or "pageSize" not in pagination:
39
+ raise CloudOSValidationError(
40
+ "pagination must contain both pageNumber and pageSize."
41
+ )
42
+ if not isinstance(pagination["pageNumber"], int) or pagination["pageNumber"] < 0:
43
+ raise CloudOSValidationError(
44
+ "pagination['pageNumber'] must be a non-negative integer."
45
+ )
46
+ if not isinstance(pagination["pageSize"], int) or pagination["pageSize"] < 1:
47
+ raise CloudOSValidationError(
48
+ "pagination['pageSize'] must be a positive integer."
49
+ )
50
+
51
+
52
+ def _poll_until_complete(
53
+ task_id: str,
54
+ profilename: str,
55
+ poll_interval: float,
56
+ max_wait: float,
57
+ label: str,
58
+ ) -> dict:
59
+ """Poll a task until it reaches 'completed' or a terminal state.
60
+
61
+ Args:
62
+ task_id (str): Task ID to poll.
63
+ profilename (str): Profile to use for query_status calls.
64
+ poll_interval (float): Seconds between status checks.
65
+ max_wait (float): Maximum seconds to wait before raising.
66
+ label (str): Human-readable label used in log messages and errors.
67
+
68
+ Returns:
69
+ dict: Final status info dict from query_status().
70
+
71
+ Raises:
72
+ CloudOSTimeoutError: If the task does not complete within max_wait seconds.
73
+ CloudOSQueryError: If the task reports a failed status.
74
+ """
75
+ start = time.monotonic()
76
+ while True:
77
+ elapsed = time.monotonic() - start
78
+ if elapsed >= max_wait:
79
+ raise CloudOSTimeoutError(
80
+ f"Task did not complete within {max_wait} seconds.\n"
81
+ f"Task ID: {task_id}\n"
82
+ "Use query_status() to check progress."
83
+ )
84
+ info = query_status(task_id, profilename)
85
+ status = info["status"].lower().strip()
86
+ if status == "completed":
87
+ return info
88
+ if status == "failed":
89
+ raise CloudOSQueryError(
90
+ f"Query execution failed for {label}.\n"
91
+ f"Task ID: {task_id}\nCheck task status for details."
92
+ )
93
+ logger.info(" %s: %s (%.1fs elapsed)...", label, info["status"], elapsed)
94
+ time.sleep(poll_interval)
95
+
96
+
97
+ def _poll_and_fetch_page(
98
+ page_num: int,
99
+ task_id: str,
100
+ profilename: str,
101
+ poll_interval: float,
102
+ max_wait: float,
103
+ ) -> pd.DataFrame:
104
+ """Poll a single page task to completion and fetch its results."""
105
+ _poll_until_complete(task_id, profilename, poll_interval, max_wait, f"Page {page_num}")
106
+ return query_results(task_id, profilename)
107
+
108
+
109
+ def _fetch_remaining_pages(
110
+ cohort_id: str,
111
+ sql: str,
112
+ page_size: int,
113
+ poll_interval: float,
114
+ max_wait: float,
115
+ profilename: str,
116
+ total_pages: int,
117
+ first_page: pd.DataFrame,
118
+ ) -> pd.DataFrame:
119
+ """Submit, poll, and fetch pages 1..total_pages-1 in parallel, then concat with first_page."""
120
+ total_rows = first_page.attrs.get("total_rows", 0)
121
+
122
+ logger.info(
123
+ "Fetching remaining pages (1 to %d) — %d total rows across %d pages...",
124
+ total_pages - 1,
125
+ total_rows,
126
+ total_pages,
127
+ )
128
+
129
+ remaining_tasks: list[tuple[int, str]] = []
130
+ for page_num in range(1, total_pages):
131
+ task = query_submit_async(
132
+ cohort_id, sql,
133
+ pagination={"pageNumber": page_num, "pageSize": page_size},
134
+ profilename=profilename,
135
+ )
136
+ remaining_tasks.append((page_num, task["task_id"]))
137
+ logger.info(" Submitted page %d (task ID: %s)", page_num, task["task_id"])
138
+ time.sleep(_PAGE_SUBMIT_DELAY)
139
+
140
+ logger.info("Polling and fetching all pages concurrently...")
141
+ workers = min(len(remaining_tasks), _MAX_PARALLEL_PAGE_WORKERS)
142
+ with ThreadPoolExecutor(max_workers=workers) as executor:
143
+ futures = [
144
+ executor.submit(
145
+ _poll_and_fetch_page, page_num, tid, profilename, poll_interval, max_wait,
146
+ )
147
+ for page_num, tid in remaining_tasks
148
+ ]
149
+ remaining_results = [f.result() for f in futures]
150
+
151
+ combined = pd.concat([first_page] + remaining_results, ignore_index=True)
152
+ combined.attrs["total_rows"] = total_rows
153
+ combined.attrs["page"] = 0
154
+ combined.attrs["page_size"] = total_rows
155
+ combined.attrs["total_pages"] = 1
156
+ combined.attrs["all_pages_fetched"] = True
157
+
158
+ logger.info("Query complete: %d rows across %d pages", len(combined), total_pages)
159
+ return combined
160
+
161
+
162
+ # ---------------------------------------------------------------------------
163
+ # Public API
164
+ # ---------------------------------------------------------------------------
165
+
166
+ class CohortTables:
167
+ """Cohort schema information returned by cohort_tables().
168
+
169
+ Attributes:
170
+ cohort_id (str): The cohort ID this schema belongs to.
171
+ schemas (list): Raw schema list from the API response.
172
+ """
173
+
174
+ def __init__(self, response: dict, cohort_id: str) -> None:
175
+ self.cohort_id = cohort_id
176
+ self.schemas = response.get("schemas", [])
177
+
178
+ def __repr__(self) -> str:
179
+ return f"CohortTables(cohort_id={self.cohort_id!r}, schemas={len(self.schemas)})"
180
+
181
+ def __str__(self) -> str:
182
+ if not self.schemas:
183
+ return "No schemas found"
184
+
185
+ lines = [f"Cohort {self.cohort_id}:"]
186
+ total_tables = 0
187
+
188
+ for schema in self.schemas:
189
+ for table in schema.get("tables", []):
190
+ lines.append(f" - {schema['name']}.{table['name']}")
191
+ for col in table.get("columns", []):
192
+ lines.append(f" - {col['name']} ({col['dataType']})")
193
+ total_tables += 1
194
+
195
+ n_dbs = len(self.schemas)
196
+ lines.append(f"\nTotal: {n_dbs} database(s), {total_tables} table(s)")
197
+ return "\n".join(lines)
198
+
199
+
200
+ def sql_validate(sql: str, profilename: str = "") -> dict:
201
+ """Validate SQL syntax and table/column references before execution.
202
+
203
+ Args:
204
+ sql (str): SQL query to validate.
205
+ profilename (str): Profile to use. Uses the default profile when empty.
206
+
207
+ Returns:
208
+ dict: Validation result with keys isValid, tableReferences,
209
+ columnReferences, and (on failure) error.
210
+
211
+ Raises:
212
+ CloudOSValidationError: If sql is empty.
213
+ CloudOSAuthError: On authentication failure.
214
+ CloudOSAPIError: On request or server errors.
215
+ """
216
+ validate_required_string(sql, "sql")
217
+ profile = load_profile(profilename)
218
+ endpoint = "/api/v2-cli/cohort-browser/sql-query/validate"
219
+ return http_post(profile, endpoint, body={"sql": sql})
220
+
221
+
222
+ def cohort_tables(cohort_id: str, profilename: str = "") -> CohortTables:
223
+ """Retrieve available schemas, tables, and columns for a cohort.
224
+
225
+ Args:
226
+ cohort_id (str): ID of the cohort to query.
227
+ profilename (str): Profile to use. Uses the default profile when empty.
228
+
229
+ Returns:
230
+ CohortTables: Schema information. Print it for a human-readable view,
231
+ or access `.schemas` for the raw list.
232
+
233
+ Raises:
234
+ CloudOSValidationError: If cohort_id is empty.
235
+ CloudOSAuthError: On authentication failure.
236
+ CloudOSAPIError: On request or server errors.
237
+ """
238
+ validate_required_string(cohort_id, "cohort_id")
239
+ profile = load_profile(profilename)
240
+ endpoint = "/api/v2-cli/cohort-browser/schemas"
241
+ response = http_get(profile, endpoint, query_params={"cohortId": cohort_id})
242
+ return CohortTables(response, cohort_id)
243
+
244
+
245
+ def query_submit_async(
246
+ cohort_id: str,
247
+ sql: str,
248
+ pagination: dict | None = None,
249
+ profilename: str = "",
250
+ ) -> dict:
251
+ """Submit an async SQL query task for a cohort.
252
+
253
+ Args:
254
+ cohort_id (str): ID of the cohort to query.
255
+ sql (str): SQL query to execute.
256
+ pagination (dict, optional): Dict with keys pageNumber (int >= 0) and
257
+ pageSize (int >= 1). Omit to use the API default.
258
+ profilename (str): Profile to use. Uses the default profile when empty.
259
+
260
+ Returns:
261
+ dict: Keys: task_id, status, query, type, sync_execution_timeout,
262
+ full_response.
263
+
264
+ Raises:
265
+ CloudOSValidationError: If required parameters are missing or invalid.
266
+ CloudOSAPIError: On request or server errors.
267
+ """
268
+ validate_required_string(cohort_id, "cohort_id")
269
+ validate_required_string(sql, "sql")
270
+ _validate_pagination(pagination)
271
+
272
+ profile = load_profile(profilename)
273
+ endpoint = f"/api/v2-cli/cohort-browser/cohort/{cohort_id}/query-results/async"
274
+
275
+ body: dict = {"query": sql}
276
+ if pagination is not None:
277
+ body["pagination"] = pagination
278
+
279
+ response = http_post(
280
+ profile,
281
+ endpoint,
282
+ body=body,
283
+ query_params={"cohortId": cohort_id},
284
+ )
285
+
286
+ task = response.get("task", {})
287
+ if not task.get("_id"):
288
+ raise CloudOSAPIError("Invalid response from server: missing task ID")
289
+
290
+ result = {
291
+ "task_id": task["_id"],
292
+ "status": task.get("status", "unknown"),
293
+ "query": task.get("query", sql),
294
+ "type": task.get("type", "unknown"),
295
+ "sync_execution_timeout": response.get("syncExecutionTimeout", 5000),
296
+ "full_response": response,
297
+ }
298
+
299
+ logger.info("Query submitted successfully. Task ID: %s", result["task_id"])
300
+ return result
301
+
302
+
303
+ def query_status(task_id: str, profilename: str = "") -> dict:
304
+ """Check the status of a submitted async query task.
305
+
306
+ Args:
307
+ task_id (str): Task ID returned by query_submit_async().
308
+ profilename (str): Profile to use. Uses the default profile when empty.
309
+
310
+ Returns:
311
+ dict: Keys: task_id, status, type, count_of_results, query,
312
+ created_at, started_at, ended_at, user, full_response.
313
+
314
+ Raises:
315
+ CloudOSValidationError: If task_id is empty.
316
+ CloudOSAPIError: On request or server errors.
317
+ """
318
+ validate_required_string(task_id, "task_id")
319
+ profile = load_profile(profilename)
320
+ endpoint = f"/api/v2-cli/cohort-browser/async-tasks/{task_id}"
321
+ response = http_get(profile, endpoint)
322
+
323
+ return {
324
+ "task_id": response.get("_id", task_id),
325
+ "status": response.get("status", "unknown"),
326
+ "type": response.get("type", "unknown"),
327
+ "count_of_results": response.get("countOfResults", 0),
328
+ "query": response.get("query", ""),
329
+ "created_at": response.get("createdAt", ""),
330
+ "started_at": response.get("startedAt", ""),
331
+ "ended_at": response.get("endedAt", ""),
332
+ "user": response.get("user", ""),
333
+ "full_response": response,
334
+ }
335
+
336
+
337
+ def query_results(task_id: str, profilename: str = "") -> pd.DataFrame:
338
+ """Fetch results from a completed async query task.
339
+
340
+ Pagination is configured at submission time (query_submit_async), not here.
341
+ This function returns whichever page the task was configured for.
342
+
343
+ Args:
344
+ task_id (str): Task ID returned by query_submit_async().
345
+ profilename (str): Profile to use. Uses the default profile when empty.
346
+
347
+ Returns:
348
+ pandas.DataFrame: Query results. The DataFrame carries metadata in
349
+ ``.attrs``: total_rows, page, page_size, total_pages.
350
+
351
+ Raises:
352
+ CloudOSValidationError: If task_id is empty.
353
+ CloudOSAPIError: On request or server errors.
354
+ """
355
+ validate_required_string(task_id, "task_id")
356
+ profile = load_profile(profilename)
357
+ endpoint = f"/api/v2-cli/cohort-browser/async-tasks/{task_id}/results"
358
+ response = http_get(profile, endpoint)
359
+
360
+ total_rows = int(response.get("total", 0))
361
+ # The API response misnames this field: "pageSize" here means total page count
362
+ total_pages = int(response.get("pageSize", 1))
363
+ current_page = int(response.get("pageNumber", 0))
364
+
365
+ column_names = [col["name"] for col in response.get("columns", [])]
366
+ df = convert_results_to_dataframe(response.get("data", []), column_names)
367
+
368
+ df.attrs["total_rows"] = total_rows
369
+ df.attrs["page"] = current_page
370
+ df.attrs["page_size"] = len(df)
371
+ df.attrs["total_pages"] = total_pages
372
+
373
+ return df
374
+
375
+
376
+ def query(
377
+ cohort_id: str,
378
+ sql: str,
379
+ poll_interval: float = 2,
380
+ max_wait: float = 600,
381
+ page_size: int = 1000,
382
+ all_pages: bool = True,
383
+ profilename: str = "",
384
+ ) -> pd.DataFrame:
385
+ """Execute an SQL query with automatic polling and result fetching.
386
+
387
+ Orchestrates the full lifecycle: submit -> poll -> fetch. When all_pages
388
+ is True, submits one async task per page and combines the results.
389
+ Remaining pages are polled and fetched concurrently.
390
+
391
+ Args:
392
+ cohort_id (str): ID of the cohort to query.
393
+ sql (str): SQL query to execute.
394
+ poll_interval (int|float): Seconds between status checks (minimum 1).
395
+ max_wait (int|float): Maximum seconds to wait for a task to complete.
396
+ page_size (int): Number of rows per page (minimum 1).
397
+ all_pages (bool): When True, fetches all pages and combines them.
398
+ When False, returns only the first page.
399
+ profilename (str): Profile to use. Uses the default profile when empty.
400
+
401
+ Returns:
402
+ pandas.DataFrame: Query results with metadata in ``.attrs``.
403
+
404
+ Raises:
405
+ CloudOSValidationError: If required parameters are missing or invalid.
406
+ CloudOSAPIError: On request or server errors.
407
+ CloudOSTimeoutError: If a task does not finish within max_wait seconds.
408
+ CloudOSQueryError: If a task reports a failed status.
409
+ """
410
+ validate_required_string(cohort_id, "cohort_id")
411
+ validate_required_string(sql, "sql")
412
+
413
+ if not isinstance(poll_interval, (int, float)) or poll_interval < 1:
414
+ raise CloudOSValidationError("poll_interval must be at least 1 second.")
415
+ if not isinstance(max_wait, (int, float)) or max_wait < 1:
416
+ raise CloudOSValidationError("max_wait must be at least 1 second.")
417
+ if not isinstance(page_size, int) or page_size < 1:
418
+ raise CloudOSValidationError("page_size must be a positive integer.")
419
+
420
+ logger.info("Submitting initial query...")
421
+ task = query_submit_async(
422
+ cohort_id, sql,
423
+ pagination={"pageNumber": 0, "pageSize": page_size},
424
+ profilename=profilename,
425
+ )
426
+ task_id = task["task_id"]
427
+
428
+ logger.info("Polling for completion (max wait: %d seconds)...", max_wait)
429
+ _poll_until_complete(task_id, profilename, poll_interval, max_wait, "Page 0")
430
+
431
+ logger.info("Page 0 completed, fetching results...")
432
+ first_page = query_results(task_id, profilename)
433
+
434
+ if not all_pages:
435
+ total_pages = first_page.attrs.get("total_pages", 1)
436
+ if total_pages > 1:
437
+ logger.info(
438
+ "Note: Query has %d total rows across %d pages. "
439
+ "Only page 0 (%d rows) returned. Use all_pages=True to fetch all.",
440
+ first_page.attrs.get("total_rows", 0),
441
+ total_pages,
442
+ len(first_page),
443
+ )
444
+ logger.info("Query complete: %d rows", len(first_page))
445
+ return first_page
446
+
447
+ total_pages = first_page.attrs.get("total_pages", 1)
448
+ if total_pages <= 1:
449
+ logger.info("Query complete: %d rows", len(first_page))
450
+ return first_page
451
+
452
+ return _fetch_remaining_pages(
453
+ cohort_id, sql, page_size, poll_interval, max_wait,
454
+ profilename, total_pages, first_page,
455
+ )
cloudos_cb/utils.py ADDED
@@ -0,0 +1,44 @@
1
+ """Shared utility helpers for cloudos_cb."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+
7
+ import pandas as pd
8
+
9
+ from .exceptions import CloudOSValidationError
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def validate_required_string(value: str, param_name: str) -> None:
15
+ """Raise CloudOSValidationError when value is empty or None.
16
+
17
+ Args:
18
+ value: Value to validate.
19
+ param_name (str): Parameter name used in the error message.
20
+
21
+ Raises:
22
+ CloudOSValidationError: If value is falsy.
23
+ """
24
+ if not value:
25
+ raise CloudOSValidationError(
26
+ f"{param_name} is required and cannot be empty."
27
+ )
28
+
29
+
30
+ def convert_results_to_dataframe(data: list[dict], column_names: list[str]) -> pd.DataFrame:
31
+ """Convert API response rows to a pandas DataFrame.
32
+
33
+ Args:
34
+ data (list[dict]): Row objects from the API response.
35
+ column_names (list[str]): Ordered list of column names.
36
+
37
+ Returns:
38
+ pandas.DataFrame: Query results. Missing values become None.
39
+ """
40
+ if not data:
41
+ return pd.DataFrame(columns=column_names)
42
+
43
+ rows = [{col: row.get(col) for col in column_names} for row in data]
44
+ return pd.DataFrame(rows, columns=column_names)
@@ -0,0 +1,400 @@
1
+ Metadata-Version: 2.4
2
+ Name: cloudos-cb-py
3
+ Version: 1.2.0
4
+ Summary: Python client for CloudOS Cohort Browser API
5
+ Author-email: David Pineyro <david.pineyro@lifebit.ai>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Lifebit
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/lifebit-ai/cloudos-cb-py
29
+ Project-URL: Repository, https://github.com/lifebit-ai/cloudos-cb-py
30
+ Project-URL: Issues, https://github.com/lifebit-ai/cloudos-cb-py/issues
31
+ Project-URL: Changelog, https://github.com/lifebit-ai/cloudos-cb-py/blob/main/CHANGELOG.md
32
+ Keywords: cloudos,cohort-browser,api-client,bioinformatics
33
+ Classifier: Development Status :: 5 - Production/Stable
34
+ Classifier: Intended Audience :: Science/Research
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.9
38
+ Classifier: Programming Language :: Python :: 3.10
39
+ Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Programming Language :: Python :: 3.12
41
+ Requires-Python: >=3.9
42
+ Description-Content-Type: text/markdown
43
+ License-File: LICENSE
44
+ Requires-Dist: requests>=2.28.0
45
+ Requires-Dist: pandas>=1.5.0
46
+ Provides-Extra: dev
47
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
48
+ Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
49
+ Requires-Dist: flake8>=6.0.0; extra == "dev"
50
+ Requires-Dist: responses>=0.23.0; extra == "dev"
51
+ Dynamic: license-file
52
+
53
+ # cloudos-cb-py
54
+
55
+ Python client for the CloudOS Cohort Browser API. Provides functions for schema discovery,
56
+ table exploration, and SQL query execution with team-based access control.
57
+
58
+ ## Requirements
59
+
60
+ - Python >= 3.9
61
+ - requests >= 2.28.0
62
+ - pandas >= 1.5.0
63
+
64
+ ## Prerequisites
65
+
66
+ **IMPORTANT:** Before using this package, ensure the following requirements are met:
67
+
68
+ - **Bastion must be enabled** for your workspace
69
+ - **You are running the package from within an interactive session**
70
+ - **The interactive session and the cohort queried must be in the same workspace**
71
+
72
+ Without these prerequisites, API calls will fail even with valid credentials.
73
+
74
+ ## Installation
75
+
76
+ ### From PyPI (recommended)
77
+
78
+ ```bash
79
+ pip install cloudos-cb-py
80
+ ```
81
+
82
+ ### From source
83
+
84
+ ```bash
85
+ git clone https://github.com/lifebit-ai/cloudos-cb-py
86
+ cd cloudos-cb-py
87
+ pip install .
88
+ ```
89
+
90
+ ### Development install (includes test dependencies)
91
+
92
+ ```bash
93
+ pip install -e ".[dev]"
94
+ ```
95
+
96
+ ## Quick Start
97
+
98
+ ### 1. Configure a profile
99
+
100
+ ```python
101
+ import cloudos_cb
102
+
103
+ cloudos_cb.configure(
104
+ profilename="production",
105
+ apikey="your-api-key-here",
106
+ workspace_id="953h453uhr73894hhr9348h9",
107
+ set_default=True,
108
+ )
109
+ ```
110
+
111
+ Credentials are stored in `~/.cloudos-cb/config.json` with 0600 permissions.
112
+ Set `CLOUDOS_CONFIG_DIR` to store the file elsewhere.
113
+
114
+ ### 2. List configured profiles
115
+
116
+ ```python
117
+ profiles = cloudos_cb.profile_list()
118
+ print(profiles)
119
+ # Returns a pandas DataFrame with columns:
120
+ # profile_name, workspace_id, base_url, default, created_at, updated_at
121
+ ```
122
+
123
+ ### 3. Discover cohort tables
124
+
125
+ ```python
126
+ tables = cloudos_cb.cohort_tables(cohort_id="1a2b3c4d5e6f7g8h9i10j11k")
127
+ print(tables)
128
+ # Cohort 1a2b3c4d5e6f7g8h9i10j11k:
129
+ # - omop_data.person
130
+ # - person_id (integer)
131
+ # - year_of_birth (integer)
132
+ # - gender_concept_id (integer)
133
+ # ...
134
+ # - omop_data.observation
135
+ # ...
136
+ #
137
+ # Total: 1 database(s), 5 table(s)
138
+
139
+ # Access raw data
140
+ schema_list = tables.schemas
141
+ ```
142
+
143
+ ### 4. Validate SQL (optional but recommended)
144
+
145
+ ```python
146
+ result = cloudos_cb.sql_validate(
147
+ sql="SELECT person_id FROM omop_data.person WHERE year_of_birth >= 1960"
148
+ )
149
+
150
+ if result["isValid"]:
151
+ print("SQL is valid")
152
+ else:
153
+ print("SQL invalid:", result["error"]["message"])
154
+ ```
155
+
156
+ ### 5. Execute a query (high-level)
157
+
158
+ ```python
159
+ df = cloudos_cb.query(
160
+ cohort_id="1a2b3c4d5e6f7g8h9i10j11k",
161
+ sql="SELECT person_id, gender_concept_id FROM omop_data.person LIMIT 100",
162
+ )
163
+ print(df.head())
164
+ print(f"Total rows: {df.attrs['total_rows']}")
165
+ ```
166
+
167
+ By default `query()` fetches all pages automatically. To return only the first page:
168
+
169
+ ```python
170
+ df = cloudos_cb.query(
171
+ cohort_id="1a2b3c4d5e6f7g8h9i10j11k",
172
+ sql="SELECT person_id FROM omop_data.person",
173
+ all_pages=False,
174
+ page_size=500,
175
+ )
176
+ ```
177
+
178
+ ### 6. Manual workflow
179
+
180
+ For fine-grained control over the submit / poll / fetch cycle:
181
+
182
+ ```python
183
+ # Step 1: Submit
184
+ task = cloudos_cb.query_submit_async(
185
+ cohort_id="1a2b3c4d5e6f7g8h9i10j11k",
186
+ sql="SELECT person_id FROM omop_data.person",
187
+ pagination={"pageNumber": 0, "pageSize": 100},
188
+ )
189
+ print("Task ID:", task["task_id"])
190
+
191
+ # Step 2: Poll
192
+ status = cloudos_cb.query_status(task_id=task["task_id"])
193
+ print("Status:", status["status"])
194
+ # status["status"] is one of: "pending", "running", "completed", "failed"
195
+
196
+ # Step 3: Fetch results when completed
197
+ df = cloudos_cb.query_results(task_id=task["task_id"])
198
+ print(df)
199
+ ```
200
+
201
+ ## API Reference
202
+
203
+ ### `configure(profilename, apikey, workspace_id, base_url=..., set_default=False)`
204
+
205
+ Create or update a named credential profile.
206
+
207
+ | Parameter | Type | Description |
208
+ |-----------|------|-------------|
209
+ | `profilename` | str | Profile name (required) |
210
+ | `apikey` | str | API key (required) |
211
+ | `workspace_id` | str | Workspace/team ID (required) |
212
+ | `base_url` | str | CloudOS base URL (default: `https://cloudos.lifebit.ai`) |
213
+ | `set_default` | bool | Mark this profile as the default |
214
+
215
+ ---
216
+
217
+ ### `profile_list()`
218
+
219
+ Return a `pandas.DataFrame` of all configured profiles.
220
+
221
+ ---
222
+
223
+ ### `cohort_tables(cohort_id, profilename="")`
224
+
225
+ Retrieve schemas, tables, and columns for a cohort.
226
+
227
+ Returns a `CohortTables` object. Print it for a human-readable tree, or
228
+ access `.schemas` for the raw list.
229
+
230
+ ---
231
+
232
+ ### `sql_validate(sql, profilename="")`
233
+
234
+ Validate SQL syntax and references before execution.
235
+
236
+ Returns a `dict` with `isValid` (bool), `tableReferences`, `columnReferences`,
237
+ and on failure an `error` dict with a `message` key.
238
+
239
+ ---
240
+
241
+ ### `query_submit_async(cohort_id, sql, pagination=None, profilename="")`
242
+
243
+ Submit an async SQL task. Returns a `dict` with:
244
+
245
+ | Key | Description |
246
+ |-----|-------------|
247
+ | `task_id` | Use this to poll status and fetch results |
248
+ | `status` | Initial status (typically `"pending"`) |
249
+ | `query` | Echo of the submitted SQL |
250
+ | `type` | Task type string |
251
+ | `sync_execution_timeout` | Server-side timeout hint in ms |
252
+ | `full_response` | Raw API response |
253
+
254
+ `pagination` is an optional `dict` with `pageNumber` (int >= 0) and
255
+ `pageSize` (int >= 1).
256
+
257
+ ---
258
+
259
+ ### `query_status(task_id, profilename="")`
260
+
261
+ Check task status. Returns a `dict` with `task_id`, `status`, `type`,
262
+ `count_of_results`, `query`, `created_at`, `started_at`, `ended_at`,
263
+ `user`, `full_response`.
264
+
265
+ ---
266
+
267
+ ### `query_results(task_id, profilename="")`
268
+
269
+ Fetch results for a completed task. Returns a `pandas.DataFrame` with
270
+ metadata in `.attrs`:
271
+
272
+ | Attribute | Description |
273
+ |-----------|-------------|
274
+ | `total_rows` | Total rows across all pages |
275
+ | `page` | Page index returned |
276
+ | `page_size` | Rows in this page |
277
+ | `total_pages` | Total number of pages available |
278
+
279
+ ---
280
+
281
+ ### `query(cohort_id, sql, poll_interval=2, max_wait=600, page_size=1000, all_pages=True, profilename="")`
282
+
283
+ High-level orchestrator. Submits, polls, and fetches results automatically.
284
+ When `all_pages=True`, submits one async task per page and concatenates them.
285
+
286
+ | Parameter | Default | Description |
287
+ |-----------|---------|-------------|
288
+ | `poll_interval` | 2 | Seconds between status checks (minimum 1) |
289
+ | `max_wait` | 600 | Maximum seconds to wait per task |
290
+ | `page_size` | 1000 | Rows per page |
291
+ | `all_pages` | True | Fetch all pages and combine them |
292
+
293
+ ---
294
+
295
+ ## Using multiple profiles
296
+
297
+ ```python
298
+ # Configure multiple profiles
299
+ cloudos_cb.configure(
300
+ profilename="production",
301
+ apikey="prod-key",
302
+ workspace_id="prod-workspace",
303
+ set_default=True,
304
+ )
305
+ cloudos_cb.configure(
306
+ profilename="staging",
307
+ apikey="stage-key",
308
+ workspace_id="stage-workspace",
309
+ )
310
+
311
+ # Use default profile (production)
312
+ df = cloudos_cb.query(cohort_id="cohort-prod", sql="SELECT 1")
313
+
314
+ # Explicitly use staging profile
315
+ df = cloudos_cb.query(
316
+ cohort_id="cohort-stage",
317
+ sql="SELECT 1",
318
+ profilename="staging",
319
+ )
320
+ ```
321
+
322
+ ## Configuration storage
323
+
324
+ The config file is located at:
325
+ - `$CLOUDOS_CONFIG_DIR/config.json` when the env var is set
326
+ - `~/.cloudos/config.json` otherwise (home directory)
327
+
328
+ File permissions are set to 0600 (user read/write only). The default location
329
+ (`~/.cloudos/`) is outside any repository. If you override `CLOUDOS_CONFIG_DIR`
330
+ to a path inside a project, add that directory to your `.gitignore`.
331
+
332
+ ## Error handling
333
+
334
+ ```python
335
+ from cloudos_cb import (
336
+ CloudOSAuthError,
337
+ CloudOSAccessError,
338
+ CloudOSServerError,
339
+ CloudOSConfigError,
340
+ CloudOSValidationError,
341
+ )
342
+
343
+ try:
344
+ df = cloudos_cb.query(cohort_id="...", sql="SELECT 1")
345
+ except CloudOSAuthError:
346
+ print("Authentication failed - check your API key.")
347
+ except CloudOSAccessError:
348
+ print("Access denied or resource not found.")
349
+ except CloudOSServerError:
350
+ print("Server error - try again later.")
351
+ except CloudOSConfigError:
352
+ print("Profile not configured - run configure() first.")
353
+ except CloudOSValidationError as e:
354
+ print(f"Invalid input: {e}")
355
+ ```
356
+
357
+ ## Logging
358
+
359
+ The package uses Python's standard `logging` module under the `cloudos_cb`
360
+ namespace. To see informational messages:
361
+
362
+ ```python
363
+ import logging
364
+ logging.basicConfig(level=logging.INFO)
365
+ ```
366
+
367
+ ## Running tests
368
+
369
+ ```bash
370
+ pip install -e ".[dev]"
371
+ pytest
372
+ ```
373
+
374
+ To check code style:
375
+
376
+ ```bash
377
+ flake8 cloudos_cb tests
378
+ ```
379
+
380
+ ## Package structure
381
+
382
+ ```
383
+ cloudos-cb-py/
384
+ ├── pyproject.toml # Package metadata and build config
385
+ ├── CHANGELOG.md
386
+ ├── README.md
387
+ ├── LICENSE
388
+ ├── cloudos_cb/ # Package source
389
+ │ ├── __init__.py # Public API
390
+ │ ├── exceptions.py # Custom exception classes
391
+ │ ├── config.py # Profile management
392
+ │ ├── http.py # Authenticated HTTP helpers
393
+ │ ├── utils.py # Shared utilities
394
+ │ └── queries.py # Cohort Browser query functions
395
+ └── tests/
396
+ ├── test_config.py
397
+ ├── test_http.py
398
+ ├── test_utils.py
399
+ └── test_query.py
400
+ ```
@@ -0,0 +1,11 @@
1
+ cloudos_cb/__init__.py,sha256=OCEFYxynZfj52wzsKrgo76sVQZ9SlZxBycrhJ5-KMYU,988
2
+ cloudos_cb/config.py,sha256=Z-KDHoNXehFqzY3eGZ2KgAMmxb_VC4Nj68znxd4IN5g,6606
3
+ cloudos_cb/exceptions.py,sha256=HVACrXQha6IuIY9xQB3UpSXHanaJgPoGpGb4exfEA-c,1260
4
+ cloudos_cb/http.py,sha256=Ty3miOLRGqWe9eyD987tJ5Tz6jQXMhJ_c1mv4Au8dvk,4779
5
+ cloudos_cb/queries.py,sha256=5WbAA4CX_RRob1p3xpnAeS7uudwG4_QZHSj1sDUI4pg,16302
6
+ cloudos_cb/utils.py,sha256=-31zGKyYiOrLF2M9WLNmsmcYOMao3kNoMj7IhnVAqIY,1222
7
+ cloudos_cb_py-1.2.0.dist-info/licenses/LICENSE,sha256=ACBpTnDEVaAfEQSdrypv9uiDoKSiovBenzkMYixrj_E,1064
8
+ cloudos_cb_py-1.2.0.dist-info/METADATA,sha256=C3sx1CE9TapMh7bIOtzgcOswjE8e81j0Xddp-B4e-Go,11276
9
+ cloudos_cb_py-1.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
10
+ cloudos_cb_py-1.2.0.dist-info/top_level.txt,sha256=pI-uF3e1PafDTdQXy3JbLahKa4Z_mNSmYlF503-OyTk,11
11
+ cloudos_cb_py-1.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Lifebit
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ cloudos_cb