pyvark 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,171 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyvark
3
+ Version: 0.1.0
4
+ Summary: Python REST client for the Anthive single-cell RNA-seq browser (sibling of the Go `vark` CLI)
5
+ Author-email: Mark Fiers <mark.fiers@kuleuven.be>
6
+ License: MIT
7
+ Project-URL: Homepage, https://codeberg.org/mfiers/pyvark
8
+ Project-URL: Repository, https://codeberg.org/mfiers/pyvark
9
+ Project-URL: Go CLI, https://codeberg.org/mfiers/vark
10
+ Project-URL: Bug Tracker, https://codeberg.org/mfiers/pyvark/issues
11
+ Keywords: bioinformatics,single-cell,rna-seq,anthive,rest-client
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Operating System :: OS Independent
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: requests>=2.25.0
26
+ Provides-Extra: pandas
27
+ Requires-Dist: pandas>=1.3.0; extra == "pandas"
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
30
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
31
+ Requires-Dist: pandas>=1.3.0; extra == "dev"
32
+ Dynamic: license-file
33
+
34
+ <p align="center"><img src="https://codeberg.org/mfiers/pyvark/raw/branch/main/doc/logo.png" alt="pyvark" width="200"></p>
35
+
36
+ # pyvark
37
+
38
+ Python client for the [Anthive](https://codeberg.org/mfiers/anthive4)
39
+ single-cell RNA-seq REST API. Sibling of the Go [`vark`](https://codeberg.org/mfiers/vark)
40
+ CLI — same backend, two front ends.
41
+
42
+ API surface verified against **anthive REST API 2.7.2** (2026-06-20).
43
+
44
+ ## Why the dual name?
45
+
46
+ The Go CLI ships as a binary called `vark`. To avoid clobbering it on
47
+ the user's `$PATH` and to keep the PyPI / Codeberg slug obvious, the
48
+ **distribution name** is `pyvark` but the **importable name** is `vark`.
49
+
50
+ ```sh
51
+ pip install pyvark # distribution
52
+ python -c "from vark import AnthiveClient; print('ok')" # usage
53
+ ```
54
+
55
+ (Both CLI and library live next to each other in the same Anthive setup
56
+ with no shell collision: `vark` = the Go binary, `vark` = the Python
57
+ import.)
58
+
59
+ ## Install
60
+
61
+ From Codeberg (no PyPI publish yet):
62
+
63
+ ```sh
64
+ pip install git+ssh://git@codeberg.org/mfiers/pyvark.git
65
+ ```
66
+
67
+ Editable from a local checkout:
68
+
69
+ ```sh
70
+ git clone ssh://git@codeberg.org/mfiers/pyvark.git
71
+ cd pyvark
72
+ pip install -e .
73
+ # with pandas for `format='dataframe'` support:
74
+ pip install -e ".[pandas]"
75
+ ```
76
+
77
+ Pyodide / JupyterLite:
78
+
79
+ ```python
80
+ import micropip
81
+ await micropip.install("pyvark")
82
+ from vark import AnthiveClient
83
+ client = AnthiveClient() # auto-detects {origin}/api/ in the browser
84
+ ```
85
+
86
+ ## Minimal example
87
+
88
+ ```python
89
+ from vark import AnthiveClient
90
+
91
+ client = AnthiveClient(
92
+ "https://my.anthive.example/api",
93
+ auth=("user", "password"),
94
+ )
95
+
96
+ # What's on this server?
97
+ print(client.get_version()["version"])
98
+ databases = client.get_databases()
99
+ print(f"{len(databases)} datasets available")
100
+
101
+ # Pick a dataset and show its metadata fields
102
+ info = client.get_database_info(databases[0]["id"])
103
+ print(info["title"], info["n_cells"], "cells")
104
+
105
+ # Render a UMAP scatter server-side and write the PNG
106
+ plot = client.get_plot(
107
+ info["id"], "scatter",
108
+ color="cell_type",
109
+ palette_categorical="tab20",
110
+ width=6, height=5, dpi=150,
111
+ )
112
+ open("umap.png", "wb").write(plot["bytes"])
113
+
114
+ # The X-Plot-Caption header carries anthive's prose figure legend —
115
+ # this is the ONLY place the multi-sentence caption exists.
116
+ print(plot["caption"])
117
+ ```
118
+
119
+ ## API coverage (highlights)
120
+
121
+ * `get_root`, `get_health`, `get_metrics`, `get_version`,
122
+ `get_changelog` — version + latency telemetry (`/health` exposes
123
+ `mean_response_ms` / `p50_response_ms` / `n_samples`).
124
+ * `get_databases`, `get_database_info`, `get_group(group_id)` —
125
+ catalog + per-collection landing-page data (API 2.5+).
126
+ * `get_plot(db_id, geom, ...)` — every server-side geom: `scatter`,
127
+ `hexbin`, `kde2d`, `violin`, `box`, `bar`, `histogram`, `ecdf`,
128
+ `kde`, `heatmap`, `rolling`, `volcano`, `ma`, `forest`, `de_heatmap`.
129
+ Captures the `X-Plot-Caption` response header (the multi-sentence
130
+ figure legend — API 2.7.2+). Supports `color_scale=auto|sequential|
131
+ divergent`, plot clamps (`log2fc_clip`, `neglog10p_clip`,
132
+ `logmean_clip`), bar `group_by`, hexbin auto-clip
133
+ (`vmin_quantile` / `vmax_quantile`), per-axis transforms
134
+ (`transform_x` / `transform_y`, `asinh_scale`), KDE knobs
135
+ (`kde_n`, `kde_bw`, `n_levels`, `iso_overlay`, `point_overlay`),
136
+ marginals / regline overlays. Data export via `format="csv"` /
137
+ `"tsv"` returns the dataframe the plot was built from (API 2.6+).
138
+ * `list_de_studies`, `get_de_study`, `list_de_contrasts`,
139
+ `get_de_rows`, `get_de_by_gene` — DE data flow (API 2.3+).
140
+ * `analytics_schema`, `analytics_query`, `analytics_viz` —
141
+ SELECT-only SQL sandbox + Parquet-backed visualisation.
142
+ * `module_score`, `list_module_scores` — on-the-fly and pre-computed
143
+ module scores.
144
+ * `list_genesets`, `get_geneset`, `rescan_genesets`.
145
+ * `pick_fastest(base_urls, ...)` — server-selection helper that
146
+ consumes `/health` latency telemetry.
147
+
148
+ ## Tests
149
+
150
+ ```sh
151
+ # Offline (no server needed):
152
+ uv run --with pytest --with requests python -m pytest tests/test_offline.py -v
153
+
154
+ # Live smoke (round-trip):
155
+ ANTHIVE_TEST_URL=https://my.anthive/api \
156
+ ANTHIVE_TEST_USER=user ANTHIVE_TEST_PASSWORD=pass \
157
+ uv run --with pytest --with requests --with pandas \
158
+ python -m pytest tests/test_smoke.py -v
159
+ ```
160
+
161
+ ## Versioning
162
+
163
+ `pyvark` starts at **0.1.0** as a clean break from the legacy
164
+ `antclient` 1.x history that previously lived under
165
+ `anthive4/antclient/`. The Anthive REST API uses its own semver
166
+ (`X.Y.Z`) — see `client.AnthiveClient.API_TARGET` for the version this
167
+ release was last verified against.
168
+
169
+ ## License
170
+
171
+ MIT — see `LICENSE`.
@@ -0,0 +1,8 @@
1
+ pyvark-0.1.0.dist-info/licenses/LICENSE,sha256=6kzRpf4IX-o9k2hWnGXLa4Hjq37utO3Cp4E4J4aUeaA,1067
2
+ vark/__init__.py,sha256=S9mKAwYHHKK-bANs6HYHD_8h5IeVkVfB856btcqY49c,1166
3
+ vark/client.py,sha256=Zp_yGhpnGzpnOXKVp_O8t_vFNFjHWBSozJqvTMKpyzU,39131
4
+ vark/helpers.py,sha256=-7fMJ6CSjZWJR7UqtZ7E0Zp8vZ5lRuL4s-pYWgCRl1E,3892
5
+ pyvark-0.1.0.dist-info/METADATA,sha256=PFE8tsghbzlKVn2pYElYB1jp6SjE_aSPpwrt7FAPsuk,5977
6
+ pyvark-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
7
+ pyvark-0.1.0.dist-info/top_level.txt,sha256=ZGRlLC2T_lgLJTk1QbIOmm826prOWR8sZXWCViPX42M,5
8
+ pyvark-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mark Fiers
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ vark
vark/__init__.py ADDED
@@ -0,0 +1,31 @@
1
+ """pyvark — Python client for the Anthive single-cell REST API.
2
+
3
+ This is the **Python** sibling of the Go `vark` CLI (codeberg.org/mfiers/vark).
4
+ Distributed on Codeberg as the `pyvark` repo to disambiguate from the Go
5
+ binary; the importable package name inside is just `vark`.
6
+
7
+ pip install pyvark # PyPI / Codeberg distribution name
8
+ from vark import AnthiveClient # importable name
9
+
10
+ The Go CLI and this Python client both talk to the same anthive REST API
11
+ (`bin/api/ant-serve` in anthive4). When the API version changes, both move
12
+ forward together — `vark.AnthiveClient.API_TARGET` records the API version
13
+ this release was last verified against.
14
+
15
+ Quick start::
16
+
17
+ from vark import AnthiveClient
18
+ c = AnthiveClient("https://my.anthive/api", auth=("user", "pass"))
19
+ print(c.get_version()["version"])
20
+
21
+ For the legacy `find_database` / `find_metadata` helpers from antclient,
22
+ use ``from vark.helpers import find_database, find_metadata``.
23
+ """
24
+
25
+ from .client import AnthiveClient
26
+
27
+ # Backward-compatibility alias (matches antclient 1.x).
28
+ AntClient = AnthiveClient
29
+
30
+ __version__ = "0.1.0"
31
+ __all__ = ["AnthiveClient", "AntClient", "__version__"]
vark/client.py ADDED
@@ -0,0 +1,910 @@
1
+ """AnthiveClient — Python REST client for Anthive (single-cell RNA-seq).
2
+
3
+ Targets the anthive REST API contract. See
4
+ `https://codeberg.org/mfiers/pyvark` for sources and
5
+ `https://codeberg.org/mfiers/vark` for the Go CLI that ships the same surface.
6
+
7
+ API surface verified against **REST API 2.7.2** (2026-06-20). Older
8
+ servers still work for every endpoint they expose; new params degrade
9
+ to "server ignores it" rather than client-side errors.
10
+
11
+ Works in standard Python 3.9+, in Pyodide / JupyterLite, and inside
12
+ Streamlit (auto-detects environment).
13
+
14
+ Requirements:
15
+ requests (required)
16
+ pandas (optional — for ``format='dataframe'``)
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from typing import Any, Dict, List, Optional, Union
22
+
23
+ import requests
24
+
25
+
26
+ __version__ = "0.1.0"
27
+ __all__ = ["AnthiveClient"]
28
+
29
+
30
+ # ── Optional Streamlit caching ────────────────────────────────────────────────
31
+ #
32
+ # antclient supported a Streamlit caching path. Keep the shape so the
33
+ # legacy frontend/streamlit/ pages (now archived) and any third-party
34
+ # Streamlit users keep working, but default to a no-op decorator
35
+ # everywhere else.
36
+ try:
37
+ import streamlit as _streamlit # type: ignore[unused-import]
38
+ _STREAMLIT_AVAILABLE = True
39
+
40
+ def _cache_short(ttl: int = 60):
41
+ return _streamlit.cache_data(ttl=ttl)
42
+
43
+ def _cache_long(ttl: int = 3600):
44
+ return _streamlit.cache_data(ttl=ttl)
45
+ except ImportError:
46
+ _STREAMLIT_AVAILABLE = False
47
+
48
+ def _cache_short(ttl: int = 60):
49
+ def decorator(func):
50
+ return func
51
+ return decorator
52
+
53
+ def _cache_long(ttl: int = 3600):
54
+ def decorator(func):
55
+ return func
56
+ return decorator
57
+
58
+
59
+ # ── DataFrame coercion helper ─────────────────────────────────────────────────
60
+
61
+
62
+ def _to_dataframe(result: Dict[str, Any], fill_na_genes: bool = True):
63
+ """Convert a ``{'data': [...], 'columns': [...]}`` API response to a
64
+ pandas DataFrame.
65
+
66
+ Args:
67
+ result: Query response dict.
68
+ fill_na_genes: If True, replace NaN with 0 in columns that look like
69
+ gene-expression columns (heuristic — skips known
70
+ metadata names + ``dim_*`` embeddings). Metadata NaN
71
+ is preserved.
72
+
73
+ Raises:
74
+ ImportError: pandas not installed.
75
+ """
76
+ try:
77
+ import pandas as pd
78
+ except ImportError as exc:
79
+ raise ImportError(
80
+ "pandas is required for format='dataframe'. "
81
+ "Install with: pip install pandas"
82
+ ) from exc
83
+
84
+ df = pd.DataFrame(result.get('data', []), columns=result.get('columns', []))
85
+
86
+ if fill_na_genes and 'columns' in result:
87
+ common_metadata = {
88
+ 'cell_name', 'cell_id', 'cell_type', 'celltype', 'tissue',
89
+ 'sample', 'donor', 'batch', 'condition', 'treatment',
90
+ 'cluster', 'seurat_clusters', 'n_genes', 'n_counts',
91
+ 'nCount_RNA', 'nFeature_RNA', 'percent.mt', 'percent_mito',
92
+ 'phase', 'doublet', 'orig.ident',
93
+ }
94
+ for col in df.columns:
95
+ if col in common_metadata:
96
+ continue
97
+ if col.startswith('dim_'):
98
+ continue
99
+ if any(col.lower().startswith(prefix) for prefix in
100
+ ('n_', 'percent', 'pct_', 'log', 'total_')):
101
+ continue
102
+ if df[col].dtype in ('float64', 'float32', 'float16'):
103
+ df[col] = df[col].fillna(0)
104
+ return df
105
+
106
+
107
+ # ── Client ────────────────────────────────────────────────────────────────────
108
+
109
+
110
+ class AnthiveClient:
111
+ """Lightweight REST client for the Anthive single-cell data browser.
112
+
113
+ Args:
114
+ base_url: Base URL — ``"https://host/api"`` or ``"http://localhost:8080"``.
115
+ When ``None``, falls back to the ``ANTHIVE_API_URL`` env var,
116
+ then to a browser-side ``js.window.location.origin``, then to
117
+ ``http://localhost:8080``.
118
+ timeout: per-request timeout (seconds). Default 30.
119
+ auth: HTTP authentication, passed straight through to ``requests``.
120
+ Use ``(user, password)`` for HTTP Basic; or any
121
+ ``requests.auth.AuthBase`` subclass.
122
+ verify: TLS-verify flag forwarded to ``requests`` (default ``True``).
123
+ Set to ``False`` when hitting a server with a self-signed cert.
124
+
125
+ Example::
126
+
127
+ from vark import AnthiveClient
128
+ c = AnthiveClient("https://my.anthive.example/api",
129
+ auth=("user", "pass"))
130
+ info = c.get_version()
131
+ print(info["version"], info["plot_geoms"])
132
+ """
133
+
134
+ #: Anthive REST API version this client release was last verified against.
135
+ API_TARGET = "2.7.2"
136
+
137
+ #: Server-side plot geoms the client knows about. Validation prevents a
138
+ #: round-trip on typos. Update with every new server geom.
139
+ PLOT_GEOMS = frozenset({
140
+ 'scatter', 'hexbin', 'kde2d',
141
+ 'violin', 'box', 'bar',
142
+ 'histogram', 'ecdf', 'kde',
143
+ 'heatmap', 'rolling',
144
+ # DE-driven geoms (REST API 2.3+)
145
+ 'volcano', 'ma', 'forest',
146
+ # DE-heatmap (REST API 2.7+)
147
+ 'de_heatmap',
148
+ })
149
+
150
+ #: Supported output formats. ``csv`` / ``tsv`` were added in 2.6.0 and
151
+ #: return the same dataframe the plot was built from (not the image).
152
+ PLOT_OUTPUT_FORMATS = frozenset({'png', 'svg', 'pdf', 'csv', 'tsv'})
153
+
154
+ def __init__(
155
+ self,
156
+ base_url: Optional[str] = None,
157
+ timeout: int = 30,
158
+ auth: Optional[Any] = None,
159
+ verify: Union[bool, str] = True,
160
+ ):
161
+ if base_url is None:
162
+ import os
163
+ base_url = os.environ.get('ANTHIVE_API_URL')
164
+ if base_url is None:
165
+ # Pyodide/browser: take origin from JS.
166
+ try:
167
+ import js # type: ignore
168
+ if hasattr(js, 'ANTHIVE_API_URL'):
169
+ base_url = js.ANTHIVE_API_URL
170
+ else:
171
+ origin = str(js.window.location.origin)
172
+ base_url = f"{origin}/api/"
173
+ except (ImportError, AttributeError, Exception):
174
+ base_url = "http://localhost:8080"
175
+
176
+ self.base_url = base_url.rstrip('/') if base_url else "http://localhost:8080"
177
+ self.timeout = timeout
178
+ self.headers: Dict[str, str] = {}
179
+ self.auth = auth
180
+ self.verify = verify
181
+
182
+ # ── HTTP primitives ──────────────────────────────────────────────────
183
+
184
+ def _get(self, path: str, params: Optional[Dict] = None) -> Any:
185
+ url = f"{self.base_url}{path}"
186
+ response = requests.get(
187
+ url, params=params, headers=self.headers,
188
+ timeout=self.timeout, auth=self.auth, verify=self.verify,
189
+ )
190
+ response.raise_for_status()
191
+ return response.json()
192
+
193
+ def _get_raw(self, path: str,
194
+ params: Optional[Dict] = None) -> 'requests.Response':
195
+ """GET returning the raw response (for non-JSON bytes — plots,
196
+ changelog)."""
197
+ url = f"{self.base_url}{path}"
198
+ response = requests.get(
199
+ url, params=params, headers=self.headers,
200
+ timeout=self.timeout, auth=self.auth, verify=self.verify,
201
+ )
202
+ response.raise_for_status()
203
+ return response
204
+
205
+ def _post(self, path: str, json: Optional[Dict] = None) -> Any:
206
+ url = f"{self.base_url}{path}"
207
+ response = requests.post(
208
+ url, json=json, headers=self.headers,
209
+ timeout=self.timeout, auth=self.auth, verify=self.verify,
210
+ )
211
+ response.raise_for_status()
212
+ return response.json()
213
+
214
+ # ── Info endpoints ───────────────────────────────────────────────────
215
+
216
+ def get_root(self) -> Dict:
217
+ """``GET /`` — API root info (name, version, links)."""
218
+ return self._get('/')
219
+
220
+ def get_health(self) -> Dict:
221
+ """``GET /health`` — health + latency telemetry.
222
+
223
+ Includes ``mean_response_ms`` / ``p50_response_ms`` / ``n_samples``
224
+ (server-side wall-clock over the last 1024 non-probe requests).
225
+ Use :meth:`pick_fastest` to choose between replicas.
226
+ """
227
+ return self._get('/health')
228
+
229
+ def get_metrics(self) -> Dict:
230
+ """``GET /metrics`` — full performance metrics (request stats,
231
+ connection-pool stats, memory throttling)."""
232
+ return self._get('/metrics')
233
+
234
+ def get_version(self) -> Dict:
235
+ """``GET /version`` — structured API contract (REST API 2.3+).
236
+
237
+ Returns a dict with ``version``, ``released``, ``plot_geoms``,
238
+ ``plot_output_formats``, ``deprecated_endpoints``, etc. Falls back to
239
+ a synthesised shape on pre-2.3 servers.
240
+ """
241
+ try:
242
+ return self._get('/version')
243
+ except Exception:
244
+ health = self._get('/health')
245
+ return {
246
+ 'version': health.get('api_version'),
247
+ 'released': None,
248
+ 'compatible_antclient': None,
249
+ 'openapi_url': '/openapi.json',
250
+ 'swagger_ui_url': '/docs',
251
+ 'redoc_url': '/redoc',
252
+ 'changelog_url': None,
253
+ 'plot_geoms': [],
254
+ 'plot_output_formats': ['png', 'svg', 'pdf'],
255
+ 'deprecated_endpoints': [],
256
+ }
257
+
258
+ def get_changelog(self) -> str:
259
+ """``GET /changelog`` — API changelog as a Markdown string
260
+ (REST API 2.3+). Raises ``HTTPError`` 404 on older servers."""
261
+ return self._get_raw('/changelog').text
262
+
263
+ # ── Database discovery ──────────────────────────────────────────────
264
+
265
+ @_cache_short(ttl=60)
266
+ def get_databases(_self, refresh: bool = False,
267
+ format: str = "list") -> Union[List[Dict], Any]:
268
+ """``GET /databases`` — list every available dataset."""
269
+ params = {'refresh': 'true'} if refresh else None
270
+ response = _self._get('/databases', params=params)
271
+ databases = (response.get('databases', response)
272
+ if isinstance(response, dict) else response)
273
+ if format == 'dataframe':
274
+ import pandas as pd
275
+ return pd.DataFrame(databases)
276
+ return databases
277
+
278
+ @_cache_short(ttl=60)
279
+ def get_database_info(_self, db_id: str) -> Dict:
280
+ """``GET /databases/{db_id}/info`` — full dataset metadata.
281
+
282
+ The ``collection`` block carries ``group_id``, ``doi``, ``pmid``,
283
+ ``accession``, and ``description`` (markdown body of the nearest
284
+ ``index.md``) on REST API 2.5+.
285
+ """
286
+ return _self._get(f'/databases/{db_id}/info')
287
+
288
+ @_cache_short(ttl=60)
289
+ def get_group(_self, group_id: str) -> Dict:
290
+ """``GET /groups/{group_id}`` — collection-level metadata
291
+ (REST API 2.5+).
292
+
293
+ Returns the group prose body + frontmatter (group, experiment,
294
+ authors, year, doi, pmid, accession, description) plus a list of
295
+ ``{db_id, title, id}`` for every dataset in that group.
296
+
297
+ Raises ``HTTPError`` 404 if the 3-char ``group_id`` is unknown.
298
+ """
299
+ return _self._get(f'/groups/{group_id}')
300
+
301
+ # ── Gene operations ─────────────────────────────────────────────────
302
+
303
+ @_cache_short(ttl=60)
304
+ def search_genes(_self,
305
+ db_id: str,
306
+ q: str = "",
307
+ limit: int = 100,
308
+ case_sensitive: bool = False,
309
+ exact: bool = False) -> List[str]:
310
+ """``GET /databases/{db_id}/genes`` — gene-name search.
311
+
312
+ ``exact=True`` requires a full match (so ``q='APOE'`` returns
313
+ ``['APOE']`` not ``['APOE', 'APOER2']``).
314
+ """
315
+ params = {
316
+ 'q': q, 'limit': limit,
317
+ 'case_sensitive': case_sensitive,
318
+ 'exact': exact,
319
+ }
320
+ response = _self._get(f'/databases/{db_id}/genes', params=params)
321
+ return (response.get('genes', response)
322
+ if isinstance(response, dict) else response)
323
+
324
+ @_cache_long(ttl=3600)
325
+ def get_gene_info(_self, db_id: str, gene_id: str) -> Dict:
326
+ """``GET /databases/{db_id}/genes/{gene_id}`` — info + layers."""
327
+ return _self._get(f'/databases/{db_id}/genes/{gene_id}')
328
+
329
+ def get_gene_stats(self,
330
+ db_id: str,
331
+ genes: Union[str, List[str]],
332
+ layer: str = "X",
333
+ format: str = "json") -> Union[List[Dict], Any]:
334
+ """``GET /databases/{db_id}/gene_stats`` — per-gene expression stats."""
335
+ genes_str = ','.join(genes) if isinstance(genes, list) else genes
336
+ params = {'genes': genes_str, 'layer': layer}
337
+ response = self._get(f'/databases/{db_id}/gene_stats', params=params)
338
+ stats = (response.get('stats', response)
339
+ if isinstance(response, dict) else response)
340
+ if format == 'dataframe':
341
+ import pandas as pd
342
+ return pd.DataFrame(stats)
343
+ return stats
344
+
345
+ def get_gene_stats_all(self,
346
+ genes: Union[str, List[str]],
347
+ case_sensitive: bool = False,
348
+ format: str = "json") -> Union[List[Dict], Any]:
349
+ """``GET /gene_stats_all`` — per-gene stats across every dataset
350
+ that has a default layer registered."""
351
+ genes_str = ','.join(genes) if isinstance(genes, list) else genes
352
+ params = {'genes': genes_str, 'case_sensitive': case_sensitive}
353
+ response = self._get('/gene_stats_all', params=params)
354
+ if format == 'dataframe':
355
+ import pandas as pd
356
+ rows = []
357
+ for record in response.get('results', []):
358
+ for stat in record.get('stats', []):
359
+ rows.append({
360
+ 'db_id': record['db_id'],
361
+ 'group': record.get('group', ''),
362
+ 'title': record['title'],
363
+ 'n_cells_db': record['n_cells'],
364
+ 'layer': record['layer'],
365
+ **stat,
366
+ })
367
+ return pd.DataFrame(rows)
368
+ return response
369
+
370
+ # ── Layers / metadata / embeddings ──────────────────────────────────
371
+
372
+ @_cache_long(ttl=3600)
373
+ def get_layers(_self, db_id: str) -> List[str]:
374
+ """``GET /databases/{db_id}/layers`` — available data layers."""
375
+ response = _self._get(f'/databases/{db_id}/layers')
376
+ return (response.get('layers', response)
377
+ if isinstance(response, dict) else response)
378
+
379
+ @_cache_long(ttl=3600)
380
+ def get_metadata_fields(_self, db_id: str) -> Dict:
381
+ """``GET /databases/{db_id}/metadata/fields`` — numerical +
382
+ categorical obs columns."""
383
+ return _self._get(f'/databases/{db_id}/metadata/fields')
384
+
385
+ @_cache_long(ttl=3600)
386
+ def get_embeddings(_self, db_id: str) -> List[str]:
387
+ """``GET /databases/{db_id}/embeddings`` — embedding ids."""
388
+ response = _self._get(f'/databases/{db_id}/embeddings')
389
+ return (response.get('embeddings', response)
390
+ if isinstance(response, dict) else response)
391
+
392
+ def get_embedding_data(self,
393
+ db_id: str,
394
+ embedding_id: str,
395
+ n_dims: int = 2,
396
+ limit: Optional[int] = None,
397
+ format: str = "json") -> Union[Dict, Any]:
398
+ """``GET /databases/{db_id}/embeddings/{embedding_id}`` — coords."""
399
+ params: Dict[str, Any] = {'n_dims': n_dims}
400
+ if limit is not None:
401
+ params['limit'] = limit
402
+ result = self._get(
403
+ f'/databases/{db_id}/embeddings/{embedding_id}', params=params,
404
+ )
405
+ if format == 'dataframe':
406
+ return _to_dataframe(result)
407
+ return result
408
+
409
+ # ── Cell-table retrieval ────────────────────────────────────────────
410
+
411
+ def get_cells(self,
412
+ db_id: str,
413
+ genes: Optional[List[str]] = None,
414
+ metadata: Optional[Union[List[str], str]] = None,
415
+ layer: str = "X",
416
+ filters: Optional[List[str]] = None,
417
+ limit: Optional[int] = None,
418
+ format: str = "json",
419
+ fill_na: bool = True) -> Union[Dict, Any, str, bytes]:
420
+ """``GET /databases/{db_id}/cells`` — cell table with expression
421
+ and/or metadata.
422
+
423
+ ``format`` is one of ``json`` (default), ``dataframe`` (requires
424
+ pandas), ``csv`` (string), or ``parquet`` (bytes).
425
+ """
426
+ api_format = 'json' if format == 'dataframe' else format
427
+ params: Dict[str, Any] = {'layer': layer, 'format': api_format}
428
+ if genes:
429
+ params['genes'] = ','.join(genes)
430
+ if metadata:
431
+ if isinstance(metadata, list):
432
+ params['metadata'] = ','.join(metadata)
433
+ else:
434
+ params['metadata'] = metadata # already "*"
435
+ if filters:
436
+ params['filter'] = filters
437
+ if limit is not None:
438
+ params['limit'] = limit
439
+
440
+ if api_format == 'json':
441
+ result = self._get(f'/databases/{db_id}/cells', params=params)
442
+ if format == 'dataframe':
443
+ return _to_dataframe(result, fill_na_genes=fill_na)
444
+ return result
445
+ url = f"{self.base_url}/databases/{db_id}/cells"
446
+ response = requests.get(
447
+ url, params=params, headers=self.headers,
448
+ timeout=self.timeout, auth=self.auth, verify=self.verify,
449
+ )
450
+ response.raise_for_status()
451
+ return response.content if format == 'parquet' else response.text
452
+
453
+ # ── Raw SQL ─────────────────────────────────────────────────────────
454
+
455
+ def execute_sql(self,
456
+ db_id: str,
457
+ query: str,
458
+ limit: Optional[int] = None,
459
+ format: str = "json",
460
+ fill_na: bool = True) -> Union[Dict, Any]:
461
+ """``POST /databases/{db_id}/query/sql`` — raw SQL.
462
+
463
+ For untrusted SQL prefer :meth:`analytics_query` (which is
464
+ SELECT-only).
465
+ """
466
+ payload: Dict[str, Any] = {'query': query}
467
+ if limit is not None:
468
+ payload['limit'] = limit
469
+ result = self._post(f'/databases/{db_id}/query/sql', json=payload)
470
+ if format == 'dataframe':
471
+ return _to_dataframe(result, fill_na_genes=fill_na)
472
+ return result
473
+
474
+ # ── Plot endpoint (per-geom) ────────────────────────────────────────
475
+
476
+ def get_plot(self,
477
+ db_id: str,
478
+ geom: str,
479
+ x: Optional[str] = None,
480
+ y: Optional[str] = None,
481
+ color: Optional[str] = None,
482
+ layer: str = "X",
483
+ format: str = "png",
484
+ width: float = 8,
485
+ height: float = 6,
486
+ dpi: int = 150,
487
+ sample: Optional[int] = None,
488
+ subset: Optional[List[str]] = None,
489
+ # —— common visual knobs ——
490
+ palette_categorical: Optional[str] = None,
491
+ palette_continuous: Optional[str] = None,
492
+ palette_divergent: Optional[str] = None,
493
+ color_scale: Optional[str] = None,
494
+ font_family: Optional[str] = None,
495
+ font_size: Optional[float] = None,
496
+ **extra: Any) -> Dict[str, Any]:
497
+ """``GET /databases/{db_id}/plot/{geom}`` — render a figure server-side.
498
+
499
+ Hits the per-geom endpoint (REST API 2.2+) — each declares only
500
+ the params relevant to that geom, so a 422 references the param
501
+ that is actually wrong for that geom.
502
+
503
+ Args:
504
+ db_id: Database identifier (``group/file``).
505
+ geom: One of :attr:`PLOT_GEOMS`.
506
+ x, y, color: Field names (gene, obs column, embedding key).
507
+ layer: Expression layer (default ``"X"``).
508
+ format: Image (``png`` | ``svg`` | ``pdf``) **or** data export
509
+ (``csv`` | ``tsv`` — REST API 2.6+; returns the
510
+ dataframe the plot was built from).
511
+ width, height, dpi: Figure styling.
512
+ sample: Reservoir-sample at most N cells server-side.
513
+ subset: Repeated ``field:value`` / ``field:v1,v2`` /
514
+ ``field:min..max`` filters.
515
+ palette_categorical, palette_continuous, palette_divergent:
516
+ matplotlib colour maps.
517
+ color_scale: ``'auto'`` (default) | ``'sequential'`` |
518
+ ``'divergent'`` — overrides the heuristic for
519
+ continuous colour.
520
+ font_family, font_size: figure typography.
521
+ **extra: Geom-specific params, forwarded verbatim:
522
+
523
+ - ``bins`` (histogram)
524
+ - ``ci`` (bar)
525
+ - ``bar_mode`` ``'dodge'`` (default) | ``'stack'`` (bar)
526
+ - ``normalize`` (stacked bar: scale to 100 %)
527
+ - ``facet`` — 3rd categorical for small-multiples
528
+ - ``group_by`` — bar: cluster x-bars by this categorical
529
+ (REST API 2.6+)
530
+ - ``point_size``, ``alpha`` (scatter / hexbin)
531
+ - ``n_genes``, ``zscore`` (heatmap)
532
+ - ``gridsize``, ``mincnt``, ``vmin_quantile``,
533
+ ``vmax_quantile`` (hexbin)
534
+ - ``log_x``, ``log_y`` — legacy log1p alias for
535
+ ``transform_x`` / ``transform_y``
536
+ - ``transform_x``, ``transform_y``, ``asinh_scale``
537
+ (XY scatter / hexbin / kde2d)
538
+ - ``vmin``, ``vmax`` — continuous colour-scale clip
539
+ - ``window``, ``show_band`` (rolling)
540
+ - ``kde_n``, ``kde_bw``, ``n_levels``, ``iso_overlay``,
541
+ ``point_overlay`` (kde2d)
542
+ - ``marginals``, ``regline`` (XY scatter / hexbin / kde2d)
543
+ - ``study``, ``term``, ``contrast``, ``gene``,
544
+ ``padj_threshold``, ``log2fc_threshold``, ``n_label``
545
+ (DE geoms volcano / ma / forest)
546
+ - ``log2fc_clip`` (volcano / ma) — symmetric clamp of the
547
+ log2fc axis (REST API 2.7+)
548
+ - ``neglog10p_clip`` (volcano) — clamp upper -log10(padj)
549
+ axis (REST API 2.7+)
550
+ - ``logmean_clip`` (ma) — clamp upper log10(mean
551
+ expression) axis (REST API 2.7+)
552
+ - ``study``, ``term``, ``contrasts``, ``genes``,
553
+ ``value``, ``sig_overlay``, ``padj_threshold``
554
+ (de_heatmap — REST API 2.7+)
555
+
556
+ Returns:
557
+ A dict::
558
+
559
+ {
560
+ "bytes": <raw bytes>,
561
+ "format": "png" | "svg" | "pdf" | "csv" | "tsv",
562
+ "caption": "<figure legend from X-Plot-Caption>",
563
+ "source_url": "<original URL the server saw>",
564
+ "cache": "HIT" | "MISS",
565
+ "mime": "image/png" | ...,
566
+ "etag": '"<cache key>"' | "",
567
+ }
568
+
569
+ The ``caption`` field captures the **X-Plot-Caption** response
570
+ header — anthive's canonical multi-sentence figure-legend
571
+ description. Mirrors the Go vark v0.0.10+ behaviour: a
572
+ non-empty caption means the server emitted one; an empty
573
+ string means the server didn't (pre-API-2.7 or non-image
574
+ geoms). Always check this — it's the only place the prose
575
+ legend exists.
576
+
577
+ Raises:
578
+ ValueError: ``geom`` not in :attr:`PLOT_GEOMS`.
579
+ requests.HTTPError: on non-2xx.
580
+
581
+ Example::
582
+
583
+ r = client.get_plot("S25/V6W", "bar", x="patient_geno", dpi=80)
584
+ open("bar.png", "wb").write(r["bytes"])
585
+ print(r["caption"])
586
+ """
587
+ if geom not in self.PLOT_GEOMS:
588
+ raise ValueError(
589
+ f"Unknown geom '{geom}'. Valid: {sorted(self.PLOT_GEOMS)}"
590
+ )
591
+ if format not in self.PLOT_OUTPUT_FORMATS:
592
+ raise ValueError(
593
+ f"Unknown format '{format}'. "
594
+ f"Valid: {sorted(self.PLOT_OUTPUT_FORMATS)}"
595
+ )
596
+ params: Dict[str, Any] = {
597
+ 'format': format, 'layer': layer,
598
+ 'width': width, 'height': height, 'dpi': dpi,
599
+ }
600
+ if x is not None: params['x'] = x
601
+ if y is not None: params['y'] = y
602
+ if color is not None: params['color'] = color
603
+ if sample is not None: params['sample'] = sample
604
+ if subset:
605
+ # requests forwards lists as repeated query params.
606
+ params['subset'] = subset
607
+ if palette_categorical is not None:
608
+ params['palette_categorical'] = palette_categorical
609
+ if palette_continuous is not None:
610
+ params['palette_continuous'] = palette_continuous
611
+ if palette_divergent is not None:
612
+ params['palette_divergent'] = palette_divergent
613
+ if color_scale is not None:
614
+ params['color_scale'] = color_scale
615
+ if font_family is not None:
616
+ params['font_family'] = font_family
617
+ if font_size is not None:
618
+ params['font_size'] = font_size
619
+ for key, value in extra.items():
620
+ if value is not None:
621
+ params[key] = value
622
+ response = self._get_raw(
623
+ f'/databases/{db_id}/plot/{geom}', params=params,
624
+ )
625
+ return {
626
+ 'bytes': response.content,
627
+ 'format': response.headers.get('X-Plot-Format', format),
628
+ # X-Plot-Caption (REST API 2.7.2+): the multi-sentence figure
629
+ # legend. Empty when the server didn't emit one — clients
630
+ # should treat absence as "no caption available", not an
631
+ # error. Matches Go vark v0.0.10 capture semantics.
632
+ 'caption': response.headers.get('X-Plot-Caption', ''),
633
+ 'source_url': response.headers.get('X-Source-URL', ''),
634
+ 'cache': response.headers.get('X-Plot-Cache', ''),
635
+ 'mime': response.headers.get('Content-Type', ''),
636
+ 'etag': response.headers.get('ETag', ''),
637
+ }
638
+
639
+ # ── Differential expression (REST API 2.3+) ─────────────────────────
640
+
641
+ def list_de_studies(self, db_id: str) -> List[Dict]:
642
+ """``GET /databases/{db_id}/de`` — list DE studies on a dataset."""
643
+ return self._get(f'/databases/{db_id}/de').get('studies', [])
644
+
645
+ def get_de_study(self, db_id: str, study_id: str) -> Dict:
646
+ """``GET /databases/{db_id}/de/{study_id}`` — one study + its
647
+ ``(term, n_contrasts)`` list."""
648
+ return self._get(f'/databases/{db_id}/de/{study_id}')
649
+
650
+ def list_de_contrasts(self, db_id: str, study_id: str,
651
+ term: str) -> List[Dict]:
652
+ """``GET /databases/{db_id}/de/{study_id}/terms/{term}``."""
653
+ return self._get(
654
+ f'/databases/{db_id}/de/{study_id}/terms/{term}'
655
+ ).get('contrasts', [])
656
+
657
+ def get_de_rows(self,
658
+ db_id: str,
659
+ study_id: str,
660
+ term: str,
661
+ contrast: str,
662
+ *,
663
+ sort: str = 'padj',
664
+ direction: str = 'both',
665
+ padj_max: Optional[float] = None,
666
+ abs_log2fc_min: Optional[float] = None,
667
+ limit: int = 200,
668
+ offset: int = 0,
669
+ format: str = 'json') -> Union[List[Dict], Any]:
670
+ """``GET /databases/{db_id}/de/{study_id}/terms/{term}/contrasts/
671
+ {contrast}`` — paged DE rows."""
672
+ params: Dict[str, Any] = {
673
+ 'sort': sort, 'direction': direction,
674
+ 'limit': limit, 'offset': offset,
675
+ }
676
+ if padj_max is not None:
677
+ params['padj_max'] = padj_max
678
+ if abs_log2fc_min is not None:
679
+ params['abs_log2fc_min'] = abs_log2fc_min
680
+ result = self._get(
681
+ f'/databases/{db_id}/de/{study_id}/terms/{term}/contrasts/{contrast}',
682
+ params=params,
683
+ )
684
+ rows = result.get('rows', [])
685
+ if format == 'dataframe':
686
+ import pandas as pd
687
+ return pd.DataFrame(rows)
688
+ return rows
689
+
690
+ def get_de_by_gene(self, db_id: str, gene: str,
691
+ format: str = 'json') -> Union[List[Dict], Any]:
692
+ """``GET /databases/{db_id}/de/by-gene/{gene}`` — every DE row for
693
+ one gene across all studies/terms/contrasts. Empty list if the
694
+ gene isn't found."""
695
+ result = self._get(f'/databases/{db_id}/de/by-gene/{gene}')
696
+ rows = result.get('rows', [])
697
+ if format == 'dataframe':
698
+ import pandas as pd
699
+ return pd.DataFrame(rows)
700
+ return rows
701
+
702
+ # ── Analytics sandbox ───────────────────────────────────────────────
703
+
704
+ def analytics_schema(self, db_id: str) -> Dict:
705
+ """``GET /databases/{db_id}/analytics/schema`` — tables + columns +
706
+ 3-row samples visible to the analytics sandbox."""
707
+ return self._get(f'/databases/{db_id}/analytics/schema')
708
+
709
+ def analytics_query(self,
710
+ db_id: str,
711
+ sql: str,
712
+ limit: Optional[int] = None,
713
+ format: str = "json") -> Union[Dict, Any]:
714
+ """``POST /databases/{db_id}/analytics/query`` — SELECT-only SQL.
715
+
716
+ Result is saved as Parquet under a ``session_id`` (use with
717
+ :meth:`analytics_viz`).
718
+ """
719
+ payload: Dict[str, Any] = {'sql': sql}
720
+ if limit is not None:
721
+ payload['limit'] = limit
722
+ result = self._post(
723
+ f'/databases/{db_id}/analytics/query', json=payload,
724
+ )
725
+ if format == 'dataframe':
726
+ try:
727
+ import pandas as pd
728
+ except ImportError as exc:
729
+ raise ImportError(
730
+ "pandas is required for format='dataframe'. "
731
+ "Install with: pip install pandas"
732
+ ) from exc
733
+ return pd.DataFrame(result.get('preview', []),
734
+ columns=result.get('columns'))
735
+ return result
736
+
737
+ def analytics_viz(self,
738
+ session_id: str,
739
+ code: str,
740
+ output_format: str = "png") -> Dict[str, Any]:
741
+ """``POST /analytics/viz`` — render a matplotlib figure server-side
742
+ against the Parquet result of a prior :meth:`analytics_query`.
743
+
744
+ Pre-injected variables in the sandbox: ``df``, ``pd``, ``plt``,
745
+ ``sns``, ``np``, ``DATA_PATH``, ``OUTPUT_PATH``.
746
+ """
747
+ payload = {
748
+ 'session_id': session_id,
749
+ 'code': code,
750
+ 'output_format': output_format,
751
+ }
752
+ return self._post('/analytics/viz', json=payload)
753
+
754
+ # ── Module scores ───────────────────────────────────────────────────
755
+
756
+ def module_score(self,
757
+ db_id: str,
758
+ genes: List[str],
759
+ name: Optional[str] = None,
760
+ layer: str = "X",
761
+ format: str = "json") -> Union[Dict, Any]:
762
+ """``POST /databases/{db_id}/module_score`` — Seurat-style module
763
+ score for an arbitrary gene list, computed on the fly."""
764
+ payload: Dict[str, Any] = {'genes': genes, 'layer': layer}
765
+ if name:
766
+ payload['name'] = name
767
+ result = self._post(
768
+ f'/databases/{db_id}/module_score', json=payload,
769
+ )
770
+ if format == 'dataframe':
771
+ import pandas as pd
772
+ return pd.DataFrame(result.get('cells', result))
773
+ return result
774
+
775
+ @_cache_short(ttl=60)
776
+ def list_module_scores(_self, db_id: str) -> Dict:
777
+ """``GET /databases/{db_id}/module_scores`` — pre-computed
778
+ module-score columns already on the dataset (``obsnum``).
779
+
780
+ Returns the full discovery dict::
781
+
782
+ {
783
+ "db_id": "...",
784
+ "count": <N>,
785
+ "column_naming": {"separator": "::", "template": "..."},
786
+ "scored_genesets": [
787
+ {"study": ..., "experiment": ..., "name": ...,
788
+ "column": ..., "n_genes": ...}, ...
789
+ ]
790
+ }
791
+
792
+ Use ``response["scored_genesets"]`` for the list itself.
793
+ """
794
+ return _self._get(f'/databases/{db_id}/module_scores')
795
+
796
+ # ── Genesets ────────────────────────────────────────────────────────
797
+
798
+ @_cache_long(ttl=3600)
799
+ def list_genesets(_self) -> Dict:
800
+ """``GET /genesets`` — catalog (studies → experiments → genesets)."""
801
+ try:
802
+ return _self._get('/genesets')
803
+ except requests.HTTPError as exc:
804
+ status = (exc.response.status_code
805
+ if exc.response is not None else 0)
806
+ if status in (404, 503):
807
+ return {}
808
+ raise
809
+
810
+ @_cache_long(ttl=3600)
811
+ def get_geneset_experiment(_self, study: str, experiment: str) -> Dict:
812
+ """``GET /genesets/{study}/{experiment}``."""
813
+ return _self._get(f'/genesets/{study}/{experiment}')
814
+
815
+ @_cache_long(ttl=3600)
816
+ def get_geneset(_self, study: str, experiment: str, name: str) -> Dict:
817
+ """``GET /genesets/{study}/{experiment}/{name}``."""
818
+ return _self._get(f'/genesets/{study}/{experiment}/{name}')
819
+
820
+ def rescan_genesets(self) -> Dict:
821
+ """``POST /genesets/rescan`` — re-scan geneset store + rebuild."""
822
+ return self._post('/genesets/rescan')
823
+
824
+ # ── Plot cache admin ────────────────────────────────────────────────
825
+
826
+ def get_plot_cache_stats(self) -> Dict:
827
+ """``GET /admin/plot-cache/stats``."""
828
+ return self._get('/admin/plot-cache/stats')
829
+
830
+ def clear_plot_cache(self) -> Dict:
831
+ """``POST /admin/plot-cache/clear``."""
832
+ return self._post('/admin/plot-cache/clear')
833
+
834
+ # ── Skill (MCP collaborator install path) ───────────────────────────
835
+
836
+ def get_skill(self) -> str:
837
+ """``GET /skill`` — fetch the anthive Claude Code SKILL.md."""
838
+ return self._get_raw('/skill').text
839
+
840
+ # ── Admin ───────────────────────────────────────────────────────────
841
+
842
+ def rescan_databases(self) -> Dict:
843
+ """``POST /admin/rescan`` — re-scan data folder + reload caches."""
844
+ return self._post('/admin/rescan')
845
+
846
+ def admin_release(self, db_id: str) -> Dict:
847
+ """``POST /admin/release/{db_id}`` — drop ant-serve's r/o handle so
848
+ a writer can open the duckdb r/w."""
849
+ return self._post(f'/admin/release/{db_id}')
850
+
851
+ def admin_restore(self, db_id: str) -> Dict:
852
+ """``POST /admin/restore/{db_id}`` — inverse of admin_release."""
853
+ return self._post(f'/admin/restore/{db_id}')
854
+
855
+ # ── Convenience ─────────────────────────────────────────────────────
856
+
857
+ def list_database_ids(self) -> List[str]:
858
+ """Return just the IDs from :meth:`get_databases`."""
859
+ return [db['id'] for db in self.get_databases()]
860
+
861
+ def get_all_genes(self, db_id: str) -> List[str]:
862
+ """Return every gene name in a database (uses a single big
863
+ ``search_genes`` call)."""
864
+ return self.search_genes(db_id, q="", limit=100000)
865
+
866
+ @classmethod
867
+ def pick_fastest(cls,
868
+ base_urls: List[str],
869
+ auth: Optional[Any] = None,
870
+ timeout: float = 2.0,
871
+ prefer: str = "p50",
872
+ min_samples: int = 10,
873
+ verify: Union[bool, str] = True) -> Optional[str]:
874
+ """Probe several anthive servers' ``/health`` endpoints, return the
875
+ ``base_url`` of the one most likely to answer fastest.
876
+
877
+ Uses the server-side ``mean_response_ms`` / ``p50_response_ms``
878
+ once it has accumulated ``>=min_samples`` real requests; falls
879
+ back to the client-observed probe RTT otherwise.
880
+ """
881
+ import time
882
+ if prefer not in ('mean', 'p50'):
883
+ raise ValueError(
884
+ f"prefer must be 'mean' or 'p50', got {prefer!r}"
885
+ )
886
+ field = f'{prefer}_response_ms'
887
+ best, best_score = None, float('inf')
888
+ for raw in base_urls:
889
+ url = raw.rstrip('/')
890
+ try:
891
+ t0 = time.perf_counter()
892
+ response = requests.get(
893
+ f"{url}/health", auth=auth, timeout=timeout,
894
+ verify=verify,
895
+ )
896
+ rtt_ms = (time.perf_counter() - t0) * 1000.0
897
+ body = response.json() if response.status_code == 200 else {}
898
+ except (requests.RequestException, ValueError):
899
+ continue
900
+ if (body.get('n_samples', 0) >= min_samples
901
+ and body.get(field) is not None):
902
+ score = body[field]
903
+ else:
904
+ score = rtt_ms
905
+ if score < best_score:
906
+ best, best_score = url, score
907
+ return best
908
+
909
+ def __repr__(self) -> str:
910
+ return f"AnthiveClient(base_url={self.base_url!r})"
vark/helpers.py ADDED
@@ -0,0 +1,119 @@
1
+ """Notebook-friendly helpers around :class:`vark.AnthiveClient`.
2
+
3
+ Port of the legacy ``anthelper.py`` from the antclient era. These
4
+ functions exist purely for human ergonomics in Jupyter — they print to
5
+ stdout rather than returning structured data. Use the client methods
6
+ directly when you need values.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from functools import lru_cache
12
+ from typing import Optional
13
+
14
+
15
+ @lru_cache(32)
16
+ def find_database(client,
17
+ search_string: Optional[str] = None,
18
+ n: int = 20,
19
+ verbose: bool = True) -> None:
20
+ """Search databases by name / title / group and print a summary.
21
+
22
+ Args:
23
+ client: :class:`vark.AnthiveClient`.
24
+ search_string: Substring matched against ``id``, ``title``,
25
+ ``group`` (case-insensitive). ``None`` shows the first ``n``.
26
+ n: Maximum results to show.
27
+ verbose: Show full per-dataset block.
28
+ """
29
+ databases = client.get_databases()
30
+
31
+ if search_string is not None:
32
+ query = search_string.lower()
33
+ databases = [
34
+ db for db in databases
35
+ if query in db['id'].lower()
36
+ or query in db.get('title', '').lower()
37
+ or query in db.get('group', '').lower()
38
+ ]
39
+
40
+ if not databases:
41
+ print("No databases found")
42
+ return
43
+
44
+ for record in sorted(databases, key=lambda x: -x.get('year', 0))[:n]:
45
+ if verbose:
46
+ print(f"{record['id']}")
47
+ print(f" - group : {record.get('group', '')}")
48
+ print(f" - title : {record.get('title', '')} "
49
+ f"({record.get('year', '')})")
50
+ print(f" - size : {record.get('n_cells', '?')} cells "
51
+ f"/ {record.get('n_genes', '?')} genes")
52
+ layers = record.get('layers', {})
53
+ layer_names = (
54
+ ', '.join(layers.keys()) if isinstance(layers, dict)
55
+ else ', '.join(layers)
56
+ )
57
+ print(f" - layers : {layer_names}")
58
+ obs = record.get('obs', {})
59
+ num_cols = obs.get('numerical', [])
60
+ cat_cols = obs.get('categorical', [])
61
+ print(
62
+ f" - categorical : {len(cat_cols)} — "
63
+ f"{', '.join(cat_cols[:3])}"
64
+ f"{', ...' if len(cat_cols) > 3 else ''}"
65
+ )
66
+ print(
67
+ f" - numerical : {len(num_cols)} — "
68
+ f"{', '.join(num_cols[:3])}"
69
+ f"{', ...' if len(num_cols) > 3 else ''}"
70
+ )
71
+ else:
72
+ print(record['id'])
73
+
74
+
75
+ def find_metadata(client,
76
+ db_id: str,
77
+ search_string: Optional[str] = None) -> None:
78
+ """Print numerical + categorical obs columns for a dataset.
79
+
80
+ Args:
81
+ client: :class:`vark.AnthiveClient`.
82
+ db_id: ``group/file`` id.
83
+ search_string: Substring filter (case-insensitive). ``None``
84
+ shows everything.
85
+ """
86
+ import textwrap
87
+
88
+ fields = client.get_metadata_fields(db_id)
89
+
90
+ if search_string is not None:
91
+ query = search_string.lower()
92
+ fields = {
93
+ 'numerical': [f for f in fields['numerical']
94
+ if query in f.lower()],
95
+ 'categorical': [f for f in fields['categorical']
96
+ if query in f.lower()],
97
+ }
98
+
99
+ numerical = fields['numerical']
100
+ categorical = fields['categorical']
101
+
102
+ if not numerical and not categorical:
103
+ print("No results found")
104
+ return
105
+
106
+ def _wrap(items):
107
+ return " " + "\n ".join(
108
+ textwrap.wrap(", ".join(items), width=70)
109
+ )
110
+
111
+ if numerical:
112
+ print("# Numerical")
113
+ print(_wrap(numerical))
114
+ if categorical:
115
+ print("# Categorical")
116
+ print(_wrap(categorical))
117
+
118
+
119
+ __all__ = ["find_database", "find_metadata"]