seashell-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- seashell_sdk-0.1.0/PKG-INFO +16 -0
- seashell_sdk-0.1.0/pyproject.toml +24 -0
- seashell_sdk-0.1.0/seashell_sdk/__init__.py +84 -0
- seashell_sdk-0.1.0/seashell_sdk/client.py +506 -0
- seashell_sdk-0.1.0/seashell_sdk/config.py +43 -0
- seashell_sdk-0.1.0/seashell_sdk/exceptions.py +28 -0
- seashell_sdk-0.1.0/seashell_sdk.egg-info/PKG-INFO +16 -0
- seashell_sdk-0.1.0/seashell_sdk.egg-info/SOURCES.txt +10 -0
- seashell_sdk-0.1.0/seashell_sdk.egg-info/dependency_links.txt +1 -0
- seashell_sdk-0.1.0/seashell_sdk.egg-info/requires.txt +1 -0
- seashell_sdk-0.1.0/seashell_sdk.egg-info/top_level.txt +1 -0
- seashell_sdk-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: seashell-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Seashell Python SDK — programmatic access to compressed genomic data
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://seashell.bio
|
|
7
|
+
Project-URL: Documentation, https://seashell.bio/docs/sdk
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Requires-Python: >=3.8
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
Requires-Dist: requests>=2.28
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=42", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "seashell-sdk"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Seashell Python SDK — programmatic access to compressed genomic data"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
dependencies = ["requests>=2.28"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Operating System :: OS Independent",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.urls]
|
|
23
|
+
Homepage = "https://seashell.bio"
|
|
24
|
+
Documentation = "https://seashell.bio/docs/sdk"
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Seashell Python SDK.
|
|
2
|
+
|
|
3
|
+
Quick start:
|
|
4
|
+
from seashell_sdk import connect
|
|
5
|
+
|
|
6
|
+
client = connect(
|
|
7
|
+
api_key="sk_...",
|
|
8
|
+
email="researcher@myinstitution.com",
|
|
9
|
+
password="...",
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
variants = client.find_variants("NA12878", gene="BRCA1", gnomad_af="<0.001")
|
|
13
|
+
for v in variants:
|
|
14
|
+
print(v["chrom"], v["pos"], v["ref"], v["alt"], v.get("gnomad_af"))
|
|
15
|
+
|
|
16
|
+
Or use the raw GQL interface:
|
|
17
|
+
result = client.query("FIND VARIANTS WHERE patient=NA12878 AND lof=true")
|
|
18
|
+
print(result["count"], result["results"])
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from seashell_sdk.client import SeashellClient, WakeJob
|
|
22
|
+
from seashell_sdk.config import DEFAULT_SERVER, load_config
|
|
23
|
+
from seashell_sdk.exceptions import (
|
|
24
|
+
SeashellError, AuthError, RateLimitError, QueryError,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__version__ = "0.1.0"
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"connect",
|
|
31
|
+
"SeashellClient",
|
|
32
|
+
"WakeJob",
|
|
33
|
+
"SeashellError",
|
|
34
|
+
"AuthError",
|
|
35
|
+
"RateLimitError",
|
|
36
|
+
"QueryError",
|
|
37
|
+
"__version__",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def connect(api_key=None, email=None, password=None, server=None):
|
|
42
|
+
"""Create a SeashellClient, authenticate, and return it ready to query.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
api_key: Institution API key (sk_...). If None, reads from
|
|
46
|
+
SEASHELL_API_KEY environment variable.
|
|
47
|
+
email: Researcher email or username.
|
|
48
|
+
password: Researcher password.
|
|
49
|
+
server: API server URL. Defaults to saved config or seashell.bio.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Authenticated SeashellClient.
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
AuthError: if credentials are invalid.
|
|
56
|
+
SeashellError: if the server is unreachable.
|
|
57
|
+
|
|
58
|
+
Example:
|
|
59
|
+
import os
|
|
60
|
+
client = connect(
|
|
61
|
+
api_key=os.environ["SEASHELL_API_KEY"],
|
|
62
|
+
email="researcher",
|
|
63
|
+
password="...",
|
|
64
|
+
)
|
|
65
|
+
"""
|
|
66
|
+
import os
|
|
67
|
+
|
|
68
|
+
if api_key is None:
|
|
69
|
+
api_key = os.environ.get("SEASHELL_API_KEY")
|
|
70
|
+
if not api_key:
|
|
71
|
+
raise AuthError(
|
|
72
|
+
"No API key provided. Pass api_key= or set SEASHELL_API_KEY env var."
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if server is None:
|
|
76
|
+
config = load_config()
|
|
77
|
+
server = config.get("server", DEFAULT_SERVER)
|
|
78
|
+
|
|
79
|
+
client = SeashellClient(server, api_key)
|
|
80
|
+
|
|
81
|
+
if email and password:
|
|
82
|
+
client.login(email, password)
|
|
83
|
+
|
|
84
|
+
return client
|
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
"""HTTP client for the Seashell API.
|
|
2
|
+
|
|
3
|
+
Low-level:
|
|
4
|
+
- SeashellClient.query(gql_string) → dict raw GQL passthrough
|
|
5
|
+
- SeashellClient.login(email, password) → dict
|
|
6
|
+
- SeashellClient.health() → dict
|
|
7
|
+
- SeashellClient.wake() → WakeJob
|
|
8
|
+
|
|
9
|
+
High-level convenience methods (build GQL, call query(), return results):
|
|
10
|
+
- find_variants(patient, gene=, **filters) → list[dict]
|
|
11
|
+
- count_variants(patient, **filters) → int
|
|
12
|
+
- list_patients() → list[dict]
|
|
13
|
+
- find_patients(**filters) → list[dict]
|
|
14
|
+
- count_patients(**filters) → int
|
|
15
|
+
- compare_patients(id1, id2) → dict
|
|
16
|
+
- diff_patients(id1, id2) → dict
|
|
17
|
+
- curate_variant(variant, classification, note=) → dict
|
|
18
|
+
- list_curations(**filters) → list[dict]
|
|
19
|
+
- show_curation_history(variant) → dict
|
|
20
|
+
- export_vcf(patient, region=) → dict
|
|
21
|
+
- export_cram(patient, region=) → dict
|
|
22
|
+
- find_similar(patient) → list[dict]
|
|
23
|
+
- pca_patients() → dict
|
|
24
|
+
- ancestry(patient) → dict
|
|
25
|
+
- qc(patient) → dict
|
|
26
|
+
- coverage(patient, region) → dict
|
|
27
|
+
- kinship(unexpected=False, limit=None) → dict
|
|
28
|
+
- mendelian_trio(mom, dad, child) → dict
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
import sys
|
|
32
|
+
import threading
|
|
33
|
+
import time
|
|
34
|
+
|
|
35
|
+
import requests
|
|
36
|
+
|
|
37
|
+
from seashell_sdk.exceptions import (
|
|
38
|
+
AuthError, RateLimitError, QueryError, SeashellError,
|
|
39
|
+
PermissionError as SeashellPermissionError,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class WakeJob(object):
|
|
44
|
+
"""Background-pollable wake-on-key job.
|
|
45
|
+
|
|
46
|
+
The CLI uses is_ready() and wait_until_ready() to block on the wake
|
|
47
|
+
completing. The poll runs in a daemon thread so it doesn't block stdin
|
|
48
|
+
while the user is typing a password.
|
|
49
|
+
"""
|
|
50
|
+
def __init__(self, client, initial_state):
|
|
51
|
+
self.client = client
|
|
52
|
+
self.state = initial_state.get("status", "unknown")
|
|
53
|
+
self.eta = int(initial_state.get("eta", 30))
|
|
54
|
+
self.error = None
|
|
55
|
+
self._lock = threading.Lock()
|
|
56
|
+
self._done = threading.Event()
|
|
57
|
+
self._thread = None
|
|
58
|
+
if self.state == "running":
|
|
59
|
+
self._done.set()
|
|
60
|
+
if initial_state.get("mode") == "single_instance":
|
|
61
|
+
self.state = "running"
|
|
62
|
+
self._done.set()
|
|
63
|
+
|
|
64
|
+
def start_poll(self, max_seconds=120, interval=2.0):
|
|
65
|
+
"""Start background polling thread. Idempotent."""
|
|
66
|
+
if self._thread is not None or self._done.is_set():
|
|
67
|
+
return
|
|
68
|
+
self._thread = threading.Thread(
|
|
69
|
+
target=self._poll, args=(max_seconds, interval), daemon=True)
|
|
70
|
+
self._thread.start()
|
|
71
|
+
|
|
72
|
+
def _poll(self, max_seconds, interval):
|
|
73
|
+
deadline = time.time() + max_seconds
|
|
74
|
+
wait = 1.0
|
|
75
|
+
while time.time() < deadline:
|
|
76
|
+
try:
|
|
77
|
+
resp = self.client.session.get(
|
|
78
|
+
self.client.server + "/wake_status", timeout=5)
|
|
79
|
+
if resp.status_code == 200:
|
|
80
|
+
j = resp.json()
|
|
81
|
+
with self._lock:
|
|
82
|
+
self.state = j.get("state", self.state)
|
|
83
|
+
if j.get("state") == "running":
|
|
84
|
+
self._done.set()
|
|
85
|
+
return
|
|
86
|
+
elif resp.status_code == 404:
|
|
87
|
+
self._done.set()
|
|
88
|
+
return
|
|
89
|
+
except requests.exceptions.RequestException as e:
|
|
90
|
+
with self._lock:
|
|
91
|
+
self.error = str(e)
|
|
92
|
+
time.sleep(min(wait, interval))
|
|
93
|
+
wait = min(wait * 1.5, interval)
|
|
94
|
+
|
|
95
|
+
def is_ready(self):
|
|
96
|
+
return self._done.is_set()
|
|
97
|
+
|
|
98
|
+
def wait_until_ready(self, timeout=None):
|
|
99
|
+
if not self._done.wait(timeout=timeout):
|
|
100
|
+
return False
|
|
101
|
+
return True
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _build_filter_clause(filters):
|
|
105
|
+
"""Convert a dict of filters to GQL AND clauses.
|
|
106
|
+
|
|
107
|
+
Handles two forms:
|
|
108
|
+
gene="BRCA1" → "AND gene=BRCA1"
|
|
109
|
+
gnomad_af="<0.001" → "AND gnomad_af<0.001" (operator embedded in value)
|
|
110
|
+
lof=True → "AND lof=true"
|
|
111
|
+
"""
|
|
112
|
+
parts = []
|
|
113
|
+
for k, v in filters.items():
|
|
114
|
+
if v is None:
|
|
115
|
+
continue
|
|
116
|
+
v_str = str(v).lower() if isinstance(v, bool) else str(v)
|
|
117
|
+
# If value already starts with an operator, don't add =
|
|
118
|
+
if v_str and v_str[0] in ("<", ">", "!"):
|
|
119
|
+
parts.append("AND %s%s" % (k, v_str))
|
|
120
|
+
else:
|
|
121
|
+
parts.append("AND %s=%s" % (k, v_str))
|
|
122
|
+
return parts
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class SeashellClient(object):
|
|
126
|
+
def __init__(self, server_url, api_key):
|
|
127
|
+
self.server = server_url.rstrip("/")
|
|
128
|
+
self.api_key = api_key
|
|
129
|
+
self.session_token = None
|
|
130
|
+
self.session = requests.Session()
|
|
131
|
+
self.session.headers["X-API-Key"] = api_key
|
|
132
|
+
|
|
133
|
+
def set_session_token(self, token):
|
|
134
|
+
"""Attach per-researcher session token to all subsequent requests."""
|
|
135
|
+
self.session_token = token
|
|
136
|
+
self.session.headers["X-Session-Token"] = token
|
|
137
|
+
|
|
138
|
+
# ─────────────────────────────────────────────────────────────────────────
|
|
139
|
+
# Core HTTP methods
|
|
140
|
+
# ─────────────────────────────────────────────────────────────────────────
|
|
141
|
+
|
|
142
|
+
def query(self, query_text):
|
|
143
|
+
"""Send a raw GQL query string and return the result dict.
|
|
144
|
+
|
|
145
|
+
This is the lowest-level method. All high-level methods call this.
|
|
146
|
+
Raises typed exceptions on error.
|
|
147
|
+
"""
|
|
148
|
+
resp = self.session.post(
|
|
149
|
+
self.server + "/query",
|
|
150
|
+
json={"query": query_text},
|
|
151
|
+
timeout=600,
|
|
152
|
+
)
|
|
153
|
+
if resp.status_code == 401:
|
|
154
|
+
raise AuthError("Session expired or invalid credentials. Please log in again.")
|
|
155
|
+
if resp.status_code == 403:
|
|
156
|
+
raise SeashellPermissionError(resp.json().get("detail", "Access denied"))
|
|
157
|
+
if resp.status_code == 429:
|
|
158
|
+
retry = int(resp.headers.get("Retry-After", 60))
|
|
159
|
+
raise RateLimitError("Rate limited. Retry in %d seconds." % retry, retry_after=retry)
|
|
160
|
+
if resp.status_code != 200:
|
|
161
|
+
detail = ""
|
|
162
|
+
try:
|
|
163
|
+
detail = resp.json().get("detail", resp.text)
|
|
164
|
+
except Exception:
|
|
165
|
+
detail = resp.text
|
|
166
|
+
raise QueryError("Query failed (%d): %s" % (resp.status_code, detail))
|
|
167
|
+
result = resp.json()
|
|
168
|
+
if "error" in result:
|
|
169
|
+
raise QueryError(result["error"])
|
|
170
|
+
return result
|
|
171
|
+
|
|
172
|
+
def health(self):
|
|
173
|
+
"""Check server health. Returns dict with status, patient_count, etc."""
|
|
174
|
+
try:
|
|
175
|
+
resp = self.session.get(self.server + "/health", timeout=10)
|
|
176
|
+
if resp.status_code == 200:
|
|
177
|
+
return resp.json()
|
|
178
|
+
except Exception:
|
|
179
|
+
pass
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
def wake(self):
|
|
183
|
+
"""Wake the institution's EC2. Returns WakeJob immediately (non-blocking)."""
|
|
184
|
+
try:
|
|
185
|
+
resp = self.session.post(self.server + "/wake", timeout=10)
|
|
186
|
+
if resp.status_code == 200:
|
|
187
|
+
return WakeJob(self, resp.json())
|
|
188
|
+
detail = ""
|
|
189
|
+
try:
|
|
190
|
+
detail = resp.json().get("detail", resp.text)
|
|
191
|
+
except Exception:
|
|
192
|
+
detail = resp.text
|
|
193
|
+
job = WakeJob(self, {"status": "error", "eta": 0})
|
|
194
|
+
job.error = "Wake failed (HTTP %d): %s" % (resp.status_code, detail[:200])
|
|
195
|
+
return job
|
|
196
|
+
except requests.exceptions.RequestException as e:
|
|
197
|
+
job = WakeJob(self, {"status": "error", "eta": 0})
|
|
198
|
+
job.error = "Cannot reach server at %s: %s" % (self.server, e)
|
|
199
|
+
return job
|
|
200
|
+
|
|
201
|
+
def login(self, email, password):
|
|
202
|
+
"""Authenticate with email + password. Attaches session token for all future requests."""
|
|
203
|
+
resp = self.session.post(
|
|
204
|
+
self.server + "/auth/login",
|
|
205
|
+
json={"email": email, "password": password},
|
|
206
|
+
timeout=15,
|
|
207
|
+
)
|
|
208
|
+
if resp.status_code == 401:
|
|
209
|
+
raise AuthError("Invalid username or password.")
|
|
210
|
+
if resp.status_code != 200:
|
|
211
|
+
detail = ""
|
|
212
|
+
try:
|
|
213
|
+
detail = resp.json().get("detail", resp.text)
|
|
214
|
+
except Exception:
|
|
215
|
+
detail = resp.text
|
|
216
|
+
raise AuthError("Login failed (%d): %s" % (resp.status_code, detail))
|
|
217
|
+
body = resp.json()
|
|
218
|
+
token = body.get("token", "")
|
|
219
|
+
if not token:
|
|
220
|
+
raise AuthError("Login response missing session token")
|
|
221
|
+
self.set_session_token(token)
|
|
222
|
+
return body
|
|
223
|
+
|
|
224
|
+
def poll_job(self, endpoint, job_id, callback=None):
|
|
225
|
+
"""Poll an async job (export/upload) until completion."""
|
|
226
|
+
url = "%s/%s/%s" % (self.server, endpoint, job_id)
|
|
227
|
+
while True:
|
|
228
|
+
try:
|
|
229
|
+
resp = self.session.get(url, timeout=30)
|
|
230
|
+
if resp.status_code != 200:
|
|
231
|
+
return {"status": "error", "detail": resp.text}
|
|
232
|
+
job = resp.json()
|
|
233
|
+
if callback:
|
|
234
|
+
callback(job)
|
|
235
|
+
status = job.get("status", "")
|
|
236
|
+
if status in ("complete", "complete_with_errors", "error", "failed"):
|
|
237
|
+
return job
|
|
238
|
+
except Exception as e:
|
|
239
|
+
sys.stderr.write("Poll error: %s\n" % str(e))
|
|
240
|
+
time.sleep(3)
|
|
241
|
+
|
|
242
|
+
# ─────────────────────────────────────────────────────────────────────────
|
|
243
|
+
# Patient queries
|
|
244
|
+
# ─────────────────────────────────────────────────────────────────────────
|
|
245
|
+
|
|
246
|
+
def list_patients(self):
|
|
247
|
+
"""List all patients in your institution. Returns list of dicts."""
|
|
248
|
+
return self.query("LIST PATIENTS").get("results", [])
|
|
249
|
+
|
|
250
|
+
def list_genes(self):
|
|
251
|
+
"""List all gene symbols in the database."""
|
|
252
|
+
return self.query("LIST GENES").get("results", [])
|
|
253
|
+
|
|
254
|
+
def find_patients(self, gene=None, disease=None, significance=None, limit=None, **filters):
|
|
255
|
+
"""Find patients matching criteria.
|
|
256
|
+
|
|
257
|
+
Examples:
|
|
258
|
+
find_patients(gene="BRCA1", significance="pathogenic")
|
|
259
|
+
find_patients(disease="cardiomyopathy")
|
|
260
|
+
find_patients(gene="CFTR")
|
|
261
|
+
"""
|
|
262
|
+
parts = ["FIND PATIENTS WHERE"]
|
|
263
|
+
if gene:
|
|
264
|
+
parts.append("gene=%s" % gene)
|
|
265
|
+
elif disease:
|
|
266
|
+
parts.append("disease=%s" % disease)
|
|
267
|
+
elif filters:
|
|
268
|
+
first_k, first_v = next(iter(filters.items()))
|
|
269
|
+
filters.pop(first_k)
|
|
270
|
+
parts.append("%s=%s" % (first_k, first_v))
|
|
271
|
+
else:
|
|
272
|
+
raise QueryError("find_patients requires at least one filter (gene=, disease=, etc.)")
|
|
273
|
+
parts.extend(_build_filter_clause(
|
|
274
|
+
{k: v for k, v in dict(significance=significance, **filters).items() if v is not None}
|
|
275
|
+
))
|
|
276
|
+
q = " ".join(parts)
|
|
277
|
+
if limit:
|
|
278
|
+
q += " LIMIT %d" % limit
|
|
279
|
+
return self.query(q).get("results", [])
|
|
280
|
+
|
|
281
|
+
def count_patients(self, gene=None, significance=None, disease=None, **filters):
|
|
282
|
+
"""Count patients matching criteria."""
|
|
283
|
+
parts = ["COUNT PATIENTS WHERE"]
|
|
284
|
+
if gene:
|
|
285
|
+
parts.append("gene=%s" % gene)
|
|
286
|
+
elif disease:
|
|
287
|
+
parts.append("disease=%s" % disease)
|
|
288
|
+
else:
|
|
289
|
+
raise QueryError("count_patients requires gene= or disease=")
|
|
290
|
+
if significance:
|
|
291
|
+
parts.append("AND significance=%s" % significance)
|
|
292
|
+
parts.extend(_build_filter_clause(filters))
|
|
293
|
+
return self.query(" ".join(parts)).get("count", 0)
|
|
294
|
+
|
|
295
|
+
def find_similar(self, patient, limit=10):
|
|
296
|
+
"""Find patients genetically similar to the given patient."""
|
|
297
|
+
result = self.query("FIND SIMILAR TO patient=%s LIMIT %d" % (patient, limit))
|
|
298
|
+
return result.get("results", [])
|
|
299
|
+
|
|
300
|
+
def compare_patients(self, id1, id2):
|
|
301
|
+
"""Jaccard similarity between two patients (fast, sketch-based)."""
|
|
302
|
+
return self.query("COMPARE PATIENTS %s AND %s" % (id1, id2))
|
|
303
|
+
|
|
304
|
+
def diff_patients(self, id1, id2):
|
|
305
|
+
"""Exact variant differences between two patients."""
|
|
306
|
+
return self.query("DIFF PATIENTS %s AND %s" % (id1, id2))
|
|
307
|
+
|
|
308
|
+
def pca_patients(self):
|
|
309
|
+
"""Principal component analysis across all patients."""
|
|
310
|
+
return self.query("PCA PATIENTS")
|
|
311
|
+
|
|
312
|
+
def kinship(self, unexpected=False, limit=None):
|
|
313
|
+
"""Pairwise kinship pre-screen across cohort."""
|
|
314
|
+
q = "KINSHIP COHORT"
|
|
315
|
+
if unexpected:
|
|
316
|
+
q += " UNEXPECTED"
|
|
317
|
+
if limit:
|
|
318
|
+
q += " LIMIT %d" % limit
|
|
319
|
+
return self.query(q)
|
|
320
|
+
|
|
321
|
+
def mendelian_trio(self, mom, dad, child):
|
|
322
|
+
"""De novo + inheritance partition for a declared trio."""
|
|
323
|
+
return self.query("MENDELIAN TRIO %s %s %s" % (mom, dad, child))
|
|
324
|
+
|
|
325
|
+
def delete_patient(self, patient_id):
|
|
326
|
+
"""Soft-delete a patient (GDPR-compliant, preserves delta chain)."""
|
|
327
|
+
return self.query("DELETE PATIENT %s" % patient_id)
|
|
328
|
+
|
|
329
|
+
# ─────────────────────────────────────────────────────────────────────────
|
|
330
|
+
# Variant queries
|
|
331
|
+
# ─────────────────────────────────────────────────────────────────────────
|
|
332
|
+
|
|
333
|
+
def find_variants(self, patient, gene=None, region=None, consequence=None,
|
|
334
|
+
gnomad_af=None, gnomad_popmax=None, significance=None,
|
|
335
|
+
lof=None, novel=None, rsid=None, curated=None,
|
|
336
|
+
limit=None, **filters):
|
|
337
|
+
"""Find variants for a patient with optional filters.
|
|
338
|
+
|
|
339
|
+
Examples:
|
|
340
|
+
find_variants("NA12878", gene="BRCA1")
|
|
341
|
+
find_variants("NA12878", gene="CFTR", gnomad_af="<0.001")
|
|
342
|
+
find_variants("NA12878", consequence="missense_variant", gnomad_af="<0.001")
|
|
343
|
+
find_variants("NA12878", lof=True)
|
|
344
|
+
find_variants("NA12878", curated="pathogenic")
|
|
345
|
+
"""
|
|
346
|
+
parts = ["FIND VARIANTS WHERE patient=%s" % patient]
|
|
347
|
+
if gene:
|
|
348
|
+
parts.append("AND gene=%s" % gene)
|
|
349
|
+
if region:
|
|
350
|
+
parts.append("AND region=%s" % region)
|
|
351
|
+
if consequence:
|
|
352
|
+
parts.append("AND consequence=%s" % consequence)
|
|
353
|
+
if gnomad_af is not None:
|
|
354
|
+
v = str(gnomad_af)
|
|
355
|
+
parts.append("AND gnomad_af%s" % (v if v[0] in "<>!" else "=%s" % v))
|
|
356
|
+
if gnomad_popmax is not None:
|
|
357
|
+
v = str(gnomad_popmax)
|
|
358
|
+
parts.append("AND gnomad_popmax%s" % (v if v[0] in "<>!" else "=%s" % v))
|
|
359
|
+
if significance:
|
|
360
|
+
parts.append("AND significance=%s" % significance)
|
|
361
|
+
if lof is True:
|
|
362
|
+
parts.append("AND lof=true")
|
|
363
|
+
if novel is True:
|
|
364
|
+
parts.append("AND novel=true")
|
|
365
|
+
if rsid:
|
|
366
|
+
parts.append("AND rsid=%s" % rsid)
|
|
367
|
+
if curated:
|
|
368
|
+
parts.append("AND curated=%s" % curated)
|
|
369
|
+
parts.extend(_build_filter_clause(filters))
|
|
370
|
+
q = " ".join(parts)
|
|
371
|
+
if limit:
|
|
372
|
+
q += " LIMIT %d" % limit
|
|
373
|
+
return self.query(q).get("results", [])
|
|
374
|
+
|
|
375
|
+
def count_variants(self, patient, gene=None, gnomad_af=None, gnomad_popmax=None,
|
|
376
|
+
consequence=None, lof=None, novel=None, significance=None,
|
|
377
|
+
**filters):
|
|
378
|
+
"""Count variants for a patient. Sub-millisecond for indexed filters.
|
|
379
|
+
|
|
380
|
+
Examples:
|
|
381
|
+
count_variants("NA12878")
|
|
382
|
+
count_variants("NA12878", gnomad_af="<0.001")
|
|
383
|
+
count_variants("NA12878", lof=True)
|
|
384
|
+
count_variants("NA12878", gene="BRCA1")
|
|
385
|
+
"""
|
|
386
|
+
parts = ["COUNT VARIANTS WHERE patient=%s" % patient]
|
|
387
|
+
if gene:
|
|
388
|
+
parts.append("AND gene=%s" % gene)
|
|
389
|
+
if gnomad_af is not None:
|
|
390
|
+
v = str(gnomad_af)
|
|
391
|
+
parts.append("AND gnomad_af%s" % (v if v[0] in "<>!" else "=%s" % v))
|
|
392
|
+
if gnomad_popmax is not None:
|
|
393
|
+
v = str(gnomad_popmax)
|
|
394
|
+
parts.append("AND gnomad_popmax%s" % (v if v[0] in "<>!" else "=%s" % v))
|
|
395
|
+
if consequence:
|
|
396
|
+
parts.append("AND consequence=%s" % consequence)
|
|
397
|
+
if lof is True:
|
|
398
|
+
parts.append("AND lof=true")
|
|
399
|
+
if novel is True:
|
|
400
|
+
parts.append("AND novel=true")
|
|
401
|
+
if significance:
|
|
402
|
+
parts.append("AND significance=%s" % significance)
|
|
403
|
+
parts.extend(_build_filter_clause(filters))
|
|
404
|
+
return self.query(" ".join(parts)).get("count", 0)
|
|
405
|
+
|
|
406
|
+
# ─────────────────────────────────────────────────────────────────────────
|
|
407
|
+
# Curation
|
|
408
|
+
# ─────────────────────────────────────────────────────────────────────────
|
|
409
|
+
|
|
410
|
+
def curate_variant(self, variant, classification, note=None):
|
|
411
|
+
"""Classify a variant (pathogenic/likely_pathogenic/vus/likely_benign/benign).
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
variant: "chr17:43093011:G:GA" format
|
|
415
|
+
classification: "pathogenic", "likely_pathogenic", "vus", "likely_benign", "benign"
|
|
416
|
+
note: optional free-text note (e.g. ACMG criteria)
|
|
417
|
+
"""
|
|
418
|
+
q = "CURATE VARIANT %s AS %s" % (variant, classification)
|
|
419
|
+
if note:
|
|
420
|
+
q += ' NOTE "%s"' % note.replace('"', "'")
|
|
421
|
+
return self.query(q)
|
|
422
|
+
|
|
423
|
+
def list_curations(self, gene=None, classification=None):
|
|
424
|
+
"""List all curations for this institution, optionally filtered."""
|
|
425
|
+
parts = ["LIST CURATIONS"]
|
|
426
|
+
filters = {}
|
|
427
|
+
if gene:
|
|
428
|
+
filters["gene"] = gene
|
|
429
|
+
if classification:
|
|
430
|
+
filters["classification"] = classification
|
|
431
|
+
if filters:
|
|
432
|
+
first = True
|
|
433
|
+
for k, v in filters.items():
|
|
434
|
+
parts.append("WHERE" if first else "AND")
|
|
435
|
+
parts.append("%s=%s" % (k, v))
|
|
436
|
+
first = False
|
|
437
|
+
return self.query(" ".join(parts)).get("results", [])
|
|
438
|
+
|
|
439
|
+
def show_curation_history(self, variant):
|
|
440
|
+
"""Full audit trail for a variant's curation history."""
|
|
441
|
+
return self.query("SHOW CURATION HISTORY %s" % variant)
|
|
442
|
+
|
|
443
|
+
# ─────────────────────────────────────────────────────────────────────────
|
|
444
|
+
# QC / analytics
|
|
445
|
+
# ─────────────────────────────────────────────────────────────────────────
|
|
446
|
+
|
|
447
|
+
def qc(self, patient):
|
|
448
|
+
"""Quality-control metrics for a patient."""
|
|
449
|
+
return self.query("QC PATIENT %s" % patient)
|
|
450
|
+
|
|
451
|
+
def ancestry(self, patient):
|
|
452
|
+
"""Predict super-population (EUR/EAS/SAS/AFR/AMR) for a patient."""
|
|
453
|
+
return self.query("ANCESTRY PATIENT %s" % patient)
|
|
454
|
+
|
|
455
|
+
def coverage(self, patient, region):
|
|
456
|
+
"""Coverage statistics in a region (e.g. 'chr17:43044295-43125483')."""
|
|
457
|
+
return self.query("COVERAGE PATIENT %s REGION %s" % (patient, region))
|
|
458
|
+
|
|
459
|
+
def pileup(self, patient, position):
|
|
460
|
+
"""Per-base pileup at one position (e.g. 'chr22:28698027')."""
|
|
461
|
+
return self.query("PILEUP PATIENT %s POSITION %s" % (patient, position))
|
|
462
|
+
|
|
463
|
+
def flagstat(self, patient):
|
|
464
|
+
"""samtools flagstat equivalent (byte-identical, no decompression)."""
|
|
465
|
+
return self.query("FLAGSTAT PATIENT %s" % patient)
|
|
466
|
+
|
|
467
|
+
def sexcheck(self, patient):
|
|
468
|
+
"""Infer biological sex from chrX/chrY coverage."""
|
|
469
|
+
return self.query("SEXCHECK PATIENT %s" % patient)
|
|
470
|
+
|
|
471
|
+
def insert_size(self, patient):
|
|
472
|
+
"""Insert size distribution."""
|
|
473
|
+
return self.query("INSERT_SIZE PATIENT %s" % patient)
|
|
474
|
+
|
|
475
|
+
def cycle_quality(self, patient):
|
|
476
|
+
"""Per-cycle base quality decay."""
|
|
477
|
+
return self.query("CYCLE_QUALITY PATIENT %s" % patient)
|
|
478
|
+
|
|
479
|
+
def contamination(self, patient):
|
|
480
|
+
"""Sample-swap / contamination pre-screen."""
|
|
481
|
+
return self.query("CONTAMINATION PATIENT %s" % patient)
|
|
482
|
+
|
|
483
|
+
# ─────────────────────────────────────────────────────────────────────────
|
|
484
|
+
# Export
|
|
485
|
+
# ─────────────────────────────────────────────────────────────────────────
|
|
486
|
+
|
|
487
|
+
def export_vcf(self, patient, region=None):
|
|
488
|
+
"""Export variants as VCF. Returns dict with download URL."""
|
|
489
|
+
q = "EXPORT PATIENT %s FORMAT VCF" % patient
|
|
490
|
+
if region:
|
|
491
|
+
q += " REGION %s" % region
|
|
492
|
+
return self.query(q)
|
|
493
|
+
|
|
494
|
+
def export_cram(self, patient, region=None):
|
|
495
|
+
"""Export as CRAM (requires CRAM upload). Returns dict with download URL."""
|
|
496
|
+
q = "EXPORT PATIENT %s FORMAT CRAM" % patient
|
|
497
|
+
if region:
|
|
498
|
+
q += " REGION %s" % region
|
|
499
|
+
return self.query(q)
|
|
500
|
+
|
|
501
|
+
def export_bam(self, patient, region=None):
|
|
502
|
+
"""Export as BAM (requires CRAM upload). Returns dict with download URL."""
|
|
503
|
+
q = "EXPORT PATIENT %s FORMAT BAM" % patient
|
|
504
|
+
if region:
|
|
505
|
+
q += " REGION %s" % region
|
|
506
|
+
return self.query(q)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Configuration management — stores server URL in ~/.seashell/config.json.
|
|
2
|
+
|
|
3
|
+
Shared between seashell-sdk and seashell-cli so both tools read from the
|
|
4
|
+
same location. Credentials (API key, session token) are never persisted —
|
|
5
|
+
they live only in process memory.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import stat
|
|
11
|
+
|
|
12
|
+
CONFIG_DIR = os.path.expanduser("~/.seashell")
|
|
13
|
+
CONFIG_FILE = os.path.join(CONFIG_DIR, "config.json")
|
|
14
|
+
|
|
15
|
+
DEFAULT_SERVER = "https://seashell.bio"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def load_config():
|
|
19
|
+
"""Load saved config, or return empty dict if none exists."""
|
|
20
|
+
if not os.path.exists(CONFIG_FILE):
|
|
21
|
+
return {}
|
|
22
|
+
try:
|
|
23
|
+
with open(CONFIG_FILE, "r") as f:
|
|
24
|
+
return json.load(f)
|
|
25
|
+
except (json.JSONDecodeError, IOError):
|
|
26
|
+
return {}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def save_config(config):
|
|
30
|
+
"""Save config to disk with owner-only permissions."""
|
|
31
|
+
os.makedirs(CONFIG_DIR, exist_ok=True)
|
|
32
|
+
with open(CONFIG_FILE, "w") as f:
|
|
33
|
+
json.dump(config, f, indent=2)
|
|
34
|
+
try:
|
|
35
|
+
os.chmod(CONFIG_FILE, stat.S_IRUSR | stat.S_IWUSR)
|
|
36
|
+
except OSError:
|
|
37
|
+
pass # Windows doesn't support chmod
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def clear_config():
|
|
41
|
+
"""Remove saved config file."""
|
|
42
|
+
if os.path.exists(CONFIG_FILE):
|
|
43
|
+
os.remove(CONFIG_FILE)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Typed exceptions for the Seashell SDK."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class SeashellError(Exception):
|
|
5
|
+
"""Base class for all Seashell SDK errors."""
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AuthError(SeashellError):
|
|
10
|
+
"""Authentication failed — bad credentials or expired session token."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RateLimitError(SeashellError):
|
|
15
|
+
"""Rate limit exceeded. Check the retry_after attribute."""
|
|
16
|
+
def __init__(self, message, retry_after=60):
|
|
17
|
+
super().__init__(message)
|
|
18
|
+
self.retry_after = retry_after
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class QueryError(SeashellError):
|
|
22
|
+
"""Query failed — syntax error, invalid filter, or engine error."""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class PermissionError(SeashellError):
|
|
27
|
+
"""Access denied — institution mismatch or insufficient role."""
|
|
28
|
+
pass
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: seashell-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Seashell Python SDK — programmatic access to compressed genomic data
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://seashell.bio
|
|
7
|
+
Project-URL: Documentation, https://seashell.bio/docs/sdk
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Requires-Python: >=3.8
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
Requires-Dist: requests>=2.28
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
seashell_sdk/__init__.py
|
|
3
|
+
seashell_sdk/client.py
|
|
4
|
+
seashell_sdk/config.py
|
|
5
|
+
seashell_sdk/exceptions.py
|
|
6
|
+
seashell_sdk.egg-info/PKG-INFO
|
|
7
|
+
seashell_sdk.egg-info/SOURCES.txt
|
|
8
|
+
seashell_sdk.egg-info/dependency_links.txt
|
|
9
|
+
seashell_sdk.egg-info/requires.txt
|
|
10
|
+
seashell_sdk.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
requests>=2.28
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
seashell_sdk
|