seer-pas-sdk 0.1.3__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- seer_pas_sdk/auth/auth.py +23 -1
- seer_pas_sdk/common/__init__.py +370 -72
- seer_pas_sdk/common/errors.py +5 -0
- seer_pas_sdk/common/groupanalysis.py +55 -0
- seer_pas_sdk/core/sdk.py +1855 -371
- seer_pas_sdk/core/unsupported.py +1634 -0
- seer_pas_sdk/objects/__init__.py +2 -0
- seer_pas_sdk/objects/groupanalysis.py +30 -0
- seer_pas_sdk/objects/platemap.py +67 -22
- seer_pas_sdk/objects/volcanoplot.py +290 -0
- seer_pas_sdk-3.0.0.dist-info/METADATA +231 -0
- seer_pas_sdk-3.0.0.dist-info/RECORD +18 -0
- {seer_pas_sdk-0.1.3.dist-info → seer_pas_sdk-3.0.0.dist-info}/WHEEL +1 -1
- {seer_pas_sdk-0.1.3.dist-info → seer_pas_sdk-3.0.0.dist-info}/top_level.txt +0 -1
- seer_pas_sdk-0.1.3.dist-info/METADATA +0 -50
- seer_pas_sdk-0.1.3.dist-info/RECORD +0 -19
- tests/__init__.py +0 -0
- tests/conftest.py +0 -17
- tests/test_auth.py +0 -48
- tests/test_common.py +0 -99
- tests/test_objects.py +0 -91
- tests/test_sdk.py +0 -11
- {seer_pas_sdk-0.1.3.dist-info → seer_pas_sdk-3.0.0.dist-info/licenses}/LICENSE.txt +0 -0
seer_pas_sdk/core/sdk.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
from tqdm import tqdm
|
|
2
2
|
|
|
3
|
+
import deprecation
|
|
3
4
|
import os
|
|
4
5
|
import jwt
|
|
5
6
|
import requests
|
|
6
7
|
import urllib.request
|
|
7
8
|
import ssl
|
|
8
|
-
import shutil
|
|
9
9
|
|
|
10
|
-
from typing import List as _List
|
|
10
|
+
from typing import List as _List, Tuple as _Tuple
|
|
11
11
|
|
|
12
12
|
from ..common import *
|
|
13
13
|
from ..auth import Auth
|
|
14
|
-
from ..objects import
|
|
14
|
+
from ..objects.volcanoplot import VolcanoPlotBuilder
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class SeerSDK:
|
|
@@ -27,40 +27,205 @@ class SeerSDK:
|
|
|
27
27
|
>>> seer_sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
|
|
28
28
|
"""
|
|
29
29
|
|
|
30
|
-
def __init__(self, username, password, instance="US"):
|
|
30
|
+
def __init__(self, username, password, instance="US", tenant=None):
|
|
31
31
|
try:
|
|
32
32
|
self._auth = Auth(username, password, instance)
|
|
33
33
|
|
|
34
34
|
self._auth.get_token()
|
|
35
|
-
|
|
36
35
|
print(f"User '{username}' logged in.\n")
|
|
37
36
|
|
|
38
|
-
|
|
37
|
+
if not tenant:
|
|
38
|
+
tenant = self._auth.active_tenant_id
|
|
39
|
+
try:
|
|
40
|
+
self.switch_tenant(tenant)
|
|
41
|
+
except Exception as e:
|
|
42
|
+
print(
|
|
43
|
+
f"Encountered an error directing you to tenant {tenant}: {e}."
|
|
44
|
+
)
|
|
45
|
+
print("Logging into home tenant...")
|
|
46
|
+
# If an error occurs while directing the user to a tenant, default to home tenant.
|
|
47
|
+
print(f"You are now active in {self.get_active_tenant_name()}")
|
|
48
|
+
except Exception as e:
|
|
39
49
|
raise ValueError(
|
|
40
|
-
"Could not log in.\nPlease check your credentials and/or instance."
|
|
50
|
+
f"Could not log in.\nPlease check your credentials and/or instance: {e}."
|
|
41
51
|
)
|
|
42
52
|
|
|
43
|
-
def _get_auth_headers(self):
|
|
53
|
+
def _get_auth_headers(self, use_multi_tenant=True):
|
|
44
54
|
id_token, access_token = self._auth.get_token()
|
|
45
|
-
|
|
55
|
+
header = {
|
|
46
56
|
"Authorization": id_token,
|
|
47
|
-
"
|
|
57
|
+
"Access-Token": access_token,
|
|
48
58
|
}
|
|
59
|
+
if use_multi_tenant:
|
|
60
|
+
multi_tenant = {
|
|
61
|
+
"Tenant-Id": self._auth.active_tenant_id,
|
|
62
|
+
"Role": self._auth.active_role,
|
|
63
|
+
}
|
|
64
|
+
header.update(multi_tenant)
|
|
65
|
+
return header
|
|
49
66
|
|
|
50
|
-
def _get_auth_session(self):
|
|
67
|
+
def _get_auth_session(self, use_multi_tenant=True):
|
|
51
68
|
sess = requests.Session()
|
|
52
69
|
|
|
53
|
-
sess.headers.update(self._get_auth_headers())
|
|
70
|
+
sess.headers.update(self._get_auth_headers(use_multi_tenant))
|
|
54
71
|
|
|
55
72
|
return sess
|
|
56
73
|
|
|
74
|
+
def get_user_tenant(self, index=True):
|
|
75
|
+
"""
|
|
76
|
+
Fetches the tenant metadata for the authenticated user.
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
response : list[dict]
|
|
81
|
+
A list of tenant objects pertaining to the user.
|
|
82
|
+
"""
|
|
83
|
+
with self._get_auth_session() as s:
|
|
84
|
+
response = s.get(f"{self._auth.url}api/v1/usertenants")
|
|
85
|
+
|
|
86
|
+
if response.status_code != 200:
|
|
87
|
+
raise ValueError(
|
|
88
|
+
"Invalid request. Please check your parameters."
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
response = response.json()
|
|
92
|
+
if index:
|
|
93
|
+
mapper = dict()
|
|
94
|
+
for x in response:
|
|
95
|
+
if x["institution"] not in mapper:
|
|
96
|
+
mapper[x["institution"]] = [x]
|
|
97
|
+
else:
|
|
98
|
+
mapper[x["institution"]].append(x)
|
|
99
|
+
return mapper
|
|
100
|
+
else:
|
|
101
|
+
return response
|
|
102
|
+
|
|
103
|
+
def list_tenants(self, reverse=False):
|
|
104
|
+
"""
|
|
105
|
+
Lists the institution names and the tenant ids for the authenticated user.
|
|
106
|
+
|
|
107
|
+
Parameters
|
|
108
|
+
----------
|
|
109
|
+
reverse: bool
|
|
110
|
+
Boolean denoting whether the user wants the result dictionary indexed by tenant id (True) or institution name (False).
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
tenants : dict[str, str]
|
|
115
|
+
A dictionary containing the institution names and tenant ids for the authenticated user.
|
|
116
|
+
"""
|
|
117
|
+
tenants = self.get_user_tenant()
|
|
118
|
+
if reverse:
|
|
119
|
+
return {x["tenantId"]: x["institution"] for x in tenants.values()}
|
|
120
|
+
else:
|
|
121
|
+
return {x["institution"]: x["tenantId"] for x in tenants.values()}
|
|
122
|
+
|
|
123
|
+
def switch_tenant(self, identifier: str):
|
|
124
|
+
"""
|
|
125
|
+
Switches the tenant for the authenticated user.
|
|
126
|
+
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
identifier: str
|
|
130
|
+
Tenant ID or organization name to switch to.
|
|
131
|
+
|
|
132
|
+
Returns
|
|
133
|
+
-------
|
|
134
|
+
tenant_id: str
|
|
135
|
+
Returns the value of the active tenant id after the operation.
|
|
136
|
+
"""
|
|
137
|
+
map = self.get_user_tenant()
|
|
138
|
+
tenant_id_match = [
|
|
139
|
+
y for x in map.values() for y in x if y["tenantId"] == identifier
|
|
140
|
+
]
|
|
141
|
+
institution_names = map.keys()
|
|
142
|
+
|
|
143
|
+
if tenant_id_match:
|
|
144
|
+
tenant_id = identifier
|
|
145
|
+
row = tenant_id_match
|
|
146
|
+
if row:
|
|
147
|
+
row = row[0]
|
|
148
|
+
else:
|
|
149
|
+
raise ValueError(
|
|
150
|
+
"Invalid tenant identifier. Tenant was not switched."
|
|
151
|
+
)
|
|
152
|
+
elif identifier in institution_names:
|
|
153
|
+
results = map[identifier]
|
|
154
|
+
if len(results) > 1:
|
|
155
|
+
raise ValueError(
|
|
156
|
+
"Multiple tenants found for the given institution name. Please specify a tenant ID."
|
|
157
|
+
)
|
|
158
|
+
row = results[0]
|
|
159
|
+
tenant_id = row["tenantId"]
|
|
160
|
+
else:
|
|
161
|
+
raise ValueError(
|
|
162
|
+
"Invalid tenant identifier. Tenant was not switched."
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
with self._get_auth_session() as s:
|
|
166
|
+
response = s.put(
|
|
167
|
+
self._auth.url + "api/v1/users/tenant",
|
|
168
|
+
json={
|
|
169
|
+
"currentTenantId": tenant_id,
|
|
170
|
+
"username": self._auth.username,
|
|
171
|
+
},
|
|
172
|
+
)
|
|
173
|
+
if response.status_code != 200:
|
|
174
|
+
raise ServerError(
|
|
175
|
+
"Could not update current tenant for user. Tenant was not switched."
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
self._auth.active_tenant_id = tenant_id
|
|
179
|
+
self._auth.active_role = row["role"]
|
|
180
|
+
print(f"You are now active in {row['institution']}")
|
|
181
|
+
return self._auth.active_tenant_id, self._auth.active_role
|
|
182
|
+
|
|
183
|
+
def get_active_tenant(self):
|
|
184
|
+
"""
|
|
185
|
+
Fetches the active tenant for the authenticated user.
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
tenant: dict[str, str]
|
|
190
|
+
Tenant metadata for the authenticated user containing "institution" and "tenantId" keys.
|
|
191
|
+
"""
|
|
192
|
+
tenants = self.get_user_tenant(index=False)
|
|
193
|
+
row = [
|
|
194
|
+
x for x in tenants if x["tenantId"] == self._auth.active_tenant_id
|
|
195
|
+
]
|
|
196
|
+
return row[0] if row else None
|
|
197
|
+
|
|
198
|
+
def get_active_tenant_id(self):
|
|
199
|
+
"""
|
|
200
|
+
Fetches the active tenant ID for the authenticated user.
|
|
201
|
+
|
|
202
|
+
Returns
|
|
203
|
+
-------
|
|
204
|
+
tenant_id: str
|
|
205
|
+
Tenant ID for the authenticated user.
|
|
206
|
+
"""
|
|
207
|
+
tenant = self.get_active_tenant()
|
|
208
|
+
return tenant["tenantId"] if tenant else None
|
|
209
|
+
|
|
210
|
+
def get_active_tenant_name(self):
|
|
211
|
+
"""
|
|
212
|
+
Fetches the active tenant name for the authenticated user.
|
|
213
|
+
|
|
214
|
+
Returns
|
|
215
|
+
-------
|
|
216
|
+
tenant: str
|
|
217
|
+
Tenant name for the authenticated user.
|
|
218
|
+
"""
|
|
219
|
+
tenant = self.get_active_tenant()
|
|
220
|
+
return tenant["institution"] if tenant else None
|
|
221
|
+
|
|
57
222
|
def get_spaces(self):
|
|
58
223
|
"""
|
|
59
224
|
Fetches a list of spaces for the authenticated user.
|
|
60
225
|
|
|
61
226
|
Returns
|
|
62
227
|
-------
|
|
63
|
-
spaces: list
|
|
228
|
+
spaces: list[dict]
|
|
64
229
|
List of space objects for the authenticated user.
|
|
65
230
|
|
|
66
231
|
Examples
|
|
@@ -86,7 +251,7 @@ class SeerSDK:
|
|
|
86
251
|
)
|
|
87
252
|
return spaces.json()
|
|
88
253
|
|
|
89
|
-
def
|
|
254
|
+
def get_plates(self, plate_id: str = None, as_df: bool = False):
|
|
90
255
|
"""
|
|
91
256
|
Fetches a list of plates for the authenticated user. If no `plate_id` is provided, returns all plates for the authenticated user. If `plate_id` is provided, returns the plate with the given `plate_id`, provided it exists.
|
|
92
257
|
|
|
@@ -94,25 +259,25 @@ class SeerSDK:
|
|
|
94
259
|
----------
|
|
95
260
|
plate_id : str, optional
|
|
96
261
|
ID of the plate to be fetched, defaulted to None.
|
|
97
|
-
|
|
98
|
-
|
|
262
|
+
as_df: bool
|
|
263
|
+
whether the result should be converted to a DataFrame, defaulted to None.
|
|
99
264
|
|
|
100
265
|
Returns
|
|
101
266
|
-------
|
|
102
|
-
plates: list or DataFrame
|
|
267
|
+
plates: list[dict] or DataFrame
|
|
103
268
|
List/DataFrame of plate objects for the authenticated user.
|
|
104
269
|
|
|
105
270
|
Examples
|
|
106
271
|
-------
|
|
107
272
|
>>> from seer_pas_sdk import SeerSDK
|
|
108
273
|
>>> seer_sdk = SeerSDK()
|
|
109
|
-
>>> seer_sdk.
|
|
274
|
+
>>> seer_sdk.get_plates()
|
|
110
275
|
>>> [
|
|
111
276
|
{ "id": ... },
|
|
112
277
|
{ "id": ... },
|
|
113
278
|
...
|
|
114
279
|
]
|
|
115
|
-
>>> seer_sdk.
|
|
280
|
+
>>> seer_sdk.get_plates(as_df=True)
|
|
116
281
|
>>> id ... user_group
|
|
117
282
|
0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
|
|
118
283
|
1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
|
|
@@ -151,9 +316,9 @@ class SeerSDK:
|
|
|
151
316
|
for entry in res:
|
|
152
317
|
del entry["tenant_id"]
|
|
153
318
|
|
|
154
|
-
return res if not
|
|
319
|
+
return res if not as_df else dict_to_df(res)
|
|
155
320
|
|
|
156
|
-
def
|
|
321
|
+
def get_projects(self, project_id: str = None, as_df: bool = False):
|
|
157
322
|
"""
|
|
158
323
|
Fetches a list of projects for the authenticated user. If no `project_id` is provided, returns all projects for the authenticated user. If `project_id` is provided, returns the project with the given `project_id`, provided it exists.
|
|
159
324
|
|
|
@@ -161,26 +326,26 @@ class SeerSDK:
|
|
|
161
326
|
----------
|
|
162
327
|
project_id: str, optional
|
|
163
328
|
Project ID of the project to be fetched, defaulted to None.
|
|
164
|
-
|
|
165
|
-
|
|
329
|
+
as_df: bool
|
|
330
|
+
whether the result should be converted to a DataFrame, defaulted to False.
|
|
166
331
|
|
|
167
332
|
Returns
|
|
168
333
|
-------
|
|
169
|
-
projects: list or DataFrame
|
|
334
|
+
projects: list[dict] or DataFrame
|
|
170
335
|
DataFrame or list of project objects for the authenticated user.
|
|
171
336
|
|
|
172
337
|
Examples
|
|
173
338
|
-------
|
|
174
339
|
>>> from seer_pas_sdk import SeerSDK
|
|
175
340
|
>>> seer_sdk = SeerSDK()
|
|
176
|
-
>>> seer_sdk.
|
|
341
|
+
>>> seer_sdk.get_projects()
|
|
177
342
|
>>> [
|
|
178
343
|
{ "project_name": ... },
|
|
179
344
|
{ "project_name": ... },
|
|
180
345
|
...
|
|
181
346
|
]
|
|
182
347
|
|
|
183
|
-
>>> seer_sdk.
|
|
348
|
+
>>> seer_sdk.get_projects(as_df=True)
|
|
184
349
|
>>> id ... user_group
|
|
185
350
|
0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
|
|
186
351
|
1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
|
|
@@ -194,7 +359,7 @@ class SeerSDK:
|
|
|
194
359
|
938 5b05d440-6610-11ea-96e3-d5a4dab4ebf6 ... None
|
|
195
360
|
939 9872e3f0-544e-11ea-ad9e-1991e0725494 ... None
|
|
196
361
|
|
|
197
|
-
>>> seer_sdk.
|
|
362
|
+
>>> seer_sdk.get_projects(id="YOUR_PROJECT_ID_HERE")
|
|
198
363
|
>>> [{ "project_name": ... }]
|
|
199
364
|
"""
|
|
200
365
|
|
|
@@ -228,19 +393,18 @@ class SeerSDK:
|
|
|
228
393
|
entry["raw_file_path"] = entry["raw_file_path"][
|
|
229
394
|
location(entry["raw_file_path"]) :
|
|
230
395
|
]
|
|
231
|
-
return res if not
|
|
396
|
+
return res if not as_df else dict_to_df(res)
|
|
232
397
|
|
|
233
|
-
def
|
|
234
|
-
self,
|
|
398
|
+
def get_samples(
|
|
399
|
+
self,
|
|
400
|
+
plate_id: str = None,
|
|
401
|
+
project_id: str = None,
|
|
402
|
+
analysis_id: str = None,
|
|
403
|
+
analysis_name: str = None,
|
|
404
|
+
as_df: bool = False,
|
|
235
405
|
):
|
|
236
406
|
"""
|
|
237
|
-
|
|
238
|
-
[UNEXPOSED METHOD CALL]
|
|
239
|
-
****************
|
|
240
|
-
|
|
241
|
-
Fetches a list of samples for the authenticated user, filtered by `plate_id`. Returns all samples for the plate with the given `plate_id`, provided it exists.
|
|
242
|
-
|
|
243
|
-
If both `plate_id` and `project_id` are passed in, only the `plate_id` is validated first.
|
|
407
|
+
Fetches a list of samples for the authenticated user with relation to a specified plate, project, or analysis. If no parameters are provided, returns all samples for the authenticated user. If `plate_id` or `project_id` is provided, returns samples associated with that plate or project. If `analysis_id` or `analysis_name` is provided, returns samples associated with that analysis.
|
|
244
408
|
|
|
245
409
|
Parameters
|
|
246
410
|
----------
|
|
@@ -248,12 +412,16 @@ class SeerSDK:
|
|
|
248
412
|
ID of the plate for which samples are to be fetched, defaulted to None.
|
|
249
413
|
project_id : str, optional
|
|
250
414
|
ID of the project for which samples are to be fetched, defaulted to None.
|
|
251
|
-
|
|
252
|
-
|
|
415
|
+
analysis_id : str, optional
|
|
416
|
+
ID of the analysis for which samples are to be fetched, defaulted to None.
|
|
417
|
+
analysis_name : str, optional
|
|
418
|
+
Name of the analysis for which samples are to be fetched, defaulted to None.
|
|
419
|
+
as_df: bool
|
|
420
|
+
whether the result should be converted to a DataFrame, defaulted to False.
|
|
253
421
|
|
|
254
422
|
Returns
|
|
255
423
|
-------
|
|
256
|
-
samples: list or DataFrame
|
|
424
|
+
samples: list[dict] or DataFrame
|
|
257
425
|
List/DataFrame of samples for the authenticated user.
|
|
258
426
|
|
|
259
427
|
Examples
|
|
@@ -261,14 +429,14 @@ class SeerSDK:
|
|
|
261
429
|
>>> from seer_pas_sdk import SeerSDK
|
|
262
430
|
>>> seer_sdk = SeerSDK()
|
|
263
431
|
|
|
264
|
-
>>> seer_sdk.
|
|
432
|
+
>>> seer_sdk.get_samples(plate_id="7ec8cad0-15e0-11ee-bdf1-bbaa73585acf")
|
|
265
433
|
>>> [
|
|
266
434
|
{ "id": ... },
|
|
267
435
|
{ "id": ... },
|
|
268
436
|
...
|
|
269
437
|
]
|
|
270
438
|
|
|
271
|
-
>>> seer_sdk.
|
|
439
|
+
>>> seer_sdk.get_samples(as_df=True)
|
|
272
440
|
>>> id ... control
|
|
273
441
|
0 812139c0-15e0-11ee-bdf1-bbaa73585acf ...
|
|
274
442
|
1 803e05b0-15e0-11ee-bdf1-bbaa73585acf ... MPE Control
|
|
@@ -283,48 +451,68 @@ class SeerSDK:
|
|
|
283
451
|
3628 dd607ef0-654c-11ea-8eb2-25a1cfd1163c ... C132
|
|
284
452
|
"""
|
|
285
453
|
|
|
286
|
-
if
|
|
287
|
-
|
|
454
|
+
# Raise an error if none or more than one of the primary key parameters are passed in.
|
|
455
|
+
if (
|
|
456
|
+
sum(
|
|
457
|
+
[
|
|
458
|
+
True if x else False
|
|
459
|
+
for x in [plate_id, project_id, analysis_id, analysis_name]
|
|
460
|
+
]
|
|
461
|
+
)
|
|
462
|
+
!= 1
|
|
463
|
+
):
|
|
464
|
+
raise ValueError(
|
|
465
|
+
"You must pass in exactly one of plate_id, project_id, analysis_id, analysis_name."
|
|
466
|
+
)
|
|
288
467
|
|
|
289
468
|
res = []
|
|
290
469
|
URL = f"{self._auth.url}api/v1/samples"
|
|
291
470
|
sample_params = {"all": "true"}
|
|
292
471
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
)
|
|
302
|
-
sample_params["plateId"] = plate_id
|
|
472
|
+
if project_id or plate_id:
|
|
473
|
+
with self._get_auth_session() as s:
|
|
474
|
+
if plate_id:
|
|
475
|
+
try:
|
|
476
|
+
self.get_plates(plate_id)
|
|
477
|
+
except:
|
|
478
|
+
raise ValueError("Plate ID is invalid.")
|
|
479
|
+
sample_params["plateId"] = plate_id
|
|
303
480
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
"Project ID is invalid. Please check your parameters and see if the backend is running."
|
|
310
|
-
)
|
|
481
|
+
else:
|
|
482
|
+
try:
|
|
483
|
+
self.get_projects(project_id)
|
|
484
|
+
except:
|
|
485
|
+
raise ValueError("Project ID is invalid.")
|
|
311
486
|
|
|
312
|
-
|
|
487
|
+
sample_params["projectId"] = project_id
|
|
313
488
|
|
|
314
489
|
samples = s.get(URL, params=sample_params)
|
|
315
490
|
if samples.status_code != 200:
|
|
316
491
|
raise ValueError(
|
|
317
|
-
"
|
|
492
|
+
f"Failed to fetch sample data for plate ID: {plate_id}."
|
|
318
493
|
)
|
|
319
494
|
res = samples.json()["data"]
|
|
495
|
+
res_df = dict_to_df(res)
|
|
320
496
|
|
|
321
|
-
for
|
|
322
|
-
|
|
497
|
+
# API returns empty strings if not a control, replace with None for filtering purposes
|
|
498
|
+
res_df["control"] = res_df["control"].apply(
|
|
499
|
+
lambda x: x if x else None
|
|
500
|
+
)
|
|
501
|
+
else:
|
|
502
|
+
if analysis_id:
|
|
503
|
+
res_df = self._get_analysis_samples(
|
|
504
|
+
analysis_id=analysis_id, as_df=True
|
|
505
|
+
)
|
|
506
|
+
else:
|
|
507
|
+
res_df = self._get_analysis_samples(
|
|
508
|
+
analysis_name=analysis_name, as_df=True, is_name=True
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
# apply post processing
|
|
512
|
+
res_df.drop(["tenant_id"], axis=1, inplace=True)
|
|
323
513
|
|
|
324
|
-
# Exclude custom fields that don't belong to the tenant
|
|
325
|
-
res_df = dict_to_df(res)
|
|
326
514
|
custom_columns = [
|
|
327
|
-
x["field_name"] for x in self.
|
|
515
|
+
x["field_name"] for x in self._get_sample_custom_fields()
|
|
328
516
|
]
|
|
329
517
|
res_df = res_df[
|
|
330
518
|
[
|
|
@@ -334,9 +522,73 @@ class SeerSDK:
|
|
|
334
522
|
]
|
|
335
523
|
]
|
|
336
524
|
|
|
337
|
-
return res_df.to_dict(orient="records") if not
|
|
525
|
+
return res_df.to_dict(orient="records") if not as_df else res_df
|
|
526
|
+
|
|
527
|
+
def _filter_samples_metadata(
|
|
528
|
+
self,
|
|
529
|
+
project_id: str,
|
|
530
|
+
filter: str,
|
|
531
|
+
sample_ids: list = None,
|
|
532
|
+
):
|
|
533
|
+
"""
|
|
534
|
+
****************
|
|
535
|
+
[UNEXPOSED METHOD CALL]
|
|
536
|
+
****************
|
|
537
|
+
Get samples given a filter and project_id.
|
|
538
|
+
|
|
539
|
+
Parameters
|
|
540
|
+
----------
|
|
541
|
+
project_id : str
|
|
542
|
+
The project id.
|
|
543
|
+
filter : str
|
|
544
|
+
The filter to be applied. Acceptable values are 'control' or 'sample'.
|
|
545
|
+
sample_ids : list, optional
|
|
546
|
+
List of user provided sample ids
|
|
547
|
+
|
|
548
|
+
Returns
|
|
549
|
+
-------
|
|
550
|
+
res : list[str]
|
|
551
|
+
A list of sample ids
|
|
552
|
+
|
|
553
|
+
Examples
|
|
554
|
+
-------
|
|
555
|
+
>>> from core import SeerSDK
|
|
556
|
+
>>> seer_sdk = SeerSDK()
|
|
557
|
+
>>> seer_sdk._get_samples_filter("FILTER", "PROJECT_ID")
|
|
558
|
+
>>> {
|
|
559
|
+
"samples": [
|
|
560
|
+
{
|
|
561
|
+
"id": "SAMPLE_ID",
|
|
562
|
+
"plate_id": "PLATE_ID",
|
|
563
|
+
"sample_name": "SAMPLE_NAME",
|
|
564
|
+
...
|
|
565
|
+
...
|
|
566
|
+
},
|
|
567
|
+
...
|
|
568
|
+
...
|
|
569
|
+
]
|
|
570
|
+
}
|
|
571
|
+
"""
|
|
572
|
+
|
|
573
|
+
if filter and filter not in ["control", "sample"]:
|
|
574
|
+
raise ValueError(
|
|
575
|
+
"Invalid filter. Please choose between 'control' or 'sample'."
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
df = self.get_samples(project_id=project_id, as_df=True)
|
|
579
|
+
|
|
580
|
+
if filter == "control":
|
|
581
|
+
df = df[~df["control"].isna()]
|
|
582
|
+
elif filter == "sample":
|
|
583
|
+
df = df[df["control"].isna()]
|
|
584
|
+
|
|
585
|
+
valid_samples = df["id"].tolist()
|
|
586
|
+
if sample_ids:
|
|
587
|
+
valid_samples = list(set(valid_samples) & set(sample_ids))
|
|
588
|
+
|
|
589
|
+
return valid_samples
|
|
338
590
|
|
|
339
|
-
def
|
|
591
|
+
def _get_sample_custom_fields(self):
|
|
340
592
|
"""
|
|
341
593
|
Fetches a list of custom fields defined for the authenticated user.
|
|
342
594
|
"""
|
|
@@ -356,7 +608,7 @@ class SeerSDK:
|
|
|
356
608
|
del entry["tenant_id"]
|
|
357
609
|
return res
|
|
358
610
|
|
|
359
|
-
def
|
|
611
|
+
def get_msruns(self, sample_ids: list, as_df: bool = False):
|
|
360
612
|
"""
|
|
361
613
|
Fetches MS data files for passed in `sample_ids` (provided they are valid and contain relevant files) for an authenticated user.
|
|
362
614
|
|
|
@@ -366,12 +618,12 @@ class SeerSDK:
|
|
|
366
618
|
----------
|
|
367
619
|
sample_ids : list
|
|
368
620
|
List of unique sample IDs.
|
|
369
|
-
|
|
370
|
-
|
|
621
|
+
as_df: bool
|
|
622
|
+
whether the result should be converted to a DataFrame, defaulted to False.
|
|
371
623
|
|
|
372
624
|
Returns
|
|
373
625
|
-------
|
|
374
|
-
res: list or DataFrame
|
|
626
|
+
res: list[dict] or DataFrame
|
|
375
627
|
List/DataFrame of plate objects for the authenticated user.
|
|
376
628
|
|
|
377
629
|
Examples
|
|
@@ -380,13 +632,13 @@ class SeerSDK:
|
|
|
380
632
|
>>> seer_sdk = SeerSDK()
|
|
381
633
|
>>> sample_ids = ["812139c0-15e0-11ee-bdf1-bbaa73585acf", "803e05b0-15e0-11ee-bdf1-bbaa73585acf"]
|
|
382
634
|
|
|
383
|
-
>>> seer_sdk.
|
|
635
|
+
>>> seer_sdk.get_runs(sample_ids)
|
|
384
636
|
>>> [
|
|
385
637
|
{"id": "SAMPLE_ID_1_HERE" ... },
|
|
386
638
|
{"id": "SAMPLE_ID_2_HERE" ... }
|
|
387
639
|
]
|
|
388
640
|
|
|
389
|
-
>>> seer_sdk.
|
|
641
|
+
>>> seer_sdk.get_msruns(sample_ids, as_df=True)
|
|
390
642
|
>>> id ... gradient
|
|
391
643
|
0 81c6a180-15e0-11ee-bdf1-bbaa73585acf ... None
|
|
392
644
|
1 816a9ed0-15e0-11ee-bdf1-bbaa73585acf ... None
|
|
@@ -405,10 +657,10 @@ class SeerSDK:
|
|
|
405
657
|
|
|
406
658
|
if msdatas.status_code != 200 or not msdatas.json()["data"]:
|
|
407
659
|
raise ValueError(
|
|
408
|
-
"Failed to fetch MS data for
|
|
660
|
+
f"Failed to fetch MS data for sample ID={sample_id}."
|
|
409
661
|
)
|
|
410
662
|
|
|
411
|
-
res
|
|
663
|
+
res += [x for x in msdatas.json()["data"]]
|
|
412
664
|
|
|
413
665
|
for entry in res:
|
|
414
666
|
if "tenant_id" in entry:
|
|
@@ -421,189 +673,7 @@ class SeerSDK:
|
|
|
421
673
|
entry["raw_file_path"] = entry["raw_file_path"][
|
|
422
674
|
location(entry["raw_file_path"]) :
|
|
423
675
|
]
|
|
424
|
-
return res if not
|
|
425
|
-
|
|
426
|
-
def get_plate(self, plate_id: str, df: bool = False):
|
|
427
|
-
"""
|
|
428
|
-
Fetches MS data files for a `plate_id` (provided that the `plate_id` is valid and has samples associated with it) for an authenticated user.
|
|
429
|
-
|
|
430
|
-
The function returns a dict containing DataFrame objects if the `df` flag is passed in as True, otherwise a nested dict object is returned instead.
|
|
431
|
-
|
|
432
|
-
Parameters
|
|
433
|
-
----------
|
|
434
|
-
plate_id : str, optional
|
|
435
|
-
ID of the plate for which samples are to be fetched, defaulted to None.
|
|
436
|
-
df: bool
|
|
437
|
-
Boolean denoting whether the user wants the response back in JSON or a DataFrame object
|
|
438
|
-
|
|
439
|
-
Returns
|
|
440
|
-
-------
|
|
441
|
-
res: list or DataFrame
|
|
442
|
-
List/DataFrame of MS data file objects for the authenticated user.
|
|
443
|
-
|
|
444
|
-
Examples
|
|
445
|
-
-------
|
|
446
|
-
>>> from seer_pas_sdk import SeerSDK
|
|
447
|
-
>>> seer_sdk = SeerSDK()
|
|
448
|
-
>>> plate_id = "7ec8cad0-15e0-11ee-bdf1-bbaa73585acf"
|
|
449
|
-
|
|
450
|
-
>>> seer_sdk.get_plate(plate_id)
|
|
451
|
-
>>> [
|
|
452
|
-
{"id": "PLATE_ID_1_HERE" ... },
|
|
453
|
-
{"id": "PLATE_ID_2_HERE" ... }
|
|
454
|
-
]
|
|
455
|
-
|
|
456
|
-
>>> seer_sdk.get_plate(plate_id, df=True)
|
|
457
|
-
>>> id ... volume
|
|
458
|
-
0 PLATE_ID_1_HERE ... None
|
|
459
|
-
1 PLATE_ID_2_HERE ... None
|
|
460
|
-
|
|
461
|
-
[2 rows x 26 columns]
|
|
462
|
-
"""
|
|
463
|
-
plate_samples = self._get_samples_metadata(plate_id=plate_id)
|
|
464
|
-
sample_ids = [sample["id"] for sample in plate_samples]
|
|
465
|
-
return self.get_msdata(sample_ids, df)
|
|
466
|
-
|
|
467
|
-
def get_project(
|
|
468
|
-
self, project_id: str, msdata: bool = False, df: bool = False
|
|
469
|
-
):
|
|
470
|
-
"""
|
|
471
|
-
Fetches samples (and MS data files) for a `project_id` (provided that the `project_id` is valid and has samples associated with it) for an authenticated user.
|
|
472
|
-
|
|
473
|
-
The function returns a DataFrame object if the `df` flag is passed in as True, otherwise a nested dict object is returned instead. If the both the `df` and `msdata` flags are passed in as True, then a nested DataFrame object is returned instead.
|
|
474
|
-
|
|
475
|
-
Parameters
|
|
476
|
-
----------
|
|
477
|
-
project_id : str
|
|
478
|
-
ID of the project for which samples are to be fetched.
|
|
479
|
-
msdata: bool, optional
|
|
480
|
-
Boolean flag denoting whether the user wants relevant MS data files associated with the samples.
|
|
481
|
-
df: bool, optional
|
|
482
|
-
Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
|
|
483
|
-
|
|
484
|
-
Returns
|
|
485
|
-
-------
|
|
486
|
-
res: list or DataFrame
|
|
487
|
-
List/DataFrame of plate objects for the authenticated user.
|
|
488
|
-
|
|
489
|
-
Examples
|
|
490
|
-
-------
|
|
491
|
-
>>> from seer_pas_sdk import SeerSDK
|
|
492
|
-
>>> seer_sdk = SeerSDK()
|
|
493
|
-
>>> project_id = "7e48e150-8a47-11ed-b382-bf440acece26"
|
|
494
|
-
|
|
495
|
-
>>> seer_sdk.get_project(project_id=project_id, msdata=False, df=False)
|
|
496
|
-
>>> {
|
|
497
|
-
"project_samples": [
|
|
498
|
-
{
|
|
499
|
-
"id": "SAMPLE_ID_1_HERE",
|
|
500
|
-
"sample_type": "Plasma",
|
|
501
|
-
...
|
|
502
|
-
...
|
|
503
|
-
},
|
|
504
|
-
{
|
|
505
|
-
"id": "SAMPLE_ID_2_HERE",
|
|
506
|
-
"sample_type": "Plasma",
|
|
507
|
-
...
|
|
508
|
-
...
|
|
509
|
-
}
|
|
510
|
-
]
|
|
511
|
-
}
|
|
512
|
-
|
|
513
|
-
>>> seer_sdk.get_project(project_id=project_id, msdata=True, df=False)
|
|
514
|
-
>>> [
|
|
515
|
-
{
|
|
516
|
-
"id": "SAMPLE_ID_1_HERE",
|
|
517
|
-
"sample_type": "Plasma",
|
|
518
|
-
...
|
|
519
|
-
...
|
|
520
|
-
"ms_data_files": [
|
|
521
|
-
{
|
|
522
|
-
"id": MS_DATA_FILE_ID_1_HERE,
|
|
523
|
-
"tenant_id": "TENANT_ID_HERE",
|
|
524
|
-
...
|
|
525
|
-
...
|
|
526
|
-
},
|
|
527
|
-
{
|
|
528
|
-
"id": MS_DATA_FILE_ID_1_HERE,
|
|
529
|
-
"tenant_id": "TENANT_ID_HERE",
|
|
530
|
-
...
|
|
531
|
-
...
|
|
532
|
-
}
|
|
533
|
-
]
|
|
534
|
-
},
|
|
535
|
-
{
|
|
536
|
-
"id": "SAMPLE_ID_2_HERE",
|
|
537
|
-
"sample_type": "Plasma",
|
|
538
|
-
...
|
|
539
|
-
...
|
|
540
|
-
"ms_data_files": [
|
|
541
|
-
{
|
|
542
|
-
"id": MS_DATA_FILE_ID_2_HERE,
|
|
543
|
-
"tenant_id": "TENANT_ID_HERE",
|
|
544
|
-
...
|
|
545
|
-
...
|
|
546
|
-
},
|
|
547
|
-
{
|
|
548
|
-
"id": MS_DATA_FILE_ID_2_HERE,
|
|
549
|
-
"tenant_id": "TENANT_ID_HERE",
|
|
550
|
-
...
|
|
551
|
-
...
|
|
552
|
-
}
|
|
553
|
-
]
|
|
554
|
-
}
|
|
555
|
-
]
|
|
556
|
-
|
|
557
|
-
>>> seer_sdk.get_project(project_id=project_id, msdata=True, df=True)
|
|
558
|
-
>>> id ... ms_data_files
|
|
559
|
-
0 829509f0-8a47-11ed-b382-bf440acece26 ... id ... g...
|
|
560
|
-
1 828d41c0-8a47-11ed-b382-bf440acece26 ... id ... g...
|
|
561
|
-
2 8294e2e0-8a47-11ed-b382-bf440acece26 ... id ... g...
|
|
562
|
-
3 8285eec0-8a47-11ed-b382-bf440acece26 ... id ... g...
|
|
563
|
-
|
|
564
|
-
[4 rows x 60 columns]
|
|
565
|
-
"""
|
|
566
|
-
if not project_id:
|
|
567
|
-
return ValueError("No project ID specified.")
|
|
568
|
-
|
|
569
|
-
sample_ids = []
|
|
570
|
-
project_samples = self._get_samples_metadata(
|
|
571
|
-
project_id=project_id, df=False
|
|
572
|
-
)
|
|
573
|
-
|
|
574
|
-
if msdata:
|
|
575
|
-
sample_ids = [
|
|
576
|
-
sample["id"] for sample in project_samples
|
|
577
|
-
] # will always contain unique values
|
|
578
|
-
ms_data_files = self.get_msdata(sample_ids=sample_ids, df=False)
|
|
579
|
-
|
|
580
|
-
for ms_data_file in ms_data_files:
|
|
581
|
-
for sample_index in range(len(project_samples)):
|
|
582
|
-
if (
|
|
583
|
-
project_samples[sample_index]["id"]
|
|
584
|
-
== ms_data_file["sample_id"]
|
|
585
|
-
):
|
|
586
|
-
if "ms_data_file" not in project_samples[sample_index]:
|
|
587
|
-
project_samples[sample_index]["ms_data_files"] = [
|
|
588
|
-
ms_data_file
|
|
589
|
-
]
|
|
590
|
-
else:
|
|
591
|
-
project_samples[sample_index][
|
|
592
|
-
"ms_data_files"
|
|
593
|
-
].append(ms_data_file)
|
|
594
|
-
|
|
595
|
-
if df:
|
|
596
|
-
for sample_index in range(len(project_samples)):
|
|
597
|
-
if "ms_data_files" in project_samples[sample_index]:
|
|
598
|
-
project_samples[sample_index]["ms_data_files"] = (
|
|
599
|
-
dict_to_df(
|
|
600
|
-
project_samples[sample_index]["ms_data_files"]
|
|
601
|
-
)
|
|
602
|
-
)
|
|
603
|
-
|
|
604
|
-
project_samples = dict_to_df(project_samples)
|
|
605
|
-
|
|
606
|
-
return project_samples
|
|
676
|
+
return res if not as_df else dict_to_df(res)
|
|
607
677
|
|
|
608
678
|
def get_analysis_protocols(
|
|
609
679
|
self,
|
|
@@ -623,7 +693,7 @@ class SeerSDK:
|
|
|
623
693
|
|
|
624
694
|
Returns
|
|
625
695
|
-------
|
|
626
|
-
protocols: list
|
|
696
|
+
protocols: list[dict]
|
|
627
697
|
List of analysis protocol objects for the authenticated user.
|
|
628
698
|
|
|
629
699
|
Examples
|
|
@@ -694,11 +764,16 @@ class SeerSDK:
|
|
|
694
764
|
self,
|
|
695
765
|
analysis_id: str = None,
|
|
696
766
|
folder_id: str = None,
|
|
697
|
-
show_folders=True,
|
|
698
|
-
analysis_only=True,
|
|
767
|
+
show_folders: bool = True,
|
|
768
|
+
analysis_only: bool = True,
|
|
769
|
+
project_id: str = None,
|
|
770
|
+
plate_name: str = None,
|
|
771
|
+
**kwargs,
|
|
699
772
|
):
|
|
700
773
|
"""
|
|
701
774
|
Returns a list of analyses objects for the authenticated user. If no id is provided, returns all analyses for the authenticated user.
|
|
775
|
+
Search parameters may be passed in as keyword arguments to filter the results. Acceptable values are 'analysis_name', 'folder_name', 'description', 'notes', or 'number_msdatafile'.
|
|
776
|
+
Only search on a single field is supported.
|
|
702
777
|
|
|
703
778
|
Parameters
|
|
704
779
|
----------
|
|
@@ -709,16 +784,25 @@ class SeerSDK:
|
|
|
709
784
|
ID of the folder to be fetched, defaulted to None.
|
|
710
785
|
|
|
711
786
|
show_folders : bool, optional
|
|
712
|
-
Mark True if folder contents are to be returned in the response, defaulted to True.
|
|
787
|
+
Mark True if folder contents are to be returned in the response, i.e. recursive search, defaulted to True.
|
|
713
788
|
Will be disabled if an analysis id is provided.
|
|
714
789
|
|
|
715
790
|
analysis_only : bool, optional
|
|
716
791
|
Mark True if only analyses objects are to be returned in the response, defaulted to True.
|
|
717
792
|
If marked false, folder objects will also be included in the response.
|
|
718
793
|
|
|
794
|
+
project_id : str, optional
|
|
795
|
+
ID of the project to be fetched, defaulted to None.
|
|
796
|
+
|
|
797
|
+
plate_name : str, optional
|
|
798
|
+
Name of the plate to be fetched, defaulted to None.
|
|
799
|
+
|
|
800
|
+
**kwargs : dict, optional
|
|
801
|
+
Search keyword parameters to be passed in. Acceptable values are 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'.
|
|
802
|
+
|
|
719
803
|
Returns
|
|
720
804
|
-------
|
|
721
|
-
analyses: dict
|
|
805
|
+
analyses: list[dict]
|
|
722
806
|
Contains a list of analyses objects for the authenticated user.
|
|
723
807
|
|
|
724
808
|
Examples
|
|
@@ -732,19 +816,67 @@ class SeerSDK:
|
|
|
732
816
|
{id: "YOUR_ANALYSIS_ID_HERE", ...}
|
|
733
817
|
]
|
|
734
818
|
|
|
735
|
-
>>> seer_sdk.
|
|
819
|
+
>>> seer_sdk.get_analysis("YOUR_ANALYSIS_ID_HERE")
|
|
820
|
+
>>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
|
|
821
|
+
|
|
822
|
+
>>> seer_sdk.get_analysis(folder_name="YOUR_FOLDER_NAME_HERE")
|
|
823
|
+
>>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
|
|
824
|
+
|
|
825
|
+
>>> seer_sdk.get_analysis(analysis_name="YOUR_ANALYSIS")
|
|
826
|
+
>>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
|
|
827
|
+
|
|
828
|
+
>>> seer_sdk.get_analysis(description="YOUR_DESCRIPTION")
|
|
736
829
|
>>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
|
|
737
830
|
"""
|
|
738
831
|
|
|
739
832
|
URL = f"{self._auth.url}api/v1/analyses"
|
|
740
833
|
res = []
|
|
741
834
|
|
|
835
|
+
search_field = None
|
|
836
|
+
search_item = None
|
|
837
|
+
if kwargs:
|
|
838
|
+
if len(kwargs.keys()) > 1:
|
|
839
|
+
raise ValueError("Please include only one search parameter.")
|
|
840
|
+
search_field = list(kwargs.keys())[0]
|
|
841
|
+
search_item = kwargs[search_field]
|
|
842
|
+
|
|
843
|
+
if not search_item:
|
|
844
|
+
raise ValueError(
|
|
845
|
+
f"Please provide a non null value for {search_field}"
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
if search_field and search_field not in [
|
|
849
|
+
"analysis_name",
|
|
850
|
+
"folder_name",
|
|
851
|
+
"analysis_protocol_name",
|
|
852
|
+
"description",
|
|
853
|
+
"notes",
|
|
854
|
+
"number_msdatafile",
|
|
855
|
+
]:
|
|
856
|
+
raise ValueError(
|
|
857
|
+
"Invalid search field. Please choose between 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'."
|
|
858
|
+
)
|
|
859
|
+
|
|
742
860
|
with self._get_auth_session() as s:
|
|
743
861
|
|
|
744
862
|
params = {"all": "true"}
|
|
745
863
|
if folder_id:
|
|
746
864
|
params["folder"] = folder_id
|
|
747
865
|
|
|
866
|
+
if search_field:
|
|
867
|
+
params["searchFields"] = search_field
|
|
868
|
+
params["searchItem"] = search_item
|
|
869
|
+
del params["all"]
|
|
870
|
+
|
|
871
|
+
if search_field == "folder_name":
|
|
872
|
+
params["searchFields"] = "analysis_name"
|
|
873
|
+
|
|
874
|
+
if project_id:
|
|
875
|
+
params["projectId"] = project_id
|
|
876
|
+
|
|
877
|
+
if plate_name:
|
|
878
|
+
params["plateName"] = plate_name
|
|
879
|
+
|
|
748
880
|
analyses = s.get(
|
|
749
881
|
f"{URL}/{analysis_id}" if analysis_id else URL, params=params
|
|
750
882
|
)
|
|
@@ -790,100 +922,706 @@ class SeerSDK:
|
|
|
790
922
|
]
|
|
791
923
|
return res
|
|
792
924
|
|
|
793
|
-
|
|
925
|
+
@deprecation.deprecated(deprecated_in="0.3.0", removed_in="1.0.0")
|
|
926
|
+
def get_analysis_result_protein_data(
|
|
927
|
+
self, analysis_id: str, link: bool = False, pg: str = None
|
|
928
|
+
):
|
|
794
929
|
"""
|
|
795
|
-
Given an
|
|
930
|
+
Given an analysis id, this function returns the protein data for the analysis.
|
|
796
931
|
|
|
797
932
|
Parameters
|
|
798
933
|
----------
|
|
934
|
+
|
|
799
935
|
analysis_id : str
|
|
800
936
|
ID of the analysis for which the data is to be fetched.
|
|
937
|
+
link : bool
|
|
938
|
+
Boolean flag denoting whether the user wants the default protein data. Defaults to False.
|
|
939
|
+
pg : str
|
|
940
|
+
Protein group ID to filter dataframe results. Defaults to None.
|
|
801
941
|
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
Contains dataframe objects for the `analysis_id`, given that the analysis has been complete.
|
|
942
|
+
"""
|
|
943
|
+
with self._get_auth_session() as s:
|
|
944
|
+
URL = f"{self._auth.url}api/v1/data"
|
|
945
|
+
response = s.get(
|
|
946
|
+
f"{URL}/protein?analysisId={analysis_id}&retry=false"
|
|
947
|
+
)
|
|
809
948
|
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
949
|
+
if response.status_code != 200:
|
|
950
|
+
raise ValueError(
|
|
951
|
+
"Could not fetch protein data. Please verify that your analysis completed."
|
|
952
|
+
)
|
|
953
|
+
response = response.json()
|
|
954
|
+
|
|
955
|
+
protein_data = {}
|
|
956
|
+
for row in response:
|
|
957
|
+
if row.get("name") == "npLink":
|
|
958
|
+
protein_data["npLink"] = {
|
|
959
|
+
"url": row.get("link", {}).get("url", "")
|
|
960
|
+
}
|
|
961
|
+
if row.get("name") == "panelLink":
|
|
962
|
+
protein_data["panelLink"] = {
|
|
963
|
+
"url": row.get("link", {}).get("url", "")
|
|
964
|
+
}
|
|
965
|
+
if not protein_data:
|
|
966
|
+
raise ValueError("No protein result files found.")
|
|
967
|
+
if not "panelLink" in protein_data.keys():
|
|
968
|
+
protein_data["panelLink"] = {"url": ""}
|
|
969
|
+
|
|
970
|
+
if link:
|
|
971
|
+
return protein_data
|
|
972
|
+
else:
|
|
973
|
+
if not pg:
|
|
974
|
+
return {
|
|
975
|
+
"protein_np": url_to_df(protein_data["npLink"]["url"]),
|
|
976
|
+
"protein_panel": url_to_df(
|
|
977
|
+
protein_data["panelLink"]["url"]
|
|
978
|
+
),
|
|
979
|
+
}
|
|
980
|
+
else:
|
|
981
|
+
protein_np = url_to_df(
|
|
982
|
+
protein_data["npLink"]["url"]
|
|
983
|
+
).query(f"`Protein Group` == '{pg}'")
|
|
984
|
+
protein_panel = url_to_df(
|
|
985
|
+
protein_data["panelLink"]["url"]
|
|
986
|
+
).query(f"`Protein Group` == '{pg}'")
|
|
987
|
+
|
|
988
|
+
if protein_np.empty and protein_panel.empty:
|
|
989
|
+
raise ValueError(
|
|
990
|
+
f"Protein group {pg} not found in analysis {analysis_id}."
|
|
991
|
+
)
|
|
814
992
|
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
"protein_np": <protein_np dataframe object>,
|
|
820
|
-
"protein_panel": <protein_panel dataframe object>
|
|
821
|
-
}
|
|
993
|
+
return {
|
|
994
|
+
"protein_np": protein_np,
|
|
995
|
+
"protein_panel": protein_panel,
|
|
996
|
+
}
|
|
822
997
|
|
|
823
|
-
|
|
824
|
-
|
|
998
|
+
@deprecation.deprecated(deprecated_in="0.3.0", removed_in="1.0.0")
|
|
999
|
+
def get_analysis_result_peptide_data(
|
|
1000
|
+
self, analysis_id: str, link: bool = False, peptide: str = None
|
|
1001
|
+
):
|
|
825
1002
|
"""
|
|
1003
|
+
Given an analysis id, this function returns the peptide data for the analysis.
|
|
826
1004
|
|
|
827
|
-
|
|
828
|
-
|
|
1005
|
+
Parameters
|
|
1006
|
+
----------
|
|
829
1007
|
|
|
830
|
-
|
|
831
|
-
|
|
1008
|
+
analysis_id : str
|
|
1009
|
+
ID of the analysis for which the data is to be fetched.
|
|
832
1010
|
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
"Cannot generate links for failed or null analyses."
|
|
836
|
-
)
|
|
1011
|
+
link : bool
|
|
1012
|
+
Boolean flag denoting whether the user wants the default peptide data. Defaults to False.
|
|
837
1013
|
|
|
838
|
-
|
|
1014
|
+
peptide : str
|
|
1015
|
+
Peptide sequence to filter dataframe results. Defaults to None.
|
|
1016
|
+
|
|
1017
|
+
"""
|
|
839
1018
|
|
|
840
1019
|
with self._get_auth_session() as s:
|
|
1020
|
+
URL = f"{self._auth.url}api/v1/data"
|
|
1021
|
+
response = s.get(
|
|
1022
|
+
f"{URL}/peptide?analysisId={analysis_id}&retry=false"
|
|
1023
|
+
)
|
|
1024
|
+
|
|
1025
|
+
if response.status_code != 200:
|
|
1026
|
+
raise ValueError(
|
|
1027
|
+
"Could not fetch peptide data. Please verify that your analysis completed."
|
|
1028
|
+
)
|
|
841
1029
|
|
|
842
|
-
|
|
1030
|
+
response = response.json()
|
|
1031
|
+
|
|
1032
|
+
peptide_data = {}
|
|
1033
|
+
for row in response:
|
|
1034
|
+
if row.get("name") == "npLink":
|
|
1035
|
+
peptide_data["npLink"] = {
|
|
1036
|
+
"url": row.get("link", {}).get("url", "")
|
|
1037
|
+
}
|
|
1038
|
+
if row.get("name") == "panelLink":
|
|
1039
|
+
peptide_data["panelLink"] = {
|
|
1040
|
+
"url": row.get("link", {}).get("url", "")
|
|
1041
|
+
}
|
|
1042
|
+
if not peptide_data:
|
|
1043
|
+
raise ValueError("No peptide result files found.")
|
|
1044
|
+
if not "panelLink" in peptide_data.keys():
|
|
1045
|
+
peptide_data["panelLink"] = {"url": ""}
|
|
1046
|
+
if link:
|
|
1047
|
+
return peptide_data
|
|
1048
|
+
else:
|
|
1049
|
+
if not peptide:
|
|
1050
|
+
return {
|
|
1051
|
+
"peptide_np": url_to_df(peptide_data["npLink"]["url"]),
|
|
1052
|
+
"peptide_panel": url_to_df(
|
|
1053
|
+
peptide_data["panelLink"]["url"]
|
|
1054
|
+
),
|
|
1055
|
+
}
|
|
1056
|
+
else:
|
|
1057
|
+
peptide_np = url_to_df(
|
|
1058
|
+
peptide_data["npLink"]["url"]
|
|
1059
|
+
).query(f"Peptide == '{peptide}'")
|
|
1060
|
+
peptide_panel = url_to_df(
|
|
1061
|
+
peptide_data["panelLink"]["url"]
|
|
1062
|
+
).query(f"Peptide == '{peptide}'")
|
|
1063
|
+
|
|
1064
|
+
if peptide_np.empty and peptide_panel.empty:
|
|
1065
|
+
raise ValueError(
|
|
1066
|
+
f"Peptide {peptide} not found in analysis {analysis_id}."
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
return {
|
|
1070
|
+
"peptide_np": peptide_np,
|
|
1071
|
+
"peptide_panel": peptide_panel,
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
def _get_search_result_protein_data(self, analysis_id: str):
|
|
1075
|
+
"""
|
|
1076
|
+
Given an analysis id, this function returns the protein data for the analysis.
|
|
1077
|
+
|
|
1078
|
+
Parameters
|
|
1079
|
+
----------
|
|
1080
|
+
analysis_id : str
|
|
1081
|
+
ID of the analysis for which the data is to be fetched.
|
|
1082
|
+
"""
|
|
1083
|
+
with self._get_auth_session() as s:
|
|
1084
|
+
URL = f"{self._auth.url}api/v1/data"
|
|
1085
|
+
response = s.get(
|
|
843
1086
|
f"{URL}/protein?analysisId={analysis_id}&retry=false"
|
|
844
1087
|
)
|
|
845
1088
|
|
|
846
|
-
if
|
|
1089
|
+
if response.status_code != 200:
|
|
847
1090
|
raise ValueError(
|
|
848
|
-
"
|
|
1091
|
+
"Could not fetch protein data. Please verify that your analysis completed."
|
|
849
1092
|
)
|
|
850
|
-
|
|
1093
|
+
response = response.json()
|
|
1094
|
+
|
|
1095
|
+
protein_data = {}
|
|
1096
|
+
for row in response:
|
|
1097
|
+
if row.get("name") == "npLink":
|
|
1098
|
+
protein_data["npLink"] = {
|
|
1099
|
+
"url": row.get("link", {}).get("url", "")
|
|
1100
|
+
}
|
|
1101
|
+
if row.get("name") == "panelLink":
|
|
1102
|
+
protein_data["panelLink"] = {
|
|
1103
|
+
"url": row.get("link", {}).get("url", "")
|
|
1104
|
+
}
|
|
1105
|
+
if not protein_data:
|
|
1106
|
+
raise ValueError("No protein result files found.")
|
|
1107
|
+
if not "panelLink" in protein_data.keys():
|
|
1108
|
+
protein_data["panelLink"] = {"url": ""}
|
|
1109
|
+
|
|
1110
|
+
return protein_data
|
|
1111
|
+
|
|
1112
|
+
def _get_search_result_peptide_data(self, analysis_id: str):
|
|
1113
|
+
"""
|
|
1114
|
+
Given an analysis id, this function returns the peptide data for the analysis.
|
|
1115
|
+
|
|
1116
|
+
Parameters
|
|
1117
|
+
----------
|
|
1118
|
+
|
|
1119
|
+
analysis_id : str
|
|
1120
|
+
ID of the analysis for which the data is to be fetched.
|
|
1121
|
+
|
|
1122
|
+
Returns
|
|
1123
|
+
-------
|
|
1124
|
+
peptide_data : dict[str, str]
|
|
1125
|
+
Dictionary containing URLs for npLink and panelLink peptide data.
|
|
851
1126
|
|
|
852
|
-
|
|
1127
|
+
"""
|
|
1128
|
+
|
|
1129
|
+
with self._get_auth_session() as s:
|
|
1130
|
+
URL = f"{self._auth.url}api/v1/data"
|
|
1131
|
+
response = s.get(
|
|
853
1132
|
f"{URL}/peptide?analysisId={analysis_id}&retry=false"
|
|
854
1133
|
)
|
|
855
1134
|
|
|
856
|
-
if
|
|
1135
|
+
if response.status_code != 200:
|
|
857
1136
|
raise ValueError(
|
|
858
|
-
"
|
|
1137
|
+
"Could not fetch peptide data. Please verify that your analysis completed."
|
|
1138
|
+
)
|
|
1139
|
+
|
|
1140
|
+
response = response.json()
|
|
1141
|
+
|
|
1142
|
+
peptide_data = {}
|
|
1143
|
+
for row in response:
|
|
1144
|
+
if row.get("name") == "npLink":
|
|
1145
|
+
peptide_data["npLink"] = {
|
|
1146
|
+
"url": row.get("link", {}).get("url", "")
|
|
1147
|
+
}
|
|
1148
|
+
if row.get("name") == "panelLink":
|
|
1149
|
+
peptide_data["panelLink"] = {
|
|
1150
|
+
"url": row.get("link", {}).get("url", "")
|
|
1151
|
+
}
|
|
1152
|
+
if not peptide_data:
|
|
1153
|
+
raise ValueError("No peptide result files found.")
|
|
1154
|
+
if not "panelLink" in peptide_data.keys():
|
|
1155
|
+
peptide_data["panelLink"] = {"url": ""}
|
|
1156
|
+
|
|
1157
|
+
return peptide_data
|
|
1158
|
+
|
|
1159
|
+
def list_search_result_files(self, analysis_id: str):
|
|
1160
|
+
"""
|
|
1161
|
+
Given an analysis id, this function returns a list of files associated with the analysis.
|
|
1162
|
+
|
|
1163
|
+
Parameters
|
|
1164
|
+
----------
|
|
1165
|
+
analysis_id : str
|
|
1166
|
+
ID of the analysis for which the data is to be fetched.
|
|
1167
|
+
|
|
1168
|
+
Returns
|
|
1169
|
+
-------
|
|
1170
|
+
files: list[str]
|
|
1171
|
+
List of files associated with the analysis.
|
|
1172
|
+
"""
|
|
1173
|
+
try:
|
|
1174
|
+
analysis_metadata = self.get_analysis(analysis_id)[0]
|
|
1175
|
+
except (IndexError, ServerError):
|
|
1176
|
+
raise ValueError("Invalid analysis ID.")
|
|
1177
|
+
except:
|
|
1178
|
+
raise ValueError("Could not fetch analysis metadata.")
|
|
1179
|
+
|
|
1180
|
+
if analysis_metadata.get("status") in ["Failed", None]:
|
|
1181
|
+
raise ValueError("Cannot find files for a failed analysis.")
|
|
1182
|
+
with self._get_auth_session() as s:
|
|
1183
|
+
response = s.get(
|
|
1184
|
+
f"{self._auth.url}api/v2/analysisResultFiles/{analysis_id}"
|
|
1185
|
+
)
|
|
1186
|
+
if response.status_code != 200:
|
|
1187
|
+
raise ServerError(
|
|
1188
|
+
"Could not fetch analysis result files. Please verify that your analysis completed."
|
|
1189
|
+
)
|
|
1190
|
+
response = response.json()
|
|
1191
|
+
files = []
|
|
1192
|
+
for row in response["data"]:
|
|
1193
|
+
files.append(row["filename"])
|
|
1194
|
+
return files
|
|
1195
|
+
|
|
1196
|
+
def get_search_result(
|
|
1197
|
+
self, analysis_id: str, analyte_type: str, rollup: str
|
|
1198
|
+
):
|
|
1199
|
+
"""
|
|
1200
|
+
Load one of the files available via the "Download result files" button on the PAS UI.
|
|
1201
|
+
|
|
1202
|
+
Args:
|
|
1203
|
+
analysis_id (str): id of the analysis
|
|
1204
|
+
analyte_type (str): type of the data. Acceptable options are one of ['protein', 'peptide', 'precursor'].
|
|
1205
|
+
rollup (str): the desired file. Acceptable options are one of ['np', 'panel'].
|
|
1206
|
+
Returns:
|
|
1207
|
+
pd.DataFrame: the requested file as a pandas DataFrame
|
|
1208
|
+
|
|
1209
|
+
"""
|
|
1210
|
+
if not analysis_id:
|
|
1211
|
+
raise ValueError("Analysis ID cannot be empty.")
|
|
1212
|
+
|
|
1213
|
+
if analyte_type not in ["protein", "peptide", "precursor"]:
|
|
1214
|
+
raise ValueError(
|
|
1215
|
+
"Invalid data type. Please choose between 'protein', 'peptide', or 'precursor'."
|
|
1216
|
+
)
|
|
1217
|
+
|
|
1218
|
+
if rollup not in ["np", "panel"]:
|
|
1219
|
+
raise ValueError(
|
|
1220
|
+
"Invalid file. Please choose between 'np', 'panel'."
|
|
1221
|
+
)
|
|
1222
|
+
|
|
1223
|
+
if analyte_type == "precursor" and rollup == "panel":
|
|
1224
|
+
raise ValueError(
|
|
1225
|
+
"Precursor data is not available for panel rollup, please select np rollup."
|
|
1226
|
+
)
|
|
1227
|
+
|
|
1228
|
+
if analyte_type == "protein":
|
|
1229
|
+
if rollup == "np":
|
|
1230
|
+
return url_to_df(
|
|
1231
|
+
self._get_search_result_protein_data(analysis_id)[
|
|
1232
|
+
"npLink"
|
|
1233
|
+
]["url"]
|
|
1234
|
+
)
|
|
1235
|
+
elif rollup == "panel":
|
|
1236
|
+
return url_to_df(
|
|
1237
|
+
self._get_search_result_protein_data(analysis_id)[
|
|
1238
|
+
"panelLink"
|
|
1239
|
+
]["url"]
|
|
1240
|
+
)
|
|
1241
|
+
elif analyte_type == "peptide":
|
|
1242
|
+
if rollup == "np":
|
|
1243
|
+
return url_to_df(
|
|
1244
|
+
self._get_search_result_peptide_data(analysis_id)[
|
|
1245
|
+
"npLink"
|
|
1246
|
+
]["url"]
|
|
1247
|
+
)
|
|
1248
|
+
elif rollup == "panel":
|
|
1249
|
+
return url_to_df(
|
|
1250
|
+
self._get_search_result_peptide_data(analysis_id)[
|
|
1251
|
+
"panelLink"
|
|
1252
|
+
]["url"]
|
|
1253
|
+
)
|
|
1254
|
+
else:
|
|
1255
|
+
return url_to_df(
|
|
1256
|
+
self.get_search_result_file_url(
|
|
1257
|
+
analysis_id, filename="report.tsv"
|
|
1258
|
+
)["url"]
|
|
1259
|
+
)
|
|
1260
|
+
|
|
1261
|
+
def download_search_output_file(
|
|
1262
|
+
self, analysis_id: str, filename: str, download_path: str = ""
|
|
1263
|
+
):
|
|
1264
|
+
"""
|
|
1265
|
+
Given an analysis id and a analysis result filename, this function downloads the file to the specified path.
|
|
1266
|
+
|
|
1267
|
+
Parameters
|
|
1268
|
+
----------
|
|
1269
|
+
analysis_id : str
|
|
1270
|
+
ID of the analysis for which the data is to be fetched.
|
|
1271
|
+
|
|
1272
|
+
filename : str
|
|
1273
|
+
Name of the file to be fetched. Files can be case insensitive and without file extensions.
|
|
1274
|
+
|
|
1275
|
+
download_path : str
|
|
1276
|
+
String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid.
|
|
1277
|
+
|
|
1278
|
+
Returns
|
|
1279
|
+
-------
|
|
1280
|
+
None
|
|
1281
|
+
Downloads the file to the specified path.
|
|
1282
|
+
"""
|
|
1283
|
+
|
|
1284
|
+
if not download_path:
|
|
1285
|
+
download_path = os.getcwd()
|
|
1286
|
+
|
|
1287
|
+
if not analysis_id:
|
|
1288
|
+
raise ValueError("Analysis ID cannot be empty.")
|
|
1289
|
+
|
|
1290
|
+
if not os.path.exists(download_path):
|
|
1291
|
+
raise ValueError(
|
|
1292
|
+
"Please specify a valid folder path as download path."
|
|
1293
|
+
)
|
|
1294
|
+
|
|
1295
|
+
file = self.get_search_result_file_url(analysis_id, filename)
|
|
1296
|
+
file_url = file["url"]
|
|
1297
|
+
filename = file["filename"]
|
|
1298
|
+
|
|
1299
|
+
print("Downloading file:", filename)
|
|
1300
|
+
for _ in range(2):
|
|
1301
|
+
try:
|
|
1302
|
+
with tqdm(
|
|
1303
|
+
unit="B",
|
|
1304
|
+
unit_scale=True,
|
|
1305
|
+
unit_divisor=1024,
|
|
1306
|
+
miniters=1,
|
|
1307
|
+
desc=f"Progress",
|
|
1308
|
+
) as t:
|
|
1309
|
+
ssl._create_default_https_context = (
|
|
1310
|
+
ssl._create_unverified_context
|
|
1311
|
+
)
|
|
1312
|
+
urllib.request.urlretrieve(
|
|
1313
|
+
file_url,
|
|
1314
|
+
f"{download_path}/{filename}",
|
|
1315
|
+
reporthook=download_hook(t),
|
|
1316
|
+
data=None,
|
|
1317
|
+
)
|
|
1318
|
+
break
|
|
1319
|
+
except:
|
|
1320
|
+
filename = filename.split("/")
|
|
1321
|
+
name += "/" + "/".join(
|
|
1322
|
+
[filename[i] for i in range(len(filename) - 1)]
|
|
859
1323
|
)
|
|
1324
|
+
filename = filename[-1]
|
|
1325
|
+
if not os.path.isdir(f"{name}/{filename}"):
|
|
1326
|
+
os.makedirs(f"{name}/")
|
|
1327
|
+
print(f"File {filename} downloaded successfully to {download_path}.")
|
|
1328
|
+
return
|
|
1329
|
+
|
|
1330
|
+
def get_search_result_file_url(self, analysis_id: str, filename: str):
|
|
1331
|
+
"""
|
|
1332
|
+
Given an analysis id and a analysis result filename, this function returns the signed URL for the file.
|
|
1333
|
+
|
|
1334
|
+
Parameters
|
|
1335
|
+
----------
|
|
1336
|
+
analysis_id : str
|
|
1337
|
+
ID of the analysis for which the data is to be fetched.
|
|
1338
|
+
|
|
1339
|
+
filename : str
|
|
1340
|
+
Name of the file to be fetched.
|
|
1341
|
+
|
|
1342
|
+
Returns
|
|
1343
|
+
-------
|
|
1344
|
+
file_url: dict[str, str]
|
|
1345
|
+
Dictionary containing the 'url' and 'filename' of the file.
|
|
1346
|
+
"""
|
|
1347
|
+
if "." in filename:
|
|
1348
|
+
filename = ".".join(filename.split(".")[:-1])
|
|
1349
|
+
filename = filename.casefold()
|
|
1350
|
+
|
|
1351
|
+
# Allow user to pass in filenames without an extension.
|
|
1352
|
+
analysis_result_files = self.list_search_result_files(analysis_id)
|
|
1353
|
+
analysis_result_files_prefix_mapper = {
|
|
1354
|
+
(".".join(x.split(".")[:-1])).casefold(): x
|
|
1355
|
+
for x in analysis_result_files
|
|
1356
|
+
}
|
|
1357
|
+
if filename in analysis_result_files_prefix_mapper:
|
|
1358
|
+
filename = analysis_result_files_prefix_mapper[filename]
|
|
1359
|
+
else:
|
|
1360
|
+
raise ValueError(
|
|
1361
|
+
f"Filename {filename} not among the available analysis result files. Please use SeerSDK.list_search_result_files('{analysis_id}') to see available files for this analysis."
|
|
1362
|
+
)
|
|
1363
|
+
|
|
1364
|
+
analysis_metadata = self.get_analysis(analysis_id)[0]
|
|
1365
|
+
if analysis_metadata.get("status") in ["Failed", None]:
|
|
1366
|
+
raise ValueError("Cannot generate links for failed searches.")
|
|
1367
|
+
with self._get_auth_session() as s:
|
|
1368
|
+
file_url = s.post(
|
|
1369
|
+
f"{self._auth.url}api/v1/analysisResultFiles/getUrl",
|
|
1370
|
+
json={
|
|
1371
|
+
"analysisId": analysis_id,
|
|
1372
|
+
"projectId": analysis_metadata["project_id"],
|
|
1373
|
+
"filename": filename,
|
|
1374
|
+
},
|
|
1375
|
+
)
|
|
1376
|
+
response = file_url.json()
|
|
1377
|
+
if not response.get("url"):
|
|
1378
|
+
raise ValueError(f"File {filename} not found.")
|
|
1379
|
+
|
|
1380
|
+
response["filename"] = filename
|
|
1381
|
+
return response
|
|
1382
|
+
|
|
1383
|
+
@deprecation.deprecated(deprecated_in="0.3.0", removed_in="1.0.0")
|
|
1384
|
+
def get_analysis_result_files(
|
|
1385
|
+
self,
|
|
1386
|
+
analysis_id: str,
|
|
1387
|
+
filenames: _List[str],
|
|
1388
|
+
download_path: str = "",
|
|
1389
|
+
protein_all: bool = False,
|
|
1390
|
+
peptide_all: bool = False,
|
|
1391
|
+
):
|
|
1392
|
+
"""
|
|
1393
|
+
Given an analysis id and a list of file names, this function returns the file in form of downloadable content, if applicable.
|
|
1394
|
+
|
|
1395
|
+
Parameters
|
|
1396
|
+
----------
|
|
1397
|
+
analysis_id : str
|
|
1398
|
+
ID of the analysis for which the data is to be fetched.
|
|
1399
|
+
|
|
1400
|
+
filenames : list
|
|
1401
|
+
List of filenames to be fetched. Only csv and tsv files are supported.
|
|
1402
|
+
|
|
1403
|
+
download_path : str
|
|
1404
|
+
String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
|
|
1405
|
+
|
|
1406
|
+
protein_all : bool
|
|
1407
|
+
Boolean flag denoting whether the user wants the default protein data. Defaults to False.
|
|
1408
|
+
|
|
1409
|
+
peptide_all : bool
|
|
1410
|
+
Boolean flag denoting whether the user wants the default peptide data. Defaults to False.
|
|
1411
|
+
|
|
1412
|
+
Returns
|
|
1413
|
+
-------
|
|
1414
|
+
links: dict[str, pd.DataFrame]
|
|
1415
|
+
Contains dataframe objects for the requested files. If a filename is not found, it is skipped.
|
|
860
1416
|
|
|
861
|
-
peptide_data = peptide_data.json()
|
|
862
1417
|
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
1418
|
+
Examples
|
|
1419
|
+
-------
|
|
1420
|
+
>>> from seer_pas_sdk import SeerSDK
|
|
1421
|
+
>>> seer_sdk = SeerSDK()
|
|
1422
|
+
>>> analysis_id = "YOUR_ANALYSIS_ID_HERE"
|
|
1423
|
+
>>> filenames = ["protein_np.tsv", "peptide_np.tsv"]
|
|
1424
|
+
>>> seer_sdk.get_analysis_result_files(analysis_id, filenames)
|
|
1425
|
+
{
|
|
1426
|
+
"protein_np.tsv": <protein_np dataframe object>,
|
|
1427
|
+
"peptide_np.tsv": <peptide_np dataframe object>
|
|
868
1428
|
}
|
|
1429
|
+
>>> seer_sdk.get_analysis_result_files(analysis_id, [], protein_all=True, peptide_all=True)
|
|
1430
|
+
{
|
|
1431
|
+
"protein_np.tsv": <protein_np dataframe object>,
|
|
1432
|
+
"protein_panel.tsv": <protein_panel dataframe object>,
|
|
1433
|
+
"peptide_np.tsv": <peptide_np dataframe object>,
|
|
1434
|
+
"peptide_panel.tsv": <peptide_panel dataframe object>
|
|
1435
|
+
}
|
|
1436
|
+
>>> seer_sdk.get_analysis_result_files(analysis_id, ["report.tsv"], download_path="/Users/Downloads")
|
|
1437
|
+
{ "report.tsv": <report.tsv dataframe object> }
|
|
1438
|
+
"""
|
|
869
1439
|
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
if not os.path.exists(name):
|
|
873
|
-
os.makedirs(name)
|
|
1440
|
+
if not analysis_id:
|
|
1441
|
+
raise ValueError("Analysis ID cannot be empty.")
|
|
874
1442
|
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
1443
|
+
if download_path and not os.path.exists(download_path):
|
|
1444
|
+
raise ValueError(
|
|
1445
|
+
"Please specify a valid folder path as download path."
|
|
1446
|
+
)
|
|
1447
|
+
|
|
1448
|
+
links = {}
|
|
1449
|
+
if protein_all:
|
|
1450
|
+
protein_data = self.get_analysis_result_protein_data(
|
|
1451
|
+
analysis_id, link=True
|
|
1452
|
+
)
|
|
1453
|
+
links["protein_np.tsv"] = protein_data["npLink"]["url"]
|
|
1454
|
+
links["protein_panel.tsv"] = protein_data["panelLink"]["url"]
|
|
1455
|
+
if peptide_all:
|
|
1456
|
+
peptide_data = self.get_analysis_result_peptide_data(
|
|
1457
|
+
analysis_id, link=True
|
|
1458
|
+
)
|
|
1459
|
+
links["peptide_np.tsv"] = peptide_data["npLink"]["url"]
|
|
1460
|
+
links["peptide_panel.tsv"] = peptide_data["panelLink"]["url"]
|
|
1461
|
+
|
|
1462
|
+
filenames = set(filenames)
|
|
1463
|
+
# Allow user to pass in filenames without an extension.
|
|
1464
|
+
analysis_result_files = self.list_search_result_files(analysis_id)
|
|
1465
|
+
analysis_result_files_prefix_mapper = {
|
|
1466
|
+
".".join(x.split(".")[:-1]): x for x in analysis_result_files
|
|
1467
|
+
}
|
|
1468
|
+
for filename in filenames:
|
|
1469
|
+
if filename in analysis_result_files_prefix_mapper:
|
|
1470
|
+
filename = analysis_result_files_prefix_mapper[filename]
|
|
1471
|
+
if filename == "protein_np.tsv":
|
|
1472
|
+
if protein_all:
|
|
1473
|
+
continue
|
|
1474
|
+
protein_data = self.get_analysis_result_protein_data(
|
|
1475
|
+
analysis_id, link=True
|
|
1476
|
+
)
|
|
1477
|
+
links["protein_np.tsv"] = protein_data["npLink"]["url"]
|
|
1478
|
+
elif filename == "protein_panel.tsv":
|
|
1479
|
+
if protein_all:
|
|
1480
|
+
continue
|
|
1481
|
+
protein_data = self.get_analysis_result_protein_data(
|
|
1482
|
+
analysis_id, link=True
|
|
1483
|
+
)
|
|
1484
|
+
links["protein_panel.tsv"] = protein_data["panelLink"]["url"]
|
|
1485
|
+
elif filename == "peptide_np.tsv":
|
|
1486
|
+
if peptide_all:
|
|
1487
|
+
continue
|
|
1488
|
+
peptide_data = self.get_analysis_result_peptide_data(
|
|
1489
|
+
analysis_id, link=True
|
|
878
1490
|
)
|
|
879
|
-
links["
|
|
880
|
-
|
|
881
|
-
|
|
1491
|
+
links["peptide_np.tsv"] = peptide_data["npLink"]["url"]
|
|
1492
|
+
elif filename == "peptide_panel.tsv":
|
|
1493
|
+
if peptide_all:
|
|
1494
|
+
continue
|
|
1495
|
+
peptide_data = self.get_analysis_result_peptide_data(
|
|
1496
|
+
analysis_id, link=True
|
|
1497
|
+
)
|
|
1498
|
+
links["peptide_panel.tsv"] = peptide_data["panelLink"]["url"]
|
|
1499
|
+
else:
|
|
1500
|
+
try:
|
|
1501
|
+
links[filename] = self._get_search_result_file_url(
|
|
1502
|
+
analysis_id, filename
|
|
1503
|
+
)["url"]
|
|
1504
|
+
except Exception as e:
|
|
1505
|
+
print(e)
|
|
1506
|
+
continue
|
|
1507
|
+
|
|
1508
|
+
links = {
|
|
1509
|
+
k: url_to_df(v, is_tsv=k.endswith(".tsv"))
|
|
1510
|
+
for k, v in links.items()
|
|
1511
|
+
}
|
|
1512
|
+
if download_path:
|
|
1513
|
+
name = f"{download_path}/downloads/{analysis_id}"
|
|
1514
|
+
print(f"Start download to path {name}")
|
|
1515
|
+
if not os.path.exists(name):
|
|
1516
|
+
os.makedirs(name)
|
|
1517
|
+
for filename, content in links.items():
|
|
1518
|
+
separator = ","
|
|
1519
|
+
if filename.endswith(".tsv"):
|
|
1520
|
+
separator = "\t"
|
|
1521
|
+
content.to_csv(f"{name}/{filename}", sep=separator)
|
|
1522
|
+
print("Download complete.")
|
|
1523
|
+
|
|
1524
|
+
return links
|
|
1525
|
+
|
|
1526
|
+
@deprecation.deprecated(deprecated_in="0.3.0", removed_in="1.0.0")
|
|
1527
|
+
def get_analysis_result(
|
|
1528
|
+
self,
|
|
1529
|
+
analysis_id: str,
|
|
1530
|
+
download_path: str = "",
|
|
1531
|
+
diann_report: bool = False,
|
|
1532
|
+
):
|
|
1533
|
+
"""
|
|
1534
|
+
Given an `analysis_id`, this function returns all relevant analysis data files in form of downloadable content, if applicable.
|
|
1535
|
+
|
|
1536
|
+
Parameters
|
|
1537
|
+
----------
|
|
1538
|
+
analysis_id : str
|
|
1539
|
+
ID of the analysis for which the data is to be fetched.
|
|
1540
|
+
|
|
1541
|
+
download_path : str
|
|
1542
|
+
String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
|
|
1543
|
+
|
|
1544
|
+
diann_report : bool
|
|
1545
|
+
Boolean flag denoting whether the user wants the DIANN report to be included in the response. Defaults to False.
|
|
1546
|
+
|
|
1547
|
+
Returns
|
|
1548
|
+
-------
|
|
1549
|
+
links: dict
|
|
1550
|
+
Contains dataframe objects for the `analysis_id`, given that the analysis has been complete.
|
|
1551
|
+
|
|
1552
|
+
Examples
|
|
1553
|
+
-------
|
|
1554
|
+
>>> from seer_pas_sdk import SeerSDK
|
|
1555
|
+
>>> seer_sdk = SeerSDK()
|
|
1556
|
+
|
|
1557
|
+
>>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE")
|
|
1558
|
+
>>> {
|
|
1559
|
+
"peptide_np": <peptide_np dataframe object>,
|
|
1560
|
+
"peptide_panel": <peptide_panel dataframe object>,
|
|
1561
|
+
"protein_np": <protein_np dataframe object>,
|
|
1562
|
+
"protein_panel": <protein_panel dataframe object>
|
|
1563
|
+
}
|
|
1564
|
+
|
|
1565
|
+
>>> seer_sdk.get_analysis_result("YOUR_DIANN_ANALYSIS_ID_HERE")
|
|
1566
|
+
>>> {
|
|
1567
|
+
"peptide_np": <peptide_np dataframe object>,
|
|
1568
|
+
"peptide_panel": <peptide_panel dataframe object>,
|
|
1569
|
+
"protein_np": <protein_np dataframe object>,
|
|
1570
|
+
"protein_panel": <protein_panel dataframe object>,
|
|
1571
|
+
"diann_report": <report.tsv dataframe object>
|
|
1572
|
+
}
|
|
1573
|
+
|
|
1574
|
+
>>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE", download_path="/Users/Downloads")
|
|
1575
|
+
>>> { "status": "Download complete." }
|
|
1576
|
+
"""
|
|
1577
|
+
|
|
1578
|
+
if not analysis_id:
|
|
1579
|
+
raise ValueError("Analysis ID cannot be empty.")
|
|
1580
|
+
|
|
1581
|
+
if download_path and not os.path.exists(download_path):
|
|
1582
|
+
raise ValueError("The download path you entered is invalid.")
|
|
1583
|
+
|
|
1584
|
+
protein_data = self.get_analysis_result_protein_data(
|
|
1585
|
+
analysis_id, link=True
|
|
1586
|
+
)
|
|
1587
|
+
peptide_data = self.get_analysis_result_peptide_data(
|
|
1588
|
+
analysis_id, link=True
|
|
1589
|
+
)
|
|
1590
|
+
links = {
|
|
1591
|
+
"peptide_np": url_to_df(peptide_data["npLink"]["url"]),
|
|
1592
|
+
"peptide_panel": url_to_df(peptide_data["panelLink"]["url"]),
|
|
1593
|
+
"protein_np": url_to_df(protein_data["npLink"]["url"]),
|
|
1594
|
+
"protein_panel": url_to_df(protein_data["panelLink"]["url"]),
|
|
1595
|
+
}
|
|
1596
|
+
|
|
1597
|
+
if diann_report:
|
|
1598
|
+
diann_report_url = self._get_search_result_file_url(
|
|
1599
|
+
analysis_id, "report.tsv"
|
|
1600
|
+
)
|
|
1601
|
+
links["diann_report"] = url_to_df(diann_report_url["url"])
|
|
1602
|
+
|
|
1603
|
+
if download_path:
|
|
1604
|
+
name = f"{download_path}/downloads/{analysis_id}"
|
|
1605
|
+
if not os.path.exists(name):
|
|
1606
|
+
os.makedirs(name)
|
|
1607
|
+
|
|
1608
|
+
links["peptide_np"].to_csv(f"{name}/peptide_np.csv", sep="\t")
|
|
1609
|
+
links["peptide_panel"].to_csv(
|
|
1610
|
+
f"{name}/peptide_panel.csv", sep="\t"
|
|
1611
|
+
)
|
|
1612
|
+
links["protein_np"].to_csv(f"{name}/protein_np.csv", sep="\t")
|
|
1613
|
+
links["protein_panel"].to_csv(
|
|
1614
|
+
f"{name}/protein_panel.csv", sep="\t"
|
|
1615
|
+
)
|
|
1616
|
+
|
|
1617
|
+
if "diann_report" in links:
|
|
1618
|
+
links["diann_report"].to_csv(
|
|
1619
|
+
f"{name}/diann_report.csv", sep="\t"
|
|
882
1620
|
)
|
|
883
1621
|
|
|
884
|
-
|
|
1622
|
+
return {"status": "Download complete."}
|
|
885
1623
|
|
|
886
|
-
|
|
1624
|
+
return links
|
|
887
1625
|
|
|
888
1626
|
def analysis_complete(self, analysis_id: str):
|
|
889
1627
|
"""
|
|
@@ -932,7 +1670,7 @@ class SeerSDK:
|
|
|
932
1670
|
|
|
933
1671
|
Returns
|
|
934
1672
|
-------
|
|
935
|
-
list
|
|
1673
|
+
list[str]
|
|
936
1674
|
Contains the list of files in the folder.
|
|
937
1675
|
|
|
938
1676
|
Examples
|
|
@@ -987,8 +1725,8 @@ class SeerSDK:
|
|
|
987
1725
|
|
|
988
1726
|
Returns
|
|
989
1727
|
-------
|
|
990
|
-
message: dict
|
|
991
|
-
Contains the message whether the files were downloaded or not.
|
|
1728
|
+
message: dict[str, str]
|
|
1729
|
+
Contains the 'message' whether the files were downloaded or not.
|
|
992
1730
|
"""
|
|
993
1731
|
|
|
994
1732
|
urls = []
|
|
@@ -1013,9 +1751,7 @@ class SeerSDK:
|
|
|
1013
1751
|
print(f'Downloading files to "{name}"\n')
|
|
1014
1752
|
|
|
1015
1753
|
URL = f"{self._auth.url}api/v1/msdataindex/download/getUrl"
|
|
1016
|
-
tenant_id =
|
|
1017
|
-
"custom:tenantId"
|
|
1018
|
-
]
|
|
1754
|
+
tenant_id = self._auth.active_tenant_id
|
|
1019
1755
|
|
|
1020
1756
|
for path in paths:
|
|
1021
1757
|
with self._get_auth_session() as s:
|
|
@@ -1033,7 +1769,6 @@ class SeerSDK:
|
|
|
1033
1769
|
"Could not download file. Please check if the backend is running."
|
|
1034
1770
|
)
|
|
1035
1771
|
urls.append(download_url.text)
|
|
1036
|
-
|
|
1037
1772
|
for i in range(len(urls)):
|
|
1038
1773
|
filename = paths[i].split("/")[-1]
|
|
1039
1774
|
url = urls[i]
|
|
@@ -1077,22 +1812,80 @@ class SeerSDK:
|
|
|
1077
1812
|
|
|
1078
1813
|
return {"message": f"Files downloaded successfully to '{name}'"}
|
|
1079
1814
|
|
|
1080
|
-
def
|
|
1815
|
+
def get_group_analysis(
|
|
1816
|
+
self, analysis_id, group_analysis_id=None, **kwargs
|
|
1817
|
+
):
|
|
1818
|
+
"""
|
|
1819
|
+
Returns the list of group analysis objects for the given analysis id, provided they exist.
|
|
1820
|
+
|
|
1821
|
+
Parameters
|
|
1822
|
+
----------
|
|
1823
|
+
analysis_id : str
|
|
1824
|
+
The analysis id.
|
|
1825
|
+
|
|
1826
|
+
group_analysis_id : str, optional
|
|
1827
|
+
The group analysis id, defaulted to None. If provided, the function will return the group analysis object for the given group analysis id.
|
|
1828
|
+
|
|
1829
|
+
**kwargs : dict, optional
|
|
1830
|
+
Search keyword parameters to be passed in. Acceptable values are 'name' or 'description'.
|
|
1831
|
+
|
|
1832
|
+
Returns
|
|
1833
|
+
-------
|
|
1834
|
+
res : list[dict]
|
|
1835
|
+
A list of dictionaries containing the group analysis objects.
|
|
1836
|
+
|
|
1837
|
+
"""
|
|
1838
|
+
params = {"analysisid": analysis_id}
|
|
1839
|
+
if kwargs and not group_analysis_id:
|
|
1840
|
+
if len(kwargs.keys()) > 1:
|
|
1841
|
+
raise ValueError("Please include only one search parameter.")
|
|
1842
|
+
search_field = list(kwargs.keys())[0]
|
|
1843
|
+
if search_field not in ["name", "description"]:
|
|
1844
|
+
raise ValueError(
|
|
1845
|
+
"Invalid search field. Please choose between 'name' or 'description'."
|
|
1846
|
+
)
|
|
1847
|
+
search_item = kwargs[search_field]
|
|
1848
|
+
|
|
1849
|
+
if not search_item:
|
|
1850
|
+
raise ValueError(
|
|
1851
|
+
f"Please provide a non null value for {search_field}"
|
|
1852
|
+
)
|
|
1853
|
+
params["searchFields"] = search_field
|
|
1854
|
+
params["searchItem"] = search_item
|
|
1855
|
+
|
|
1856
|
+
URL = f"{self._auth.url}api/v1/groupanalysis/groupanalyses"
|
|
1857
|
+
|
|
1858
|
+
if group_analysis_id:
|
|
1859
|
+
URL = f"{URL}/{group_analysis_id}"
|
|
1860
|
+
params["id"] = group_analysis_id
|
|
1861
|
+
|
|
1862
|
+
with self._get_auth_session() as s:
|
|
1863
|
+
response = s.get(URL, params=params)
|
|
1864
|
+
if response.status_code != 200:
|
|
1865
|
+
raise ServerError(
|
|
1866
|
+
"Request failed. Please check your parameters."
|
|
1867
|
+
)
|
|
1868
|
+
response = response.json()
|
|
1869
|
+
return response
|
|
1870
|
+
|
|
1871
|
+
def group_analysis_results(self, analysis_id: str, group_analysis_id=None):
|
|
1081
1872
|
"""
|
|
1082
1873
|
Returns the group analysis data for the given analysis id, provided it exists.
|
|
1083
1874
|
|
|
1875
|
+
If no group analysis id is provided, the function will return the most recent group analysis data for the given analysis id.
|
|
1876
|
+
|
|
1084
1877
|
Parameters
|
|
1085
1878
|
----------
|
|
1086
1879
|
analysis_id : str
|
|
1087
1880
|
The analysis id.
|
|
1088
1881
|
|
|
1089
|
-
|
|
1090
|
-
The
|
|
1882
|
+
group_analysis_id : str, optional
|
|
1883
|
+
The group analysis id, defaulted to None.
|
|
1091
1884
|
|
|
1092
1885
|
Returns
|
|
1093
1886
|
-------
|
|
1094
1887
|
res : dict
|
|
1095
|
-
A dictionary containing the group analysis
|
|
1888
|
+
A dictionary containing the group analysis object.
|
|
1096
1889
|
|
|
1097
1890
|
Examples
|
|
1098
1891
|
-------
|
|
@@ -1142,7 +1935,6 @@ class SeerSDK:
|
|
|
1142
1935
|
"peptide_processed_long_form_file_url": "",
|
|
1143
1936
|
},
|
|
1144
1937
|
},
|
|
1145
|
-
"box_plot": [],
|
|
1146
1938
|
}
|
|
1147
1939
|
|
|
1148
1940
|
# Pre-GA data call
|
|
@@ -1153,7 +1945,7 @@ class SeerSDK:
|
|
|
1153
1945
|
json={"analysisId": analysis_id, "grouping": "condition"},
|
|
1154
1946
|
)
|
|
1155
1947
|
if protein_pre_data.status_code != 200:
|
|
1156
|
-
raise
|
|
1948
|
+
raise ServerError(
|
|
1157
1949
|
"Invalid request. Could not fetch group analysis protein pre data. Please check your parameters."
|
|
1158
1950
|
)
|
|
1159
1951
|
|
|
@@ -1161,15 +1953,15 @@ class SeerSDK:
|
|
|
1161
1953
|
|
|
1162
1954
|
res["pre"]["protein"] = protein_pre_data
|
|
1163
1955
|
|
|
1164
|
-
with
|
|
1165
|
-
s.headers.update(HEADERS)
|
|
1956
|
+
with self._get_auth_session() as s:
|
|
1166
1957
|
|
|
1167
1958
|
peptide_pre_data = s.post(
|
|
1168
1959
|
url=f"{URL}api/v2/groupanalysis/peptide",
|
|
1169
1960
|
json={"analysisId": analysis_id, "grouping": "condition"},
|
|
1170
1961
|
)
|
|
1962
|
+
|
|
1171
1963
|
if peptide_pre_data.status_code != 200:
|
|
1172
|
-
raise
|
|
1964
|
+
raise ServerError(
|
|
1173
1965
|
"Invalid request. Could not fetch group analysis peptide pre data. Please check your parameters."
|
|
1174
1966
|
)
|
|
1175
1967
|
|
|
@@ -1177,18 +1969,21 @@ class SeerSDK:
|
|
|
1177
1969
|
res["pre"]["peptide"] = peptide_pre_data
|
|
1178
1970
|
|
|
1179
1971
|
# Post-GA data call
|
|
1180
|
-
with
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
"Invalid request. Could not fetch group analysis post data. Please check your parameters."
|
|
1972
|
+
with self._get_auth_session() as s:
|
|
1973
|
+
if group_analysis_id:
|
|
1974
|
+
get_saved_result = self.get_group_analysis(
|
|
1975
|
+
analysis_id=analysis_id,
|
|
1976
|
+
group_analysis_id=group_analysis_id,
|
|
1977
|
+
)
|
|
1978
|
+
else:
|
|
1979
|
+
get_saved_result = s.get(
|
|
1980
|
+
f"{URL}api/v1/groupanalysis/getSavedResults?analysisid={analysis_id}"
|
|
1190
1981
|
)
|
|
1191
|
-
|
|
1982
|
+
if get_saved_result.status_code != 200:
|
|
1983
|
+
raise ServerError(
|
|
1984
|
+
"Could not fetch saved results. Please check your analysis id."
|
|
1985
|
+
)
|
|
1986
|
+
get_saved_result = get_saved_result.json()
|
|
1192
1987
|
|
|
1193
1988
|
# Protein data
|
|
1194
1989
|
if "pgResult" in get_saved_result:
|
|
@@ -1198,6 +1993,13 @@ class SeerSDK:
|
|
|
1198
1993
|
if "peptideResult" in get_saved_result:
|
|
1199
1994
|
res["post"]["peptide"] = get_saved_result["peptideResult"]
|
|
1200
1995
|
|
|
1996
|
+
# require that either protein or peptide data exists
|
|
1997
|
+
# Error handling is necessary for volcano plot calculations downstream
|
|
1998
|
+
if not (res["post"].get("protein") or res["post"].get("peptide")):
|
|
1999
|
+
raise ValueError(
|
|
2000
|
+
"No group analysis data returned from server."
|
|
2001
|
+
)
|
|
2002
|
+
|
|
1201
2003
|
# Protein URLs
|
|
1202
2004
|
if "pgProcessedFileUrl" in get_saved_result:
|
|
1203
2005
|
res["post"]["protein_url"]["protein_processed_file_url"] = (
|
|
@@ -1219,32 +2021,714 @@ class SeerSDK:
|
|
|
1219
2021
|
"peptide_processed_long_form_file_url"
|
|
1220
2022
|
] = get_saved_result["peptideProcessedLongFormFileUrl"]
|
|
1221
2023
|
|
|
1222
|
-
|
|
1223
|
-
if not box_plot:
|
|
1224
|
-
del res["box_plot"]
|
|
1225
|
-
return res
|
|
2024
|
+
return res
|
|
1226
2025
|
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
2026
|
+
def get_box_plot_data(
|
|
2027
|
+
self,
|
|
2028
|
+
analysis_id: str,
|
|
2029
|
+
group_analysis_id: str = None,
|
|
2030
|
+
feature_ids: _List[str] = [],
|
|
2031
|
+
show_significant_only: bool = False,
|
|
2032
|
+
as_df=False,
|
|
2033
|
+
volcano_plot=False,
|
|
2034
|
+
cached=False,
|
|
2035
|
+
):
|
|
2036
|
+
"""Get box plot data for given analyses and samples formatted in a DataFrame or a dictionary.
|
|
2037
|
+
|
|
2038
|
+
Args:
|
|
2039
|
+
analysis_id (str): ID of the analysis.
|
|
2040
|
+
feature_ids (list[str], optional): Filter result object to a set of ids. Defaults to [].
|
|
2041
|
+
show_significant_only (bool, optional): Mark true if only significant results are to be returned. Defaults to False.
|
|
2042
|
+
as_df (bool, optional): whether the result should be converted to a DataFrame. Defaults to False.
|
|
2043
|
+
volcano_plot (bool, optional): Mark true to include the volcano plot data in the return object. Defaults to False.
|
|
2044
|
+
cached (bool, optional): Mark true to return volcano plot data as a VolcanoPlotBuilder object. No effect if volcano_plot flag is marked false. Defaults to False.
|
|
2045
|
+
|
|
2046
|
+
Raises:
|
|
2047
|
+
ValueError: Invalid feature type. Must be either 'protein' or 'peptide'.
|
|
2048
|
+
ServerError: Could not fetch box plot data.
|
|
2049
|
+
|
|
2050
|
+
Returns:
|
|
2051
|
+
list[dict] | pd.DataFrame : A list of dictionaries or a dataframe with each row containing the following keys/columns:
|
|
2052
|
+
'proteinId', 'intensity', 'sampleName', 'sampleId', 'condition','gene'
|
|
2053
|
+
"""
|
|
2054
|
+
|
|
2055
|
+
with self._get_auth_session() as s:
|
|
2056
|
+
|
|
2057
|
+
# API call 1 - get volcano plot data for filtered results and gene mapping
|
|
2058
|
+
builder = self.get_volcano_plot_data(
|
|
2059
|
+
analysis_id, cached=True, group_analysis_id=group_analysis_id
|
|
2060
|
+
)
|
|
2061
|
+
|
|
2062
|
+
protein_peptide_gene_map = builder.protein_gene_map
|
|
2063
|
+
|
|
2064
|
+
# API call 2 - get analysis samples to get condition
|
|
2065
|
+
samples_metadata = self._get_analysis_samples(
|
|
2066
|
+
analysis_id=analysis_id
|
|
2067
|
+
)
|
|
2068
|
+
|
|
2069
|
+
json = {"analysisId": analysis_id}
|
|
2070
|
+
if feature_ids:
|
|
2071
|
+
json["featureIds"] = ",".join(feature_ids)
|
|
2072
|
+
filters = ""
|
|
2073
|
+
# API call 3 - get group analysis data. This gives us the filters for the group analysis
|
|
2074
|
+
if group_analysis_id:
|
|
2075
|
+
ga = self.get_group_analysis(
|
|
2076
|
+
analysis_id, group_analysis_id=group_analysis_id
|
|
2077
|
+
)
|
|
2078
|
+
filters = ga["parameters"]["filters"]
|
|
2079
|
+
if filters:
|
|
2080
|
+
json["filters"] = filters
|
|
2081
|
+
|
|
2082
|
+
json["featureType"] = (
|
|
2083
|
+
builder.type if builder.type == "peptide" else "proteingroup"
|
|
2084
|
+
)
|
|
2085
|
+
|
|
2086
|
+
# API call 4 - get intensities
|
|
1230
2087
|
box_plot_data = s.post(
|
|
1231
|
-
url=f"{
|
|
1232
|
-
json={
|
|
1233
|
-
"analysisId": analysis_id,
|
|
1234
|
-
"featureIds": (
|
|
1235
|
-
",".join(box_plot["feature_ids"])
|
|
1236
|
-
if len(box_plot["feature_ids"]) > 1
|
|
1237
|
-
else box_plot["feature_ids"][0]
|
|
1238
|
-
),
|
|
1239
|
-
"featureType": f"{box_plot['feature_type']}group",
|
|
1240
|
-
},
|
|
2088
|
+
url=f"{self._auth.url}api/v1/groupanalysis/rawdata", json=json
|
|
1241
2089
|
)
|
|
2090
|
+
|
|
1242
2091
|
if box_plot_data.status_code != 200:
|
|
2092
|
+
raise ServerError("Could not fetch box plot data.")
|
|
2093
|
+
|
|
2094
|
+
box_plot_data = box_plot_data.json()
|
|
2095
|
+
feature_type_index = (
|
|
2096
|
+
"peptide" if builder.type == "peptide" else "proteinId"
|
|
2097
|
+
)
|
|
2098
|
+
box_plot_data = [
|
|
2099
|
+
x
|
|
2100
|
+
for x in box_plot_data
|
|
2101
|
+
if x[feature_type_index] in protein_peptide_gene_map
|
|
2102
|
+
]
|
|
2103
|
+
sample_id_condition = {
|
|
2104
|
+
x["id"]: x["condition"] for x in samples_metadata
|
|
2105
|
+
}
|
|
2106
|
+
|
|
2107
|
+
if show_significant_only:
|
|
2108
|
+
significant_rows = set(builder.get_significant_rows())
|
|
2109
|
+
box_plot_data = [
|
|
2110
|
+
x
|
|
2111
|
+
for x in box_plot_data
|
|
2112
|
+
if x[feature_type_index] in significant_rows
|
|
2113
|
+
]
|
|
2114
|
+
|
|
2115
|
+
for row in box_plot_data:
|
|
2116
|
+
row["condition"] = sample_id_condition.get(
|
|
2117
|
+
row["sampleId"], None
|
|
2118
|
+
)
|
|
2119
|
+
row["gene"] = builder.protein_gene_map[row[feature_type_index]]
|
|
2120
|
+
|
|
2121
|
+
if as_df:
|
|
2122
|
+
box_plot_data = pd.DataFrame(box_plot_data)
|
|
2123
|
+
|
|
2124
|
+
if volcano_plot:
|
|
2125
|
+
vplot = None
|
|
2126
|
+
if cached:
|
|
2127
|
+
vplot = builder
|
|
2128
|
+
elif as_df:
|
|
2129
|
+
vplot = pd.DataFrame(builder.volcano_plot)
|
|
2130
|
+
else:
|
|
2131
|
+
vplot = builder.volcano_plot
|
|
2132
|
+
|
|
2133
|
+
return {"box_plot": box_plot_data, "volcano_plot": vplot}
|
|
2134
|
+
return box_plot_data
|
|
2135
|
+
|
|
2136
|
+
def get_all_volcano_plot_data(self, analysis_id: str, box_plot=False):
|
|
2137
|
+
"""
|
|
2138
|
+
Get all volcano plot data for a given analysis.
|
|
2139
|
+
|
|
2140
|
+
Args:
|
|
2141
|
+
analysis_id (str): ID of the analysis.
|
|
2142
|
+
box_plot (bool, optional): Mark true to include box plot data in the return object. Defaults to False.
|
|
2143
|
+
|
|
2144
|
+
Returns:
|
|
2145
|
+
dict[str, pd.DataFrame]: A dictionary containing the volcano plot and optionally box plot data for each group analysis.
|
|
2146
|
+
"""
|
|
2147
|
+
group_analysis_ids = [
|
|
2148
|
+
x["id"]
|
|
2149
|
+
for x in self.get_group_analysis(analysis_id).get("data", [])
|
|
2150
|
+
if x.get("id")
|
|
2151
|
+
]
|
|
2152
|
+
if not group_analysis_ids:
|
|
2153
|
+
return {}
|
|
2154
|
+
results = dict()
|
|
2155
|
+
|
|
2156
|
+
if box_plot:
|
|
2157
|
+
results = {
|
|
2158
|
+
ga_id: {
|
|
2159
|
+
k: v
|
|
2160
|
+
for k, v in self.get_box_plot_data(
|
|
2161
|
+
analysis_id, ga_id, as_df=True, volcano_plot=True
|
|
2162
|
+
).items()
|
|
2163
|
+
}
|
|
2164
|
+
for ga_id in group_analysis_ids
|
|
2165
|
+
}
|
|
2166
|
+
else:
|
|
2167
|
+
results = {
|
|
2168
|
+
ga_id: {
|
|
2169
|
+
"volcano_plot": self.get_volcano_plot_data(
|
|
2170
|
+
analysis_id, group_analysis_id=ga_id, as_df=True
|
|
2171
|
+
)
|
|
2172
|
+
}
|
|
2173
|
+
for ga_id in group_analysis_ids
|
|
2174
|
+
}
|
|
2175
|
+
|
|
2176
|
+
return results
|
|
2177
|
+
|
|
2178
|
+
def _get_analysis_pca(
|
|
2179
|
+
self,
|
|
2180
|
+
analysis_ids: _List[str],
|
|
2181
|
+
sample_ids: _List[str],
|
|
2182
|
+
type: str,
|
|
2183
|
+
hide_control: bool = False,
|
|
2184
|
+
):
|
|
2185
|
+
"""
|
|
2186
|
+
****************
|
|
2187
|
+
[UNEXPOSED METHOD CALL]
|
|
2188
|
+
****************
|
|
2189
|
+
Get PCA data for given analyses and samples.
|
|
2190
|
+
Args:
|
|
2191
|
+
analysis_ids (list[str]): IDs of the analyses of interest.
|
|
2192
|
+
sample_ids (list[str]): IDs of the samples of interest.
|
|
2193
|
+
type (str): Type of data to be fetched. Must be either 'protein' or 'peptide'.
|
|
2194
|
+
hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
|
|
2195
|
+
Raises:
|
|
2196
|
+
ValueError: No analysis IDs provided.
|
|
2197
|
+
ValueError: No sample IDs provided.
|
|
2198
|
+
ValueError: Invalid type provided.
|
|
2199
|
+
ServerError: Could not fetch PCA data.
|
|
2200
|
+
Returns:
|
|
2201
|
+
dict[str, list|float]
|
|
2202
|
+
Returns response object containing 'xContributionRatio' (float), 'yContributionRatio' (float), 'samples' (list[dict]), and 'points' (list[float]).
|
|
2203
|
+
"""
|
|
2204
|
+
if not analysis_ids:
|
|
2205
|
+
raise ValueError("Analysis IDs cannot be empty.")
|
|
2206
|
+
if type not in ["protein", "peptide"]:
|
|
2207
|
+
raise ValueError("Type must be either 'protein' or 'peptide'.")
|
|
2208
|
+
|
|
2209
|
+
URL = f"{self._auth.url}api/v1/analysisqcpca"
|
|
2210
|
+
|
|
2211
|
+
with self._get_auth_session() as s:
|
|
2212
|
+
json = {
|
|
2213
|
+
"analysisIds": ",".join(analysis_ids),
|
|
2214
|
+
"type": type,
|
|
2215
|
+
}
|
|
2216
|
+
if sample_ids:
|
|
2217
|
+
json["sampleIds"] = ",".join(sample_ids)
|
|
2218
|
+
|
|
2219
|
+
# specify hideControl as a string - unexpected behavior occurs if a boolean is passed
|
|
2220
|
+
if hide_control:
|
|
2221
|
+
json["hideControl"] = "true"
|
|
2222
|
+
else:
|
|
2223
|
+
json["hideControl"] = "false"
|
|
2224
|
+
|
|
2225
|
+
pca_data = s.post(URL, json=json)
|
|
2226
|
+
|
|
2227
|
+
if pca_data.status_code != 200:
|
|
2228
|
+
raise ServerError("Could not fetch PCA data.")
|
|
2229
|
+
|
|
2230
|
+
return pca_data.json()
|
|
2231
|
+
|
|
2232
|
+
def get_analysis_pca_data(
|
|
2233
|
+
self,
|
|
2234
|
+
analysis_ids: _List[str],
|
|
2235
|
+
type: str,
|
|
2236
|
+
sample_ids: _List[str] = [],
|
|
2237
|
+
hide_control: bool = False,
|
|
2238
|
+
as_df=False,
|
|
2239
|
+
):
|
|
2240
|
+
"""
|
|
2241
|
+
Get PCA data for given analyses and samples formatted in a DataFrame or a dictionary.
|
|
2242
|
+
Args:
|
|
2243
|
+
analysis_ids (list[str]): IDs of the analyses of interest.
|
|
2244
|
+
type (str): Type of data to be fetched. Must be either 'protein' or 'peptide'.
|
|
2245
|
+
sample_ids (list[str], optional): IDs of the samples of interest.
|
|
2246
|
+
hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
|
|
2247
|
+
as_df (bool, optional): whether the result should be converted to a DataFrame. Defaults to False.
|
|
2248
|
+
Raises:
|
|
2249
|
+
ValueError: No analysis IDs provided.
|
|
2250
|
+
ValueError: No sample IDs provided.
|
|
2251
|
+
ValueError: Invalid type parameter provided.
|
|
2252
|
+
ServerError: Could not fetch PCA data.
|
|
2253
|
+
Returns:
|
|
2254
|
+
A dictionary with the following keys:
|
|
2255
|
+
- x_contribution_ratio (float): Proportion of variance explained by the x-axis.
|
|
2256
|
+
- y_contribution_ratio (float): Proportion of variance explained by the y-axis.
|
|
2257
|
+
- data (list[dict] | pd.DataFrame): A list of dictionaries or a dataframe with each row containing the following keys/columns:
|
|
2258
|
+
- sample_name (str): Name of the sample.
|
|
2259
|
+
- plate_name (str): Name of the plate.
|
|
2260
|
+
- sample_id (int): ID of the sample.
|
|
2261
|
+
- condition (str): Condition.
|
|
2262
|
+
- PC1 (float): X-value of the PCA point.
|
|
2263
|
+
- PC2 (float): Y-value of the PCA point.
|
|
2264
|
+
- custom_* (str): Custom fields. Included if meaningful, i.e., not null, in the data.
|
|
2265
|
+
Examples
|
|
2266
|
+
--------
|
|
2267
|
+
>>> from seer_pas_sdk import *
|
|
2268
|
+
>>> sdk = SeerSDK()
|
|
2269
|
+
>>> sdk.get_analysis_pca_data(
|
|
2270
|
+
analysis_ids=["analysis_id"],
|
|
2271
|
+
sample_ids=["sample_id"],
|
|
2272
|
+
type="protein",
|
|
2273
|
+
hide_control=False
|
|
2274
|
+
)
|
|
2275
|
+
"""
|
|
2276
|
+
pca_data = self._get_analysis_pca(
|
|
2277
|
+
analysis_ids, sample_ids, type, hide_control
|
|
2278
|
+
)
|
|
2279
|
+
|
|
2280
|
+
# common columns returned by the API
|
|
2281
|
+
generic_columns = [
|
|
2282
|
+
"sample_name",
|
|
2283
|
+
"plate_name",
|
|
2284
|
+
"sample_id",
|
|
2285
|
+
"condition",
|
|
2286
|
+
"PC1",
|
|
2287
|
+
"PC2",
|
|
2288
|
+
]
|
|
2289
|
+
|
|
2290
|
+
# edge case where yContributionRatio is NaN when zero points are returned.
|
|
2291
|
+
if not "yContributionRatio" in pca_data:
|
|
2292
|
+
y_contribution_ratio = None
|
|
2293
|
+
else:
|
|
2294
|
+
y_contribution_ratio = pca_data["yContributionRatio"]
|
|
2295
|
+
|
|
2296
|
+
x_contribution_ratio = pca_data["xContributionRatio"]
|
|
2297
|
+
samples = pca_data["samples"]
|
|
2298
|
+
points = pca_data["points"]
|
|
2299
|
+
|
|
2300
|
+
df = pd.DataFrame(
|
|
2301
|
+
[
|
|
2302
|
+
sample | {"PC1": point[0], "PC2": point[1]}
|
|
2303
|
+
for sample, point in zip(samples, points)
|
|
2304
|
+
]
|
|
2305
|
+
)
|
|
2306
|
+
|
|
2307
|
+
# Slice the df such that only custom columns are dropped in the absence of data
|
|
2308
|
+
df = pd.concat(
|
|
2309
|
+
[
|
|
2310
|
+
df.drop(columns=generic_columns).dropna(how="all", axis=1),
|
|
2311
|
+
df[generic_columns],
|
|
2312
|
+
],
|
|
2313
|
+
axis=1,
|
|
2314
|
+
)
|
|
2315
|
+
|
|
2316
|
+
# Filter down to a minimal set of columns
|
|
2317
|
+
permitted_columns = [
|
|
2318
|
+
x
|
|
2319
|
+
for x in df.columns
|
|
2320
|
+
if x in generic_columns or x.startswith("custom_")
|
|
2321
|
+
]
|
|
2322
|
+
|
|
2323
|
+
df = df.loc(axis=1)[permitted_columns]
|
|
2324
|
+
|
|
2325
|
+
# Return the data as a DataFrame if as_df is True
|
|
2326
|
+
if not as_df:
|
|
2327
|
+
df = df.to_dict(orient="records")
|
|
2328
|
+
result = dict(
|
|
2329
|
+
x_contribution_ratio=x_contribution_ratio,
|
|
2330
|
+
y_contribution_ratio=y_contribution_ratio,
|
|
2331
|
+
data=df,
|
|
2332
|
+
)
|
|
2333
|
+
return result
|
|
2334
|
+
|
|
2335
|
+
def get_analysis_hierarchical_clustering(
|
|
2336
|
+
self,
|
|
2337
|
+
analysis_ids: _List[str],
|
|
2338
|
+
sample_ids: _List[str] = [],
|
|
2339
|
+
hide_control: bool = False,
|
|
2340
|
+
):
|
|
2341
|
+
"""
|
|
2342
|
+
Get hierarchical clustering data for given analyses and samples.
|
|
2343
|
+
Args:
|
|
2344
|
+
analysis_ids (list[str]): IDs of the analyses.
|
|
2345
|
+
sample_ids (list[str], optional): IDs of the samples.
|
|
2346
|
+
hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
|
|
2347
|
+
raw_data (bool, optional): Mark true if raw data should be returned. Defaults to True.
|
|
2348
|
+
Raises:
|
|
2349
|
+
ValueError: No analysis IDs provided.
|
|
2350
|
+
ValueError: No sample IDs provided.
|
|
2351
|
+
ValueError: Response status code is not 200.
|
|
2352
|
+
Returns:
|
|
2353
|
+
dict
|
|
2354
|
+
Hierarchical clustering data returned by the API.
|
|
2355
|
+
"""
|
|
2356
|
+
if not analysis_ids:
|
|
2357
|
+
raise ValueError("Analysis IDs cannot be empty.")
|
|
2358
|
+
|
|
2359
|
+
URL = f"{self._auth.url}api/v1/analysishcluster"
|
|
2360
|
+
|
|
2361
|
+
with self._get_auth_session() as s:
|
|
2362
|
+
json = {
|
|
2363
|
+
"analysisIds": ",".join(analysis_ids),
|
|
2364
|
+
}
|
|
2365
|
+
if sample_ids:
|
|
2366
|
+
json["sampleIds"] = ",".join(sample_ids)
|
|
2367
|
+
|
|
2368
|
+
if sample_ids:
|
|
2369
|
+
json["sampleIds"] = ",".join(sample_ids)
|
|
2370
|
+
|
|
2371
|
+
# specify hideControl as a string
|
|
2372
|
+
# Python bool values are not recognized by the API
|
|
2373
|
+
if hide_control:
|
|
2374
|
+
json["hideControl"] = "true"
|
|
2375
|
+
else:
|
|
2376
|
+
json["hideControl"] = "false"
|
|
2377
|
+
|
|
2378
|
+
hc_data = s.post(URL, json=json)
|
|
2379
|
+
|
|
2380
|
+
if hc_data.status_code != 200:
|
|
1243
2381
|
raise ValueError(
|
|
1244
|
-
"Invalid request
|
|
2382
|
+
"Invalid request. Please check your parameters."
|
|
1245
2383
|
)
|
|
1246
2384
|
|
|
1247
|
-
|
|
1248
|
-
res["box_plot"] = box_plot_data
|
|
2385
|
+
data = hc_data.json()
|
|
1249
2386
|
|
|
1250
|
-
|
|
2387
|
+
# Filter out custom fields that are not part of the tenant's custom fields
|
|
2388
|
+
if not "samples" in data:
|
|
2389
|
+
raise ValueError("No sample data returned from server.")
|
|
2390
|
+
|
|
2391
|
+
data["samples"] = [
|
|
2392
|
+
{k: v for k, v in sample.items()} for sample in data["samples"]
|
|
2393
|
+
]
|
|
2394
|
+
|
|
2395
|
+
return data
|
|
2396
|
+
|
|
2397
|
+
def get_ppi_network_data(
|
|
2398
|
+
self, significant_pgs: _List[str], species: str = None
|
|
2399
|
+
):
|
|
2400
|
+
"""
|
|
2401
|
+
Get PPI network data for given significant protein groups.
|
|
2402
|
+
Args:
|
|
2403
|
+
significant_pgs (_List[str]): Significant protein groups.
|
|
2404
|
+
species (str, optional): Species of interest. Defaults to None.
|
|
2405
|
+
Raises:
|
|
2406
|
+
ValueError: No significant protein groups provided.
|
|
2407
|
+
ValueError: Response status code is not 200.
|
|
2408
|
+
Returns:
|
|
2409
|
+
dict
|
|
2410
|
+
Response returned by the API.
|
|
2411
|
+
"""
|
|
2412
|
+
if not significant_pgs:
|
|
2413
|
+
raise ValueError("Significant protein groups cannot be empty.")
|
|
2414
|
+
|
|
2415
|
+
URL = f"{self._auth.url}api/v1/groupanalysis/stringdb"
|
|
2416
|
+
|
|
2417
|
+
with self._get_auth_session() as s:
|
|
2418
|
+
json = {
|
|
2419
|
+
"significantPGs": ",".join(significant_pgs),
|
|
2420
|
+
}
|
|
2421
|
+
if species:
|
|
2422
|
+
json["species"] = species
|
|
2423
|
+
|
|
2424
|
+
ppi_data = s.post(URL, json=json)
|
|
2425
|
+
|
|
2426
|
+
if ppi_data.status_code != 200:
|
|
2427
|
+
raise ValueError("Server error - bad response")
|
|
2428
|
+
|
|
2429
|
+
return ppi_data.json()
|
|
2430
|
+
|
|
2431
|
+
# groups are user defined by the sample description file
|
|
2432
|
+
def get_cluster_heatmap_data(
|
|
2433
|
+
self,
|
|
2434
|
+
analysis_id: str,
|
|
2435
|
+
grouping: str,
|
|
2436
|
+
groups: _List[str],
|
|
2437
|
+
contrasts: _List[_Tuple[int, ...]],
|
|
2438
|
+
stat_test: str,
|
|
2439
|
+
feature_type: str,
|
|
2440
|
+
significant_pgs: _List[str] = [],
|
|
2441
|
+
):
|
|
2442
|
+
"""Get cluster heatmap data for the given analysis.
|
|
2443
|
+
|
|
2444
|
+
Args:
|
|
2445
|
+
analysis_id (str): ID of the analysis
|
|
2446
|
+
grouping (str): Category of sample groups
|
|
2447
|
+
groups (_List[str]): sample groups
|
|
2448
|
+
contrasts (_List[_Tuple[int, ...]]): Indicate which groups are compared against each other. e.g. [(0, 1, -1, 0), (1, 0, 0, -1)]
|
|
2449
|
+
stat_test (str): Statistical test to be used
|
|
2450
|
+
feature_type (str): Type of feature to be used, either proteingroup or peptide
|
|
2451
|
+
significant_pgs (_List[str], optional): significant protein group IDs. Defaults to [].
|
|
2452
|
+
|
|
2453
|
+
Raises:
|
|
2454
|
+
ValueError: "Feature type must be either 'proteingroup' or 'peptide'."
|
|
2455
|
+
ValueError: "Stat test must be either 'ttest' or 'wilcoxon'."
|
|
2456
|
+
ValueError: Invalid contrast value.
|
|
2457
|
+
ValueError: Server error
|
|
2458
|
+
|
|
2459
|
+
Returns:
|
|
2460
|
+
dict: the response object
|
|
2461
|
+
clusterProtein: List of protein clusters
|
|
2462
|
+
clusters:
|
|
2463
|
+
indexes: list[int], List of indexes
|
|
2464
|
+
height: int, Height of the cluster
|
|
2465
|
+
children: list[dict] | None, Children of the cluster
|
|
2466
|
+
clusterSample: List of sample clusters
|
|
2467
|
+
clusters:
|
|
2468
|
+
indexes: list[int], List of indexes
|
|
2469
|
+
height: int, Height of the cluster
|
|
2470
|
+
children: list[dict] | None, Children of the cluster
|
|
2471
|
+
data: List of data
|
|
2472
|
+
|
|
2473
|
+
"""
|
|
2474
|
+
if feature_type not in ["proteingroup", "peptide"]:
|
|
2475
|
+
raise ValueError(
|
|
2476
|
+
"Feature type must be either 'proteingroup' or 'peptide'."
|
|
2477
|
+
)
|
|
2478
|
+
|
|
2479
|
+
if stat_test not in ["ttest", "wilcoxon"]:
|
|
2480
|
+
raise ValueError("Stat test must be either 'ttest' or 'wilcoxon'.")
|
|
2481
|
+
|
|
2482
|
+
[validate_contrast(contrast, len(groups)) for contrast in contrasts]
|
|
2483
|
+
|
|
2484
|
+
formatted_contrasts = ";".join(
|
|
2485
|
+
[",".join(map(str, x)) for x in contrasts]
|
|
2486
|
+
)
|
|
2487
|
+
|
|
2488
|
+
payload = dict(
|
|
2489
|
+
analysisId=analysis_id,
|
|
2490
|
+
grouping=grouping,
|
|
2491
|
+
groups=",".join(groups),
|
|
2492
|
+
contrasts=formatted_contrasts,
|
|
2493
|
+
statTest=stat_test,
|
|
2494
|
+
featureType=feature_type,
|
|
2495
|
+
significantPGs=",".join(significant_pgs),
|
|
2496
|
+
)
|
|
2497
|
+
|
|
2498
|
+
with self._get_auth_session() as s:
|
|
2499
|
+
URL = f"{self._auth.url}api/v2/clusterheatmap"
|
|
2500
|
+
response = s.post(URL, json=payload)
|
|
2501
|
+
if response.status_code != 200:
|
|
2502
|
+
raise ValueError("Server error. Bad response.")
|
|
2503
|
+
return response.json()
|
|
2504
|
+
|
|
2505
|
+
def get_enrichment_plot(
|
|
2506
|
+
self,
|
|
2507
|
+
analysis_id: str,
|
|
2508
|
+
significant_pgs: _List[str],
|
|
2509
|
+
summarize_output: bool = False,
|
|
2510
|
+
exclude_singleton: bool = False,
|
|
2511
|
+
cutoff: float = None,
|
|
2512
|
+
species: str = None,
|
|
2513
|
+
):
|
|
2514
|
+
"""
|
|
2515
|
+
Get enrichment plot data for a given analysis ID.
|
|
2516
|
+
|
|
2517
|
+
Args:
|
|
2518
|
+
analysis_id (str): ID of the analysis.
|
|
2519
|
+
significant_pgs (_List[str]): List of significant protein/peptide groups.
|
|
2520
|
+
summarize_output (bool, optional): Summarize the output. Defaults to False.
|
|
2521
|
+
exclude_singleton (bool, optional): Exclude singleton values. Defaults to False.
|
|
2522
|
+
cutoff (float, optional): Cutoff value for the p-value to determine significance. Defaults to None.
|
|
2523
|
+
species (str, optional): Species to filter the data by. Defaults to None.
|
|
2524
|
+
|
|
2525
|
+
Raises:
|
|
2526
|
+
ServerError - could not fetch enrichment plot data.
|
|
2527
|
+
|
|
2528
|
+
Returns:
|
|
2529
|
+
dict: A dictionary containing the enrichment plot data.
|
|
2530
|
+
"""
|
|
2531
|
+
|
|
2532
|
+
URL = f"{self._auth.url}api/v1/groupanalysis/enrichmentgo"
|
|
2533
|
+
|
|
2534
|
+
if not significant_pgs:
|
|
2535
|
+
raise ValueError("Significant pgs cannot be empty.")
|
|
2536
|
+
|
|
2537
|
+
with self._get_auth_session() as s:
|
|
2538
|
+
json = {
|
|
2539
|
+
"analysisId": analysis_id,
|
|
2540
|
+
"significantPGs": significant_pgs,
|
|
2541
|
+
"summarizeOutput": summarize_output,
|
|
2542
|
+
"excludeSingleton": exclude_singleton,
|
|
2543
|
+
}
|
|
2544
|
+
if cutoff:
|
|
2545
|
+
json["cutoff"] = cutoff
|
|
2546
|
+
if species:
|
|
2547
|
+
json["species"] = species
|
|
2548
|
+
|
|
2549
|
+
enrichment_data = s.post(URL, json=json)
|
|
2550
|
+
|
|
2551
|
+
if enrichment_data.status_code != 200:
|
|
2552
|
+
raise ValueError("Could not fetch enrichment plot data.")
|
|
2553
|
+
|
|
2554
|
+
return enrichment_data.json()
|
|
2555
|
+
|
|
2556
|
+
def get_volcano_plot_data(
|
|
2557
|
+
self,
|
|
2558
|
+
analysis_id,
|
|
2559
|
+
group_analysis_id=None,
|
|
2560
|
+
significance_threshold=0.05,
|
|
2561
|
+
fold_change_threshold=1,
|
|
2562
|
+
label_by="fold_change",
|
|
2563
|
+
cached=False,
|
|
2564
|
+
as_df=False,
|
|
2565
|
+
):
|
|
2566
|
+
"""Get volcano plot data for a given analysis ID.
|
|
2567
|
+
|
|
2568
|
+
Args:
|
|
2569
|
+
analysis_id (str): ID of the analysis.
|
|
2570
|
+
significance_threshold (float, optional): Cutoff value for the p-value to determine significance. Defaults to 0.05.
|
|
2571
|
+
fold_change_threshold (float, optional): Cutoff value for the fold change to determine significance. Defaults to 1.
|
|
2572
|
+
label_by (str, optional): Metric to sort result data. Defaults to "fold_change".
|
|
2573
|
+
cached (bool, optional): Return a VolcanoPlotBuilder object for calculation reuse. Defaults to False.
|
|
2574
|
+
as_df (bool, optional): whether the result should be converted to a DataFrame. Defaults to False.
|
|
2575
|
+
|
|
2576
|
+
Raises:
|
|
2577
|
+
ServerError - could not fetch group analysis results.
|
|
2578
|
+
Returns:
|
|
2579
|
+
list[dict] | pd.DataFrame | VolcanoPlotBuilder: A list of dictionaries, a DataFrame, or a VolcanoPlotBuilder object containing the volcano plot data.
|
|
2580
|
+
Object contains the following columns: 'logFD', 'negativeLog10P', 'dataIndex', 'rowID', 'gene', 'protein',
|
|
2581
|
+
'group', 'significant', 'euclideanDistance'
|
|
2582
|
+
"""
|
|
2583
|
+
try:
|
|
2584
|
+
response = self.group_analysis_results(
|
|
2585
|
+
analysis_id, group_analysis_id=group_analysis_id
|
|
2586
|
+
)
|
|
2587
|
+
except:
|
|
2588
|
+
raise ServerError(
|
|
2589
|
+
f"Could not fetch group analysis results. Please check that group analysis has completed for analysis {analysis_id}."
|
|
2590
|
+
)
|
|
2591
|
+
|
|
2592
|
+
obj = VolcanoPlotBuilder(
|
|
2593
|
+
response, significance_threshold, fold_change_threshold, label_by
|
|
2594
|
+
)
|
|
2595
|
+
|
|
2596
|
+
if cached:
|
|
2597
|
+
return obj
|
|
2598
|
+
else:
|
|
2599
|
+
if as_df:
|
|
2600
|
+
return pd.DataFrame(obj.volcano_plot)
|
|
2601
|
+
else:
|
|
2602
|
+
return obj.volcano_plot
|
|
2603
|
+
|
|
2604
|
+
def _get_analysis_samples(
|
|
2605
|
+
self, analysis_id: str = None, analysis_name: str = None, as_df=False
|
|
2606
|
+
):
|
|
2607
|
+
"""
|
|
2608
|
+
Get the samples associated with a given analysis.
|
|
2609
|
+
|
|
2610
|
+
Args:
|
|
2611
|
+
analysis_id (str): UUID identifier of the analysis. Defaults to None.
|
|
2612
|
+
analysis_name (str): Name of the analysis. Defaults to None.
|
|
2613
|
+
as_df (bool) : whether the result should be converted to a DataFrame. Defaults to False.
|
|
2614
|
+
|
|
2615
|
+
Raises:
|
|
2616
|
+
ServerError - could not retrieve samples for analysis.
|
|
2617
|
+
Returns:
|
|
2618
|
+
list[dict] : a list of samples associated with the analysis.
|
|
2619
|
+
"""
|
|
2620
|
+
|
|
2621
|
+
if not analysis_id and not analysis_name:
|
|
2622
|
+
raise ValueError("Analysis cannot be empty.")
|
|
2623
|
+
|
|
2624
|
+
if analysis_id:
|
|
2625
|
+
rows = [{"id": analysis_id}]
|
|
2626
|
+
else:
|
|
2627
|
+
rows = self.get_analysis(analysis_name=analysis_name)
|
|
2628
|
+
|
|
2629
|
+
resp = []
|
|
2630
|
+
for row in rows:
|
|
2631
|
+
URL = f"{self._auth.url}api/v1/analyses/samples/{row['id']}"
|
|
2632
|
+
with self._get_auth_session() as s:
|
|
2633
|
+
samples = s.get(URL)
|
|
2634
|
+
try:
|
|
2635
|
+
samples.raise_for_status()
|
|
2636
|
+
obj = samples.json()[0]
|
|
2637
|
+
resp += obj["samples"]
|
|
2638
|
+
except:
|
|
2639
|
+
continue
|
|
2640
|
+
|
|
2641
|
+
if not resp:
|
|
2642
|
+
raise ServerError(
|
|
2643
|
+
f"Could not retrieve samples for analysis {analysis_id or analysis_name}."
|
|
2644
|
+
)
|
|
2645
|
+
|
|
2646
|
+
resp = pd.DataFrame(resp)
|
|
2647
|
+
resp.drop_duplicates(subset=["id"], inplace=True)
|
|
2648
|
+
return resp if as_df else resp.to_dict(orient="records")
|
|
2649
|
+
|
|
2650
|
+
def get_analysis_protocol_fasta(self, analysis_id, download_path=None):
|
|
2651
|
+
if not analysis_id:
|
|
2652
|
+
raise ValueError("Analysis ID cannot be empty.")
|
|
2653
|
+
|
|
2654
|
+
if not download_path:
|
|
2655
|
+
download_path = os.getcwd()
|
|
2656
|
+
|
|
2657
|
+
try:
|
|
2658
|
+
analysis_protocol_id = self.get_analysis(analysis_id)[0][
|
|
2659
|
+
"analysis_protocol_id"
|
|
2660
|
+
]
|
|
2661
|
+
except (IndexError, KeyError):
|
|
2662
|
+
raise ValueError(f"Could not parse server response.")
|
|
2663
|
+
|
|
2664
|
+
try:
|
|
2665
|
+
analysis_protocol_engine = self.get_analysis_protocols(
|
|
2666
|
+
analysis_protocol_id=analysis_protocol_id
|
|
2667
|
+
)[0]["analysis_engine"]
|
|
2668
|
+
except (IndexError, KeyError):
|
|
2669
|
+
raise ValueError(f"Could not parse server response.")
|
|
2670
|
+
|
|
2671
|
+
analysis_protocol_engine = analysis_protocol_engine.lower()
|
|
2672
|
+
if analysis_protocol_engine == "diann":
|
|
2673
|
+
URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/diann/{analysis_protocol_id}"
|
|
2674
|
+
elif analysis_protocol_engine == "encyclopedia":
|
|
2675
|
+
URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/dia/{analysis_protocol_id}"
|
|
2676
|
+
elif analysis_protocol_engine == "msfragger":
|
|
2677
|
+
URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/msfragger/{analysis_protocol_id}"
|
|
2678
|
+
elif analysis_protocol_engine == "proteogenomics":
|
|
2679
|
+
URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/proteogenomics/{analysis_protocol_id}"
|
|
2680
|
+
else:
|
|
2681
|
+
# Change needed on the backend to get s3 file path for MaxQuant
|
|
2682
|
+
# URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/{analysis_protocol_id}"
|
|
2683
|
+
raise ValueError(
|
|
2684
|
+
f"Analysis protocol engine {analysis_protocol_engine} not supported for fasta download."
|
|
2685
|
+
)
|
|
2686
|
+
|
|
2687
|
+
with self._get_auth_session() as s:
|
|
2688
|
+
response = s.get(URL)
|
|
2689
|
+
if response.status_code != 200:
|
|
2690
|
+
raise ServerError("Request failed.")
|
|
2691
|
+
response = response.json()
|
|
2692
|
+
if type(response) == dict:
|
|
2693
|
+
response = response["editableParameters"]
|
|
2694
|
+
fasta_filenames = [
|
|
2695
|
+
x["Value"]
|
|
2696
|
+
for x in response
|
|
2697
|
+
if x["Key"] in ["fasta", "fastaFilePath", "referencegenome"]
|
|
2698
|
+
]
|
|
2699
|
+
if not fasta_filenames:
|
|
2700
|
+
raise ServerError("No fasta file name returned from server.")
|
|
2701
|
+
|
|
2702
|
+
URL = f"{self._auth.url}api/v1/analysisProtocolFiles/getUrl"
|
|
2703
|
+
for file in fasta_filenames:
|
|
2704
|
+
with self._get_auth_session() as s:
|
|
2705
|
+
response = s.post(URL, json={"filepath": file})
|
|
2706
|
+
if response.status_code != 200:
|
|
2707
|
+
raise ServerError("Request failed.")
|
|
2708
|
+
url = response.json()["url"]
|
|
2709
|
+
filename = os.path.basename(file)
|
|
2710
|
+
print(f"Downloading {filename}")
|
|
2711
|
+
for _ in range(2):
|
|
2712
|
+
try:
|
|
2713
|
+
with tqdm(
|
|
2714
|
+
unit="B",
|
|
2715
|
+
unit_scale=True,
|
|
2716
|
+
unit_divisor=1024,
|
|
2717
|
+
miniters=1,
|
|
2718
|
+
desc=f"Progress",
|
|
2719
|
+
) as t:
|
|
2720
|
+
ssl._create_default_https_context = (
|
|
2721
|
+
ssl._create_unverified_context
|
|
2722
|
+
)
|
|
2723
|
+
urllib.request.urlretrieve(
|
|
2724
|
+
url,
|
|
2725
|
+
f"{download_path}/{filename}",
|
|
2726
|
+
reporthook=download_hook(t),
|
|
2727
|
+
data=None,
|
|
2728
|
+
)
|
|
2729
|
+
break
|
|
2730
|
+
except:
|
|
2731
|
+
if not os.path.isdir(f"{download_path}"):
|
|
2732
|
+
os.makedirs(f"{download_path}")
|
|
2733
|
+
|
|
2734
|
+
print(f"Downloaded file to {download_path}/{file}")
|