seer-pas-sdk 0.1.3__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- seer_pas_sdk/auth/auth.py +23 -1
- seer_pas_sdk/common/__init__.py +370 -72
- seer_pas_sdk/common/errors.py +5 -0
- seer_pas_sdk/common/groupanalysis.py +55 -0
- seer_pas_sdk/core/sdk.py +1580 -198
- seer_pas_sdk/core/unsupported.py +1634 -0
- seer_pas_sdk/objects/__init__.py +2 -0
- seer_pas_sdk/objects/groupanalysis.py +30 -0
- seer_pas_sdk/objects/platemap.py +67 -22
- seer_pas_sdk/objects/volcanoplot.py +290 -0
- seer_pas_sdk-0.2.1.dist-info/METADATA +230 -0
- seer_pas_sdk-0.2.1.dist-info/RECORD +18 -0
- {seer_pas_sdk-0.1.3.dist-info → seer_pas_sdk-0.2.1.dist-info}/WHEEL +1 -1
- {seer_pas_sdk-0.1.3.dist-info → seer_pas_sdk-0.2.1.dist-info}/top_level.txt +0 -1
- seer_pas_sdk-0.1.3.dist-info/METADATA +0 -50
- seer_pas_sdk-0.1.3.dist-info/RECORD +0 -19
- tests/__init__.py +0 -0
- tests/conftest.py +0 -17
- tests/test_auth.py +0 -48
- tests/test_common.py +0 -99
- tests/test_objects.py +0 -91
- tests/test_sdk.py +0 -11
- {seer_pas_sdk-0.1.3.dist-info → seer_pas_sdk-0.2.1.dist-info/licenses}/LICENSE.txt +0 -0
seer_pas_sdk/core/sdk.py
CHANGED
|
@@ -5,13 +5,12 @@ import jwt
|
|
|
5
5
|
import requests
|
|
6
6
|
import urllib.request
|
|
7
7
|
import ssl
|
|
8
|
-
import shutil
|
|
9
8
|
|
|
10
|
-
from typing import List as _List
|
|
9
|
+
from typing import List as _List, Tuple as _Tuple
|
|
11
10
|
|
|
12
11
|
from ..common import *
|
|
13
12
|
from ..auth import Auth
|
|
14
|
-
from ..objects import
|
|
13
|
+
from ..objects.volcanoplot import VolcanoPlotBuilder
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
class SeerSDK:
|
|
@@ -27,33 +26,185 @@ class SeerSDK:
|
|
|
27
26
|
>>> seer_sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
|
|
28
27
|
"""
|
|
29
28
|
|
|
30
|
-
def __init__(self, username, password, instance="US"):
|
|
29
|
+
def __init__(self, username, password, instance="US", tenant=None):
|
|
31
30
|
try:
|
|
32
31
|
self._auth = Auth(username, password, instance)
|
|
33
32
|
|
|
34
33
|
self._auth.get_token()
|
|
35
|
-
|
|
36
34
|
print(f"User '{username}' logged in.\n")
|
|
37
35
|
|
|
38
|
-
|
|
36
|
+
if not tenant:
|
|
37
|
+
tenant = self._auth.active_tenant_id
|
|
38
|
+
try:
|
|
39
|
+
self.switch_tenant(tenant)
|
|
40
|
+
except Exception as e:
|
|
41
|
+
print(
|
|
42
|
+
f"Encountered an error directing you to tenant {tenant}: {e}."
|
|
43
|
+
)
|
|
44
|
+
print("Logging into home tenant...")
|
|
45
|
+
# If an error occurs while directing the user to a tenant, default to home tenant.
|
|
46
|
+
print(f"You are now active in {self.get_active_tenant_name()}")
|
|
47
|
+
except Exception as e:
|
|
39
48
|
raise ValueError(
|
|
40
|
-
"Could not log in.\nPlease check your credentials and/or instance."
|
|
49
|
+
f"Could not log in.\nPlease check your credentials and/or instance: {e}."
|
|
41
50
|
)
|
|
42
51
|
|
|
43
|
-
def _get_auth_headers(self):
|
|
52
|
+
def _get_auth_headers(self, use_multi_tenant=True):
|
|
44
53
|
id_token, access_token = self._auth.get_token()
|
|
45
|
-
|
|
54
|
+
header = {
|
|
46
55
|
"Authorization": id_token,
|
|
47
|
-
"
|
|
56
|
+
"Access-Token": access_token,
|
|
48
57
|
}
|
|
58
|
+
if use_multi_tenant:
|
|
59
|
+
multi_tenant = {
|
|
60
|
+
"Tenant-Id": self._auth.active_tenant_id,
|
|
61
|
+
"Role": self._auth.active_role,
|
|
62
|
+
}
|
|
63
|
+
header.update(multi_tenant)
|
|
64
|
+
return header
|
|
49
65
|
|
|
50
|
-
def _get_auth_session(self):
|
|
66
|
+
def _get_auth_session(self, use_multi_tenant=True):
|
|
51
67
|
sess = requests.Session()
|
|
52
68
|
|
|
53
|
-
sess.headers.update(self._get_auth_headers())
|
|
69
|
+
sess.headers.update(self._get_auth_headers(use_multi_tenant))
|
|
54
70
|
|
|
55
71
|
return sess
|
|
56
72
|
|
|
73
|
+
def get_user_tenant_metadata(self, index=True):
|
|
74
|
+
"""
|
|
75
|
+
Fetches the tenant metadata for the authenticated user.
|
|
76
|
+
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
response : dict
|
|
80
|
+
A dictionary containing the tenant metadata for the authenticated user.
|
|
81
|
+
"""
|
|
82
|
+
with self._get_auth_session() as s:
|
|
83
|
+
response = s.get(f"{self._auth.url}api/v1/usertenants")
|
|
84
|
+
|
|
85
|
+
if response.status_code != 200:
|
|
86
|
+
raise ValueError(
|
|
87
|
+
"Invalid request. Please check your parameters."
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
response = response.json()
|
|
91
|
+
if index:
|
|
92
|
+
return {x["institution"]: x for x in response}
|
|
93
|
+
else:
|
|
94
|
+
return response
|
|
95
|
+
|
|
96
|
+
def list_tenants(self, reverse=False):
|
|
97
|
+
"""
|
|
98
|
+
Lists the institution names and the tenant ids for the authenticated user.
|
|
99
|
+
|
|
100
|
+
Parameters
|
|
101
|
+
----------
|
|
102
|
+
reverse: bool
|
|
103
|
+
Boolean denoting whether the user wants the result dictionary indexed by tenant id (True) or institution name (False).
|
|
104
|
+
|
|
105
|
+
Returns
|
|
106
|
+
-------
|
|
107
|
+
tenants : dict
|
|
108
|
+
A dictionary containing the institution names and tenant ids for the authenticated user.
|
|
109
|
+
"""
|
|
110
|
+
tenants = self.get_user_tenant_metadata()
|
|
111
|
+
if reverse:
|
|
112
|
+
return {x["tenantId"]: x["institution"] for x in tenants.values()}
|
|
113
|
+
else:
|
|
114
|
+
return {x["institution"]: x["tenantId"] for x in tenants.values()}
|
|
115
|
+
|
|
116
|
+
def switch_tenant(self, identifier: str):
|
|
117
|
+
"""
|
|
118
|
+
Switches the tenant for the authenticated user.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
identifier: str
|
|
123
|
+
Tenant ID or organization name to switch to.
|
|
124
|
+
|
|
125
|
+
Returns
|
|
126
|
+
-------
|
|
127
|
+
tenant_id: str
|
|
128
|
+
Returns the value of the active tenant id after the operation.
|
|
129
|
+
"""
|
|
130
|
+
map = self.get_user_tenant_metadata()
|
|
131
|
+
tenant_ids = [x["tenantId"] for x in map.values()]
|
|
132
|
+
institution_names = map.keys()
|
|
133
|
+
|
|
134
|
+
if identifier in tenant_ids:
|
|
135
|
+
tenant_id = identifier
|
|
136
|
+
row = [x for x in map.values() if x["tenantId"] == tenant_id]
|
|
137
|
+
if row:
|
|
138
|
+
row = row[0]
|
|
139
|
+
else:
|
|
140
|
+
raise ValueError(
|
|
141
|
+
"Invalid tenant identifier. Tenant was not switched."
|
|
142
|
+
)
|
|
143
|
+
elif identifier in institution_names:
|
|
144
|
+
row = map[identifier]
|
|
145
|
+
tenant_id = row["tenantId"]
|
|
146
|
+
else:
|
|
147
|
+
raise ValueError(
|
|
148
|
+
"Invalid tenant identifier. Tenant was not switched."
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
with self._get_auth_session() as s:
|
|
152
|
+
response = s.put(
|
|
153
|
+
self._auth.url + "api/v1/users/tenant",
|
|
154
|
+
json={
|
|
155
|
+
"currentTenantId": tenant_id,
|
|
156
|
+
"username": self._auth.username,
|
|
157
|
+
},
|
|
158
|
+
)
|
|
159
|
+
if response.status_code != 200:
|
|
160
|
+
raise ServerError(
|
|
161
|
+
"Could not update current tenant for user. Tenant was not switched."
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
self._auth.active_tenant_id = tenant_id
|
|
165
|
+
self._auth.active_role = row["role"]
|
|
166
|
+
print(f"You are now active in {row['institution']}")
|
|
167
|
+
return self._auth.active_tenant_id, self._auth.active_role
|
|
168
|
+
|
|
169
|
+
def get_active_tenant(self):
|
|
170
|
+
"""
|
|
171
|
+
Fetches the active tenant for the authenticated user.
|
|
172
|
+
|
|
173
|
+
Returns
|
|
174
|
+
-------
|
|
175
|
+
tenant: dict
|
|
176
|
+
Tenant metadata for the authenticated user containing "institution" and "tenantId" keys.
|
|
177
|
+
"""
|
|
178
|
+
tenants = self.get_user_tenant_metadata(index=False)
|
|
179
|
+
row = [
|
|
180
|
+
x for x in tenants if x["tenantId"] == self._auth.active_tenant_id
|
|
181
|
+
]
|
|
182
|
+
return row[0] if row else None
|
|
183
|
+
|
|
184
|
+
def get_active_tenant_id(self):
|
|
185
|
+
"""
|
|
186
|
+
Fetches the active tenant ID for the authenticated user.
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
tenant_id: str
|
|
191
|
+
Tenant ID for the authenticated user.
|
|
192
|
+
"""
|
|
193
|
+
tenant = self.get_active_tenant()
|
|
194
|
+
return tenant["tenantId"] if tenant else None
|
|
195
|
+
|
|
196
|
+
def get_active_tenant_name(self):
|
|
197
|
+
"""
|
|
198
|
+
Fetches the active tenant name for the authenticated user.
|
|
199
|
+
|
|
200
|
+
Returns
|
|
201
|
+
-------
|
|
202
|
+
tenant: str
|
|
203
|
+
Tenant name for the authenticated user.
|
|
204
|
+
"""
|
|
205
|
+
tenant = self.get_active_tenant()
|
|
206
|
+
return tenant["institution"] if tenant else None
|
|
207
|
+
|
|
57
208
|
def get_spaces(self):
|
|
58
209
|
"""
|
|
59
210
|
Fetches a list of spaces for the authenticated user.
|
|
@@ -230,14 +381,10 @@ class SeerSDK:
|
|
|
230
381
|
]
|
|
231
382
|
return res if not df else dict_to_df(res)
|
|
232
383
|
|
|
233
|
-
def
|
|
384
|
+
def get_samples_metadata(
|
|
234
385
|
self, plate_id: str = None, project_id: str = None, df: bool = False
|
|
235
386
|
):
|
|
236
387
|
"""
|
|
237
|
-
****************
|
|
238
|
-
[UNEXPOSED METHOD CALL]
|
|
239
|
-
****************
|
|
240
|
-
|
|
241
388
|
Fetches a list of samples for the authenticated user, filtered by `plate_id`. Returns all samples for the plate with the given `plate_id`, provided it exists.
|
|
242
389
|
|
|
243
390
|
If both `plate_id` and `project_id` are passed in, only the `plate_id` is validated first.
|
|
@@ -261,14 +408,14 @@ class SeerSDK:
|
|
|
261
408
|
>>> from seer_pas_sdk import SeerSDK
|
|
262
409
|
>>> seer_sdk = SeerSDK()
|
|
263
410
|
|
|
264
|
-
>>> seer_sdk.
|
|
411
|
+
>>> seer_sdk.get_samples_metadata(plate_id="7ec8cad0-15e0-11ee-bdf1-bbaa73585acf")
|
|
265
412
|
>>> [
|
|
266
413
|
{ "id": ... },
|
|
267
414
|
{ "id": ... },
|
|
268
415
|
...
|
|
269
416
|
]
|
|
270
417
|
|
|
271
|
-
>>> seer_sdk.
|
|
418
|
+
>>> seer_sdk.get_samples_metadata(df=True)
|
|
272
419
|
>>> id ... control
|
|
273
420
|
0 812139c0-15e0-11ee-bdf1-bbaa73585acf ...
|
|
274
421
|
1 803e05b0-15e0-11ee-bdf1-bbaa73585acf ... MPE Control
|
|
@@ -296,25 +443,21 @@ class SeerSDK:
|
|
|
296
443
|
try:
|
|
297
444
|
self.get_plate_metadata(plate_id)
|
|
298
445
|
except:
|
|
299
|
-
raise ValueError(
|
|
300
|
-
"Plate ID is invalid. Please check your parameters and see if the backend is running."
|
|
301
|
-
)
|
|
446
|
+
raise ValueError("Plate ID is invalid.")
|
|
302
447
|
sample_params["plateId"] = plate_id
|
|
303
448
|
|
|
304
449
|
elif project_id:
|
|
305
450
|
try:
|
|
306
451
|
self.get_project_metadata(project_id)
|
|
307
452
|
except:
|
|
308
|
-
raise ValueError(
|
|
309
|
-
"Project ID is invalid. Please check your parameters and see if the backend is running."
|
|
310
|
-
)
|
|
453
|
+
raise ValueError("Project ID is invalid.")
|
|
311
454
|
|
|
312
455
|
sample_params["projectId"] = project_id
|
|
313
456
|
|
|
314
457
|
samples = s.get(URL, params=sample_params)
|
|
315
458
|
if samples.status_code != 200:
|
|
316
459
|
raise ValueError(
|
|
317
|
-
"
|
|
460
|
+
f"Failed to fetch sample data for plate ID: {plate_id}."
|
|
318
461
|
)
|
|
319
462
|
res = samples.json()["data"]
|
|
320
463
|
|
|
@@ -334,8 +477,75 @@ class SeerSDK:
|
|
|
334
477
|
]
|
|
335
478
|
]
|
|
336
479
|
|
|
480
|
+
# API returns empty strings if not a control, replace with None for filtering purposes
|
|
481
|
+
res_df["control"] = res_df["control"].apply(lambda x: x if x else None)
|
|
482
|
+
|
|
337
483
|
return res_df.to_dict(orient="records") if not df else res_df
|
|
338
484
|
|
|
485
|
+
def _filter_samples_metadata(
|
|
486
|
+
self,
|
|
487
|
+
project_id: str,
|
|
488
|
+
filter: str,
|
|
489
|
+
sample_ids: list = None,
|
|
490
|
+
):
|
|
491
|
+
"""
|
|
492
|
+
****************
|
|
493
|
+
[UNEXPOSED METHOD CALL]
|
|
494
|
+
****************
|
|
495
|
+
Get samples given a filter and project_id.
|
|
496
|
+
|
|
497
|
+
Parameters
|
|
498
|
+
----------
|
|
499
|
+
project_id : str
|
|
500
|
+
The project id.
|
|
501
|
+
filter : str
|
|
502
|
+
The filter to be applied. Acceptable values are 'control' or 'sample'.
|
|
503
|
+
sample_ids : list, optional
|
|
504
|
+
List of user provided sample ids
|
|
505
|
+
|
|
506
|
+
Returns
|
|
507
|
+
-------
|
|
508
|
+
res : list
|
|
509
|
+
A list of sample ids
|
|
510
|
+
|
|
511
|
+
Examples
|
|
512
|
+
-------
|
|
513
|
+
>>> from core import SeerSDK
|
|
514
|
+
>>> seer_sdk = SeerSDK()
|
|
515
|
+
>>> seer_sdk._get_samples_filter("FILTER", "PROJECT_ID")
|
|
516
|
+
>>> {
|
|
517
|
+
"samples": [
|
|
518
|
+
{
|
|
519
|
+
"id": "SAMPLE_ID",
|
|
520
|
+
"plate_id": "PLATE_ID",
|
|
521
|
+
"sample_name": "SAMPLE_NAME",
|
|
522
|
+
...
|
|
523
|
+
...
|
|
524
|
+
},
|
|
525
|
+
...
|
|
526
|
+
...
|
|
527
|
+
]
|
|
528
|
+
}
|
|
529
|
+
"""
|
|
530
|
+
|
|
531
|
+
if filter and filter not in ["control", "sample"]:
|
|
532
|
+
raise ValueError(
|
|
533
|
+
"Invalid filter. Please choose between 'control' or 'sample'."
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
df = self.get_samples_metadata(project_id=project_id, df=True)
|
|
537
|
+
|
|
538
|
+
if filter == "control":
|
|
539
|
+
df = df[~df["control"].isna()]
|
|
540
|
+
elif filter == "sample":
|
|
541
|
+
df = df[df["control"].isna()]
|
|
542
|
+
|
|
543
|
+
valid_samples = df["id"].tolist()
|
|
544
|
+
if sample_ids:
|
|
545
|
+
valid_samples = list(set(valid_samples) & set(sample_ids))
|
|
546
|
+
|
|
547
|
+
return valid_samples
|
|
548
|
+
|
|
339
549
|
def get_sample_custom_fields(self):
|
|
340
550
|
"""
|
|
341
551
|
Fetches a list of custom fields defined for the authenticated user.
|
|
@@ -405,10 +615,10 @@ class SeerSDK:
|
|
|
405
615
|
|
|
406
616
|
if msdatas.status_code != 200 or not msdatas.json()["data"]:
|
|
407
617
|
raise ValueError(
|
|
408
|
-
"Failed to fetch MS data for
|
|
618
|
+
f"Failed to fetch MS data for sample ID={sample_id}."
|
|
409
619
|
)
|
|
410
620
|
|
|
411
|
-
res
|
|
621
|
+
res += [x for x in msdatas.json()["data"]]
|
|
412
622
|
|
|
413
623
|
for entry in res:
|
|
414
624
|
if "tenant_id" in entry:
|
|
@@ -460,18 +670,24 @@ class SeerSDK:
|
|
|
460
670
|
|
|
461
671
|
[2 rows x 26 columns]
|
|
462
672
|
"""
|
|
463
|
-
plate_samples = self.
|
|
673
|
+
plate_samples = self.get_samples_metadata(plate_id=plate_id)
|
|
464
674
|
sample_ids = [sample["id"] for sample in plate_samples]
|
|
465
675
|
return self.get_msdata(sample_ids, df)
|
|
466
676
|
|
|
467
677
|
def get_project(
|
|
468
|
-
self,
|
|
678
|
+
self,
|
|
679
|
+
project_id: str,
|
|
680
|
+
msdata: bool = False,
|
|
681
|
+
df: bool = False,
|
|
682
|
+
flat: bool = False,
|
|
469
683
|
):
|
|
470
684
|
"""
|
|
471
685
|
Fetches samples (and MS data files) for a `project_id` (provided that the `project_id` is valid and has samples associated with it) for an authenticated user.
|
|
472
686
|
|
|
473
687
|
The function returns a DataFrame object if the `df` flag is passed in as True, otherwise a nested dict object is returned instead. If the both the `df` and `msdata` flags are passed in as True, then a nested DataFrame object is returned instead.
|
|
474
688
|
|
|
689
|
+
If the `flat` flag is passed in as True, then the nested dict object is returned as an array of dict objects and the nested df object is returned as a single df object.
|
|
690
|
+
|
|
475
691
|
Parameters
|
|
476
692
|
----------
|
|
477
693
|
project_id : str
|
|
@@ -567,39 +783,53 @@ class SeerSDK:
|
|
|
567
783
|
return ValueError("No project ID specified.")
|
|
568
784
|
|
|
569
785
|
sample_ids = []
|
|
570
|
-
project_samples = self.
|
|
786
|
+
project_samples = self.get_samples_metadata(
|
|
571
787
|
project_id=project_id, df=False
|
|
572
788
|
)
|
|
789
|
+
flat_result = []
|
|
573
790
|
|
|
574
791
|
if msdata:
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
792
|
+
|
|
793
|
+
# construct map for quick index reference of sample in project_samples
|
|
794
|
+
sample_ids = {
|
|
795
|
+
sample["id"]: i for i, sample in enumerate(project_samples)
|
|
796
|
+
} # will always contain unique values
|
|
797
|
+
ms_data_files = self.get_msdata(
|
|
798
|
+
sample_ids=list(sample_ids.keys()), df=False
|
|
799
|
+
)
|
|
579
800
|
|
|
580
801
|
for ms_data_file in ms_data_files:
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
802
|
+
index = sample_ids.get(ms_data_file["sample_id"], None)
|
|
803
|
+
if not index:
|
|
804
|
+
continue
|
|
805
|
+
|
|
806
|
+
if not flat:
|
|
807
|
+
if "ms_data_file" not in project_samples[index]:
|
|
808
|
+
project_samples[index]["ms_data_files"] = [
|
|
809
|
+
ms_data_file
|
|
810
|
+
]
|
|
811
|
+
else:
|
|
812
|
+
project_samples[index]["ms_data_files"].append(
|
|
813
|
+
ms_data_file
|
|
814
|
+
)
|
|
815
|
+
else:
|
|
816
|
+
flat_result.append(project_samples[index] | ms_data_file)
|
|
817
|
+
|
|
818
|
+
# return flat result if results were added to the flat object
|
|
819
|
+
if flat and flat_result:
|
|
820
|
+
project_samples = flat_result
|
|
594
821
|
|
|
595
822
|
if df:
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
823
|
+
if flat:
|
|
824
|
+
return pd.DataFrame(project_samples)
|
|
825
|
+
else:
|
|
826
|
+
for sample_index in range(len(project_samples)):
|
|
827
|
+
if "ms_data_files" in project_samples[sample_index]:
|
|
828
|
+
project_samples[sample_index]["ms_data_files"] = (
|
|
829
|
+
dict_to_df(
|
|
830
|
+
project_samples[sample_index]["ms_data_files"]
|
|
831
|
+
)
|
|
601
832
|
)
|
|
602
|
-
)
|
|
603
833
|
|
|
604
834
|
project_samples = dict_to_df(project_samples)
|
|
605
835
|
|
|
@@ -694,11 +924,16 @@ class SeerSDK:
|
|
|
694
924
|
self,
|
|
695
925
|
analysis_id: str = None,
|
|
696
926
|
folder_id: str = None,
|
|
697
|
-
show_folders=True,
|
|
698
|
-
analysis_only=True,
|
|
927
|
+
show_folders: bool = True,
|
|
928
|
+
analysis_only: bool = True,
|
|
929
|
+
project_id: str = None,
|
|
930
|
+
plate_name: str = None,
|
|
931
|
+
**kwargs,
|
|
699
932
|
):
|
|
700
933
|
"""
|
|
701
934
|
Returns a list of analyses objects for the authenticated user. If no id is provided, returns all analyses for the authenticated user.
|
|
935
|
+
Search parameters may be passed in as keyword arguments to filter the results. Acceptable values are 'analysis_name', 'folder_name', 'description', 'notes', or 'number_msdatafile'.
|
|
936
|
+
Only search on a single field is supported.
|
|
702
937
|
|
|
703
938
|
Parameters
|
|
704
939
|
----------
|
|
@@ -709,13 +944,22 @@ class SeerSDK:
|
|
|
709
944
|
ID of the folder to be fetched, defaulted to None.
|
|
710
945
|
|
|
711
946
|
show_folders : bool, optional
|
|
712
|
-
Mark True if folder contents are to be returned in the response, defaulted to True.
|
|
947
|
+
Mark True if folder contents are to be returned in the response, i.e. recursive search, defaulted to True.
|
|
713
948
|
Will be disabled if an analysis id is provided.
|
|
714
949
|
|
|
715
950
|
analysis_only : bool, optional
|
|
716
951
|
Mark True if only analyses objects are to be returned in the response, defaulted to True.
|
|
717
952
|
If marked false, folder objects will also be included in the response.
|
|
718
953
|
|
|
954
|
+
project_id : str, optional
|
|
955
|
+
ID of the project to be fetched, defaulted to None.
|
|
956
|
+
|
|
957
|
+
plate_name : str, optional
|
|
958
|
+
Name of the plate to be fetched, defaulted to None.
|
|
959
|
+
|
|
960
|
+
**kwargs : dict, optional
|
|
961
|
+
Search keyword parameters to be passed in. Acceptable values are 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'.
|
|
962
|
+
|
|
719
963
|
Returns
|
|
720
964
|
-------
|
|
721
965
|
analyses: dict
|
|
@@ -732,19 +976,67 @@ class SeerSDK:
|
|
|
732
976
|
{id: "YOUR_ANALYSIS_ID_HERE", ...}
|
|
733
977
|
]
|
|
734
978
|
|
|
735
|
-
>>> seer_sdk.
|
|
979
|
+
>>> seer_sdk.get_analysis("YOUR_ANALYSIS_ID_HERE")
|
|
980
|
+
>>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
|
|
981
|
+
|
|
982
|
+
>>> seer_sdk.get_analysis(folder_name="YOUR_FOLDER_NAME_HERE")
|
|
983
|
+
>>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
|
|
984
|
+
|
|
985
|
+
>>> seer_sdk.get_analysis(analysis_name="YOUR_ANALYSIS")
|
|
986
|
+
>>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
|
|
987
|
+
|
|
988
|
+
>>> seer_sdk.get_analysis(description="YOUR_DESCRIPTION")
|
|
736
989
|
>>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
|
|
737
990
|
"""
|
|
738
991
|
|
|
739
992
|
URL = f"{self._auth.url}api/v1/analyses"
|
|
740
993
|
res = []
|
|
741
994
|
|
|
995
|
+
search_field = None
|
|
996
|
+
search_item = None
|
|
997
|
+
if kwargs:
|
|
998
|
+
if len(kwargs.keys()) > 1:
|
|
999
|
+
raise ValueError("Please include only one search parameter.")
|
|
1000
|
+
search_field = list(kwargs.keys())[0]
|
|
1001
|
+
search_item = kwargs[search_field]
|
|
1002
|
+
|
|
1003
|
+
if not search_item:
|
|
1004
|
+
raise ValueError(
|
|
1005
|
+
f"Please provide a non null value for {search_field}"
|
|
1006
|
+
)
|
|
1007
|
+
|
|
1008
|
+
if search_field and search_field not in [
|
|
1009
|
+
"analysis_name",
|
|
1010
|
+
"folder_name",
|
|
1011
|
+
"analysis_protocol_name",
|
|
1012
|
+
"description",
|
|
1013
|
+
"notes",
|
|
1014
|
+
"number_msdatafile",
|
|
1015
|
+
]:
|
|
1016
|
+
raise ValueError(
|
|
1017
|
+
"Invalid search field. Please choose between 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'."
|
|
1018
|
+
)
|
|
1019
|
+
|
|
742
1020
|
with self._get_auth_session() as s:
|
|
743
1021
|
|
|
744
1022
|
params = {"all": "true"}
|
|
745
1023
|
if folder_id:
|
|
746
1024
|
params["folder"] = folder_id
|
|
747
1025
|
|
|
1026
|
+
if search_field:
|
|
1027
|
+
params["searchFields"] = search_field
|
|
1028
|
+
params["searchItem"] = search_item
|
|
1029
|
+
del params["all"]
|
|
1030
|
+
|
|
1031
|
+
if search_field == "folder_name":
|
|
1032
|
+
params["searchFields"] = "analysis_name"
|
|
1033
|
+
|
|
1034
|
+
if project_id:
|
|
1035
|
+
params["projectId"] = project_id
|
|
1036
|
+
|
|
1037
|
+
if plate_name:
|
|
1038
|
+
params["plateName"] = plate_name
|
|
1039
|
+
|
|
748
1040
|
analyses = s.get(
|
|
749
1041
|
f"{URL}/{analysis_id}" if analysis_id else URL, params=params
|
|
750
1042
|
)
|
|
@@ -790,150 +1082,523 @@ class SeerSDK:
|
|
|
790
1082
|
]
|
|
791
1083
|
return res
|
|
792
1084
|
|
|
793
|
-
def
|
|
1085
|
+
def get_analysis_result_protein_data(
|
|
1086
|
+
self, analysis_id: str, link: bool = False, pg: str = None
|
|
1087
|
+
):
|
|
794
1088
|
"""
|
|
795
|
-
Given an
|
|
1089
|
+
Given an analysis id, this function returns the protein data for the analysis.
|
|
796
1090
|
|
|
797
1091
|
Parameters
|
|
798
1092
|
----------
|
|
1093
|
+
|
|
799
1094
|
analysis_id : str
|
|
800
1095
|
ID of the analysis for which the data is to be fetched.
|
|
1096
|
+
link : bool
|
|
1097
|
+
Boolean flag denoting whether the user wants the default protein data. Defaults to False.
|
|
1098
|
+
pg : str
|
|
1099
|
+
Protein group ID to filter dataframe results. Defaults to None.
|
|
801
1100
|
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
Contains dataframe objects for the `analysis_id`, given that the analysis has been complete.
|
|
1101
|
+
"""
|
|
1102
|
+
with self._get_auth_session() as s:
|
|
1103
|
+
URL = f"{self._auth.url}api/v1/data"
|
|
1104
|
+
response = s.get(
|
|
1105
|
+
f"{URL}/protein?analysisId={analysis_id}&retry=false"
|
|
1106
|
+
)
|
|
809
1107
|
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
1108
|
+
if response.status_code != 200:
|
|
1109
|
+
raise ValueError(
|
|
1110
|
+
"Could not fetch protein data. Please verify that your analysis completed."
|
|
1111
|
+
)
|
|
1112
|
+
response = response.json()
|
|
1113
|
+
|
|
1114
|
+
protein_data = {}
|
|
1115
|
+
for row in response:
|
|
1116
|
+
if row.get("name") == "npLink":
|
|
1117
|
+
protein_data["npLink"] = {
|
|
1118
|
+
"url": row.get("link", {}).get("url", "")
|
|
1119
|
+
}
|
|
1120
|
+
if row.get("name") == "panelLink":
|
|
1121
|
+
protein_data["panelLink"] = {
|
|
1122
|
+
"url": row.get("link", {}).get("url", "")
|
|
1123
|
+
}
|
|
1124
|
+
if not protein_data:
|
|
1125
|
+
raise ValueError("No protein result files found.")
|
|
1126
|
+
if not "panelLink" in protein_data.keys():
|
|
1127
|
+
protein_data["panelLink"] = {"url": ""}
|
|
1128
|
+
|
|
1129
|
+
if link:
|
|
1130
|
+
return protein_data
|
|
1131
|
+
else:
|
|
1132
|
+
if not pg:
|
|
1133
|
+
return {
|
|
1134
|
+
"protein_np": url_to_df(protein_data["npLink"]["url"]),
|
|
1135
|
+
"protein_panel": url_to_df(
|
|
1136
|
+
protein_data["panelLink"]["url"]
|
|
1137
|
+
),
|
|
1138
|
+
}
|
|
1139
|
+
else:
|
|
1140
|
+
protein_np = url_to_df(
|
|
1141
|
+
protein_data["npLink"]["url"]
|
|
1142
|
+
).query(f"`Protein Group` == '{pg}'")
|
|
1143
|
+
protein_panel = url_to_df(
|
|
1144
|
+
protein_data["panelLink"]["url"]
|
|
1145
|
+
).query(f"`Protein Group` == '{pg}'")
|
|
1146
|
+
|
|
1147
|
+
if protein_np.empty and protein_panel.empty:
|
|
1148
|
+
raise ValueError(
|
|
1149
|
+
f"Protein group {pg} not found in analysis {analysis_id}."
|
|
1150
|
+
)
|
|
814
1151
|
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
"protein_np": <protein_np dataframe object>,
|
|
820
|
-
"protein_panel": <protein_panel dataframe object>
|
|
821
|
-
}
|
|
1152
|
+
return {
|
|
1153
|
+
"protein_np": protein_np,
|
|
1154
|
+
"protein_panel": protein_panel,
|
|
1155
|
+
}
|
|
822
1156
|
|
|
823
|
-
|
|
824
|
-
|
|
1157
|
+
def get_analysis_result_peptide_data(
|
|
1158
|
+
self, analysis_id: str, link: bool = False, peptide: str = None
|
|
1159
|
+
):
|
|
825
1160
|
"""
|
|
1161
|
+
Given an analysis id, this function returns the peptide data for the analysis.
|
|
826
1162
|
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
if download_path and not os.path.exists(download_path):
|
|
831
|
-
raise ValueError("The download path you entered is invalid.")
|
|
832
|
-
|
|
833
|
-
if self.get_analysis(analysis_id)[0]["status"] in ["FAILED", None]:
|
|
834
|
-
raise ValueError(
|
|
835
|
-
"Cannot generate links for failed or null analyses."
|
|
836
|
-
)
|
|
1163
|
+
Parameters
|
|
1164
|
+
----------
|
|
837
1165
|
|
|
838
|
-
|
|
1166
|
+
analysis_id : str
|
|
1167
|
+
ID of the analysis for which the data is to be fetched.
|
|
839
1168
|
|
|
840
|
-
|
|
1169
|
+
link : bool
|
|
1170
|
+
Boolean flag denoting whether the user wants the default peptide data. Defaults to False.
|
|
841
1171
|
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
)
|
|
1172
|
+
peptide : str
|
|
1173
|
+
Peptide sequence to filter dataframe results. Defaults to None.
|
|
845
1174
|
|
|
846
|
-
|
|
847
|
-
raise ValueError(
|
|
848
|
-
"Invalid request. Could not fetch protein data. Please check your parameters."
|
|
849
|
-
)
|
|
850
|
-
protein_data = protein_data.json()
|
|
1175
|
+
"""
|
|
851
1176
|
|
|
852
|
-
|
|
1177
|
+
with self._get_auth_session() as s:
|
|
1178
|
+
URL = f"{self._auth.url}api/v1/data"
|
|
1179
|
+
response = s.get(
|
|
853
1180
|
f"{URL}/peptide?analysisId={analysis_id}&retry=false"
|
|
854
1181
|
)
|
|
855
1182
|
|
|
856
|
-
if
|
|
1183
|
+
if response.status_code != 200:
|
|
857
1184
|
raise ValueError(
|
|
858
|
-
"
|
|
1185
|
+
"Could not fetch peptide data. Please verify that your analysis completed."
|
|
859
1186
|
)
|
|
860
1187
|
|
|
861
|
-
|
|
1188
|
+
response = response.json()
|
|
1189
|
+
|
|
1190
|
+
peptide_data = {}
|
|
1191
|
+
for row in response:
|
|
1192
|
+
if row.get("name") == "npLink":
|
|
1193
|
+
peptide_data["npLink"] = {
|
|
1194
|
+
"url": row.get("link", {}).get("url", "")
|
|
1195
|
+
}
|
|
1196
|
+
if row.get("name") == "panelLink":
|
|
1197
|
+
peptide_data["panelLink"] = {
|
|
1198
|
+
"url": row.get("link", {}).get("url", "")
|
|
1199
|
+
}
|
|
1200
|
+
if not peptide_data:
|
|
1201
|
+
raise ValueError("No peptide result files found.")
|
|
1202
|
+
if not "panelLink" in peptide_data.keys():
|
|
1203
|
+
peptide_data["panelLink"] = {"url": ""}
|
|
1204
|
+
if link:
|
|
1205
|
+
return peptide_data
|
|
1206
|
+
else:
|
|
1207
|
+
if not peptide:
|
|
1208
|
+
return {
|
|
1209
|
+
"peptide_np": url_to_df(peptide_data["npLink"]["url"]),
|
|
1210
|
+
"peptide_panel": url_to_df(
|
|
1211
|
+
peptide_data["panelLink"]["url"]
|
|
1212
|
+
),
|
|
1213
|
+
}
|
|
1214
|
+
else:
|
|
1215
|
+
peptide_np = url_to_df(
|
|
1216
|
+
peptide_data["npLink"]["url"]
|
|
1217
|
+
).query(f"Peptide == '{peptide}'")
|
|
1218
|
+
peptide_panel = url_to_df(
|
|
1219
|
+
peptide_data["panelLink"]["url"]
|
|
1220
|
+
).query(f"Peptide == '{peptide}'")
|
|
1221
|
+
|
|
1222
|
+
if peptide_np.empty and peptide_panel.empty:
|
|
1223
|
+
raise ValueError(
|
|
1224
|
+
f"Peptide {peptide} not found in analysis {analysis_id}."
|
|
1225
|
+
)
|
|
862
1226
|
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
"protein_panel": url_to_df(protein_data["panelLink"]["url"]),
|
|
868
|
-
}
|
|
1227
|
+
return {
|
|
1228
|
+
"peptide_np": peptide_np,
|
|
1229
|
+
"peptide_panel": peptide_panel,
|
|
1230
|
+
}
|
|
869
1231
|
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
os.makedirs(name)
|
|
1232
|
+
def list_analysis_result_files(self, analysis_id: str):
|
|
1233
|
+
"""
|
|
1234
|
+
Given an analysis id, this function returns a list of files associated with the analysis.
|
|
874
1235
|
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
links["protein_np"].to_csv(f"{name}/protein_np.csv", sep="\t")
|
|
880
|
-
links["protein_panel"].to_csv(
|
|
881
|
-
f"{name}/protein_panel.csv", sep="\t"
|
|
882
|
-
)
|
|
1236
|
+
Parameters
|
|
1237
|
+
----------
|
|
1238
|
+
analysis_id : str
|
|
1239
|
+
ID of the analysis for which the data is to be fetched.
|
|
883
1240
|
|
|
884
|
-
|
|
1241
|
+
Returns
|
|
1242
|
+
-------
|
|
1243
|
+
files: list
|
|
1244
|
+
List of files associated with the analysis.
|
|
1245
|
+
"""
|
|
1246
|
+
try:
|
|
1247
|
+
analysis_metadata = self.get_analysis(analysis_id)[0]
|
|
1248
|
+
except (IndexError, ServerError):
|
|
1249
|
+
raise ValueError("Invalid analysis ID.")
|
|
1250
|
+
except:
|
|
1251
|
+
raise ValueError("Could not fetch analysis metadata.")
|
|
885
1252
|
|
|
886
|
-
|
|
1253
|
+
if analysis_metadata.get("status") in ["Failed", None]:
|
|
1254
|
+
raise ValueError("Cannot find files for a failed analysis.")
|
|
1255
|
+
with self._get_auth_session() as s:
|
|
1256
|
+
response = s.get(
|
|
1257
|
+
f"{self._auth.url}api/v2/analysisResultFiles/{analysis_id}"
|
|
1258
|
+
)
|
|
1259
|
+
if response.status_code != 200:
|
|
1260
|
+
raise ServerError(
|
|
1261
|
+
"Could not fetch analysis result files. Please verify that your analysis completed."
|
|
1262
|
+
)
|
|
1263
|
+
response = response.json()
|
|
1264
|
+
files = []
|
|
1265
|
+
for row in response["data"]:
|
|
1266
|
+
files.append(row["filename"])
|
|
1267
|
+
return files
|
|
887
1268
|
|
|
888
|
-
def
|
|
1269
|
+
def get_analysis_result_file_url(self, analysis_id: str, filename: str):
|
|
889
1270
|
"""
|
|
890
|
-
|
|
1271
|
+
Given an analysis id and a analysis result filename, this function returns the signed URL for the file.
|
|
891
1272
|
|
|
892
1273
|
Parameters
|
|
893
1274
|
----------
|
|
894
1275
|
analysis_id : str
|
|
895
|
-
|
|
1276
|
+
ID of the analysis for which the data is to be fetched.
|
|
896
1277
|
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
res : dict
|
|
900
|
-
A dictionary containing the status of the analysis.
|
|
1278
|
+
filename : str
|
|
1279
|
+
Name of the file to be fetched.
|
|
901
1280
|
|
|
902
|
-
|
|
1281
|
+
Returns
|
|
903
1282
|
-------
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
>>> seer_sdk.analysis_complete("YOUR_ANALYSIS_ID_HERE")
|
|
907
|
-
>>> {
|
|
908
|
-
"status": "SUCCEEDED"
|
|
909
|
-
}
|
|
1283
|
+
file_url: dict
|
|
1284
|
+
Response object containing the url for the file.
|
|
910
1285
|
"""
|
|
911
1286
|
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
1287
|
+
# Allow user to pass in filenames without an extension.
|
|
1288
|
+
analysis_result_files = self.list_analysis_result_files(analysis_id)
|
|
1289
|
+
analysis_result_files_prefix_mapper = {
|
|
1290
|
+
".".join(x.split(".")[:-1]): x for x in analysis_result_files
|
|
1291
|
+
}
|
|
1292
|
+
if filename in analysis_result_files_prefix_mapper:
|
|
1293
|
+
filename = analysis_result_files_prefix_mapper[filename]
|
|
919
1294
|
|
|
920
|
-
|
|
1295
|
+
analysis_metadata = self.get_analysis(analysis_id)[0]
|
|
1296
|
+
if analysis_metadata.get("status") in ["Failed", None]:
|
|
1297
|
+
raise ValueError("Cannot generate links for failed analyses.")
|
|
1298
|
+
with self._get_auth_session() as s:
|
|
1299
|
+
file_url = s.post(
|
|
1300
|
+
f"{self._auth.url}api/v1/analysisResultFiles/getUrl",
|
|
1301
|
+
json={
|
|
1302
|
+
"analysisId": analysis_id,
|
|
1303
|
+
"projectId": analysis_metadata["project_id"],
|
|
1304
|
+
"filename": filename,
|
|
1305
|
+
},
|
|
1306
|
+
)
|
|
1307
|
+
response = file_url.json()
|
|
1308
|
+
if not response.get("url"):
|
|
1309
|
+
raise ValueError(f"File {filename} not found.")
|
|
1310
|
+
return response
|
|
921
1311
|
|
|
922
|
-
def
|
|
1312
|
+
def get_analysis_result_files(
|
|
1313
|
+
self,
|
|
1314
|
+
analysis_id: str,
|
|
1315
|
+
filenames: _List[str],
|
|
1316
|
+
download_path: str = "",
|
|
1317
|
+
protein_all: bool = False,
|
|
1318
|
+
peptide_all: bool = False,
|
|
1319
|
+
):
|
|
923
1320
|
"""
|
|
924
|
-
|
|
1321
|
+
Given an analysis id and a list of file names, this function returns the file in form of downloadable content, if applicable.
|
|
925
1322
|
|
|
926
1323
|
Parameters
|
|
927
1324
|
----------
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
1325
|
+
analysis_id : str
|
|
1326
|
+
ID of the analysis for which the data is to be fetched.
|
|
1327
|
+
|
|
1328
|
+
filenames : list
|
|
1329
|
+
List of filenames to be fetched. Only csv and tsv files are supported.
|
|
1330
|
+
|
|
1331
|
+
download_path : str
|
|
1332
|
+
String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
|
|
1333
|
+
|
|
1334
|
+
protein_all : bool
|
|
1335
|
+
Boolean flag denoting whether the user wants the default protein data. Defaults to False.
|
|
1336
|
+
|
|
1337
|
+
peptide_all : bool
|
|
1338
|
+
Boolean flag denoting whether the user wants the default peptide data. Defaults to False.
|
|
932
1339
|
|
|
933
1340
|
Returns
|
|
934
1341
|
-------
|
|
935
|
-
|
|
936
|
-
Contains the
|
|
1342
|
+
links: dict
|
|
1343
|
+
Contains dataframe objects for the requested files. If a filename is not found, it is skipped.
|
|
1344
|
+
|
|
1345
|
+
|
|
1346
|
+
Examples
|
|
1347
|
+
-------
|
|
1348
|
+
>>> from seer_pas_sdk import SeerSDK
|
|
1349
|
+
>>> seer_sdk = SeerSDK()
|
|
1350
|
+
>>> analysis_id = "YOUR_ANALYSIS_ID_HERE"
|
|
1351
|
+
>>> filenames = ["protein_np.tsv", "peptide_np.tsv"]
|
|
1352
|
+
>>> seer_sdk.get_analysis_result_files(analysis_id, filenames)
|
|
1353
|
+
{
|
|
1354
|
+
"protein_np.tsv": <protein_np dataframe object>,
|
|
1355
|
+
"peptide_np.tsv": <peptide_np dataframe object>
|
|
1356
|
+
}
|
|
1357
|
+
>>> seer_sdk.get_analysis_result_files(analysis_id, [], protein_all=True, peptide_all=True)
|
|
1358
|
+
{
|
|
1359
|
+
"protein_np.tsv": <protein_np dataframe object>,
|
|
1360
|
+
"protein_panel.tsv": <protein_panel dataframe object>,
|
|
1361
|
+
"peptide_np.tsv": <peptide_np dataframe object>,
|
|
1362
|
+
"peptide_panel.tsv": <peptide_panel dataframe object>
|
|
1363
|
+
}
|
|
1364
|
+
>>> seer_sdk.get_analysis_result_files(analysis_id, ["report.tsv"], download_path="/Users/Downloads")
|
|
1365
|
+
{ "report.tsv": <report.tsv dataframe object> }
|
|
1366
|
+
"""
|
|
1367
|
+
|
|
1368
|
+
if not analysis_id:
|
|
1369
|
+
raise ValueError("Analysis ID cannot be empty.")
|
|
1370
|
+
|
|
1371
|
+
if download_path and not os.path.exists(download_path):
|
|
1372
|
+
raise ValueError(
|
|
1373
|
+
"Please specify a valid folder path as download path."
|
|
1374
|
+
)
|
|
1375
|
+
|
|
1376
|
+
links = {}
|
|
1377
|
+
if protein_all:
|
|
1378
|
+
protein_data = self.get_analysis_result_protein_data(
|
|
1379
|
+
analysis_id, link=True
|
|
1380
|
+
)
|
|
1381
|
+
links["protein_np.tsv"] = protein_data["npLink"]["url"]
|
|
1382
|
+
links["protein_panel.tsv"] = protein_data["panelLink"]["url"]
|
|
1383
|
+
if peptide_all:
|
|
1384
|
+
peptide_data = self.get_analysis_result_peptide_data(
|
|
1385
|
+
analysis_id, link=True
|
|
1386
|
+
)
|
|
1387
|
+
links["peptide_np.tsv"] = peptide_data["npLink"]["url"]
|
|
1388
|
+
links["peptide_panel.tsv"] = peptide_data["panelLink"]["url"]
|
|
1389
|
+
|
|
1390
|
+
filenames = set(filenames)
|
|
1391
|
+
# Allow user to pass in filenames without an extension.
|
|
1392
|
+
analysis_result_files = self.list_analysis_result_files(analysis_id)
|
|
1393
|
+
analysis_result_files_prefix_mapper = {
|
|
1394
|
+
".".join(x.split(".")[:-1]): x for x in analysis_result_files
|
|
1395
|
+
}
|
|
1396
|
+
for filename in filenames:
|
|
1397
|
+
if filename in analysis_result_files_prefix_mapper:
|
|
1398
|
+
filename = analysis_result_files_prefix_mapper[filename]
|
|
1399
|
+
if filename == "protein_np.tsv":
|
|
1400
|
+
if protein_all:
|
|
1401
|
+
continue
|
|
1402
|
+
protein_data = self.get_analysis_result_protein_data(
|
|
1403
|
+
analysis_id, link=True
|
|
1404
|
+
)
|
|
1405
|
+
links["protein_np.tsv"] = protein_data["npLink"]["url"]
|
|
1406
|
+
elif filename == "protein_panel.tsv":
|
|
1407
|
+
if protein_all:
|
|
1408
|
+
continue
|
|
1409
|
+
protein_data = self.get_analysis_result_protein_data(
|
|
1410
|
+
analysis_id, link=True
|
|
1411
|
+
)
|
|
1412
|
+
links["protein_panel.tsv"] = protein_data["panelLink"]["url"]
|
|
1413
|
+
elif filename == "peptide_np.tsv":
|
|
1414
|
+
if peptide_all:
|
|
1415
|
+
continue
|
|
1416
|
+
peptide_data = self.get_analysis_result_peptide_data(
|
|
1417
|
+
analysis_id, link=True
|
|
1418
|
+
)
|
|
1419
|
+
links["peptide_np.tsv"] = peptide_data["npLink"]["url"]
|
|
1420
|
+
elif filename == "peptide_panel.tsv":
|
|
1421
|
+
if peptide_all:
|
|
1422
|
+
continue
|
|
1423
|
+
peptide_data = self.get_analysis_result_peptide_data(
|
|
1424
|
+
analysis_id, link=True
|
|
1425
|
+
)
|
|
1426
|
+
links["peptide_panel.tsv"] = peptide_data["panelLink"]["url"]
|
|
1427
|
+
else:
|
|
1428
|
+
try:
|
|
1429
|
+
links[filename] = self.get_analysis_result_file_url(
|
|
1430
|
+
analysis_id, filename
|
|
1431
|
+
)["url"]
|
|
1432
|
+
except Exception as e:
|
|
1433
|
+
print(e)
|
|
1434
|
+
continue
|
|
1435
|
+
|
|
1436
|
+
links = {
|
|
1437
|
+
k: url_to_df(v, is_tsv=k.endswith(".tsv"))
|
|
1438
|
+
for k, v in links.items()
|
|
1439
|
+
}
|
|
1440
|
+
if download_path:
|
|
1441
|
+
name = f"{download_path}/downloads/{analysis_id}"
|
|
1442
|
+
print(f"Start download to path {name}")
|
|
1443
|
+
if not os.path.exists(name):
|
|
1444
|
+
os.makedirs(name)
|
|
1445
|
+
for filename, content in links.items():
|
|
1446
|
+
separator = ","
|
|
1447
|
+
if filename.endswith(".tsv"):
|
|
1448
|
+
separator = "\t"
|
|
1449
|
+
content.to_csv(f"{name}/{filename}", sep=separator)
|
|
1450
|
+
print("Download complete.")
|
|
1451
|
+
|
|
1452
|
+
return links
|
|
1453
|
+
|
|
1454
|
+
def get_analysis_result(
|
|
1455
|
+
self,
|
|
1456
|
+
analysis_id: str,
|
|
1457
|
+
download_path: str = "",
|
|
1458
|
+
diann_report: bool = False,
|
|
1459
|
+
):
|
|
1460
|
+
"""
|
|
1461
|
+
Given an `analysis_id`, this function returns all relevant analysis data files in form of downloadable content, if applicable.
|
|
1462
|
+
|
|
1463
|
+
Parameters
|
|
1464
|
+
----------
|
|
1465
|
+
analysis_id : str
|
|
1466
|
+
ID of the analysis for which the data is to be fetched.
|
|
1467
|
+
|
|
1468
|
+
download_path : str
|
|
1469
|
+
String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
|
|
1470
|
+
|
|
1471
|
+
diann_report : bool
|
|
1472
|
+
Boolean flag denoting whether the user wants the DIANN report to be included in the response. Defaults to False.
|
|
1473
|
+
|
|
1474
|
+
Returns
|
|
1475
|
+
-------
|
|
1476
|
+
links: dict
|
|
1477
|
+
Contains dataframe objects for the `analysis_id`, given that the analysis has been complete.
|
|
1478
|
+
|
|
1479
|
+
Examples
|
|
1480
|
+
-------
|
|
1481
|
+
>>> from seer_pas_sdk import SeerSDK
|
|
1482
|
+
>>> seer_sdk = SeerSDK()
|
|
1483
|
+
|
|
1484
|
+
>>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE")
|
|
1485
|
+
>>> {
|
|
1486
|
+
"peptide_np": <peptide_np dataframe object>,
|
|
1487
|
+
"peptide_panel": <peptide_panel dataframe object>,
|
|
1488
|
+
"protein_np": <protein_np dataframe object>,
|
|
1489
|
+
"protein_panel": <protein_panel dataframe object>
|
|
1490
|
+
}
|
|
1491
|
+
|
|
1492
|
+
>>> seer_sdk.get_analysis_result("YOUR_DIANN_ANALYSIS_ID_HERE")
|
|
1493
|
+
>>> {
|
|
1494
|
+
"peptide_np": <peptide_np dataframe object>,
|
|
1495
|
+
"peptide_panel": <peptide_panel dataframe object>,
|
|
1496
|
+
"protein_np": <protein_np dataframe object>,
|
|
1497
|
+
"protein_panel": <protein_panel dataframe object>,
|
|
1498
|
+
"diann_report": <report.tsv dataframe object>
|
|
1499
|
+
}
|
|
1500
|
+
|
|
1501
|
+
>>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE", download_path="/Users/Downloads")
|
|
1502
|
+
>>> { "status": "Download complete." }
|
|
1503
|
+
"""
|
|
1504
|
+
|
|
1505
|
+
if not analysis_id:
|
|
1506
|
+
raise ValueError("Analysis ID cannot be empty.")
|
|
1507
|
+
|
|
1508
|
+
if download_path and not os.path.exists(download_path):
|
|
1509
|
+
raise ValueError("The download path you entered is invalid.")
|
|
1510
|
+
|
|
1511
|
+
protein_data = self.get_analysis_result_protein_data(
|
|
1512
|
+
analysis_id, link=True
|
|
1513
|
+
)
|
|
1514
|
+
peptide_data = self.get_analysis_result_peptide_data(
|
|
1515
|
+
analysis_id, link=True
|
|
1516
|
+
)
|
|
1517
|
+
links = {
|
|
1518
|
+
"peptide_np": url_to_df(peptide_data["npLink"]["url"]),
|
|
1519
|
+
"peptide_panel": url_to_df(peptide_data["panelLink"]["url"]),
|
|
1520
|
+
"protein_np": url_to_df(protein_data["npLink"]["url"]),
|
|
1521
|
+
"protein_panel": url_to_df(protein_data["panelLink"]["url"]),
|
|
1522
|
+
}
|
|
1523
|
+
|
|
1524
|
+
if diann_report:
|
|
1525
|
+
diann_report_url = self.get_analysis_result_file_url(
|
|
1526
|
+
analysis_id, "report.tsv"
|
|
1527
|
+
)
|
|
1528
|
+
links["diann_report"] = url_to_df(diann_report_url["url"])
|
|
1529
|
+
|
|
1530
|
+
if download_path:
|
|
1531
|
+
name = f"{download_path}/downloads/{analysis_id}"
|
|
1532
|
+
if not os.path.exists(name):
|
|
1533
|
+
os.makedirs(name)
|
|
1534
|
+
|
|
1535
|
+
links["peptide_np"].to_csv(f"{name}/peptide_np.csv", sep="\t")
|
|
1536
|
+
links["peptide_panel"].to_csv(
|
|
1537
|
+
f"{name}/peptide_panel.csv", sep="\t"
|
|
1538
|
+
)
|
|
1539
|
+
links["protein_np"].to_csv(f"{name}/protein_np.csv", sep="\t")
|
|
1540
|
+
links["protein_panel"].to_csv(
|
|
1541
|
+
f"{name}/protein_panel.csv", sep="\t"
|
|
1542
|
+
)
|
|
1543
|
+
|
|
1544
|
+
if "diann_report" in links:
|
|
1545
|
+
links["diann_report"].to_csv(
|
|
1546
|
+
f"{name}/diann_report.csv", sep="\t"
|
|
1547
|
+
)
|
|
1548
|
+
|
|
1549
|
+
return {"status": "Download complete."}
|
|
1550
|
+
|
|
1551
|
+
return links
|
|
1552
|
+
|
|
1553
|
+
def analysis_complete(self, analysis_id: str):
|
|
1554
|
+
"""
|
|
1555
|
+
Returns the status of the analysis with the given id.
|
|
1556
|
+
|
|
1557
|
+
Parameters
|
|
1558
|
+
----------
|
|
1559
|
+
analysis_id : str
|
|
1560
|
+
The analysis id.
|
|
1561
|
+
|
|
1562
|
+
Returns
|
|
1563
|
+
-------
|
|
1564
|
+
res : dict
|
|
1565
|
+
A dictionary containing the status of the analysis.
|
|
1566
|
+
|
|
1567
|
+
Examples
|
|
1568
|
+
-------
|
|
1569
|
+
>>> from seer_pas_sdk import SeerSDK
|
|
1570
|
+
>>> seer_sdk = SeerSDK()
|
|
1571
|
+
>>> seer_sdk.analysis_complete("YOUR_ANALYSIS_ID_HERE")
|
|
1572
|
+
>>> {
|
|
1573
|
+
"status": "SUCCEEDED"
|
|
1574
|
+
}
|
|
1575
|
+
"""
|
|
1576
|
+
|
|
1577
|
+
if not analysis_id:
|
|
1578
|
+
raise ValueError("Analysis id cannot be empty.")
|
|
1579
|
+
|
|
1580
|
+
try:
|
|
1581
|
+
res = self.get_analysis(analysis_id)
|
|
1582
|
+
except ValueError:
|
|
1583
|
+
return ValueError("Analysis not found. Your ID could be incorrect")
|
|
1584
|
+
|
|
1585
|
+
return {"status": res[0]["status"]}
|
|
1586
|
+
|
|
1587
|
+
def list_ms_data_files(self, folder="", space=None):
|
|
1588
|
+
"""
|
|
1589
|
+
Lists all the MS data files in the given folder as long as the folder path passed in the params is valid.
|
|
1590
|
+
|
|
1591
|
+
Parameters
|
|
1592
|
+
----------
|
|
1593
|
+
folder : str, optional
|
|
1594
|
+
Folder path to list the files from. Defaults to an empty string and displays all files for the user.
|
|
1595
|
+
space : str, optional
|
|
1596
|
+
ID of the user group to which the files belong, defaulted to None.
|
|
1597
|
+
|
|
1598
|
+
Returns
|
|
1599
|
+
-------
|
|
1600
|
+
list
|
|
1601
|
+
Contains the list of files in the folder.
|
|
937
1602
|
|
|
938
1603
|
Examples
|
|
939
1604
|
-------
|
|
@@ -1013,9 +1678,7 @@ class SeerSDK:
|
|
|
1013
1678
|
print(f'Downloading files to "{name}"\n')
|
|
1014
1679
|
|
|
1015
1680
|
URL = f"{self._auth.url}api/v1/msdataindex/download/getUrl"
|
|
1016
|
-
tenant_id =
|
|
1017
|
-
"custom:tenantId"
|
|
1018
|
-
]
|
|
1681
|
+
tenant_id = self._auth.active_tenant_id
|
|
1019
1682
|
|
|
1020
1683
|
for path in paths:
|
|
1021
1684
|
with self._get_auth_session() as s:
|
|
@@ -1033,7 +1696,6 @@ class SeerSDK:
|
|
|
1033
1696
|
"Could not download file. Please check if the backend is running."
|
|
1034
1697
|
)
|
|
1035
1698
|
urls.append(download_url.text)
|
|
1036
|
-
|
|
1037
1699
|
for i in range(len(urls)):
|
|
1038
1700
|
filename = paths[i].split("/")[-1]
|
|
1039
1701
|
url = urls[i]
|
|
@@ -1077,17 +1739,70 @@ class SeerSDK:
|
|
|
1077
1739
|
|
|
1078
1740
|
return {"message": f"Files downloaded successfully to '{name}'"}
|
|
1079
1741
|
|
|
1080
|
-
def
|
|
1742
|
+
def get_group_analysis(
|
|
1743
|
+
self, analysis_id, group_analysis_id=None, **kwargs
|
|
1744
|
+
):
|
|
1745
|
+
"""
|
|
1746
|
+
Returns the list of group analysis objects for the given analysis id, provided they exist.
|
|
1747
|
+
|
|
1748
|
+
Parameters
|
|
1749
|
+
----------
|
|
1750
|
+
analysis_id : str
|
|
1751
|
+
The analysis id.
|
|
1752
|
+
|
|
1753
|
+
group_analysis_id : str, optional
|
|
1754
|
+
The group analysis id, defaulted to None. If provided, the function will return the group analysis object for the given group analysis id.
|
|
1755
|
+
|
|
1756
|
+
**kwargs : dict, optional
|
|
1757
|
+
Search keyword parameters to be passed in. Acceptable values are 'name' or 'description'.
|
|
1758
|
+
|
|
1759
|
+
"""
|
|
1760
|
+
params = {"analysisid": analysis_id}
|
|
1761
|
+
if kwargs and not group_analysis_id:
|
|
1762
|
+
if len(kwargs.keys()) > 1:
|
|
1763
|
+
raise ValueError("Please include only one search parameter.")
|
|
1764
|
+
search_field = list(kwargs.keys())[0]
|
|
1765
|
+
if search_field not in ["name", "description"]:
|
|
1766
|
+
raise ValueError(
|
|
1767
|
+
"Invalid search field. Please choose between 'name' or 'description'."
|
|
1768
|
+
)
|
|
1769
|
+
search_item = kwargs[search_field]
|
|
1770
|
+
|
|
1771
|
+
if not search_item:
|
|
1772
|
+
raise ValueError(
|
|
1773
|
+
f"Please provide a non null value for {search_field}"
|
|
1774
|
+
)
|
|
1775
|
+
params["searchFields"] = search_field
|
|
1776
|
+
params["searchItem"] = search_item
|
|
1777
|
+
|
|
1778
|
+
URL = f"{self._auth.url}api/v1/groupanalysis/groupanalyses"
|
|
1779
|
+
|
|
1780
|
+
if group_analysis_id:
|
|
1781
|
+
URL = f"{URL}/{group_analysis_id}"
|
|
1782
|
+
params["id"] = group_analysis_id
|
|
1783
|
+
|
|
1784
|
+
with self._get_auth_session() as s:
|
|
1785
|
+
response = s.get(URL, params=params)
|
|
1786
|
+
if response.status_code != 200:
|
|
1787
|
+
raise ServerError(
|
|
1788
|
+
"Request failed. Please check your parameters."
|
|
1789
|
+
)
|
|
1790
|
+
response = response.json()
|
|
1791
|
+
return response
|
|
1792
|
+
|
|
1793
|
+
def group_analysis_results(self, analysis_id: str, group_analysis_id=None):
|
|
1081
1794
|
"""
|
|
1082
1795
|
Returns the group analysis data for the given analysis id, provided it exists.
|
|
1083
1796
|
|
|
1797
|
+
If no group analysis id is provided, the function will return the most recent group analysis data for the given analysis id.
|
|
1798
|
+
|
|
1084
1799
|
Parameters
|
|
1085
1800
|
----------
|
|
1086
1801
|
analysis_id : str
|
|
1087
1802
|
The analysis id.
|
|
1088
1803
|
|
|
1089
|
-
|
|
1090
|
-
The
|
|
1804
|
+
group_analysis_id : str, optional
|
|
1805
|
+
The group analysis id, defaulted to None.
|
|
1091
1806
|
|
|
1092
1807
|
Returns
|
|
1093
1808
|
-------
|
|
@@ -1142,7 +1857,6 @@ class SeerSDK:
|
|
|
1142
1857
|
"peptide_processed_long_form_file_url": "",
|
|
1143
1858
|
},
|
|
1144
1859
|
},
|
|
1145
|
-
"box_plot": [],
|
|
1146
1860
|
}
|
|
1147
1861
|
|
|
1148
1862
|
# Pre-GA data call
|
|
@@ -1153,7 +1867,7 @@ class SeerSDK:
|
|
|
1153
1867
|
json={"analysisId": analysis_id, "grouping": "condition"},
|
|
1154
1868
|
)
|
|
1155
1869
|
if protein_pre_data.status_code != 200:
|
|
1156
|
-
raise
|
|
1870
|
+
raise ServerError(
|
|
1157
1871
|
"Invalid request. Could not fetch group analysis protein pre data. Please check your parameters."
|
|
1158
1872
|
)
|
|
1159
1873
|
|
|
@@ -1161,15 +1875,15 @@ class SeerSDK:
|
|
|
1161
1875
|
|
|
1162
1876
|
res["pre"]["protein"] = protein_pre_data
|
|
1163
1877
|
|
|
1164
|
-
with
|
|
1165
|
-
s.headers.update(HEADERS)
|
|
1878
|
+
with self._get_auth_session() as s:
|
|
1166
1879
|
|
|
1167
1880
|
peptide_pre_data = s.post(
|
|
1168
1881
|
url=f"{URL}api/v2/groupanalysis/peptide",
|
|
1169
1882
|
json={"analysisId": analysis_id, "grouping": "condition"},
|
|
1170
1883
|
)
|
|
1884
|
+
|
|
1171
1885
|
if peptide_pre_data.status_code != 200:
|
|
1172
|
-
raise
|
|
1886
|
+
raise ServerError(
|
|
1173
1887
|
"Invalid request. Could not fetch group analysis peptide pre data. Please check your parameters."
|
|
1174
1888
|
)
|
|
1175
1889
|
|
|
@@ -1177,18 +1891,21 @@ class SeerSDK:
|
|
|
1177
1891
|
res["pre"]["peptide"] = peptide_pre_data
|
|
1178
1892
|
|
|
1179
1893
|
# Post-GA data call
|
|
1180
|
-
with
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
"Invalid request. Could not fetch group analysis post data. Please check your parameters."
|
|
1894
|
+
with self._get_auth_session() as s:
|
|
1895
|
+
if group_analysis_id:
|
|
1896
|
+
get_saved_result = self.get_group_analysis(
|
|
1897
|
+
analysis_id=analysis_id,
|
|
1898
|
+
group_analysis_id=group_analysis_id,
|
|
1899
|
+
)
|
|
1900
|
+
else:
|
|
1901
|
+
get_saved_result = s.get(
|
|
1902
|
+
f"{URL}api/v1/groupanalysis/getSavedResults?analysisid={analysis_id}"
|
|
1190
1903
|
)
|
|
1191
|
-
|
|
1904
|
+
if get_saved_result.status_code != 200:
|
|
1905
|
+
raise ServerError(
|
|
1906
|
+
"Could not fetch saved results. Please check your analysis id."
|
|
1907
|
+
)
|
|
1908
|
+
get_saved_result = get_saved_result.json()
|
|
1192
1909
|
|
|
1193
1910
|
# Protein data
|
|
1194
1911
|
if "pgResult" in get_saved_result:
|
|
@@ -1198,6 +1915,13 @@ class SeerSDK:
|
|
|
1198
1915
|
if "peptideResult" in get_saved_result:
|
|
1199
1916
|
res["post"]["peptide"] = get_saved_result["peptideResult"]
|
|
1200
1917
|
|
|
1918
|
+
# require that either protein or peptide data exists
|
|
1919
|
+
# Error handling is necessary for volcano plot calculations downstream
|
|
1920
|
+
if not (res["post"].get("protein") or res["post"].get("peptide")):
|
|
1921
|
+
raise ValueError(
|
|
1922
|
+
"No group analysis data returned from server."
|
|
1923
|
+
)
|
|
1924
|
+
|
|
1201
1925
|
# Protein URLs
|
|
1202
1926
|
if "pgProcessedFileUrl" in get_saved_result:
|
|
1203
1927
|
res["post"]["protein_url"]["protein_processed_file_url"] = (
|
|
@@ -1219,32 +1943,690 @@ class SeerSDK:
|
|
|
1219
1943
|
"peptide_processed_long_form_file_url"
|
|
1220
1944
|
] = get_saved_result["peptideProcessedLongFormFileUrl"]
|
|
1221
1945
|
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1946
|
+
return res
|
|
1947
|
+
|
|
1948
|
+
def get_box_plot_data(
|
|
1949
|
+
self,
|
|
1950
|
+
analysis_id: str,
|
|
1951
|
+
group_analysis_id: str = None,
|
|
1952
|
+
feature_ids: _List[str] = [],
|
|
1953
|
+
show_significant_only: bool = False,
|
|
1954
|
+
as_df=False,
|
|
1955
|
+
volcano_plot=False,
|
|
1956
|
+
cached=False,
|
|
1957
|
+
):
|
|
1958
|
+
"""Get box plot data for given analyses and samples formatted in a DataFrame or a dictionary.
|
|
1959
|
+
|
|
1960
|
+
Args:
|
|
1961
|
+
analysis_id (str): ID of the analysis.
|
|
1962
|
+
feature_ids (list[str], optional): Filter result object to a set of ids. Defaults to [].
|
|
1963
|
+
show_significant_only (bool, optional): Mark true if only significant results are to be returned. Defaults to False.
|
|
1964
|
+
as_df (bool, optional): Mark true if return object should be a pandas DataFrame. Defaults to False.
|
|
1965
|
+
volcano_plot (bool, optional): Mark true to include the volcano plot data in the return object. Defaults to False.
|
|
1966
|
+
cached (bool, optional): Mark true to return volcano plot data as a VolcanoPlotBuilder object. No effect if volcano_plot flag is marked false. Defaults to False.
|
|
1967
|
+
|
|
1968
|
+
Raises:
|
|
1969
|
+
ValueError: Invalid feature type. Must be either 'protein' or 'peptide'.
|
|
1970
|
+
ServerError: Could not fetch box plot data.
|
|
1971
|
+
|
|
1972
|
+
Returns:
|
|
1973
|
+
list[dict] | pd.DataFrame : A list of dictionaries or a dataframe with each row containing the following keys/columns:
|
|
1974
|
+
'proteinId', 'intensity', 'sampleName', 'sampleId', 'condition','gene'
|
|
1975
|
+
"""
|
|
1976
|
+
|
|
1977
|
+
with self._get_auth_session() as s:
|
|
1978
|
+
|
|
1979
|
+
# API call 1 - get volcano plot data for filtered results and gene mapping
|
|
1980
|
+
builder = self.get_volcano_plot_data(
|
|
1981
|
+
analysis_id, cached=True, group_analysis_id=group_analysis_id
|
|
1982
|
+
)
|
|
1983
|
+
|
|
1984
|
+
protein_peptide_gene_map = builder.protein_gene_map
|
|
1985
|
+
|
|
1986
|
+
# API call 2 - get analysis samples metadata to get condition
|
|
1987
|
+
samples_metadata = self.get_analysis_samples(analysis_id)
|
|
1988
|
+
|
|
1989
|
+
json = {"analysisId": analysis_id}
|
|
1990
|
+
if feature_ids:
|
|
1991
|
+
json["featureIds"] = ",".join(feature_ids)
|
|
1992
|
+
filters = ""
|
|
1993
|
+
# API call 3 - get group analysis data. This gives us the filters for the group analysis
|
|
1994
|
+
if group_analysis_id:
|
|
1995
|
+
ga = self.get_group_analysis(
|
|
1996
|
+
analysis_id, group_analysis_id=group_analysis_id
|
|
1997
|
+
)
|
|
1998
|
+
filters = ga["parameters"]["filters"]
|
|
1999
|
+
if filters:
|
|
2000
|
+
json["filters"] = filters
|
|
2001
|
+
|
|
2002
|
+
json["featureType"] = (
|
|
2003
|
+
builder.type if builder.type == "peptide" else "proteingroup"
|
|
2004
|
+
)
|
|
1226
2005
|
|
|
1227
|
-
|
|
1228
|
-
s.headers.update(HEADERS)
|
|
1229
|
-
box_plot["feature_type"] = box_plot["feature_type"].lower()
|
|
2006
|
+
# API call 4 - get intensities
|
|
1230
2007
|
box_plot_data = s.post(
|
|
1231
|
-
url=f"{
|
|
1232
|
-
json={
|
|
1233
|
-
"analysisId": analysis_id,
|
|
1234
|
-
"featureIds": (
|
|
1235
|
-
",".join(box_plot["feature_ids"])
|
|
1236
|
-
if len(box_plot["feature_ids"]) > 1
|
|
1237
|
-
else box_plot["feature_ids"][0]
|
|
1238
|
-
),
|
|
1239
|
-
"featureType": f"{box_plot['feature_type']}group",
|
|
1240
|
-
},
|
|
2008
|
+
url=f"{self._auth.url}api/v1/groupanalysis/rawdata", json=json
|
|
1241
2009
|
)
|
|
2010
|
+
|
|
1242
2011
|
if box_plot_data.status_code != 200:
|
|
2012
|
+
raise ServerError("Could not fetch box plot data.")
|
|
2013
|
+
|
|
2014
|
+
box_plot_data = box_plot_data.json()
|
|
2015
|
+
feature_type_index = (
|
|
2016
|
+
"peptide" if builder.type == "peptide" else "proteinId"
|
|
2017
|
+
)
|
|
2018
|
+
box_plot_data = [
|
|
2019
|
+
x
|
|
2020
|
+
for x in box_plot_data
|
|
2021
|
+
if x[feature_type_index] in protein_peptide_gene_map
|
|
2022
|
+
]
|
|
2023
|
+
sample_id_condition = {
|
|
2024
|
+
x["id"]: x["condition"] for x in samples_metadata[0]["samples"]
|
|
2025
|
+
}
|
|
2026
|
+
|
|
2027
|
+
if show_significant_only:
|
|
2028
|
+
significant_rows = set(builder.get_significant_rows())
|
|
2029
|
+
box_plot_data = [
|
|
2030
|
+
x
|
|
2031
|
+
for x in box_plot_data
|
|
2032
|
+
if x[feature_type_index] in significant_rows
|
|
2033
|
+
]
|
|
2034
|
+
|
|
2035
|
+
for row in box_plot_data:
|
|
2036
|
+
row["condition"] = sample_id_condition.get(
|
|
2037
|
+
row["sampleId"], None
|
|
2038
|
+
)
|
|
2039
|
+
row["gene"] = builder.protein_gene_map[row[feature_type_index]]
|
|
2040
|
+
|
|
2041
|
+
if as_df:
|
|
2042
|
+
box_plot_data = pd.DataFrame(box_plot_data)
|
|
2043
|
+
|
|
2044
|
+
if volcano_plot:
|
|
2045
|
+
vplot = None
|
|
2046
|
+
if cached:
|
|
2047
|
+
vplot = builder
|
|
2048
|
+
elif as_df:
|
|
2049
|
+
vplot = pd.DataFrame(builder.volcano_plot)
|
|
2050
|
+
else:
|
|
2051
|
+
vplot = builder.volcano_plot
|
|
2052
|
+
|
|
2053
|
+
return {"box_plot": box_plot_data, "volcano_plot": vplot}
|
|
2054
|
+
return box_plot_data
|
|
2055
|
+
|
|
2056
|
+
def get_all_volcano_plot_data(self, analysis_id: str, box_plot=False):
|
|
2057
|
+
"""
|
|
2058
|
+
Get all volcano plot data for a given analysis.
|
|
2059
|
+
|
|
2060
|
+
Args:
|
|
2061
|
+
analysis_id (str): ID of the analysis.
|
|
2062
|
+
box_plot (bool, optional): Mark true to include box plot data in the return object. Defaults to False.
|
|
2063
|
+
|
|
2064
|
+
Returns:
|
|
2065
|
+
dict: A dictionary containing the volcano plot and optionally box plot data for each group analysis.
|
|
2066
|
+
"""
|
|
2067
|
+
group_analysis_ids = [
|
|
2068
|
+
x["id"]
|
|
2069
|
+
for x in self.get_group_analysis(analysis_id).get("data", [])
|
|
2070
|
+
if x.get("id")
|
|
2071
|
+
]
|
|
2072
|
+
if not group_analysis_ids:
|
|
2073
|
+
return {}
|
|
2074
|
+
results = dict()
|
|
2075
|
+
|
|
2076
|
+
if box_plot:
|
|
2077
|
+
results = {
|
|
2078
|
+
ga_id: {
|
|
2079
|
+
k: v
|
|
2080
|
+
for k, v in self.get_box_plot_data(
|
|
2081
|
+
analysis_id, ga_id, as_df=True, volcano_plot=True
|
|
2082
|
+
).items()
|
|
2083
|
+
}
|
|
2084
|
+
for ga_id in group_analysis_ids
|
|
2085
|
+
}
|
|
2086
|
+
else:
|
|
2087
|
+
results = {
|
|
2088
|
+
ga_id: {
|
|
2089
|
+
"volcano_plot": self.get_volcano_plot_data(
|
|
2090
|
+
analysis_id, group_analysis_id=ga_id, as_df=True
|
|
2091
|
+
)
|
|
2092
|
+
}
|
|
2093
|
+
for ga_id in group_analysis_ids
|
|
2094
|
+
}
|
|
2095
|
+
|
|
2096
|
+
return results
|
|
2097
|
+
|
|
2098
|
+
def _get_analysis_pca(
|
|
2099
|
+
self,
|
|
2100
|
+
analysis_ids: _List[str],
|
|
2101
|
+
sample_ids: _List[str],
|
|
2102
|
+
type: str,
|
|
2103
|
+
hide_control: bool = False,
|
|
2104
|
+
):
|
|
2105
|
+
"""
|
|
2106
|
+
****************
|
|
2107
|
+
[UNEXPOSED METHOD CALL]
|
|
2108
|
+
****************
|
|
2109
|
+
Get PCA data for given analyses and samples.
|
|
2110
|
+
Args:
|
|
2111
|
+
analysis_ids (list[str]): IDs of the analyses of interest.
|
|
2112
|
+
sample_ids (list[str]): IDs of the samples of interest.
|
|
2113
|
+
type (str): Type of data to be fetched. Must be either 'protein' or 'peptide'.
|
|
2114
|
+
hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
|
|
2115
|
+
Raises:
|
|
2116
|
+
ValueError: No analysis IDs provided.
|
|
2117
|
+
ValueError: No sample IDs provided.
|
|
2118
|
+
ValueError: Invalid type provided.
|
|
2119
|
+
ServerError: Could not fetch PCA data.
|
|
2120
|
+
Returns:
|
|
2121
|
+
dict
|
|
2122
|
+
Pure response from the API.
|
|
2123
|
+
"""
|
|
2124
|
+
if not analysis_ids:
|
|
2125
|
+
raise ValueError("Analysis IDs cannot be empty.")
|
|
2126
|
+
if type not in ["protein", "peptide"]:
|
|
2127
|
+
raise ValueError("Type must be either 'protein' or 'peptide'.")
|
|
2128
|
+
|
|
2129
|
+
URL = f"{self._auth.url}api/v1/analysisqcpca"
|
|
2130
|
+
|
|
2131
|
+
with self._get_auth_session() as s:
|
|
2132
|
+
json = {
|
|
2133
|
+
"analysisIds": ",".join(analysis_ids),
|
|
2134
|
+
"type": type,
|
|
2135
|
+
}
|
|
2136
|
+
if sample_ids:
|
|
2137
|
+
json["sampleIds"] = ",".join(sample_ids)
|
|
2138
|
+
|
|
2139
|
+
# specify hideControl as a string - unexpected behavior occurs if a boolean is passed
|
|
2140
|
+
if hide_control:
|
|
2141
|
+
json["hideControl"] = "true"
|
|
2142
|
+
else:
|
|
2143
|
+
json["hideControl"] = "false"
|
|
2144
|
+
|
|
2145
|
+
pca_data = s.post(URL, json=json)
|
|
2146
|
+
|
|
2147
|
+
if pca_data.status_code != 200:
|
|
2148
|
+
raise ServerError("Could not fetch PCA data.")
|
|
2149
|
+
|
|
2150
|
+
return pca_data.json()
|
|
2151
|
+
|
|
2152
|
+
def get_analysis_pca_data(
|
|
2153
|
+
self,
|
|
2154
|
+
analysis_ids: _List[str],
|
|
2155
|
+
type: str,
|
|
2156
|
+
sample_ids: _List[str] = [],
|
|
2157
|
+
hide_control: bool = False,
|
|
2158
|
+
as_df=False,
|
|
2159
|
+
):
|
|
2160
|
+
"""
|
|
2161
|
+
Get PCA data for given analyses and samples formatted in a DataFrame or a dictionary.
|
|
2162
|
+
Args:
|
|
2163
|
+
analysis_ids (list[str]): IDs of the analyses of interest.
|
|
2164
|
+
type (str): Type of data to be fetched. Must be either 'protein' or 'peptide'.
|
|
2165
|
+
sample_ids (list[str], optional): IDs of the samples of interest.
|
|
2166
|
+
hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
|
|
2167
|
+
as_df (bool, optional): Mark true if the data should be returned as a pandas DataFrame. Defaults to False.
|
|
2168
|
+
Raises:
|
|
2169
|
+
ValueError: No analysis IDs provided.
|
|
2170
|
+
ValueError: No sample IDs provided.
|
|
2171
|
+
ValueError: Invalid type parameter provided.
|
|
2172
|
+
ServerError: Could not fetch PCA data.
|
|
2173
|
+
Returns:
|
|
2174
|
+
A dictionary with the following keys:
|
|
2175
|
+
- x_contribution_ratio (float): Proportion of variance explained by the x-axis.
|
|
2176
|
+
- y_contribution_ratio (float): Proportion of variance explained by the y-axis.
|
|
2177
|
+
- data (list[dict] | pd.DataFrame): A list of dictionaries or a dataframe with each row containing the following keys/columns:
|
|
2178
|
+
- sample_name (str): Name of the sample.
|
|
2179
|
+
- plate_name (str): Name of the plate.
|
|
2180
|
+
- sample_id (int): ID of the sample.
|
|
2181
|
+
- condition (str): Condition.
|
|
2182
|
+
- PC1 (float): X-value of the PCA point.
|
|
2183
|
+
- PC2 (float): Y-value of the PCA point.
|
|
2184
|
+
- custom_* (str): Custom fields. Included if meaningful, i.e., not null, in the data.
|
|
2185
|
+
Examples
|
|
2186
|
+
--------
|
|
2187
|
+
>>> from seer_pas_sdk import *
|
|
2188
|
+
>>> sdk = SeerSDK()
|
|
2189
|
+
>>> sdk.get_analysis_pca_data(
|
|
2190
|
+
analysis_ids=["analysis_id"],
|
|
2191
|
+
sample_ids=["sample_id"],
|
|
2192
|
+
type="protein",
|
|
2193
|
+
hide_control=False
|
|
2194
|
+
)
|
|
2195
|
+
"""
|
|
2196
|
+
pca_data = self._get_analysis_pca(
|
|
2197
|
+
analysis_ids, sample_ids, type, hide_control
|
|
2198
|
+
)
|
|
2199
|
+
|
|
2200
|
+
# common columns returned by the API
|
|
2201
|
+
generic_columns = [
|
|
2202
|
+
"sample_name",
|
|
2203
|
+
"plate_name",
|
|
2204
|
+
"sample_id",
|
|
2205
|
+
"condition",
|
|
2206
|
+
"PC1",
|
|
2207
|
+
"PC2",
|
|
2208
|
+
]
|
|
2209
|
+
|
|
2210
|
+
# edge case where yContributionRatio is NaN when zero points are returned.
|
|
2211
|
+
if not "yContributionRatio" in pca_data:
|
|
2212
|
+
y_contribution_ratio = None
|
|
2213
|
+
else:
|
|
2214
|
+
y_contribution_ratio = pca_data["yContributionRatio"]
|
|
2215
|
+
|
|
2216
|
+
x_contribution_ratio = pca_data["xContributionRatio"]
|
|
2217
|
+
samples = pca_data["samples"]
|
|
2218
|
+
points = pca_data["points"]
|
|
2219
|
+
|
|
2220
|
+
df = pd.DataFrame(
|
|
2221
|
+
[
|
|
2222
|
+
sample | {"PC1": point[0], "PC2": point[1]}
|
|
2223
|
+
for sample, point in zip(samples, points)
|
|
2224
|
+
]
|
|
2225
|
+
)
|
|
2226
|
+
|
|
2227
|
+
# Slice the df such that only custom columns are dropped in the absence of data
|
|
2228
|
+
df = pd.concat(
|
|
2229
|
+
[
|
|
2230
|
+
df.drop(columns=generic_columns).dropna(how="all", axis=1),
|
|
2231
|
+
df[generic_columns],
|
|
2232
|
+
],
|
|
2233
|
+
axis=1,
|
|
2234
|
+
)
|
|
2235
|
+
|
|
2236
|
+
# Filter down to a minimal set of columns
|
|
2237
|
+
permitted_columns = [
|
|
2238
|
+
x
|
|
2239
|
+
for x in df.columns
|
|
2240
|
+
if x in generic_columns or x.startswith("custom_")
|
|
2241
|
+
]
|
|
2242
|
+
|
|
2243
|
+
df = df.loc(axis=1)[permitted_columns]
|
|
2244
|
+
|
|
2245
|
+
# Return the data as a DataFrame if as_df is True
|
|
2246
|
+
if not as_df:
|
|
2247
|
+
df = df.to_dict(orient="records")
|
|
2248
|
+
result = dict(
|
|
2249
|
+
x_contribution_ratio=x_contribution_ratio,
|
|
2250
|
+
y_contribution_ratio=y_contribution_ratio,
|
|
2251
|
+
data=df,
|
|
2252
|
+
)
|
|
2253
|
+
return result
|
|
2254
|
+
|
|
2255
|
+
def get_analysis_hierarchical_clustering(
|
|
2256
|
+
self,
|
|
2257
|
+
analysis_ids: _List[str],
|
|
2258
|
+
sample_ids: _List[str] = [],
|
|
2259
|
+
hide_control: bool = False,
|
|
2260
|
+
):
|
|
2261
|
+
"""
|
|
2262
|
+
Get hierarchical clustering data for given analyses and samples.
|
|
2263
|
+
Args:
|
|
2264
|
+
analysis_ids (list[str]): IDs of the analyses.
|
|
2265
|
+
sample_ids (list[str], optional): IDs of the samples.
|
|
2266
|
+
hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
|
|
2267
|
+
raw_data (bool, optional): Mark true if raw data should be returned. Defaults to True.
|
|
2268
|
+
Raises:
|
|
2269
|
+
ValueError: No analysis IDs provided.
|
|
2270
|
+
ValueError: No sample IDs provided.
|
|
2271
|
+
ValueError: Response status code is not 200.
|
|
2272
|
+
Returns:
|
|
2273
|
+
dict
|
|
2274
|
+
Hierarchical clustering data returned by the API.
|
|
2275
|
+
"""
|
|
2276
|
+
if not analysis_ids:
|
|
2277
|
+
raise ValueError("Analysis IDs cannot be empty.")
|
|
2278
|
+
|
|
2279
|
+
URL = f"{self._auth.url}api/v1/analysishcluster"
|
|
2280
|
+
|
|
2281
|
+
with self._get_auth_session() as s:
|
|
2282
|
+
json = {
|
|
2283
|
+
"analysisIds": ",".join(analysis_ids),
|
|
2284
|
+
}
|
|
2285
|
+
if sample_ids:
|
|
2286
|
+
json["sampleIds"] = ",".join(sample_ids)
|
|
2287
|
+
|
|
2288
|
+
if sample_ids:
|
|
2289
|
+
json["sampleIds"] = ",".join(sample_ids)
|
|
2290
|
+
|
|
2291
|
+
# specify hideControl as a string
|
|
2292
|
+
# Python bool values are not recognized by the API
|
|
2293
|
+
if hide_control:
|
|
2294
|
+
json["hideControl"] = "true"
|
|
2295
|
+
else:
|
|
2296
|
+
json["hideControl"] = "false"
|
|
2297
|
+
|
|
2298
|
+
hc_data = s.post(URL, json=json)
|
|
2299
|
+
|
|
2300
|
+
if hc_data.status_code != 200:
|
|
1243
2301
|
raise ValueError(
|
|
1244
|
-
"Invalid request
|
|
2302
|
+
"Invalid request. Please check your parameters."
|
|
1245
2303
|
)
|
|
1246
2304
|
|
|
1247
|
-
|
|
1248
|
-
res["box_plot"] = box_plot_data
|
|
2305
|
+
data = hc_data.json()
|
|
1249
2306
|
|
|
1250
|
-
|
|
2307
|
+
# Filter out custom fields that are not part of the tenant's custom fields
|
|
2308
|
+
if not "samples" in data:
|
|
2309
|
+
raise ValueError("No sample data returned from server.")
|
|
2310
|
+
|
|
2311
|
+
data["samples"] = [
|
|
2312
|
+
{k: v for k, v in sample.items()} for sample in data["samples"]
|
|
2313
|
+
]
|
|
2314
|
+
|
|
2315
|
+
return data
|
|
2316
|
+
|
|
2317
|
+
def get_ppi_network_data(
|
|
2318
|
+
self, significant_pgs: _List[str], species: str = None
|
|
2319
|
+
):
|
|
2320
|
+
"""
|
|
2321
|
+
Get PPI network data for given significant protein groups.
|
|
2322
|
+
Args:
|
|
2323
|
+
significant_pgs (_List[str]): Significant protein groups.
|
|
2324
|
+
species (str, optional): Species of interest. Defaults to None.
|
|
2325
|
+
Raises:
|
|
2326
|
+
ValueError: No significant protein groups provided.
|
|
2327
|
+
ValueError: Response status code is not 200.
|
|
2328
|
+
Returns:
|
|
2329
|
+
dict
|
|
2330
|
+
Response returned by the API.
|
|
2331
|
+
"""
|
|
2332
|
+
if not significant_pgs:
|
|
2333
|
+
raise ValueError("Significant protein groups cannot be empty.")
|
|
2334
|
+
|
|
2335
|
+
URL = f"{self._auth.url}api/v1/groupanalysis/stringdb"
|
|
2336
|
+
|
|
2337
|
+
with self._get_auth_session() as s:
|
|
2338
|
+
json = {
|
|
2339
|
+
"significantPGs": ",".join(significant_pgs),
|
|
2340
|
+
}
|
|
2341
|
+
if species:
|
|
2342
|
+
json["species"] = species
|
|
2343
|
+
|
|
2344
|
+
ppi_data = s.post(URL, json=json)
|
|
2345
|
+
|
|
2346
|
+
if ppi_data.status_code != 200:
|
|
2347
|
+
raise ValueError("Server error - bad response")
|
|
2348
|
+
|
|
2349
|
+
return ppi_data.json()
|
|
2350
|
+
|
|
2351
|
+
# groups are user defined by the sample description file
|
|
2352
|
+
def get_cluster_heatmap_data(
|
|
2353
|
+
self,
|
|
2354
|
+
analysis_id: str,
|
|
2355
|
+
grouping: str,
|
|
2356
|
+
groups: _List[str],
|
|
2357
|
+
contrasts: _List[_Tuple[int, ...]],
|
|
2358
|
+
stat_test: str,
|
|
2359
|
+
feature_type: str,
|
|
2360
|
+
significant_pgs: _List[str] = [],
|
|
2361
|
+
):
|
|
2362
|
+
"""Get cluster heatmap data for the given analysis.
|
|
2363
|
+
|
|
2364
|
+
Args:
|
|
2365
|
+
analysis_id (str): ID of the analysis
|
|
2366
|
+
grouping (str): Category of sample groups
|
|
2367
|
+
groups (_List[str]): sample groups
|
|
2368
|
+
contrasts (_List[_Tuple[int, ...]]): Indicate which groups are compared against each other. e.g. [(0, 1, -1, 0), (1, 0, 0, -1)]
|
|
2369
|
+
stat_test (str): Statistical test to be used
|
|
2370
|
+
feature_type (str): Type of feature to be used, either proteingroup or peptide
|
|
2371
|
+
significant_pgs (_List[str], optional): significant protein group IDs. Defaults to [].
|
|
2372
|
+
|
|
2373
|
+
Raises:
|
|
2374
|
+
ValueError: "Feature type must be either 'proteingroup' or 'peptide'."
|
|
2375
|
+
ValueError: "Stat test must be either 'ttest' or 'wilcoxon'."
|
|
2376
|
+
ValueError: Invalid contrast value.
|
|
2377
|
+
ValueError: Server error
|
|
2378
|
+
|
|
2379
|
+
Returns:
|
|
2380
|
+
dict: the response object
|
|
2381
|
+
clusterProtein: List of protein clusters
|
|
2382
|
+
clusters:
|
|
2383
|
+
indexes: list[int], List of indexes
|
|
2384
|
+
height: int, Height of the cluster
|
|
2385
|
+
children: list[dict] | None, Children of the cluster
|
|
2386
|
+
clusterSample: List of sample clusters
|
|
2387
|
+
clusters:
|
|
2388
|
+
indexes: list[int], List of indexes
|
|
2389
|
+
height: int, Height of the cluster
|
|
2390
|
+
children: list[dict] | None, Children of the cluster
|
|
2391
|
+
data: List of data
|
|
2392
|
+
|
|
2393
|
+
"""
|
|
2394
|
+
if feature_type not in ["proteingroup", "peptide"]:
|
|
2395
|
+
raise ValueError(
|
|
2396
|
+
"Feature type must be either 'proteingroup' or 'peptide'."
|
|
2397
|
+
)
|
|
2398
|
+
|
|
2399
|
+
if stat_test not in ["ttest", "wilcoxon"]:
|
|
2400
|
+
raise ValueError("Stat test must be either 'ttest' or 'wilcoxon'.")
|
|
2401
|
+
|
|
2402
|
+
[validate_contrast(contrast, len(groups)) for contrast in contrasts]
|
|
2403
|
+
|
|
2404
|
+
formatted_contrasts = ";".join(
|
|
2405
|
+
[",".join(map(str, x)) for x in contrasts]
|
|
2406
|
+
)
|
|
2407
|
+
|
|
2408
|
+
payload = dict(
|
|
2409
|
+
analysisId=analysis_id,
|
|
2410
|
+
grouping=grouping,
|
|
2411
|
+
groups=",".join(groups),
|
|
2412
|
+
contrasts=formatted_contrasts,
|
|
2413
|
+
statTest=stat_test,
|
|
2414
|
+
featureType=feature_type,
|
|
2415
|
+
significantPGs=",".join(significant_pgs),
|
|
2416
|
+
)
|
|
2417
|
+
|
|
2418
|
+
with self._get_auth_session() as s:
|
|
2419
|
+
URL = f"{self._auth.url}api/v2/clusterheatmap"
|
|
2420
|
+
response = s.post(URL, json=payload)
|
|
2421
|
+
if response.status_code != 200:
|
|
2422
|
+
raise ValueError("Server error. Bad response.")
|
|
2423
|
+
return response.json()
|
|
2424
|
+
|
|
2425
|
+
def get_enrichment_plot(
|
|
2426
|
+
self,
|
|
2427
|
+
analysis_id: str,
|
|
2428
|
+
significant_pgs: _List[str],
|
|
2429
|
+
summarize_output: bool = False,
|
|
2430
|
+
exclude_singleton: bool = False,
|
|
2431
|
+
cutoff: float = None,
|
|
2432
|
+
species: str = None,
|
|
2433
|
+
):
|
|
2434
|
+
"""
|
|
2435
|
+
Get enrichment plot data for a given analysis ID.
|
|
2436
|
+
|
|
2437
|
+
Args:
|
|
2438
|
+
analysis_id (str): ID of the analysis.
|
|
2439
|
+
significant_pgs (_List[str]): List of significant protein/peptide groups.
|
|
2440
|
+
summarize_output (bool, optional): Summarize the output. Defaults to False.
|
|
2441
|
+
exclude_singleton (bool, optional): Exclude singleton values. Defaults to False.
|
|
2442
|
+
cutoff (float, optional): Cutoff value for the p-value to determine significance. Defaults to None.
|
|
2443
|
+
species (str, optional): Species to filter the data by. Defaults to None.
|
|
2444
|
+
|
|
2445
|
+
Raises:
|
|
2446
|
+
ServerError - could not fetch enrichment plot data.
|
|
2447
|
+
|
|
2448
|
+
Returns:
|
|
2449
|
+
dict: A dictionary containing the enrichment plot data.
|
|
2450
|
+
"""
|
|
2451
|
+
|
|
2452
|
+
URL = f"{self._auth.url}api/v1/groupanalysis/enrichmentgo"
|
|
2453
|
+
|
|
2454
|
+
if not significant_pgs:
|
|
2455
|
+
raise ValueError("Significant pgs cannot be empty.")
|
|
2456
|
+
|
|
2457
|
+
with self._get_auth_session() as s:
|
|
2458
|
+
json = {
|
|
2459
|
+
"analysisId": analysis_id,
|
|
2460
|
+
"significantPGs": significant_pgs,
|
|
2461
|
+
"summarizeOutput": summarize_output,
|
|
2462
|
+
"excludeSingleton": exclude_singleton,
|
|
2463
|
+
}
|
|
2464
|
+
if cutoff:
|
|
2465
|
+
json["cutoff"] = cutoff
|
|
2466
|
+
if species:
|
|
2467
|
+
json["species"] = species
|
|
2468
|
+
|
|
2469
|
+
enrichment_data = s.post(URL, json=json)
|
|
2470
|
+
|
|
2471
|
+
if enrichment_data.status_code != 200:
|
|
2472
|
+
raise ValueError("Could not fetch enrichment plot data.")
|
|
2473
|
+
|
|
2474
|
+
return enrichment_data.json()
|
|
2475
|
+
|
|
2476
|
+
def get_volcano_plot_data(
|
|
2477
|
+
self,
|
|
2478
|
+
analysis_id,
|
|
2479
|
+
group_analysis_id=None,
|
|
2480
|
+
significance_threshold=0.05,
|
|
2481
|
+
fold_change_threshold=1,
|
|
2482
|
+
label_by="fold_change",
|
|
2483
|
+
cached=False,
|
|
2484
|
+
as_df=False,
|
|
2485
|
+
):
|
|
2486
|
+
"""Get volcano plot data for a given analysis ID.
|
|
2487
|
+
|
|
2488
|
+
Args:
|
|
2489
|
+
analysis_id (str): ID of the analysis.
|
|
2490
|
+
significance_threshold (float, optional): Cutoff value for the p-value to determine significance. Defaults to 0.05.
|
|
2491
|
+
fold_change_threshold (float, optional): Cutoff value for the fold change to determine significance. Defaults to 1.
|
|
2492
|
+
label_by (str, optional): Metric to sort result data. Defaults to "fold_change".
|
|
2493
|
+
cached (bool, optional): Return a VolcanoPlotBuilder object for calculation reuse. Defaults to False.
|
|
2494
|
+
as_df (bool, optional): Return data as a pandas DataFrame. Defaults to False.
|
|
2495
|
+
|
|
2496
|
+
Raises:
|
|
2497
|
+
ServerError - could not fetch group analysis results.
|
|
2498
|
+
Returns:
|
|
2499
|
+
list[dict] | pd.DataFrame | VolcanoPlotBuilder: A list of dictionaries, a DataFrame, or a VolcanoPlotBuilder object containing the volcano plot data.
|
|
2500
|
+
Object contains the following columns: 'logFD', 'negativeLog10P', 'dataIndex', 'rowID', 'gene', 'protein',
|
|
2501
|
+
'group', 'significant', 'euclideanDistance'
|
|
2502
|
+
"""
|
|
2503
|
+
try:
|
|
2504
|
+
response = self.group_analysis_results(
|
|
2505
|
+
analysis_id, group_analysis_id=group_analysis_id
|
|
2506
|
+
)
|
|
2507
|
+
except:
|
|
2508
|
+
raise ServerError(
|
|
2509
|
+
f"Could not fetch group analysis results. Please check that group analysis has completed for analysis {analysis_id}."
|
|
2510
|
+
)
|
|
2511
|
+
|
|
2512
|
+
obj = VolcanoPlotBuilder(
|
|
2513
|
+
response, significance_threshold, fold_change_threshold, label_by
|
|
2514
|
+
)
|
|
2515
|
+
|
|
2516
|
+
if cached:
|
|
2517
|
+
return obj
|
|
2518
|
+
else:
|
|
2519
|
+
if as_df:
|
|
2520
|
+
return pd.DataFrame(obj.volcano_plot)
|
|
2521
|
+
else:
|
|
2522
|
+
return obj.volcano_plot
|
|
2523
|
+
|
|
2524
|
+
def get_analysis_samples(self, analysis_id: str):
|
|
2525
|
+
"""
|
|
2526
|
+
Get the samples associated with a given analysis ID.
|
|
2527
|
+
|
|
2528
|
+
Args:
|
|
2529
|
+
analysis_id (str): The analysis ID.
|
|
2530
|
+
|
|
2531
|
+
Raises:
|
|
2532
|
+
ServerError - could not retrieve samples for analysis.
|
|
2533
|
+
Returns:
|
|
2534
|
+
dict: A dictionary containing the samples associated with the analysis.
|
|
2535
|
+
"""
|
|
2536
|
+
if not analysis_id:
|
|
2537
|
+
raise ValueError("Analysis ID cannot be empty.")
|
|
2538
|
+
|
|
2539
|
+
URL = f"{self._auth.url}api/v1/analyses/samples/{analysis_id}"
|
|
2540
|
+
with self._get_auth_session() as s:
|
|
2541
|
+
samples = s.get(URL)
|
|
2542
|
+
|
|
2543
|
+
if samples.status_code != 200:
|
|
2544
|
+
raise ServerError("Could not retrieve samples for analysis.")
|
|
2545
|
+
|
|
2546
|
+
return samples.json()
|
|
2547
|
+
|
|
2548
|
+
def get_analysis_protocol_fasta(self, analysis_id, download_path=None):
|
|
2549
|
+
if not analysis_id:
|
|
2550
|
+
raise ValueError("Analysis ID cannot be empty.")
|
|
2551
|
+
|
|
2552
|
+
if not download_path:
|
|
2553
|
+
download_path = os.getcwd()
|
|
2554
|
+
|
|
2555
|
+
try:
|
|
2556
|
+
analysis_protocol_id = self.get_analysis(analysis_id)[0][
|
|
2557
|
+
"analysis_protocol_id"
|
|
2558
|
+
]
|
|
2559
|
+
except (IndexError, KeyError):
|
|
2560
|
+
raise ValueError(f"Could not parse server response.")
|
|
2561
|
+
|
|
2562
|
+
try:
|
|
2563
|
+
analysis_protocol_engine = self.get_analysis_protocols(
|
|
2564
|
+
analysis_protocol_id=analysis_protocol_id
|
|
2565
|
+
)[0]["analysis_engine"]
|
|
2566
|
+
except (IndexError, KeyError):
|
|
2567
|
+
raise ValueError(f"Could not parse server response.")
|
|
2568
|
+
|
|
2569
|
+
analysis_protocol_engine = analysis_protocol_engine.lower()
|
|
2570
|
+
if analysis_protocol_engine == "diann":
|
|
2571
|
+
URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/diann/{analysis_protocol_id}"
|
|
2572
|
+
elif analysis_protocol_engine == "encyclopedia":
|
|
2573
|
+
URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/dia/{analysis_protocol_id}"
|
|
2574
|
+
elif analysis_protocol_engine == "msfragger":
|
|
2575
|
+
URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/msfragger/{analysis_protocol_id}"
|
|
2576
|
+
elif analysis_protocol_engine == "proteogenomics":
|
|
2577
|
+
URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/proteogenomics/{analysis_protocol_id}"
|
|
2578
|
+
else:
|
|
2579
|
+
# Change needed on the backend to get s3 file path for MaxQuant
|
|
2580
|
+
# URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/{analysis_protocol_id}"
|
|
2581
|
+
raise ValueError(
|
|
2582
|
+
f"Analysis protocol engine {analysis_protocol_engine} not supported for fasta download."
|
|
2583
|
+
)
|
|
2584
|
+
|
|
2585
|
+
with self._get_auth_session() as s:
|
|
2586
|
+
response = s.get(URL)
|
|
2587
|
+
if response.status_code != 200:
|
|
2588
|
+
raise ServerError("Request failed.")
|
|
2589
|
+
response = response.json()
|
|
2590
|
+
if type(response) == dict:
|
|
2591
|
+
response = response["editableParameters"]
|
|
2592
|
+
fasta_filenames = [
|
|
2593
|
+
x["Value"]
|
|
2594
|
+
for x in response
|
|
2595
|
+
if x["Key"] in ["fasta", "fastaFilePath", "referencegenome"]
|
|
2596
|
+
]
|
|
2597
|
+
if not fasta_filenames:
|
|
2598
|
+
raise ServerError("No fasta file name returned from server.")
|
|
2599
|
+
|
|
2600
|
+
URL = f"{self._auth.url}api/v1/analysisProtocolFiles/getUrl"
|
|
2601
|
+
for file in fasta_filenames:
|
|
2602
|
+
with self._get_auth_session() as s:
|
|
2603
|
+
response = s.post(URL, json={"filepath": file})
|
|
2604
|
+
if response.status_code != 200:
|
|
2605
|
+
raise ServerError("Request failed.")
|
|
2606
|
+
url = response.json()["url"]
|
|
2607
|
+
filename = os.path.basename(file)
|
|
2608
|
+
print(f"Downloading {filename}")
|
|
2609
|
+
for _ in range(2):
|
|
2610
|
+
try:
|
|
2611
|
+
with tqdm(
|
|
2612
|
+
unit="B",
|
|
2613
|
+
unit_scale=True,
|
|
2614
|
+
unit_divisor=1024,
|
|
2615
|
+
miniters=1,
|
|
2616
|
+
desc=f"Progress",
|
|
2617
|
+
) as t:
|
|
2618
|
+
ssl._create_default_https_context = (
|
|
2619
|
+
ssl._create_unverified_context
|
|
2620
|
+
)
|
|
2621
|
+
urllib.request.urlretrieve(
|
|
2622
|
+
url,
|
|
2623
|
+
f"{download_path}/{filename}",
|
|
2624
|
+
reporthook=download_hook(t),
|
|
2625
|
+
data=None,
|
|
2626
|
+
)
|
|
2627
|
+
break
|
|
2628
|
+
except:
|
|
2629
|
+
if not os.path.isdir(f"{download_path}"):
|
|
2630
|
+
os.makedirs(f"{download_path}")
|
|
2631
|
+
|
|
2632
|
+
print(f"Downloaded file to {download_path}/{file}")
|