seer-pas-sdk 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- seer_pas_sdk/core/sdk.py +481 -344
- seer_pas_sdk/core/unsupported.py +3 -3
- {seer_pas_sdk-0.2.1.dist-info → seer_pas_sdk-0.3.1.dist-info}/METADATA +2 -1
- {seer_pas_sdk-0.2.1.dist-info → seer_pas_sdk-0.3.1.dist-info}/RECORD +7 -7
- {seer_pas_sdk-0.2.1.dist-info → seer_pas_sdk-0.3.1.dist-info}/WHEEL +0 -0
- {seer_pas_sdk-0.2.1.dist-info → seer_pas_sdk-0.3.1.dist-info}/licenses/LICENSE.txt +0 -0
- {seer_pas_sdk-0.2.1.dist-info → seer_pas_sdk-0.3.1.dist-info}/top_level.txt +0 -0
seer_pas_sdk/core/sdk.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from tqdm import tqdm
|
|
2
2
|
|
|
3
|
+
import deprecation
|
|
3
4
|
import os
|
|
4
5
|
import jwt
|
|
5
6
|
import requests
|
|
@@ -70,14 +71,14 @@ class SeerSDK:
|
|
|
70
71
|
|
|
71
72
|
return sess
|
|
72
73
|
|
|
73
|
-
def
|
|
74
|
+
def get_user_tenant(self, index=True):
|
|
74
75
|
"""
|
|
75
76
|
Fetches the tenant metadata for the authenticated user.
|
|
76
77
|
|
|
77
78
|
Returns
|
|
78
79
|
-------
|
|
79
|
-
response : dict
|
|
80
|
-
A
|
|
80
|
+
response : list[dict]
|
|
81
|
+
A list of tenant objects pertaining to the user.
|
|
81
82
|
"""
|
|
82
83
|
with self._get_auth_session() as s:
|
|
83
84
|
response = s.get(f"{self._auth.url}api/v1/usertenants")
|
|
@@ -89,7 +90,13 @@ class SeerSDK:
|
|
|
89
90
|
|
|
90
91
|
response = response.json()
|
|
91
92
|
if index:
|
|
92
|
-
|
|
93
|
+
mapper = dict()
|
|
94
|
+
for x in response:
|
|
95
|
+
if x["institution"] not in mapper:
|
|
96
|
+
mapper[x["institution"]] = [x]
|
|
97
|
+
else:
|
|
98
|
+
mapper[x["institution"]].append(x)
|
|
99
|
+
return mapper
|
|
93
100
|
else:
|
|
94
101
|
return response
|
|
95
102
|
|
|
@@ -104,14 +111,14 @@ class SeerSDK:
|
|
|
104
111
|
|
|
105
112
|
Returns
|
|
106
113
|
-------
|
|
107
|
-
tenants : dict
|
|
114
|
+
tenants : dict[str, str]
|
|
108
115
|
A dictionary containing the institution names and tenant ids for the authenticated user.
|
|
109
116
|
"""
|
|
110
|
-
tenants = self.
|
|
117
|
+
tenants = self.get_user_tenant(index=False)
|
|
111
118
|
if reverse:
|
|
112
|
-
return {x["tenantId"]: x["institution"] for x in tenants
|
|
119
|
+
return {x["tenantId"]: x["institution"] for x in tenants}
|
|
113
120
|
else:
|
|
114
|
-
return {x["institution"]: x["tenantId"] for x in tenants
|
|
121
|
+
return {x["institution"]: x["tenantId"] for x in tenants}
|
|
115
122
|
|
|
116
123
|
def switch_tenant(self, identifier: str):
|
|
117
124
|
"""
|
|
@@ -127,13 +134,15 @@ class SeerSDK:
|
|
|
127
134
|
tenant_id: str
|
|
128
135
|
Returns the value of the active tenant id after the operation.
|
|
129
136
|
"""
|
|
130
|
-
map = self.
|
|
131
|
-
|
|
137
|
+
map = self.get_user_tenant()
|
|
138
|
+
tenant_id_match = [
|
|
139
|
+
y for x in map.values() for y in x if y["tenantId"] == identifier
|
|
140
|
+
]
|
|
132
141
|
institution_names = map.keys()
|
|
133
142
|
|
|
134
|
-
if
|
|
143
|
+
if tenant_id_match:
|
|
135
144
|
tenant_id = identifier
|
|
136
|
-
row =
|
|
145
|
+
row = tenant_id_match
|
|
137
146
|
if row:
|
|
138
147
|
row = row[0]
|
|
139
148
|
else:
|
|
@@ -141,7 +150,12 @@ class SeerSDK:
|
|
|
141
150
|
"Invalid tenant identifier. Tenant was not switched."
|
|
142
151
|
)
|
|
143
152
|
elif identifier in institution_names:
|
|
144
|
-
|
|
153
|
+
results = map[identifier]
|
|
154
|
+
if len(results) > 1:
|
|
155
|
+
raise ValueError(
|
|
156
|
+
"Multiple tenants found for the given institution name. Please specify a tenant ID."
|
|
157
|
+
)
|
|
158
|
+
row = results[0]
|
|
145
159
|
tenant_id = row["tenantId"]
|
|
146
160
|
else:
|
|
147
161
|
raise ValueError(
|
|
@@ -172,10 +186,10 @@ class SeerSDK:
|
|
|
172
186
|
|
|
173
187
|
Returns
|
|
174
188
|
-------
|
|
175
|
-
tenant: dict
|
|
189
|
+
tenant: dict[str, str]
|
|
176
190
|
Tenant metadata for the authenticated user containing "institution" and "tenantId" keys.
|
|
177
191
|
"""
|
|
178
|
-
tenants = self.
|
|
192
|
+
tenants = self.get_user_tenant(index=False)
|
|
179
193
|
row = [
|
|
180
194
|
x for x in tenants if x["tenantId"] == self._auth.active_tenant_id
|
|
181
195
|
]
|
|
@@ -211,7 +225,7 @@ class SeerSDK:
|
|
|
211
225
|
|
|
212
226
|
Returns
|
|
213
227
|
-------
|
|
214
|
-
spaces: list
|
|
228
|
+
spaces: list[dict]
|
|
215
229
|
List of space objects for the authenticated user.
|
|
216
230
|
|
|
217
231
|
Examples
|
|
@@ -237,7 +251,9 @@ class SeerSDK:
|
|
|
237
251
|
)
|
|
238
252
|
return spaces.json()
|
|
239
253
|
|
|
240
|
-
def
|
|
254
|
+
def get_plates(
|
|
255
|
+
self, plate_id: str = None, plate_name: str = None, as_df: bool = False
|
|
256
|
+
):
|
|
241
257
|
"""
|
|
242
258
|
Fetches a list of plates for the authenticated user. If no `plate_id` is provided, returns all plates for the authenticated user. If `plate_id` is provided, returns the plate with the given `plate_id`, provided it exists.
|
|
243
259
|
|
|
@@ -245,25 +261,25 @@ class SeerSDK:
|
|
|
245
261
|
----------
|
|
246
262
|
plate_id : str, optional
|
|
247
263
|
ID of the plate to be fetched, defaulted to None.
|
|
248
|
-
|
|
249
|
-
|
|
264
|
+
as_df: bool
|
|
265
|
+
whether the result should be converted to a DataFrame, defaulted to None.
|
|
250
266
|
|
|
251
267
|
Returns
|
|
252
268
|
-------
|
|
253
|
-
plates: list or DataFrame
|
|
269
|
+
plates: list[dict] or DataFrame
|
|
254
270
|
List/DataFrame of plate objects for the authenticated user.
|
|
255
271
|
|
|
256
272
|
Examples
|
|
257
273
|
-------
|
|
258
274
|
>>> from seer_pas_sdk import SeerSDK
|
|
259
275
|
>>> seer_sdk = SeerSDK()
|
|
260
|
-
>>> seer_sdk.
|
|
276
|
+
>>> seer_sdk.get_plates()
|
|
261
277
|
>>> [
|
|
262
278
|
{ "id": ... },
|
|
263
279
|
{ "id": ... },
|
|
264
280
|
...
|
|
265
281
|
]
|
|
266
|
-
>>> seer_sdk.
|
|
282
|
+
>>> seer_sdk.get_plates(as_df=True)
|
|
267
283
|
>>> id ... user_group
|
|
268
284
|
0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
|
|
269
285
|
1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
|
|
@@ -284,11 +300,18 @@ class SeerSDK:
|
|
|
284
300
|
URL = f"{self._auth.url}api/v1/plates"
|
|
285
301
|
res = []
|
|
286
302
|
|
|
303
|
+
if not plate_id and not plate_name:
|
|
304
|
+
params = {"all": "true"}
|
|
305
|
+
elif plate_name:
|
|
306
|
+
params = {"searchFields": "plate_name", "searchItem": plate_name}
|
|
307
|
+
else:
|
|
308
|
+
params = dict()
|
|
309
|
+
|
|
287
310
|
with self._get_auth_session() as s:
|
|
288
311
|
|
|
289
312
|
plates = s.get(
|
|
290
313
|
f"{URL}/{plate_id}" if plate_id else URL,
|
|
291
|
-
params=
|
|
314
|
+
params=params,
|
|
292
315
|
)
|
|
293
316
|
if plates.status_code != 200:
|
|
294
317
|
raise ValueError(
|
|
@@ -302,9 +325,14 @@ class SeerSDK:
|
|
|
302
325
|
for entry in res:
|
|
303
326
|
del entry["tenant_id"]
|
|
304
327
|
|
|
305
|
-
return res if not
|
|
328
|
+
return res if not as_df else dict_to_df(res)
|
|
306
329
|
|
|
307
|
-
def
|
|
330
|
+
def get_projects(
|
|
331
|
+
self,
|
|
332
|
+
project_id: str = None,
|
|
333
|
+
project_name: str = None,
|
|
334
|
+
as_df: bool = False,
|
|
335
|
+
):
|
|
308
336
|
"""
|
|
309
337
|
Fetches a list of projects for the authenticated user. If no `project_id` is provided, returns all projects for the authenticated user. If `project_id` is provided, returns the project with the given `project_id`, provided it exists.
|
|
310
338
|
|
|
@@ -312,26 +340,26 @@ class SeerSDK:
|
|
|
312
340
|
----------
|
|
313
341
|
project_id: str, optional
|
|
314
342
|
Project ID of the project to be fetched, defaulted to None.
|
|
315
|
-
|
|
316
|
-
|
|
343
|
+
as_df: bool
|
|
344
|
+
whether the result should be converted to a DataFrame, defaulted to False.
|
|
317
345
|
|
|
318
346
|
Returns
|
|
319
347
|
-------
|
|
320
|
-
projects: list or DataFrame
|
|
348
|
+
projects: list[dict] or DataFrame
|
|
321
349
|
DataFrame or list of project objects for the authenticated user.
|
|
322
350
|
|
|
323
351
|
Examples
|
|
324
352
|
-------
|
|
325
353
|
>>> from seer_pas_sdk import SeerSDK
|
|
326
354
|
>>> seer_sdk = SeerSDK()
|
|
327
|
-
>>> seer_sdk.
|
|
355
|
+
>>> seer_sdk.get_projects()
|
|
328
356
|
>>> [
|
|
329
357
|
{ "project_name": ... },
|
|
330
358
|
{ "project_name": ... },
|
|
331
359
|
...
|
|
332
360
|
]
|
|
333
361
|
|
|
334
|
-
>>> seer_sdk.
|
|
362
|
+
>>> seer_sdk.get_projects(as_df=True)
|
|
335
363
|
>>> id ... user_group
|
|
336
364
|
0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
|
|
337
365
|
1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
|
|
@@ -345,7 +373,7 @@ class SeerSDK:
|
|
|
345
373
|
938 5b05d440-6610-11ea-96e3-d5a4dab4ebf6 ... None
|
|
346
374
|
939 9872e3f0-544e-11ea-ad9e-1991e0725494 ... None
|
|
347
375
|
|
|
348
|
-
>>> seer_sdk.
|
|
376
|
+
>>> seer_sdk.get_projects(id="YOUR_PROJECT_ID_HERE")
|
|
349
377
|
>>> [{ "project_name": ... }]
|
|
350
378
|
"""
|
|
351
379
|
|
|
@@ -355,10 +383,19 @@ class SeerSDK:
|
|
|
355
383
|
else f"{self._auth.url}api/v1/projects/{project_id}"
|
|
356
384
|
)
|
|
357
385
|
res = []
|
|
386
|
+
if not project_id and not project_name:
|
|
387
|
+
params = {"all": "true"}
|
|
388
|
+
elif project_name:
|
|
389
|
+
params = {
|
|
390
|
+
"searchFields": "project_name",
|
|
391
|
+
"searchItem": project_name,
|
|
392
|
+
}
|
|
393
|
+
else:
|
|
394
|
+
params = dict()
|
|
358
395
|
|
|
359
396
|
with self._get_auth_session() as s:
|
|
360
397
|
|
|
361
|
-
projects = s.get(URL, params=
|
|
398
|
+
projects = s.get(URL, params=params)
|
|
362
399
|
if projects.status_code != 200:
|
|
363
400
|
raise ValueError(
|
|
364
401
|
"Invalid request. Please check your parameters."
|
|
@@ -379,15 +416,18 @@ class SeerSDK:
|
|
|
379
416
|
entry["raw_file_path"] = entry["raw_file_path"][
|
|
380
417
|
location(entry["raw_file_path"]) :
|
|
381
418
|
]
|
|
382
|
-
return res if not
|
|
419
|
+
return res if not as_df else dict_to_df(res)
|
|
383
420
|
|
|
384
|
-
def
|
|
385
|
-
self,
|
|
421
|
+
def get_samples(
|
|
422
|
+
self,
|
|
423
|
+
plate_id: str = None,
|
|
424
|
+
project_id: str = None,
|
|
425
|
+
analysis_id: str = None,
|
|
426
|
+
analysis_name: str = None,
|
|
427
|
+
as_df: bool = False,
|
|
386
428
|
):
|
|
387
429
|
"""
|
|
388
|
-
Fetches a list of samples for the authenticated user,
|
|
389
|
-
|
|
390
|
-
If both `plate_id` and `project_id` are passed in, only the `plate_id` is validated first.
|
|
430
|
+
Fetches a list of samples for the authenticated user with relation to a specified plate, project, or analysis. If no parameters are provided, returns all samples for the authenticated user. If `plate_id` or `project_id` is provided, returns samples associated with that plate or project. If `analysis_id` or `analysis_name` is provided, returns samples associated with that analysis.
|
|
391
431
|
|
|
392
432
|
Parameters
|
|
393
433
|
----------
|
|
@@ -395,12 +435,16 @@ class SeerSDK:
|
|
|
395
435
|
ID of the plate for which samples are to be fetched, defaulted to None.
|
|
396
436
|
project_id : str, optional
|
|
397
437
|
ID of the project for which samples are to be fetched, defaulted to None.
|
|
398
|
-
|
|
399
|
-
|
|
438
|
+
analysis_id : str, optional
|
|
439
|
+
ID of the analysis for which samples are to be fetched, defaulted to None.
|
|
440
|
+
analysis_name : str, optional
|
|
441
|
+
Name of the analysis for which samples are to be fetched, defaulted to None.
|
|
442
|
+
as_df: bool
|
|
443
|
+
whether the result should be converted to a DataFrame, defaulted to False.
|
|
400
444
|
|
|
401
445
|
Returns
|
|
402
446
|
-------
|
|
403
|
-
samples: list or DataFrame
|
|
447
|
+
samples: list[dict] or DataFrame
|
|
404
448
|
List/DataFrame of samples for the authenticated user.
|
|
405
449
|
|
|
406
450
|
Examples
|
|
@@ -408,14 +452,14 @@ class SeerSDK:
|
|
|
408
452
|
>>> from seer_pas_sdk import SeerSDK
|
|
409
453
|
>>> seer_sdk = SeerSDK()
|
|
410
454
|
|
|
411
|
-
>>> seer_sdk.
|
|
455
|
+
>>> seer_sdk.get_samples(plate_id="7ec8cad0-15e0-11ee-bdf1-bbaa73585acf")
|
|
412
456
|
>>> [
|
|
413
457
|
{ "id": ... },
|
|
414
458
|
{ "id": ... },
|
|
415
459
|
...
|
|
416
460
|
]
|
|
417
461
|
|
|
418
|
-
>>> seer_sdk.
|
|
462
|
+
>>> seer_sdk.get_samples(as_df=True)
|
|
419
463
|
>>> id ... control
|
|
420
464
|
0 812139c0-15e0-11ee-bdf1-bbaa73585acf ...
|
|
421
465
|
1 803e05b0-15e0-11ee-bdf1-bbaa73585acf ... MPE Control
|
|
@@ -430,29 +474,40 @@ class SeerSDK:
|
|
|
430
474
|
3628 dd607ef0-654c-11ea-8eb2-25a1cfd1163c ... C132
|
|
431
475
|
"""
|
|
432
476
|
|
|
433
|
-
if
|
|
434
|
-
|
|
477
|
+
# Raise an error if none or more than one of the primary key parameters are passed in.
|
|
478
|
+
if (
|
|
479
|
+
sum(
|
|
480
|
+
[
|
|
481
|
+
True if x else False
|
|
482
|
+
for x in [plate_id, project_id, analysis_id, analysis_name]
|
|
483
|
+
]
|
|
484
|
+
)
|
|
485
|
+
!= 1
|
|
486
|
+
):
|
|
487
|
+
raise ValueError(
|
|
488
|
+
"You must pass in exactly one of plate_id, project_id, analysis_id, analysis_name."
|
|
489
|
+
)
|
|
435
490
|
|
|
436
491
|
res = []
|
|
437
492
|
URL = f"{self._auth.url}api/v1/samples"
|
|
438
493
|
sample_params = {"all": "true"}
|
|
439
494
|
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
495
|
+
if project_id or plate_id:
|
|
496
|
+
with self._get_auth_session() as s:
|
|
497
|
+
if plate_id:
|
|
498
|
+
try:
|
|
499
|
+
self.get_plates(plate_id)
|
|
500
|
+
except:
|
|
501
|
+
raise ValueError("Plate ID is invalid.")
|
|
502
|
+
sample_params["plateId"] = plate_id
|
|
448
503
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
504
|
+
else:
|
|
505
|
+
try:
|
|
506
|
+
self.get_projects(project_id)
|
|
507
|
+
except:
|
|
508
|
+
raise ValueError("Project ID is invalid.")
|
|
454
509
|
|
|
455
|
-
|
|
510
|
+
sample_params["projectId"] = project_id
|
|
456
511
|
|
|
457
512
|
samples = s.get(URL, params=sample_params)
|
|
458
513
|
if samples.status_code != 200:
|
|
@@ -460,14 +515,27 @@ class SeerSDK:
|
|
|
460
515
|
f"Failed to fetch sample data for plate ID: {plate_id}."
|
|
461
516
|
)
|
|
462
517
|
res = samples.json()["data"]
|
|
518
|
+
res_df = dict_to_df(res)
|
|
463
519
|
|
|
464
|
-
for
|
|
465
|
-
|
|
520
|
+
# API returns empty strings if not a control, replace with None for filtering purposes
|
|
521
|
+
res_df["control"] = res_df["control"].apply(
|
|
522
|
+
lambda x: x if x else None
|
|
523
|
+
)
|
|
524
|
+
else:
|
|
525
|
+
if analysis_id:
|
|
526
|
+
res_df = self._get_analysis_samples(
|
|
527
|
+
analysis_id=analysis_id, as_df=True
|
|
528
|
+
)
|
|
529
|
+
else:
|
|
530
|
+
res_df = self._get_analysis_samples(
|
|
531
|
+
analysis_name=analysis_name, as_df=True, is_name=True
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
# apply post processing
|
|
535
|
+
res_df.drop(["tenant_id"], axis=1, inplace=True)
|
|
466
536
|
|
|
467
|
-
# Exclude custom fields that don't belong to the tenant
|
|
468
|
-
res_df = dict_to_df(res)
|
|
469
537
|
custom_columns = [
|
|
470
|
-
x["field_name"] for x in self.
|
|
538
|
+
x["field_name"] for x in self._get_sample_custom_fields()
|
|
471
539
|
]
|
|
472
540
|
res_df = res_df[
|
|
473
541
|
[
|
|
@@ -477,10 +545,7 @@ class SeerSDK:
|
|
|
477
545
|
]
|
|
478
546
|
]
|
|
479
547
|
|
|
480
|
-
|
|
481
|
-
res_df["control"] = res_df["control"].apply(lambda x: x if x else None)
|
|
482
|
-
|
|
483
|
-
return res_df.to_dict(orient="records") if not df else res_df
|
|
548
|
+
return res_df.to_dict(orient="records") if not as_df else res_df
|
|
484
549
|
|
|
485
550
|
def _filter_samples_metadata(
|
|
486
551
|
self,
|
|
@@ -505,7 +570,7 @@ class SeerSDK:
|
|
|
505
570
|
|
|
506
571
|
Returns
|
|
507
572
|
-------
|
|
508
|
-
res : list
|
|
573
|
+
res : list[str]
|
|
509
574
|
A list of sample ids
|
|
510
575
|
|
|
511
576
|
Examples
|
|
@@ -533,7 +598,7 @@ class SeerSDK:
|
|
|
533
598
|
"Invalid filter. Please choose between 'control' or 'sample'."
|
|
534
599
|
)
|
|
535
600
|
|
|
536
|
-
df = self.
|
|
601
|
+
df = self.get_samples(project_id=project_id, as_df=True)
|
|
537
602
|
|
|
538
603
|
if filter == "control":
|
|
539
604
|
df = df[~df["control"].isna()]
|
|
@@ -546,7 +611,7 @@ class SeerSDK:
|
|
|
546
611
|
|
|
547
612
|
return valid_samples
|
|
548
613
|
|
|
549
|
-
def
|
|
614
|
+
def _get_sample_custom_fields(self):
|
|
550
615
|
"""
|
|
551
616
|
Fetches a list of custom fields defined for the authenticated user.
|
|
552
617
|
"""
|
|
@@ -566,7 +631,7 @@ class SeerSDK:
|
|
|
566
631
|
del entry["tenant_id"]
|
|
567
632
|
return res
|
|
568
633
|
|
|
569
|
-
def
|
|
634
|
+
def get_msruns(self, sample_ids: list, as_df: bool = False):
|
|
570
635
|
"""
|
|
571
636
|
Fetches MS data files for passed in `sample_ids` (provided they are valid and contain relevant files) for an authenticated user.
|
|
572
637
|
|
|
@@ -576,12 +641,12 @@ class SeerSDK:
|
|
|
576
641
|
----------
|
|
577
642
|
sample_ids : list
|
|
578
643
|
List of unique sample IDs.
|
|
579
|
-
|
|
580
|
-
|
|
644
|
+
as_df: bool
|
|
645
|
+
whether the result should be converted to a DataFrame, defaulted to False.
|
|
581
646
|
|
|
582
647
|
Returns
|
|
583
648
|
-------
|
|
584
|
-
res: list or DataFrame
|
|
649
|
+
res: list[dict] or DataFrame
|
|
585
650
|
List/DataFrame of plate objects for the authenticated user.
|
|
586
651
|
|
|
587
652
|
Examples
|
|
@@ -590,13 +655,13 @@ class SeerSDK:
|
|
|
590
655
|
>>> seer_sdk = SeerSDK()
|
|
591
656
|
>>> sample_ids = ["812139c0-15e0-11ee-bdf1-bbaa73585acf", "803e05b0-15e0-11ee-bdf1-bbaa73585acf"]
|
|
592
657
|
|
|
593
|
-
>>> seer_sdk.
|
|
658
|
+
>>> seer_sdk.get_msruns(sample_ids)
|
|
594
659
|
>>> [
|
|
595
660
|
{"id": "SAMPLE_ID_1_HERE" ... },
|
|
596
661
|
{"id": "SAMPLE_ID_2_HERE" ... }
|
|
597
662
|
]
|
|
598
663
|
|
|
599
|
-
>>> seer_sdk.
|
|
664
|
+
>>> seer_sdk.get_msruns(sample_ids, as_df=True)
|
|
600
665
|
>>> id ... gradient
|
|
601
666
|
0 81c6a180-15e0-11ee-bdf1-bbaa73585acf ... None
|
|
602
667
|
1 816a9ed0-15e0-11ee-bdf1-bbaa73585acf ... None
|
|
@@ -631,214 +696,13 @@ class SeerSDK:
|
|
|
631
696
|
entry["raw_file_path"] = entry["raw_file_path"][
|
|
632
697
|
location(entry["raw_file_path"]) :
|
|
633
698
|
]
|
|
634
|
-
return res if not
|
|
635
|
-
|
|
636
|
-
def get_plate(self, plate_id: str, df: bool = False):
|
|
637
|
-
"""
|
|
638
|
-
Fetches MS data files for a `plate_id` (provided that the `plate_id` is valid and has samples associated with it) for an authenticated user.
|
|
639
|
-
|
|
640
|
-
The function returns a dict containing DataFrame objects if the `df` flag is passed in as True, otherwise a nested dict object is returned instead.
|
|
641
|
-
|
|
642
|
-
Parameters
|
|
643
|
-
----------
|
|
644
|
-
plate_id : str, optional
|
|
645
|
-
ID of the plate for which samples are to be fetched, defaulted to None.
|
|
646
|
-
df: bool
|
|
647
|
-
Boolean denoting whether the user wants the response back in JSON or a DataFrame object
|
|
648
|
-
|
|
649
|
-
Returns
|
|
650
|
-
-------
|
|
651
|
-
res: list or DataFrame
|
|
652
|
-
List/DataFrame of MS data file objects for the authenticated user.
|
|
653
|
-
|
|
654
|
-
Examples
|
|
655
|
-
-------
|
|
656
|
-
>>> from seer_pas_sdk import SeerSDK
|
|
657
|
-
>>> seer_sdk = SeerSDK()
|
|
658
|
-
>>> plate_id = "7ec8cad0-15e0-11ee-bdf1-bbaa73585acf"
|
|
659
|
-
|
|
660
|
-
>>> seer_sdk.get_plate(plate_id)
|
|
661
|
-
>>> [
|
|
662
|
-
{"id": "PLATE_ID_1_HERE" ... },
|
|
663
|
-
{"id": "PLATE_ID_2_HERE" ... }
|
|
664
|
-
]
|
|
665
|
-
|
|
666
|
-
>>> seer_sdk.get_plate(plate_id, df=True)
|
|
667
|
-
>>> id ... volume
|
|
668
|
-
0 PLATE_ID_1_HERE ... None
|
|
669
|
-
1 PLATE_ID_2_HERE ... None
|
|
670
|
-
|
|
671
|
-
[2 rows x 26 columns]
|
|
672
|
-
"""
|
|
673
|
-
plate_samples = self.get_samples_metadata(plate_id=plate_id)
|
|
674
|
-
sample_ids = [sample["id"] for sample in plate_samples]
|
|
675
|
-
return self.get_msdata(sample_ids, df)
|
|
676
|
-
|
|
677
|
-
def get_project(
|
|
678
|
-
self,
|
|
679
|
-
project_id: str,
|
|
680
|
-
msdata: bool = False,
|
|
681
|
-
df: bool = False,
|
|
682
|
-
flat: bool = False,
|
|
683
|
-
):
|
|
684
|
-
"""
|
|
685
|
-
Fetches samples (and MS data files) for a `project_id` (provided that the `project_id` is valid and has samples associated with it) for an authenticated user.
|
|
686
|
-
|
|
687
|
-
The function returns a DataFrame object if the `df` flag is passed in as True, otherwise a nested dict object is returned instead. If the both the `df` and `msdata` flags are passed in as True, then a nested DataFrame object is returned instead.
|
|
688
|
-
|
|
689
|
-
If the `flat` flag is passed in as True, then the nested dict object is returned as an array of dict objects and the nested df object is returned as a single df object.
|
|
690
|
-
|
|
691
|
-
Parameters
|
|
692
|
-
----------
|
|
693
|
-
project_id : str
|
|
694
|
-
ID of the project for which samples are to be fetched.
|
|
695
|
-
msdata: bool, optional
|
|
696
|
-
Boolean flag denoting whether the user wants relevant MS data files associated with the samples.
|
|
697
|
-
df: bool, optional
|
|
698
|
-
Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
|
|
699
|
-
|
|
700
|
-
Returns
|
|
701
|
-
-------
|
|
702
|
-
res: list or DataFrame
|
|
703
|
-
List/DataFrame of plate objects for the authenticated user.
|
|
704
|
-
|
|
705
|
-
Examples
|
|
706
|
-
-------
|
|
707
|
-
>>> from seer_pas_sdk import SeerSDK
|
|
708
|
-
>>> seer_sdk = SeerSDK()
|
|
709
|
-
>>> project_id = "7e48e150-8a47-11ed-b382-bf440acece26"
|
|
710
|
-
|
|
711
|
-
>>> seer_sdk.get_project(project_id=project_id, msdata=False, df=False)
|
|
712
|
-
>>> {
|
|
713
|
-
"project_samples": [
|
|
714
|
-
{
|
|
715
|
-
"id": "SAMPLE_ID_1_HERE",
|
|
716
|
-
"sample_type": "Plasma",
|
|
717
|
-
...
|
|
718
|
-
...
|
|
719
|
-
},
|
|
720
|
-
{
|
|
721
|
-
"id": "SAMPLE_ID_2_HERE",
|
|
722
|
-
"sample_type": "Plasma",
|
|
723
|
-
...
|
|
724
|
-
...
|
|
725
|
-
}
|
|
726
|
-
]
|
|
727
|
-
}
|
|
728
|
-
|
|
729
|
-
>>> seer_sdk.get_project(project_id=project_id, msdata=True, df=False)
|
|
730
|
-
>>> [
|
|
731
|
-
{
|
|
732
|
-
"id": "SAMPLE_ID_1_HERE",
|
|
733
|
-
"sample_type": "Plasma",
|
|
734
|
-
...
|
|
735
|
-
...
|
|
736
|
-
"ms_data_files": [
|
|
737
|
-
{
|
|
738
|
-
"id": MS_DATA_FILE_ID_1_HERE,
|
|
739
|
-
"tenant_id": "TENANT_ID_HERE",
|
|
740
|
-
...
|
|
741
|
-
...
|
|
742
|
-
},
|
|
743
|
-
{
|
|
744
|
-
"id": MS_DATA_FILE_ID_1_HERE,
|
|
745
|
-
"tenant_id": "TENANT_ID_HERE",
|
|
746
|
-
...
|
|
747
|
-
...
|
|
748
|
-
}
|
|
749
|
-
]
|
|
750
|
-
},
|
|
751
|
-
{
|
|
752
|
-
"id": "SAMPLE_ID_2_HERE",
|
|
753
|
-
"sample_type": "Plasma",
|
|
754
|
-
...
|
|
755
|
-
...
|
|
756
|
-
"ms_data_files": [
|
|
757
|
-
{
|
|
758
|
-
"id": MS_DATA_FILE_ID_2_HERE,
|
|
759
|
-
"tenant_id": "TENANT_ID_HERE",
|
|
760
|
-
...
|
|
761
|
-
...
|
|
762
|
-
},
|
|
763
|
-
{
|
|
764
|
-
"id": MS_DATA_FILE_ID_2_HERE,
|
|
765
|
-
"tenant_id": "TENANT_ID_HERE",
|
|
766
|
-
...
|
|
767
|
-
...
|
|
768
|
-
}
|
|
769
|
-
]
|
|
770
|
-
}
|
|
771
|
-
]
|
|
772
|
-
|
|
773
|
-
>>> seer_sdk.get_project(project_id=project_id, msdata=True, df=True)
|
|
774
|
-
>>> id ... ms_data_files
|
|
775
|
-
0 829509f0-8a47-11ed-b382-bf440acece26 ... id ... g...
|
|
776
|
-
1 828d41c0-8a47-11ed-b382-bf440acece26 ... id ... g...
|
|
777
|
-
2 8294e2e0-8a47-11ed-b382-bf440acece26 ... id ... g...
|
|
778
|
-
3 8285eec0-8a47-11ed-b382-bf440acece26 ... id ... g...
|
|
779
|
-
|
|
780
|
-
[4 rows x 60 columns]
|
|
781
|
-
"""
|
|
782
|
-
if not project_id:
|
|
783
|
-
return ValueError("No project ID specified.")
|
|
784
|
-
|
|
785
|
-
sample_ids = []
|
|
786
|
-
project_samples = self.get_samples_metadata(
|
|
787
|
-
project_id=project_id, df=False
|
|
788
|
-
)
|
|
789
|
-
flat_result = []
|
|
790
|
-
|
|
791
|
-
if msdata:
|
|
792
|
-
|
|
793
|
-
# construct map for quick index reference of sample in project_samples
|
|
794
|
-
sample_ids = {
|
|
795
|
-
sample["id"]: i for i, sample in enumerate(project_samples)
|
|
796
|
-
} # will always contain unique values
|
|
797
|
-
ms_data_files = self.get_msdata(
|
|
798
|
-
sample_ids=list(sample_ids.keys()), df=False
|
|
799
|
-
)
|
|
800
|
-
|
|
801
|
-
for ms_data_file in ms_data_files:
|
|
802
|
-
index = sample_ids.get(ms_data_file["sample_id"], None)
|
|
803
|
-
if not index:
|
|
804
|
-
continue
|
|
805
|
-
|
|
806
|
-
if not flat:
|
|
807
|
-
if "ms_data_file" not in project_samples[index]:
|
|
808
|
-
project_samples[index]["ms_data_files"] = [
|
|
809
|
-
ms_data_file
|
|
810
|
-
]
|
|
811
|
-
else:
|
|
812
|
-
project_samples[index]["ms_data_files"].append(
|
|
813
|
-
ms_data_file
|
|
814
|
-
)
|
|
815
|
-
else:
|
|
816
|
-
flat_result.append(project_samples[index] | ms_data_file)
|
|
817
|
-
|
|
818
|
-
# return flat result if results were added to the flat object
|
|
819
|
-
if flat and flat_result:
|
|
820
|
-
project_samples = flat_result
|
|
821
|
-
|
|
822
|
-
if df:
|
|
823
|
-
if flat:
|
|
824
|
-
return pd.DataFrame(project_samples)
|
|
825
|
-
else:
|
|
826
|
-
for sample_index in range(len(project_samples)):
|
|
827
|
-
if "ms_data_files" in project_samples[sample_index]:
|
|
828
|
-
project_samples[sample_index]["ms_data_files"] = (
|
|
829
|
-
dict_to_df(
|
|
830
|
-
project_samples[sample_index]["ms_data_files"]
|
|
831
|
-
)
|
|
832
|
-
)
|
|
833
|
-
|
|
834
|
-
project_samples = dict_to_df(project_samples)
|
|
835
|
-
|
|
836
|
-
return project_samples
|
|
699
|
+
return res if not as_df else dict_to_df(res)
|
|
837
700
|
|
|
838
701
|
def get_analysis_protocols(
|
|
839
702
|
self,
|
|
840
703
|
analysis_protocol_name: str = None,
|
|
841
704
|
analysis_protocol_id: str = None,
|
|
705
|
+
as_df: bool = False,
|
|
842
706
|
):
|
|
843
707
|
"""
|
|
844
708
|
Fetches a list of analysis protocols for the authenticated user. If no `analysis_protocol_id` is provided, returns all analysis protocols for the authenticated user. If `analysis_protocol_name` (and no `analysis_protocol_id`) is provided, returns the analysis protocol with the given name, provided it exists.
|
|
@@ -851,9 +715,11 @@ class SeerSDK:
|
|
|
851
715
|
analysis_protocol_name : str, optional
|
|
852
716
|
Name of the analysis protocol to be fetched, defaulted to None.
|
|
853
717
|
|
|
718
|
+
as_df : bool, optional
|
|
719
|
+
whether the result should be converted to a DataFrame, defaulted to False.
|
|
854
720
|
Returns
|
|
855
721
|
-------
|
|
856
|
-
protocols: list
|
|
722
|
+
protocols: list[dict]
|
|
857
723
|
List of analysis protocol objects for the authenticated user.
|
|
858
724
|
|
|
859
725
|
Examples
|
|
@@ -884,32 +750,41 @@ class SeerSDK:
|
|
|
884
750
|
else f"{self._auth.url}api/v1/analysisProtocols/{analysis_protocol_id}"
|
|
885
751
|
)
|
|
886
752
|
res = []
|
|
753
|
+
params = {"all": "true"}
|
|
754
|
+
|
|
755
|
+
if analysis_protocol_name:
|
|
756
|
+
params.update(
|
|
757
|
+
{
|
|
758
|
+
"searchFields": "analysis_protocol_name,offering_name",
|
|
759
|
+
"searchItem": analysis_protocol_name,
|
|
760
|
+
}
|
|
761
|
+
)
|
|
887
762
|
|
|
888
763
|
with self._get_auth_session() as s:
|
|
889
764
|
|
|
890
|
-
protocols = s.get(URL, params=
|
|
765
|
+
protocols = s.get(URL, params=params)
|
|
891
766
|
if protocols.status_code != 200:
|
|
892
767
|
raise ValueError(
|
|
893
768
|
"Invalid request. Please check your parameters."
|
|
894
769
|
)
|
|
895
|
-
if
|
|
896
|
-
res = protocols.json()["data"]
|
|
897
|
-
|
|
898
|
-
if analysis_protocol_id and not analysis_protocol_name:
|
|
770
|
+
if analysis_protocol_id:
|
|
899
771
|
res = [protocols.json()]
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
res = [
|
|
903
|
-
protocol
|
|
904
|
-
for protocol in protocols.json()["data"]
|
|
905
|
-
if protocol["analysis_protocol_name"]
|
|
906
|
-
== analysis_protocol_name
|
|
907
|
-
]
|
|
772
|
+
else:
|
|
773
|
+
res = protocols.json()["data"]
|
|
908
774
|
|
|
909
775
|
for entry in range(len(res)):
|
|
910
776
|
if "tenant_id" in res[entry]:
|
|
911
777
|
del res[entry]["tenant_id"]
|
|
912
778
|
|
|
779
|
+
if "can_edit" in res[entry]:
|
|
780
|
+
del res[entry]["can_edit"]
|
|
781
|
+
|
|
782
|
+
if "can_delete" in res[entry]:
|
|
783
|
+
del res[entry]["can_delete"]
|
|
784
|
+
|
|
785
|
+
if "scope" in res[entry]:
|
|
786
|
+
del res[entry]["scope"]
|
|
787
|
+
|
|
913
788
|
if "parameter_file_path" in res[entry]:
|
|
914
789
|
# Simple lambda function to find the third occurrence of '/' in the raw file path
|
|
915
790
|
location = lambda s: len(s) - len(s.split("/", 3)[-1])
|
|
@@ -918,9 +793,9 @@ class SeerSDK:
|
|
|
918
793
|
"parameter_file_path"
|
|
919
794
|
][location(res[entry]["parameter_file_path"]) :]
|
|
920
795
|
|
|
921
|
-
return res
|
|
796
|
+
return res if not as_df else dict_to_df(res)
|
|
922
797
|
|
|
923
|
-
def
|
|
798
|
+
def get_analyses(
|
|
924
799
|
self,
|
|
925
800
|
analysis_id: str = None,
|
|
926
801
|
folder_id: str = None,
|
|
@@ -962,30 +837,30 @@ class SeerSDK:
|
|
|
962
837
|
|
|
963
838
|
Returns
|
|
964
839
|
-------
|
|
965
|
-
analyses: dict
|
|
840
|
+
analyses: list[dict]
|
|
966
841
|
Contains a list of analyses objects for the authenticated user.
|
|
967
842
|
|
|
968
843
|
Examples
|
|
969
844
|
-------
|
|
970
845
|
>>> from seer_pas_sdk import SeerSDK
|
|
971
846
|
>>> seer_sdk = SeerSDK()
|
|
972
|
-
>>> seer_sdk.
|
|
847
|
+
>>> seer_sdk.get_analyses()
|
|
973
848
|
>>> [
|
|
974
849
|
{id: "YOUR_ANALYSIS_ID_HERE", ...},
|
|
975
850
|
{id: "YOUR_ANALYSIS_ID_HERE", ...},
|
|
976
851
|
{id: "YOUR_ANALYSIS_ID_HERE", ...}
|
|
977
852
|
]
|
|
978
853
|
|
|
979
|
-
>>> seer_sdk.
|
|
854
|
+
>>> seer_sdk.get_analyses("YOUR_ANALYSIS_ID_HERE")
|
|
980
855
|
>>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
|
|
981
856
|
|
|
982
|
-
>>> seer_sdk.
|
|
857
|
+
>>> seer_sdk.get_analyses(folder_name="YOUR_FOLDER_NAME_HERE")
|
|
983
858
|
>>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
|
|
984
859
|
|
|
985
|
-
>>> seer_sdk.
|
|
860
|
+
>>> seer_sdk.get_analyses(analysis_name="YOUR_ANALYSIS")
|
|
986
861
|
>>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
|
|
987
862
|
|
|
988
|
-
>>> seer_sdk.
|
|
863
|
+
>>> seer_sdk.get_analyses(description="YOUR_DESCRIPTION")
|
|
989
864
|
>>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
|
|
990
865
|
"""
|
|
991
866
|
|
|
@@ -1074,7 +949,7 @@ class SeerSDK:
|
|
|
1074
949
|
|
|
1075
950
|
# recursive solution to get analyses in folders
|
|
1076
951
|
for folder in folders:
|
|
1077
|
-
res += self.
|
|
952
|
+
res += self.get_analyses(folder_id=folder)
|
|
1078
953
|
|
|
1079
954
|
if analysis_only:
|
|
1080
955
|
res = [
|
|
@@ -1082,6 +957,7 @@ class SeerSDK:
|
|
|
1082
957
|
]
|
|
1083
958
|
return res
|
|
1084
959
|
|
|
960
|
+
@deprecation.deprecated(deprecated_in="0.3.0", removed_in="1.0.0")
|
|
1085
961
|
def get_analysis_result_protein_data(
|
|
1086
962
|
self, analysis_id: str, link: bool = False, pg: str = None
|
|
1087
963
|
):
|
|
@@ -1154,6 +1030,7 @@ class SeerSDK:
|
|
|
1154
1030
|
"protein_panel": protein_panel,
|
|
1155
1031
|
}
|
|
1156
1032
|
|
|
1033
|
+
@deprecation.deprecated(deprecated_in="0.3.0", removed_in="1.0.0")
|
|
1157
1034
|
def get_analysis_result_peptide_data(
|
|
1158
1035
|
self, analysis_id: str, link: bool = False, peptide: str = None
|
|
1159
1036
|
):
|
|
@@ -1229,7 +1106,92 @@ class SeerSDK:
|
|
|
1229
1106
|
"peptide_panel": peptide_panel,
|
|
1230
1107
|
}
|
|
1231
1108
|
|
|
1232
|
-
def
|
|
1109
|
+
def _get_search_result_protein_data(self, analysis_id: str):
|
|
1110
|
+
"""
|
|
1111
|
+
Given an analysis id, this function returns the protein data for the analysis.
|
|
1112
|
+
|
|
1113
|
+
Parameters
|
|
1114
|
+
----------
|
|
1115
|
+
analysis_id : str
|
|
1116
|
+
ID of the analysis for which the data is to be fetched.
|
|
1117
|
+
"""
|
|
1118
|
+
with self._get_auth_session() as s:
|
|
1119
|
+
URL = f"{self._auth.url}api/v1/data"
|
|
1120
|
+
response = s.get(
|
|
1121
|
+
f"{URL}/protein?analysisId={analysis_id}&retry=false"
|
|
1122
|
+
)
|
|
1123
|
+
|
|
1124
|
+
if response.status_code != 200:
|
|
1125
|
+
raise ValueError(
|
|
1126
|
+
"Could not fetch protein data. Please verify that your analysis completed."
|
|
1127
|
+
)
|
|
1128
|
+
response = response.json()
|
|
1129
|
+
|
|
1130
|
+
protein_data = {}
|
|
1131
|
+
for row in response:
|
|
1132
|
+
if row.get("name") == "npLink":
|
|
1133
|
+
protein_data["npLink"] = {
|
|
1134
|
+
"url": row.get("link", {}).get("url", "")
|
|
1135
|
+
}
|
|
1136
|
+
if row.get("name") == "panelLink":
|
|
1137
|
+
protein_data["panelLink"] = {
|
|
1138
|
+
"url": row.get("link", {}).get("url", "")
|
|
1139
|
+
}
|
|
1140
|
+
if not protein_data:
|
|
1141
|
+
raise ValueError("No protein result files found.")
|
|
1142
|
+
if not "panelLink" in protein_data.keys():
|
|
1143
|
+
protein_data["panelLink"] = {"url": ""}
|
|
1144
|
+
|
|
1145
|
+
return protein_data
|
|
1146
|
+
|
|
1147
|
+
def _get_search_result_peptide_data(self, analysis_id: str):
|
|
1148
|
+
"""
|
|
1149
|
+
Given an analysis id, this function returns the peptide data for the analysis.
|
|
1150
|
+
|
|
1151
|
+
Parameters
|
|
1152
|
+
----------
|
|
1153
|
+
|
|
1154
|
+
analysis_id : str
|
|
1155
|
+
ID of the analysis for which the data is to be fetched.
|
|
1156
|
+
|
|
1157
|
+
Returns
|
|
1158
|
+
-------
|
|
1159
|
+
peptide_data : dict[str, str]
|
|
1160
|
+
Dictionary containing URLs for npLink and panelLink peptide data.
|
|
1161
|
+
|
|
1162
|
+
"""
|
|
1163
|
+
|
|
1164
|
+
with self._get_auth_session() as s:
|
|
1165
|
+
URL = f"{self._auth.url}api/v1/data"
|
|
1166
|
+
response = s.get(
|
|
1167
|
+
f"{URL}/peptide?analysisId={analysis_id}&retry=false"
|
|
1168
|
+
)
|
|
1169
|
+
|
|
1170
|
+
if response.status_code != 200:
|
|
1171
|
+
raise ValueError(
|
|
1172
|
+
"Could not fetch peptide data. Please verify that your analysis completed."
|
|
1173
|
+
)
|
|
1174
|
+
|
|
1175
|
+
response = response.json()
|
|
1176
|
+
|
|
1177
|
+
peptide_data = {}
|
|
1178
|
+
for row in response:
|
|
1179
|
+
if row.get("name") == "npLink":
|
|
1180
|
+
peptide_data["npLink"] = {
|
|
1181
|
+
"url": row.get("link", {}).get("url", "")
|
|
1182
|
+
}
|
|
1183
|
+
if row.get("name") == "panelLink":
|
|
1184
|
+
peptide_data["panelLink"] = {
|
|
1185
|
+
"url": row.get("link", {}).get("url", "")
|
|
1186
|
+
}
|
|
1187
|
+
if not peptide_data:
|
|
1188
|
+
raise ValueError("No peptide result files found.")
|
|
1189
|
+
if not "panelLink" in peptide_data.keys():
|
|
1190
|
+
peptide_data["panelLink"] = {"url": ""}
|
|
1191
|
+
|
|
1192
|
+
return peptide_data
|
|
1193
|
+
|
|
1194
|
+
def list_search_result_files(self, analysis_id: str):
|
|
1233
1195
|
"""
|
|
1234
1196
|
Given an analysis id, this function returns a list of files associated with the analysis.
|
|
1235
1197
|
|
|
@@ -1240,11 +1202,11 @@ class SeerSDK:
|
|
|
1240
1202
|
|
|
1241
1203
|
Returns
|
|
1242
1204
|
-------
|
|
1243
|
-
files: list
|
|
1205
|
+
files: list[str]
|
|
1244
1206
|
List of files associated with the analysis.
|
|
1245
1207
|
"""
|
|
1246
1208
|
try:
|
|
1247
|
-
analysis_metadata = self.
|
|
1209
|
+
analysis_metadata = self.get_analyses(analysis_id)[0]
|
|
1248
1210
|
except (IndexError, ServerError):
|
|
1249
1211
|
raise ValueError("Invalid analysis ID.")
|
|
1250
1212
|
except:
|
|
@@ -1266,7 +1228,141 @@ class SeerSDK:
|
|
|
1266
1228
|
files.append(row["filename"])
|
|
1267
1229
|
return files
|
|
1268
1230
|
|
|
1269
|
-
def
|
|
1231
|
+
def get_search_result(
|
|
1232
|
+
self, analysis_id: str, analyte_type: str, rollup: str
|
|
1233
|
+
):
|
|
1234
|
+
"""
|
|
1235
|
+
Load one of the files available via the "Download result files" button on the PAS UI.
|
|
1236
|
+
|
|
1237
|
+
Args:
|
|
1238
|
+
analysis_id (str): id of the analysis
|
|
1239
|
+
analyte_type (str): type of the data. Acceptable options are one of ['protein', 'peptide', 'precursor'].
|
|
1240
|
+
rollup (str): the desired file. Acceptable options are one of ['np', 'panel'].
|
|
1241
|
+
Returns:
|
|
1242
|
+
pd.DataFrame: the requested file as a pandas DataFrame
|
|
1243
|
+
|
|
1244
|
+
"""
|
|
1245
|
+
if not analysis_id:
|
|
1246
|
+
raise ValueError("Analysis ID cannot be empty.")
|
|
1247
|
+
|
|
1248
|
+
if analyte_type not in ["protein", "peptide", "precursor"]:
|
|
1249
|
+
raise ValueError(
|
|
1250
|
+
"Invalid data type. Please choose between 'protein', 'peptide', or 'precursor'."
|
|
1251
|
+
)
|
|
1252
|
+
|
|
1253
|
+
if rollup not in ["np", "panel"]:
|
|
1254
|
+
raise ValueError(
|
|
1255
|
+
"Invalid file. Please choose between 'np', 'panel'."
|
|
1256
|
+
)
|
|
1257
|
+
|
|
1258
|
+
if analyte_type == "precursor" and rollup == "panel":
|
|
1259
|
+
raise ValueError(
|
|
1260
|
+
"Precursor data is not available for panel rollup, please select np rollup."
|
|
1261
|
+
)
|
|
1262
|
+
|
|
1263
|
+
if analyte_type == "protein":
|
|
1264
|
+
if rollup == "np":
|
|
1265
|
+
return url_to_df(
|
|
1266
|
+
self._get_search_result_protein_data(analysis_id)[
|
|
1267
|
+
"npLink"
|
|
1268
|
+
]["url"]
|
|
1269
|
+
)
|
|
1270
|
+
elif rollup == "panel":
|
|
1271
|
+
return url_to_df(
|
|
1272
|
+
self._get_search_result_protein_data(analysis_id)[
|
|
1273
|
+
"panelLink"
|
|
1274
|
+
]["url"]
|
|
1275
|
+
)
|
|
1276
|
+
elif analyte_type == "peptide":
|
|
1277
|
+
if rollup == "np":
|
|
1278
|
+
return url_to_df(
|
|
1279
|
+
self._get_search_result_peptide_data(analysis_id)[
|
|
1280
|
+
"npLink"
|
|
1281
|
+
]["url"]
|
|
1282
|
+
)
|
|
1283
|
+
elif rollup == "panel":
|
|
1284
|
+
return url_to_df(
|
|
1285
|
+
self._get_search_result_peptide_data(analysis_id)[
|
|
1286
|
+
"panelLink"
|
|
1287
|
+
]["url"]
|
|
1288
|
+
)
|
|
1289
|
+
else:
|
|
1290
|
+
return url_to_df(
|
|
1291
|
+
self.get_search_result_file_url(
|
|
1292
|
+
analysis_id, filename="report.tsv"
|
|
1293
|
+
)["url"]
|
|
1294
|
+
)
|
|
1295
|
+
|
|
1296
|
+
def download_search_output_file(
|
|
1297
|
+
self, analysis_id: str, filename: str, download_path: str = ""
|
|
1298
|
+
):
|
|
1299
|
+
"""
|
|
1300
|
+
Given an analysis id and a analysis result filename, this function downloads the file to the specified path.
|
|
1301
|
+
|
|
1302
|
+
Parameters
|
|
1303
|
+
----------
|
|
1304
|
+
analysis_id : str
|
|
1305
|
+
ID of the analysis for which the data is to be fetched.
|
|
1306
|
+
|
|
1307
|
+
filename : str
|
|
1308
|
+
Name of the file to be fetched. Files can be case insensitive and without file extensions.
|
|
1309
|
+
|
|
1310
|
+
download_path : str
|
|
1311
|
+
String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid.
|
|
1312
|
+
|
|
1313
|
+
Returns
|
|
1314
|
+
-------
|
|
1315
|
+
None
|
|
1316
|
+
Downloads the file to the specified path.
|
|
1317
|
+
"""
|
|
1318
|
+
|
|
1319
|
+
if not download_path:
|
|
1320
|
+
download_path = os.getcwd()
|
|
1321
|
+
|
|
1322
|
+
if not analysis_id:
|
|
1323
|
+
raise ValueError("Analysis ID cannot be empty.")
|
|
1324
|
+
|
|
1325
|
+
if not os.path.exists(download_path):
|
|
1326
|
+
raise ValueError(
|
|
1327
|
+
"Please specify a valid folder path as download path."
|
|
1328
|
+
)
|
|
1329
|
+
|
|
1330
|
+
file = self.get_search_result_file_url(analysis_id, filename)
|
|
1331
|
+
file_url = file["url"]
|
|
1332
|
+
filename = file["filename"]
|
|
1333
|
+
|
|
1334
|
+
print("Downloading file:", filename)
|
|
1335
|
+
for _ in range(2):
|
|
1336
|
+
try:
|
|
1337
|
+
with tqdm(
|
|
1338
|
+
unit="B",
|
|
1339
|
+
unit_scale=True,
|
|
1340
|
+
unit_divisor=1024,
|
|
1341
|
+
miniters=1,
|
|
1342
|
+
desc=f"Progress",
|
|
1343
|
+
) as t:
|
|
1344
|
+
ssl._create_default_https_context = (
|
|
1345
|
+
ssl._create_unverified_context
|
|
1346
|
+
)
|
|
1347
|
+
urllib.request.urlretrieve(
|
|
1348
|
+
file_url,
|
|
1349
|
+
f"{download_path}/{filename}",
|
|
1350
|
+
reporthook=download_hook(t),
|
|
1351
|
+
data=None,
|
|
1352
|
+
)
|
|
1353
|
+
break
|
|
1354
|
+
except:
|
|
1355
|
+
filename = filename.split("/")
|
|
1356
|
+
name += "/" + "/".join(
|
|
1357
|
+
[filename[i] for i in range(len(filename) - 1)]
|
|
1358
|
+
)
|
|
1359
|
+
filename = filename[-1]
|
|
1360
|
+
if not os.path.isdir(f"{name}/{filename}"):
|
|
1361
|
+
os.makedirs(f"{name}/")
|
|
1362
|
+
print(f"File {filename} downloaded successfully to {download_path}.")
|
|
1363
|
+
return
|
|
1364
|
+
|
|
1365
|
+
def get_search_result_file_url(self, analysis_id: str, filename: str):
|
|
1270
1366
|
"""
|
|
1271
1367
|
Given an analysis id and a analysis result filename, this function returns the signed URL for the file.
|
|
1272
1368
|
|
|
@@ -1280,21 +1376,29 @@ class SeerSDK:
|
|
|
1280
1376
|
|
|
1281
1377
|
Returns
|
|
1282
1378
|
-------
|
|
1283
|
-
file_url: dict
|
|
1284
|
-
|
|
1379
|
+
file_url: dict[str, str]
|
|
1380
|
+
Dictionary containing the 'url' and 'filename' of the file.
|
|
1285
1381
|
"""
|
|
1382
|
+
if "." in filename:
|
|
1383
|
+
filename = ".".join(filename.split(".")[:-1])
|
|
1384
|
+
filename = filename.casefold()
|
|
1286
1385
|
|
|
1287
1386
|
# Allow user to pass in filenames without an extension.
|
|
1288
|
-
analysis_result_files = self.
|
|
1387
|
+
analysis_result_files = self.list_search_result_files(analysis_id)
|
|
1289
1388
|
analysis_result_files_prefix_mapper = {
|
|
1290
|
-
".".join(x.split(".")[:-1]): x
|
|
1389
|
+
(".".join(x.split(".")[:-1])).casefold(): x
|
|
1390
|
+
for x in analysis_result_files
|
|
1291
1391
|
}
|
|
1292
1392
|
if filename in analysis_result_files_prefix_mapper:
|
|
1293
1393
|
filename = analysis_result_files_prefix_mapper[filename]
|
|
1394
|
+
else:
|
|
1395
|
+
raise ValueError(
|
|
1396
|
+
f"Filename {filename} not among the available analysis result files. Please use SeerSDK.list_search_result_files('{analysis_id}') to see available files for this analysis."
|
|
1397
|
+
)
|
|
1294
1398
|
|
|
1295
|
-
analysis_metadata = self.
|
|
1399
|
+
analysis_metadata = self.get_analyses(analysis_id)[0]
|
|
1296
1400
|
if analysis_metadata.get("status") in ["Failed", None]:
|
|
1297
|
-
raise ValueError("Cannot generate links for failed
|
|
1401
|
+
raise ValueError("Cannot generate links for failed searches.")
|
|
1298
1402
|
with self._get_auth_session() as s:
|
|
1299
1403
|
file_url = s.post(
|
|
1300
1404
|
f"{self._auth.url}api/v1/analysisResultFiles/getUrl",
|
|
@@ -1307,8 +1411,11 @@ class SeerSDK:
|
|
|
1307
1411
|
response = file_url.json()
|
|
1308
1412
|
if not response.get("url"):
|
|
1309
1413
|
raise ValueError(f"File {filename} not found.")
|
|
1414
|
+
|
|
1415
|
+
response["filename"] = filename
|
|
1310
1416
|
return response
|
|
1311
1417
|
|
|
1418
|
+
@deprecation.deprecated(deprecated_in="0.3.0", removed_in="1.0.0")
|
|
1312
1419
|
def get_analysis_result_files(
|
|
1313
1420
|
self,
|
|
1314
1421
|
analysis_id: str,
|
|
@@ -1339,7 +1446,7 @@ class SeerSDK:
|
|
|
1339
1446
|
|
|
1340
1447
|
Returns
|
|
1341
1448
|
-------
|
|
1342
|
-
links: dict
|
|
1449
|
+
links: dict[str, pd.DataFrame]
|
|
1343
1450
|
Contains dataframe objects for the requested files. If a filename is not found, it is skipped.
|
|
1344
1451
|
|
|
1345
1452
|
|
|
@@ -1389,7 +1496,7 @@ class SeerSDK:
|
|
|
1389
1496
|
|
|
1390
1497
|
filenames = set(filenames)
|
|
1391
1498
|
# Allow user to pass in filenames without an extension.
|
|
1392
|
-
analysis_result_files = self.
|
|
1499
|
+
analysis_result_files = self.list_search_result_files(analysis_id)
|
|
1393
1500
|
analysis_result_files_prefix_mapper = {
|
|
1394
1501
|
".".join(x.split(".")[:-1]): x for x in analysis_result_files
|
|
1395
1502
|
}
|
|
@@ -1426,7 +1533,7 @@ class SeerSDK:
|
|
|
1426
1533
|
links["peptide_panel.tsv"] = peptide_data["panelLink"]["url"]
|
|
1427
1534
|
else:
|
|
1428
1535
|
try:
|
|
1429
|
-
links[filename] = self.
|
|
1536
|
+
links[filename] = self._get_search_result_file_url(
|
|
1430
1537
|
analysis_id, filename
|
|
1431
1538
|
)["url"]
|
|
1432
1539
|
except Exception as e:
|
|
@@ -1451,6 +1558,7 @@ class SeerSDK:
|
|
|
1451
1558
|
|
|
1452
1559
|
return links
|
|
1453
1560
|
|
|
1561
|
+
@deprecation.deprecated(deprecated_in="0.3.0", removed_in="1.0.0")
|
|
1454
1562
|
def get_analysis_result(
|
|
1455
1563
|
self,
|
|
1456
1564
|
analysis_id: str,
|
|
@@ -1522,7 +1630,7 @@ class SeerSDK:
|
|
|
1522
1630
|
}
|
|
1523
1631
|
|
|
1524
1632
|
if diann_report:
|
|
1525
|
-
diann_report_url = self.
|
|
1633
|
+
diann_report_url = self._get_search_result_file_url(
|
|
1526
1634
|
analysis_id, "report.tsv"
|
|
1527
1635
|
)
|
|
1528
1636
|
links["diann_report"] = url_to_df(diann_report_url["url"])
|
|
@@ -1578,7 +1686,7 @@ class SeerSDK:
|
|
|
1578
1686
|
raise ValueError("Analysis id cannot be empty.")
|
|
1579
1687
|
|
|
1580
1688
|
try:
|
|
1581
|
-
res = self.
|
|
1689
|
+
res = self.get_analyses(analysis_id)
|
|
1582
1690
|
except ValueError:
|
|
1583
1691
|
return ValueError("Analysis not found. Your ID could be incorrect")
|
|
1584
1692
|
|
|
@@ -1597,7 +1705,7 @@ class SeerSDK:
|
|
|
1597
1705
|
|
|
1598
1706
|
Returns
|
|
1599
1707
|
-------
|
|
1600
|
-
list
|
|
1708
|
+
list[str]
|
|
1601
1709
|
Contains the list of files in the folder.
|
|
1602
1710
|
|
|
1603
1711
|
Examples
|
|
@@ -1652,8 +1760,8 @@ class SeerSDK:
|
|
|
1652
1760
|
|
|
1653
1761
|
Returns
|
|
1654
1762
|
-------
|
|
1655
|
-
message: dict
|
|
1656
|
-
Contains the message whether the files were downloaded or not.
|
|
1763
|
+
message: dict[str, str]
|
|
1764
|
+
Contains the 'message' whether the files were downloaded or not.
|
|
1657
1765
|
"""
|
|
1658
1766
|
|
|
1659
1767
|
urls = []
|
|
@@ -1756,6 +1864,11 @@ class SeerSDK:
|
|
|
1756
1864
|
**kwargs : dict, optional
|
|
1757
1865
|
Search keyword parameters to be passed in. Acceptable values are 'name' or 'description'.
|
|
1758
1866
|
|
|
1867
|
+
Returns
|
|
1868
|
+
-------
|
|
1869
|
+
res : list[dict]
|
|
1870
|
+
A list of dictionaries containing the group analysis objects.
|
|
1871
|
+
|
|
1759
1872
|
"""
|
|
1760
1873
|
params = {"analysisid": analysis_id}
|
|
1761
1874
|
if kwargs and not group_analysis_id:
|
|
@@ -1807,7 +1920,7 @@ class SeerSDK:
|
|
|
1807
1920
|
Returns
|
|
1808
1921
|
-------
|
|
1809
1922
|
res : dict
|
|
1810
|
-
A dictionary containing the group analysis
|
|
1923
|
+
A dictionary containing the group analysis object.
|
|
1811
1924
|
|
|
1812
1925
|
Examples
|
|
1813
1926
|
-------
|
|
@@ -1961,7 +2074,7 @@ class SeerSDK:
|
|
|
1961
2074
|
analysis_id (str): ID of the analysis.
|
|
1962
2075
|
feature_ids (list[str], optional): Filter result object to a set of ids. Defaults to [].
|
|
1963
2076
|
show_significant_only (bool, optional): Mark true if only significant results are to be returned. Defaults to False.
|
|
1964
|
-
as_df (bool, optional):
|
|
2077
|
+
as_df (bool, optional): whether the result should be converted to a DataFrame. Defaults to False.
|
|
1965
2078
|
volcano_plot (bool, optional): Mark true to include the volcano plot data in the return object. Defaults to False.
|
|
1966
2079
|
cached (bool, optional): Mark true to return volcano plot data as a VolcanoPlotBuilder object. No effect if volcano_plot flag is marked false. Defaults to False.
|
|
1967
2080
|
|
|
@@ -1983,8 +2096,10 @@ class SeerSDK:
|
|
|
1983
2096
|
|
|
1984
2097
|
protein_peptide_gene_map = builder.protein_gene_map
|
|
1985
2098
|
|
|
1986
|
-
# API call 2 - get analysis samples
|
|
1987
|
-
samples_metadata = self.
|
|
2099
|
+
# API call 2 - get analysis samples to get condition
|
|
2100
|
+
samples_metadata = self._get_analysis_samples(
|
|
2101
|
+
analysis_id=analysis_id
|
|
2102
|
+
)
|
|
1988
2103
|
|
|
1989
2104
|
json = {"analysisId": analysis_id}
|
|
1990
2105
|
if feature_ids:
|
|
@@ -2021,7 +2136,7 @@ class SeerSDK:
|
|
|
2021
2136
|
if x[feature_type_index] in protein_peptide_gene_map
|
|
2022
2137
|
]
|
|
2023
2138
|
sample_id_condition = {
|
|
2024
|
-
x["id"]: x["condition"] for x in samples_metadata
|
|
2139
|
+
x["id"]: x["condition"] for x in samples_metadata
|
|
2025
2140
|
}
|
|
2026
2141
|
|
|
2027
2142
|
if show_significant_only:
|
|
@@ -2062,7 +2177,7 @@ class SeerSDK:
|
|
|
2062
2177
|
box_plot (bool, optional): Mark true to include box plot data in the return object. Defaults to False.
|
|
2063
2178
|
|
|
2064
2179
|
Returns:
|
|
2065
|
-
dict: A dictionary containing the volcano plot and optionally box plot data for each group analysis.
|
|
2180
|
+
dict[str, pd.DataFrame]: A dictionary containing the volcano plot and optionally box plot data for each group analysis.
|
|
2066
2181
|
"""
|
|
2067
2182
|
group_analysis_ids = [
|
|
2068
2183
|
x["id"]
|
|
@@ -2118,8 +2233,8 @@ class SeerSDK:
|
|
|
2118
2233
|
ValueError: Invalid type provided.
|
|
2119
2234
|
ServerError: Could not fetch PCA data.
|
|
2120
2235
|
Returns:
|
|
2121
|
-
dict
|
|
2122
|
-
|
|
2236
|
+
dict[str, list|float]
|
|
2237
|
+
Returns response object containing 'xContributionRatio' (float), 'yContributionRatio' (float), 'samples' (list[dict]), and 'points' (list[float]).
|
|
2123
2238
|
"""
|
|
2124
2239
|
if not analysis_ids:
|
|
2125
2240
|
raise ValueError("Analysis IDs cannot be empty.")
|
|
@@ -2164,7 +2279,7 @@ class SeerSDK:
|
|
|
2164
2279
|
type (str): Type of data to be fetched. Must be either 'protein' or 'peptide'.
|
|
2165
2280
|
sample_ids (list[str], optional): IDs of the samples of interest.
|
|
2166
2281
|
hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
|
|
2167
|
-
as_df (bool, optional):
|
|
2282
|
+
as_df (bool, optional): whether the result should be converted to a DataFrame. Defaults to False.
|
|
2168
2283
|
Raises:
|
|
2169
2284
|
ValueError: No analysis IDs provided.
|
|
2170
2285
|
ValueError: No sample IDs provided.
|
|
@@ -2491,7 +2606,7 @@ class SeerSDK:
|
|
|
2491
2606
|
fold_change_threshold (float, optional): Cutoff value for the fold change to determine significance. Defaults to 1.
|
|
2492
2607
|
label_by (str, optional): Metric to sort result data. Defaults to "fold_change".
|
|
2493
2608
|
cached (bool, optional): Return a VolcanoPlotBuilder object for calculation reuse. Defaults to False.
|
|
2494
|
-
as_df (bool, optional):
|
|
2609
|
+
as_df (bool, optional): whether the result should be converted to a DataFrame. Defaults to False.
|
|
2495
2610
|
|
|
2496
2611
|
Raises:
|
|
2497
2612
|
ServerError - could not fetch group analysis results.
|
|
@@ -2521,29 +2636,51 @@ class SeerSDK:
|
|
|
2521
2636
|
else:
|
|
2522
2637
|
return obj.volcano_plot
|
|
2523
2638
|
|
|
2524
|
-
def
|
|
2639
|
+
def _get_analysis_samples(
|
|
2640
|
+
self, analysis_id: str = None, analysis_name: str = None, as_df=False
|
|
2641
|
+
):
|
|
2525
2642
|
"""
|
|
2526
|
-
Get the samples associated with a given analysis
|
|
2643
|
+
Get the samples associated with a given analysis.
|
|
2527
2644
|
|
|
2528
2645
|
Args:
|
|
2529
|
-
analysis_id (str):
|
|
2646
|
+
analysis_id (str): UUID identifier of the analysis. Defaults to None.
|
|
2647
|
+
analysis_name (str): Name of the analysis. Defaults to None.
|
|
2648
|
+
as_df (bool) : whether the result should be converted to a DataFrame. Defaults to False.
|
|
2530
2649
|
|
|
2531
2650
|
Raises:
|
|
2532
2651
|
ServerError - could not retrieve samples for analysis.
|
|
2533
2652
|
Returns:
|
|
2534
|
-
dict:
|
|
2653
|
+
list[dict] : a list of samples associated with the analysis.
|
|
2535
2654
|
"""
|
|
2536
|
-
if not analysis_id:
|
|
2537
|
-
raise ValueError("Analysis ID cannot be empty.")
|
|
2538
2655
|
|
|
2539
|
-
|
|
2540
|
-
|
|
2541
|
-
samples = s.get(URL)
|
|
2656
|
+
if not analysis_id and not analysis_name:
|
|
2657
|
+
raise ValueError("Analysis cannot be empty.")
|
|
2542
2658
|
|
|
2543
|
-
|
|
2544
|
-
|
|
2659
|
+
if analysis_id:
|
|
2660
|
+
rows = [{"id": analysis_id}]
|
|
2661
|
+
else:
|
|
2662
|
+
rows = self.get_analyses(analysis_name=analysis_name)
|
|
2663
|
+
|
|
2664
|
+
resp = []
|
|
2665
|
+
for row in rows:
|
|
2666
|
+
URL = f"{self._auth.url}api/v1/analyses/samples/{row['id']}"
|
|
2667
|
+
with self._get_auth_session() as s:
|
|
2668
|
+
samples = s.get(URL)
|
|
2669
|
+
try:
|
|
2670
|
+
samples.raise_for_status()
|
|
2671
|
+
obj = samples.json()[0]
|
|
2672
|
+
resp += obj["samples"]
|
|
2673
|
+
except:
|
|
2674
|
+
continue
|
|
2675
|
+
|
|
2676
|
+
if not resp:
|
|
2677
|
+
raise ServerError(
|
|
2678
|
+
f"Could not retrieve samples for analysis {analysis_id or analysis_name}."
|
|
2679
|
+
)
|
|
2545
2680
|
|
|
2546
|
-
|
|
2681
|
+
resp = pd.DataFrame(resp)
|
|
2682
|
+
resp.drop_duplicates(subset=["id"], inplace=True)
|
|
2683
|
+
return resp if as_df else resp.to_dict(orient="records")
|
|
2547
2684
|
|
|
2548
2685
|
def get_analysis_protocol_fasta(self, analysis_id, download_path=None):
|
|
2549
2686
|
if not analysis_id:
|
|
@@ -2553,7 +2690,7 @@ class SeerSDK:
|
|
|
2553
2690
|
download_path = os.getcwd()
|
|
2554
2691
|
|
|
2555
2692
|
try:
|
|
2556
|
-
analysis_protocol_id = self.
|
|
2693
|
+
analysis_protocol_id = self.get_analyses(analysis_id)[0][
|
|
2557
2694
|
"analysis_protocol_id"
|
|
2558
2695
|
]
|
|
2559
2696
|
except (IndexError, KeyError):
|