seer-pas-sdk 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2632 @@
1
+ from tqdm import tqdm
2
+
3
+ import os
4
+ import jwt
5
+ import requests
6
+ import urllib.request
7
+ import ssl
8
+
9
+ from typing import List as _List, Tuple as _Tuple
10
+
11
+ from ..common import *
12
+ from ..auth import Auth
13
+ from ..objects.volcanoplot import VolcanoPlotBuilder
14
+
15
+
16
+ class SeerSDK:
17
+ """
18
+ Object exposing SDK methods. Requires a username and password; the optional `instance` param denotes the instance of PAS (defaults to "US").
19
+
20
+ Examples
21
+ -------
22
+ >>> from seer_pas_sdk import SeerSDK
23
+ >>> USERNAME = "test"
24
+ >>> PASSWORD = "test-password"
25
+ >>> INSTANCE = "EU"
26
+ >>> seer_sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
27
+ """
28
+
29
+ def __init__(self, username, password, instance="US", tenant=None):
30
+ try:
31
+ self._auth = Auth(username, password, instance)
32
+
33
+ self._auth.get_token()
34
+ print(f"User '{username}' logged in.\n")
35
+
36
+ if not tenant:
37
+ tenant = self._auth.active_tenant_id
38
+ try:
39
+ self.switch_tenant(tenant)
40
+ except Exception as e:
41
+ print(
42
+ f"Encountered an error directing you to tenant {tenant}: {e}."
43
+ )
44
+ print("Logging into home tenant...")
45
+ # If an error occurs while directing the user to a tenant, default to home tenant.
46
+ print(f"You are now active in {self.get_active_tenant_name()}")
47
+ except Exception as e:
48
+ raise ValueError(
49
+ f"Could not log in.\nPlease check your credentials and/or instance: {e}."
50
+ )
51
+
52
+ def _get_auth_headers(self, use_multi_tenant=True):
53
+ id_token, access_token = self._auth.get_token()
54
+ header = {
55
+ "Authorization": id_token,
56
+ "Access-Token": access_token,
57
+ }
58
+ if use_multi_tenant:
59
+ multi_tenant = {
60
+ "Tenant-Id": self._auth.active_tenant_id,
61
+ "Role": self._auth.active_role,
62
+ }
63
+ header.update(multi_tenant)
64
+ return header
65
+
66
+ def _get_auth_session(self, use_multi_tenant=True):
67
+ sess = requests.Session()
68
+
69
+ sess.headers.update(self._get_auth_headers(use_multi_tenant))
70
+
71
+ return sess
72
+
73
+ def get_user_tenant_metadata(self, index=True):
74
+ """
75
+ Fetches the tenant metadata for the authenticated user.
76
+
77
+ Returns
78
+ -------
79
+ response : dict
80
+ A dictionary containing the tenant metadata for the authenticated user.
81
+ """
82
+ with self._get_auth_session() as s:
83
+ response = s.get(f"{self._auth.url}api/v1/usertenants")
84
+
85
+ if response.status_code != 200:
86
+ raise ValueError(
87
+ "Invalid request. Please check your parameters."
88
+ )
89
+
90
+ response = response.json()
91
+ if index:
92
+ return {x["institution"]: x for x in response}
93
+ else:
94
+ return response
95
+
96
+ def list_tenants(self, reverse=False):
97
+ """
98
+ Lists the institution names and the tenant ids for the authenticated user.
99
+
100
+ Parameters
101
+ ----------
102
+ reverse: bool
103
+ Boolean denoting whether the user wants the result dictionary indexed by tenant id (True) or institution name (False).
104
+
105
+ Returns
106
+ -------
107
+ tenants : dict
108
+ A dictionary containing the institution names and tenant ids for the authenticated user.
109
+ """
110
+ tenants = self.get_user_tenant_metadata()
111
+ if reverse:
112
+ return {x["tenantId"]: x["institution"] for x in tenants.values()}
113
+ else:
114
+ return {x["institution"]: x["tenantId"] for x in tenants.values()}
115
+
116
+ def switch_tenant(self, identifier: str):
117
+ """
118
+ Switches the tenant for the authenticated user.
119
+
120
+ Parameters
121
+ ----------
122
+ identifier: str
123
+ Tenant ID or organization name to switch to.
124
+
125
+ Returns
126
+ -------
127
+ tenant_id: str
128
+ Returns the value of the active tenant id after the operation.
129
+ """
130
+ map = self.get_user_tenant_metadata()
131
+ tenant_ids = [x["tenantId"] for x in map.values()]
132
+ institution_names = map.keys()
133
+
134
+ if identifier in tenant_ids:
135
+ tenant_id = identifier
136
+ row = [x for x in map.values() if x["tenantId"] == tenant_id]
137
+ if row:
138
+ row = row[0]
139
+ else:
140
+ raise ValueError(
141
+ "Invalid tenant identifier. Tenant was not switched."
142
+ )
143
+ elif identifier in institution_names:
144
+ row = map[identifier]
145
+ tenant_id = row["tenantId"]
146
+ else:
147
+ raise ValueError(
148
+ "Invalid tenant identifier. Tenant was not switched."
149
+ )
150
+
151
+ with self._get_auth_session() as s:
152
+ response = s.put(
153
+ self._auth.url + "api/v1/users/tenant",
154
+ json={
155
+ "currentTenantId": tenant_id,
156
+ "username": self._auth.username,
157
+ },
158
+ )
159
+ if response.status_code != 200:
160
+ raise ServerError(
161
+ "Could not update current tenant for user. Tenant was not switched."
162
+ )
163
+
164
+ self._auth.active_tenant_id = tenant_id
165
+ self._auth.active_role = row["role"]
166
+ print(f"You are now active in {row['institution']}")
167
+ return self._auth.active_tenant_id, self._auth.active_role
168
+
169
+ def get_active_tenant(self):
170
+ """
171
+ Fetches the active tenant for the authenticated user.
172
+
173
+ Returns
174
+ -------
175
+ tenant: dict
176
+ Tenant metadata for the authenticated user containing "institution" and "tenantId" keys.
177
+ """
178
+ tenants = self.get_user_tenant_metadata(index=False)
179
+ row = [
180
+ x for x in tenants if x["tenantId"] == self._auth.active_tenant_id
181
+ ]
182
+ return row[0] if row else None
183
+
184
+ def get_active_tenant_id(self):
185
+ """
186
+ Fetches the active tenant ID for the authenticated user.
187
+
188
+ Returns
189
+ -------
190
+ tenant_id: str
191
+ Tenant ID for the authenticated user.
192
+ """
193
+ tenant = self.get_active_tenant()
194
+ return tenant["tenantId"] if tenant else None
195
+
196
+ def get_active_tenant_name(self):
197
+ """
198
+ Fetches the active tenant name for the authenticated user.
199
+
200
+ Returns
201
+ -------
202
+ tenant: str
203
+ Tenant name for the authenticated user.
204
+ """
205
+ tenant = self.get_active_tenant()
206
+ return tenant["institution"] if tenant else None
207
+
208
+ def get_spaces(self):
209
+ """
210
+ Fetches a list of spaces for the authenticated user.
211
+
212
+ Returns
213
+ -------
214
+ spaces: list
215
+ List of space objects for the authenticated user.
216
+
217
+ Examples
218
+ -------
219
+ >>> from seer_pas_sdk import SeerSDK
220
+ >>> seer_sdk = SeerSDK()
221
+ >>> seer_sdk.get_spaces()
222
+ >>> [
223
+ { "usergroup_name": ... },
224
+ { "usergroup_name": ... },
225
+ ...
226
+ ]
227
+ """
228
+
229
+ URL = f"{self._auth.url}api/v1/usergroups"
230
+
231
+ with self._get_auth_session() as s:
232
+ spaces = s.get(URL)
233
+
234
+ if spaces.status_code != 200:
235
+ raise ValueError(
236
+ "Invalid request. Please check your parameters."
237
+ )
238
+ return spaces.json()
239
+
240
+ def get_plate_metadata(self, plate_id: str = None, df: bool = False):
241
+ """
242
+ Fetches a list of plates for the authenticated user. If no `plate_id` is provided, returns all plates for the authenticated user. If `plate_id` is provided, returns the plate with the given `plate_id`, provided it exists.
243
+
244
+ Parameters
245
+ ----------
246
+ plate_id : str, optional
247
+ ID of the plate to be fetched, defaulted to None.
248
+ df: bool
249
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object
250
+
251
+ Returns
252
+ -------
253
+ plates: list or DataFrame
254
+ List/DataFrame of plate objects for the authenticated user.
255
+
256
+ Examples
257
+ -------
258
+ >>> from seer_pas_sdk import SeerSDK
259
+ >>> seer_sdk = SeerSDK()
260
+ >>> seer_sdk.get_plate_metadata()
261
+ >>> [
262
+ { "id": ... },
263
+ { "id": ... },
264
+ ...
265
+ ]
266
+ >>> seer_sdk.get_plate_metadata(df=True)
267
+ >>> id ... user_group
268
+ 0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
269
+ 1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
270
+ 2 6f158840-15da-11ee-bdf1-bbaa73585acf ... None
271
+ 3 1a8a2920-15da-11ee-bdf1-bbaa73585acf ... None
272
+ 4 7ab47f40-15d9-11ee-bdf1-bbaa73585acf ... None
273
+ .. ... ... ...
274
+ 935 8fa91c00-6621-11ea-96e3-d5a4dab4ebf6 ... None
275
+ 936 53180b20-6621-11ea-96e3-d5a4dab4ebf6 ... None
276
+ 937 5c31fe90-6618-11ea-96e3-d5a4dab4ebf6 ... None
277
+ 938 5b05d440-6610-11ea-96e3-d5a4dab4ebf6 ... None
278
+ 939 9872e3f0-544e-11ea-ad9e-1991e0725494 ... None
279
+
280
+ >>> seer_sdk.get_plate_metadata(id="YOUR_PLATE_ID_HERE")
281
+ >>> [{ "id": ... }]
282
+ """
283
+
284
+ URL = f"{self._auth.url}api/v1/plates"
285
+ res = []
286
+
287
+ with self._get_auth_session() as s:
288
+
289
+ plates = s.get(
290
+ f"{URL}/{plate_id}" if plate_id else URL,
291
+ params={"all": "true"},
292
+ )
293
+ if plates.status_code != 200:
294
+ raise ValueError(
295
+ "Invalid request. Please check your parameters."
296
+ )
297
+ if not plate_id:
298
+ res = plates.json()["data"]
299
+ else:
300
+ res = [plates.json()]
301
+
302
+ for entry in res:
303
+ del entry["tenant_id"]
304
+
305
+ return res if not df else dict_to_df(res)
306
+
307
+ def get_project_metadata(self, project_id: str = None, df: bool = False):
308
+ """
309
+ Fetches a list of projects for the authenticated user. If no `project_id` is provided, returns all projects for the authenticated user. If `project_id` is provided, returns the project with the given `project_id`, provided it exists.
310
+
311
+ Parameters
312
+ ----------
313
+ project_id: str, optional
314
+ Project ID of the project to be fetched, defaulted to None.
315
+ df: bool
316
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
317
+
318
+ Returns
319
+ -------
320
+ projects: list or DataFrame
321
+ DataFrame or list of project objects for the authenticated user.
322
+
323
+ Examples
324
+ -------
325
+ >>> from seer_pas_sdk import SeerSDK
326
+ >>> seer_sdk = SeerSDK()
327
+ >>> seer_sdk.get_project_metadata()
328
+ >>> [
329
+ { "project_name": ... },
330
+ { "project_name": ... },
331
+ ...
332
+ ]
333
+
334
+ >>> seer_sdk.get_project_metadata(df=True)
335
+ >>> id ... user_group
336
+ 0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
337
+ 1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
338
+ 2 6f158840-15da-11ee-bdf1-bbaa73585acf ... None
339
+ 3 1a8a2920-15da-11ee-bdf1-bbaa73585acf ... None
340
+ 4 7ab47f40-15d9-11ee-bdf1-bbaa73585acf ... None
341
+ .. ... ... ...
342
+ 935 8fa91c00-6621-11ea-96e3-d5a4dab4ebf6 ... None
343
+ 936 53180b20-6621-11ea-96e3-d5a4dab4ebf6 ... None
344
+ 937 5c31fe90-6618-11ea-96e3-d5a4dab4ebf6 ... None
345
+ 938 5b05d440-6610-11ea-96e3-d5a4dab4ebf6 ... None
346
+ 939 9872e3f0-544e-11ea-ad9e-1991e0725494 ... None
347
+
348
+ >>> seer_sdk.get_project_metadata(id="YOUR_PROJECT_ID_HERE")
349
+ >>> [{ "project_name": ... }]
350
+ """
351
+
352
+ URL = (
353
+ f"{self._auth.url}api/v1/projects"
354
+ if not project_id
355
+ else f"{self._auth.url}api/v1/projects/{project_id}"
356
+ )
357
+ res = []
358
+
359
+ with self._get_auth_session() as s:
360
+
361
+ projects = s.get(URL, params={"all": "true"})
362
+ if projects.status_code != 200:
363
+ raise ValueError(
364
+ "Invalid request. Please check your parameters."
365
+ )
366
+ if not project_id:
367
+ res = projects.json()["data"]
368
+ else:
369
+ res = [projects.json()]
370
+
371
+ for entry in res:
372
+ if "tenant_id" in entry:
373
+ del entry["tenant_id"]
374
+
375
+ if "raw_file_path" in entry:
376
+ # Simple lambda function to find the third occurrence of '/' in the raw file path
377
+ location = lambda s: len(s) - len(s.split("/", 3)[-1])
378
+ # Slicing the string from the location
379
+ entry["raw_file_path"] = entry["raw_file_path"][
380
+ location(entry["raw_file_path"]) :
381
+ ]
382
+ return res if not df else dict_to_df(res)
383
+
384
+ def get_samples_metadata(
385
+ self, plate_id: str = None, project_id: str = None, df: bool = False
386
+ ):
387
+ """
388
+ Fetches a list of samples for the authenticated user, filtered by `plate_id`. Returns all samples for the plate with the given `plate_id`, provided it exists.
389
+
390
+ If both `plate_id` and `project_id` are passed in, only the `plate_id` is validated first.
391
+
392
+ Parameters
393
+ ----------
394
+ plate_id : str, optional
395
+ ID of the plate for which samples are to be fetched, defaulted to None.
396
+ project_id : str, optional
397
+ ID of the project for which samples are to be fetched, defaulted to None.
398
+ df: bool
399
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object
400
+
401
+ Returns
402
+ -------
403
+ samples: list or DataFrame
404
+ List/DataFrame of samples for the authenticated user.
405
+
406
+ Examples
407
+ -------
408
+ >>> from seer_pas_sdk import SeerSDK
409
+ >>> seer_sdk = SeerSDK()
410
+
411
+ >>> seer_sdk.get_samples_metadata(plate_id="7ec8cad0-15e0-11ee-bdf1-bbaa73585acf")
412
+ >>> [
413
+ { "id": ... },
414
+ { "id": ... },
415
+ ...
416
+ ]
417
+
418
+ >>> seer_sdk.get_samples_metadata(df=True)
419
+ >>> id ... control
420
+ 0 812139c0-15e0-11ee-bdf1-bbaa73585acf ...
421
+ 1 803e05b0-15e0-11ee-bdf1-bbaa73585acf ... MPE Control
422
+ 2 a9b26a40-15da-11ee-bdf1-bbaa73585acf ...
423
+ 3 a8fc87c0-15da-11ee-bdf1-bbaa73585acf ... MPE Control
424
+ 4 8e322990-15da-11ee-bdf1-bbaa73585acf ...
425
+ ... ... ... ...
426
+ 3624 907e1f40-6621-11ea-96e3-d5a4dab4ebf6 ... C132
427
+ 3625 53e59450-6621-11ea-96e3-d5a4dab4ebf6 ... C132
428
+ 3626 5d11b030-6618-11ea-96e3-d5a4dab4ebf6 ... C132
429
+ 3627 5bdf9270-6610-11ea-96e3-d5a4dab4ebf6 ... C132
430
+ 3628 dd607ef0-654c-11ea-8eb2-25a1cfd1163c ... C132
431
+ """
432
+
433
+ if not plate_id and not project_id:
434
+ raise ValueError("You must pass in plate ID or project ID.")
435
+
436
+ res = []
437
+ URL = f"{self._auth.url}api/v1/samples"
438
+ sample_params = {"all": "true"}
439
+
440
+ with self._get_auth_session() as s:
441
+
442
+ if plate_id:
443
+ try:
444
+ self.get_plate_metadata(plate_id)
445
+ except:
446
+ raise ValueError("Plate ID is invalid.")
447
+ sample_params["plateId"] = plate_id
448
+
449
+ elif project_id:
450
+ try:
451
+ self.get_project_metadata(project_id)
452
+ except:
453
+ raise ValueError("Project ID is invalid.")
454
+
455
+ sample_params["projectId"] = project_id
456
+
457
+ samples = s.get(URL, params=sample_params)
458
+ if samples.status_code != 200:
459
+ raise ValueError(
460
+ f"Failed to fetch sample data for plate ID: {plate_id}."
461
+ )
462
+ res = samples.json()["data"]
463
+
464
+ for entry in res:
465
+ del entry["tenant_id"]
466
+
467
+ # Exclude custom fields that don't belong to the tenant
468
+ res_df = dict_to_df(res)
469
+ custom_columns = [
470
+ x["field_name"] for x in self.get_sample_custom_fields()
471
+ ]
472
+ res_df = res_df[
473
+ [
474
+ x
475
+ for x in res_df.columns
476
+ if not x.startswith("custom_") or x in custom_columns
477
+ ]
478
+ ]
479
+
480
+ # API returns empty strings if not a control, replace with None for filtering purposes
481
+ res_df["control"] = res_df["control"].apply(lambda x: x if x else None)
482
+
483
+ return res_df.to_dict(orient="records") if not df else res_df
484
+
485
+ def _filter_samples_metadata(
486
+ self,
487
+ project_id: str,
488
+ filter: str,
489
+ sample_ids: list = None,
490
+ ):
491
+ """
492
+ ****************
493
+ [UNEXPOSED METHOD CALL]
494
+ ****************
495
+ Get samples given a filter and project_id.
496
+
497
+ Parameters
498
+ ----------
499
+ project_id : str
500
+ The project id.
501
+ filter : str
502
+ The filter to be applied. Acceptable values are 'control' or 'sample'.
503
+ sample_ids : list, optional
504
+ List of user provided sample ids
505
+
506
+ Returns
507
+ -------
508
+ res : list
509
+ A list of sample ids
510
+
511
+ Examples
512
+ -------
513
+ >>> from core import SeerSDK
514
+ >>> seer_sdk = SeerSDK()
515
+ >>> seer_sdk._get_samples_filter("FILTER", "PROJECT_ID")
516
+ >>> {
517
+ "samples": [
518
+ {
519
+ "id": "SAMPLE_ID",
520
+ "plate_id": "PLATE_ID",
521
+ "sample_name": "SAMPLE_NAME",
522
+ ...
523
+ ...
524
+ },
525
+ ...
526
+ ...
527
+ ]
528
+ }
529
+ """
530
+
531
+ if filter and filter not in ["control", "sample"]:
532
+ raise ValueError(
533
+ "Invalid filter. Please choose between 'control' or 'sample'."
534
+ )
535
+
536
+ df = self.get_samples_metadata(project_id=project_id, df=True)
537
+
538
+ if filter == "control":
539
+ df = df[~df["control"].isna()]
540
+ elif filter == "sample":
541
+ df = df[df["control"].isna()]
542
+
543
+ valid_samples = df["id"].tolist()
544
+ if sample_ids:
545
+ valid_samples = list(set(valid_samples) & set(sample_ids))
546
+
547
+ return valid_samples
548
+
549
+ def get_sample_custom_fields(self):
550
+ """
551
+ Fetches a list of custom fields defined for the authenticated user.
552
+ """
553
+ URL = f"{self._auth.url}api/v1/samplefields"
554
+
555
+ with self._get_auth_session() as s:
556
+
557
+ fields = s.get(URL)
558
+
559
+ if fields.status_code != 200:
560
+ raise ValueError(
561
+ "Failed to fetch custom columns. Please check your connection."
562
+ )
563
+
564
+ res = fields.json()
565
+ for entry in res:
566
+ del entry["tenant_id"]
567
+ return res
568
+
569
+ def get_msdata(self, sample_ids: list, df: bool = False):
570
+ """
571
+ Fetches MS data files for passed in `sample_ids` (provided they are valid and contain relevant files) for an authenticated user.
572
+
573
+ The function returns a dict containing DataFrame objects if the `df` flag is passed in as True, otherwise a nested dict object is returned instead.
574
+
575
+ Parameters
576
+ ----------
577
+ sample_ids : list
578
+ List of unique sample IDs.
579
+ df: bool
580
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
581
+
582
+ Returns
583
+ -------
584
+ res: list or DataFrame
585
+ List/DataFrame of plate objects for the authenticated user.
586
+
587
+ Examples
588
+ -------
589
+ >>> from seer_pas_sdk import SeerSDK
590
+ >>> seer_sdk = SeerSDK()
591
+ >>> sample_ids = ["812139c0-15e0-11ee-bdf1-bbaa73585acf", "803e05b0-15e0-11ee-bdf1-bbaa73585acf"]
592
+
593
+ >>> seer_sdk.get_msdata(sample_ids)
594
+ >>> [
595
+ {"id": "SAMPLE_ID_1_HERE" ... },
596
+ {"id": "SAMPLE_ID_2_HERE" ... }
597
+ ]
598
+
599
+ >>> seer_sdk.get_msdata(sample_ids, df=True)
600
+ >>> id ... gradient
601
+ 0 81c6a180-15e0-11ee-bdf1-bbaa73585acf ... None
602
+ 1 816a9ed0-15e0-11ee-bdf1-bbaa73585acf ... None
603
+
604
+ [2 rows x 26 columns]
605
+ """
606
+
607
+ URL = f"{self._auth.url}api/v1/msdatas/items"
608
+
609
+ res = []
610
+ for sample_id in sample_ids:
611
+
612
+ with self._get_auth_session() as s:
613
+
614
+ msdatas = s.post(URL, json={"sampleId": sample_id})
615
+
616
+ if msdatas.status_code != 200 or not msdatas.json()["data"]:
617
+ raise ValueError(
618
+ f"Failed to fetch MS data for sample ID={sample_id}."
619
+ )
620
+
621
+ res += [x for x in msdatas.json()["data"]]
622
+
623
+ for entry in res:
624
+ if "tenant_id" in entry:
625
+ del entry["tenant_id"]
626
+
627
+ if "raw_file_path" in entry:
628
+ # Simple lambda function to find the third occurrence of '/' in the raw file path
629
+ location = lambda s: len(s) - len(s.split("/", 3)[-1])
630
+ # Slicing the string from the location
631
+ entry["raw_file_path"] = entry["raw_file_path"][
632
+ location(entry["raw_file_path"]) :
633
+ ]
634
+ return res if not df else dict_to_df(res)
635
+
636
+ def get_plate(self, plate_id: str, df: bool = False):
637
+ """
638
+ Fetches MS data files for a `plate_id` (provided that the `plate_id` is valid and has samples associated with it) for an authenticated user.
639
+
640
+ The function returns a dict containing DataFrame objects if the `df` flag is passed in as True, otherwise a nested dict object is returned instead.
641
+
642
+ Parameters
643
+ ----------
644
+ plate_id : str, optional
645
+ ID of the plate for which samples are to be fetched, defaulted to None.
646
+ df: bool
647
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object
648
+
649
+ Returns
650
+ -------
651
+ res: list or DataFrame
652
+ List/DataFrame of MS data file objects for the authenticated user.
653
+
654
+ Examples
655
+ -------
656
+ >>> from seer_pas_sdk import SeerSDK
657
+ >>> seer_sdk = SeerSDK()
658
+ >>> plate_id = "7ec8cad0-15e0-11ee-bdf1-bbaa73585acf"
659
+
660
+ >>> seer_sdk.get_plate(plate_id)
661
+ >>> [
662
+ {"id": "PLATE_ID_1_HERE" ... },
663
+ {"id": "PLATE_ID_2_HERE" ... }
664
+ ]
665
+
666
+ >>> seer_sdk.get_plate(plate_id, df=True)
667
+ >>> id ... volume
668
+ 0 PLATE_ID_1_HERE ... None
669
+ 1 PLATE_ID_2_HERE ... None
670
+
671
+ [2 rows x 26 columns]
672
+ """
673
+ plate_samples = self.get_samples_metadata(plate_id=plate_id)
674
+ sample_ids = [sample["id"] for sample in plate_samples]
675
+ return self.get_msdata(sample_ids, df)
676
+
677
+ def get_project(
678
+ self,
679
+ project_id: str,
680
+ msdata: bool = False,
681
+ df: bool = False,
682
+ flat: bool = False,
683
+ ):
684
+ """
685
+ Fetches samples (and MS data files) for a `project_id` (provided that the `project_id` is valid and has samples associated with it) for an authenticated user.
686
+
687
+ The function returns a DataFrame object if the `df` flag is passed in as True, otherwise a nested dict object is returned instead. If the both the `df` and `msdata` flags are passed in as True, then a nested DataFrame object is returned instead.
688
+
689
+ If the `flat` flag is passed in as True, then the nested dict object is returned as an array of dict objects and the nested df object is returned as a single df object.
690
+
691
+ Parameters
692
+ ----------
693
+ project_id : str
694
+ ID of the project for which samples are to be fetched.
695
+ msdata: bool, optional
696
+ Boolean flag denoting whether the user wants relevant MS data files associated with the samples.
697
+ df: bool, optional
698
+ Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
699
+
700
+ Returns
701
+ -------
702
+ res: list or DataFrame
703
+ List/DataFrame of plate objects for the authenticated user.
704
+
705
+ Examples
706
+ -------
707
+ >>> from seer_pas_sdk import SeerSDK
708
+ >>> seer_sdk = SeerSDK()
709
+ >>> project_id = "7e48e150-8a47-11ed-b382-bf440acece26"
710
+
711
+ >>> seer_sdk.get_project(project_id=project_id, msdata=False, df=False)
712
+ >>> {
713
+ "project_samples": [
714
+ {
715
+ "id": "SAMPLE_ID_1_HERE",
716
+ "sample_type": "Plasma",
717
+ ...
718
+ ...
719
+ },
720
+ {
721
+ "id": "SAMPLE_ID_2_HERE",
722
+ "sample_type": "Plasma",
723
+ ...
724
+ ...
725
+ }
726
+ ]
727
+ }
728
+
729
+ >>> seer_sdk.get_project(project_id=project_id, msdata=True, df=False)
730
+ >>> [
731
+ {
732
+ "id": "SAMPLE_ID_1_HERE",
733
+ "sample_type": "Plasma",
734
+ ...
735
+ ...
736
+ "ms_data_files": [
737
+ {
738
+ "id": MS_DATA_FILE_ID_1_HERE,
739
+ "tenant_id": "TENANT_ID_HERE",
740
+ ...
741
+ ...
742
+ },
743
+ {
744
+ "id": MS_DATA_FILE_ID_1_HERE,
745
+ "tenant_id": "TENANT_ID_HERE",
746
+ ...
747
+ ...
748
+ }
749
+ ]
750
+ },
751
+ {
752
+ "id": "SAMPLE_ID_2_HERE",
753
+ "sample_type": "Plasma",
754
+ ...
755
+ ...
756
+ "ms_data_files": [
757
+ {
758
+ "id": MS_DATA_FILE_ID_2_HERE,
759
+ "tenant_id": "TENANT_ID_HERE",
760
+ ...
761
+ ...
762
+ },
763
+ {
764
+ "id": MS_DATA_FILE_ID_2_HERE,
765
+ "tenant_id": "TENANT_ID_HERE",
766
+ ...
767
+ ...
768
+ }
769
+ ]
770
+ }
771
+ ]
772
+
773
+ >>> seer_sdk.get_project(project_id=project_id, msdata=True, df=True)
774
+ >>> id ... ms_data_files
775
+ 0 829509f0-8a47-11ed-b382-bf440acece26 ... id ... g...
776
+ 1 828d41c0-8a47-11ed-b382-bf440acece26 ... id ... g...
777
+ 2 8294e2e0-8a47-11ed-b382-bf440acece26 ... id ... g...
778
+ 3 8285eec0-8a47-11ed-b382-bf440acece26 ... id ... g...
779
+
780
+ [4 rows x 60 columns]
781
+ """
782
+ if not project_id:
783
+ return ValueError("No project ID specified.")
784
+
785
+ sample_ids = []
786
+ project_samples = self.get_samples_metadata(
787
+ project_id=project_id, df=False
788
+ )
789
+ flat_result = []
790
+
791
+ if msdata:
792
+
793
+ # construct map for quick index reference of sample in project_samples
794
+ sample_ids = {
795
+ sample["id"]: i for i, sample in enumerate(project_samples)
796
+ } # will always contain unique values
797
+ ms_data_files = self.get_msdata(
798
+ sample_ids=list(sample_ids.keys()), df=False
799
+ )
800
+
801
+ for ms_data_file in ms_data_files:
802
+ index = sample_ids.get(ms_data_file["sample_id"], None)
803
+ if not index:
804
+ continue
805
+
806
+ if not flat:
807
+ if "ms_data_file" not in project_samples[index]:
808
+ project_samples[index]["ms_data_files"] = [
809
+ ms_data_file
810
+ ]
811
+ else:
812
+ project_samples[index]["ms_data_files"].append(
813
+ ms_data_file
814
+ )
815
+ else:
816
+ flat_result.append(project_samples[index] | ms_data_file)
817
+
818
+ # return flat result if results were added to the flat object
819
+ if flat and flat_result:
820
+ project_samples = flat_result
821
+
822
+ if df:
823
+ if flat:
824
+ return pd.DataFrame(project_samples)
825
+ else:
826
+ for sample_index in range(len(project_samples)):
827
+ if "ms_data_files" in project_samples[sample_index]:
828
+ project_samples[sample_index]["ms_data_files"] = (
829
+ dict_to_df(
830
+ project_samples[sample_index]["ms_data_files"]
831
+ )
832
+ )
833
+
834
+ project_samples = dict_to_df(project_samples)
835
+
836
+ return project_samples
837
+
838
+ def get_analysis_protocols(
839
+ self,
840
+ analysis_protocol_name: str = None,
841
+ analysis_protocol_id: str = None,
842
+ ):
843
+ """
844
+ Fetches a list of analysis protocols for the authenticated user. If no `analysis_protocol_id` is provided, returns all analysis protocols for the authenticated user. If `analysis_protocol_name` (and no `analysis_protocol_id`) is provided, returns the analysis protocol with the given name, provided it exists.
845
+
846
+ Parameters
847
+ ----------
848
+ analysis_protocol_id : str, optional
849
+ ID of the analysis protocol to be fetched, defaulted to None.
850
+
851
+ analysis_protocol_name : str, optional
852
+ Name of the analysis protocol to be fetched, defaulted to None.
853
+
854
+ Returns
855
+ -------
856
+ protocols: list
857
+ List of analysis protocol objects for the authenticated user.
858
+
859
+ Examples
860
+ -------
861
+ >>> from seer_pas_sdk import SeerSDK
862
+ >>> seer_sdk = SeerSDK()
863
+ >>> seer_sdk.get_analysis_protocols()
864
+ >>> [
865
+ { "id": ..., "analysis_protocol_name": ... },
866
+ { "id": ..., "analysis_protocol_name": ... },
867
+ ...
868
+ ]
869
+
870
+ >>> seer_sdk.get_analysis_protocols(name="YOUR_ANALYSIS_PROTOCOL_NAME_HERE")
871
+ >>> [{ "id": ..., "analysis_protocol_name": ... }]
872
+
873
+ >>> seer_sdk.get_analysis_protocols(id="YOUR_ANALYSIS_PROTOCOL_ID_HERE")
874
+ >>> [{ "id": ..., "analysis_protocol_name": ... }]
875
+
876
+ >>> seer_sdk.get_analysis_protocols(id="YOUR_ANALYSIS_PROTOCOL_ID_HERE", name="YOUR_ANALYSIS_PROTOCOL_NAME_HERE")
877
+
878
+ >>> [{ "id": ..., "analysis_protocol_name": ... }] # in this case the id would supersede the inputted name.
879
+ """
880
+
881
+ URL = (
882
+ f"{self._auth.url}api/v1/analysisProtocols"
883
+ if not analysis_protocol_id
884
+ else f"{self._auth.url}api/v1/analysisProtocols/{analysis_protocol_id}"
885
+ )
886
+ res = []
887
+
888
+ with self._get_auth_session() as s:
889
+
890
+ protocols = s.get(URL, params={"all": "true"})
891
+ if protocols.status_code != 200:
892
+ raise ValueError(
893
+ "Invalid request. Please check your parameters."
894
+ )
895
+ if not analysis_protocol_id and not analysis_protocol_name:
896
+ res = protocols.json()["data"]
897
+
898
+ if analysis_protocol_id and not analysis_protocol_name:
899
+ res = [protocols.json()]
900
+
901
+ if not analysis_protocol_id and analysis_protocol_name:
902
+ res = [
903
+ protocol
904
+ for protocol in protocols.json()["data"]
905
+ if protocol["analysis_protocol_name"]
906
+ == analysis_protocol_name
907
+ ]
908
+
909
+ for entry in range(len(res)):
910
+ if "tenant_id" in res[entry]:
911
+ del res[entry]["tenant_id"]
912
+
913
+ if "parameter_file_path" in res[entry]:
914
+ # Simple lambda function to find the third occurrence of '/' in the raw file path
915
+ location = lambda s: len(s) - len(s.split("/", 3)[-1])
916
+ # Slicing the string from the location
917
+ res[entry]["parameter_file_path"] = res[entry][
918
+ "parameter_file_path"
919
+ ][location(res[entry]["parameter_file_path"]) :]
920
+
921
+ return res
922
+
923
+ def get_analysis(
924
+ self,
925
+ analysis_id: str = None,
926
+ folder_id: str = None,
927
+ show_folders: bool = True,
928
+ analysis_only: bool = True,
929
+ project_id: str = None,
930
+ plate_name: str = None,
931
+ **kwargs,
932
+ ):
933
+ """
934
+ Returns a list of analyses objects for the authenticated user. If no id is provided, returns all analyses for the authenticated user.
935
+ Search parameters may be passed in as keyword arguments to filter the results. Acceptable values are 'analysis_name', 'folder_name', 'description', 'notes', or 'number_msdatafile'.
936
+ Only search on a single field is supported.
937
+
938
+ Parameters
939
+ ----------
940
+ analysis_id : str, optional
941
+ ID of the analysis to be fetched, defaulted to None.
942
+
943
+ folder_id : str, optional
944
+ ID of the folder to be fetched, defaulted to None.
945
+
946
+ show_folders : bool, optional
947
+ Mark True if folder contents are to be returned in the response, i.e. recursive search, defaulted to True.
948
+ Will be disabled if an analysis id is provided.
949
+
950
+ analysis_only : bool, optional
951
+ Mark True if only analyses objects are to be returned in the response, defaulted to True.
952
+ If marked false, folder objects will also be included in the response.
953
+
954
+ project_id : str, optional
955
+ ID of the project to be fetched, defaulted to None.
956
+
957
+ plate_name : str, optional
958
+ Name of the plate to be fetched, defaulted to None.
959
+
960
+ **kwargs : dict, optional
961
+ Search keyword parameters to be passed in. Acceptable values are 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'.
962
+
963
+ Returns
964
+ -------
965
+ analyses: dict
966
+ Contains a list of analyses objects for the authenticated user.
967
+
968
+ Examples
969
+ -------
970
+ >>> from seer_pas_sdk import SeerSDK
971
+ >>> seer_sdk = SeerSDK()
972
+ >>> seer_sdk.get_analysis()
973
+ >>> [
974
+ {id: "YOUR_ANALYSIS_ID_HERE", ...},
975
+ {id: "YOUR_ANALYSIS_ID_HERE", ...},
976
+ {id: "YOUR_ANALYSIS_ID_HERE", ...}
977
+ ]
978
+
979
+ >>> seer_sdk.get_analysis("YOUR_ANALYSIS_ID_HERE")
980
+ >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
981
+
982
+ >>> seer_sdk.get_analysis(folder_name="YOUR_FOLDER_NAME_HERE")
983
+ >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
984
+
985
+ >>> seer_sdk.get_analysis(analysis_name="YOUR_ANALYSIS")
986
+ >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
987
+
988
+ >>> seer_sdk.get_analysis(description="YOUR_DESCRIPTION")
989
+ >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
990
+ """
991
+
992
+ URL = f"{self._auth.url}api/v1/analyses"
993
+ res = []
994
+
995
+ search_field = None
996
+ search_item = None
997
+ if kwargs:
998
+ if len(kwargs.keys()) > 1:
999
+ raise ValueError("Please include only one search parameter.")
1000
+ search_field = list(kwargs.keys())[0]
1001
+ search_item = kwargs[search_field]
1002
+
1003
+ if not search_item:
1004
+ raise ValueError(
1005
+ f"Please provide a non null value for {search_field}"
1006
+ )
1007
+
1008
+ if search_field and search_field not in [
1009
+ "analysis_name",
1010
+ "folder_name",
1011
+ "analysis_protocol_name",
1012
+ "description",
1013
+ "notes",
1014
+ "number_msdatafile",
1015
+ ]:
1016
+ raise ValueError(
1017
+ "Invalid search field. Please choose between 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'."
1018
+ )
1019
+
1020
+ with self._get_auth_session() as s:
1021
+
1022
+ params = {"all": "true"}
1023
+ if folder_id:
1024
+ params["folder"] = folder_id
1025
+
1026
+ if search_field:
1027
+ params["searchFields"] = search_field
1028
+ params["searchItem"] = search_item
1029
+ del params["all"]
1030
+
1031
+ if search_field == "folder_name":
1032
+ params["searchFields"] = "analysis_name"
1033
+
1034
+ if project_id:
1035
+ params["projectId"] = project_id
1036
+
1037
+ if plate_name:
1038
+ params["plateName"] = plate_name
1039
+
1040
+ analyses = s.get(
1041
+ f"{URL}/{analysis_id}" if analysis_id else URL, params=params
1042
+ )
1043
+
1044
+ if analyses.status_code != 200:
1045
+ raise ValueError(
1046
+ "Invalid request. Please check your parameters."
1047
+ )
1048
+ if not analysis_id:
1049
+ res = analyses.json()["data"]
1050
+
1051
+ else:
1052
+ res = [analyses.json()["analysis"]]
1053
+
1054
+ folders = []
1055
+ for entry in range(len(res)):
1056
+ if "tenant_id" in res[entry]:
1057
+ del res[entry]["tenant_id"]
1058
+
1059
+ if "parameter_file_path" in res[entry]:
1060
+ # Simple lambda function to find the third occurrence of '/' in the raw file path
1061
+ location = lambda s: len(s) - len(s.split("/", 3)[-1])
1062
+
1063
+ # Slicing the string from the location
1064
+ res[entry]["parameter_file_path"] = res[entry][
1065
+ "parameter_file_path"
1066
+ ][location(res[entry]["parameter_file_path"]) :]
1067
+
1068
+ if (
1069
+ show_folders
1070
+ and not analysis_id
1071
+ and res[entry]["is_folder"]
1072
+ ):
1073
+ folders.append(res[entry]["id"])
1074
+
1075
+ # recursive solution to get analyses in folders
1076
+ for folder in folders:
1077
+ res += self.get_analysis(folder_id=folder)
1078
+
1079
+ if analysis_only:
1080
+ res = [
1081
+ analysis for analysis in res if not analysis["is_folder"]
1082
+ ]
1083
+ return res
1084
+
1085
+ def get_analysis_result_protein_data(
1086
+ self, analysis_id: str, link: bool = False, pg: str = None
1087
+ ):
1088
+ """
1089
+ Given an analysis id, this function returns the protein data for the analysis.
1090
+
1091
+ Parameters
1092
+ ----------
1093
+
1094
+ analysis_id : str
1095
+ ID of the analysis for which the data is to be fetched.
1096
+ link : bool
1097
+ Boolean flag denoting whether the user wants the default protein data. Defaults to False.
1098
+ pg : str
1099
+ Protein group ID to filter dataframe results. Defaults to None.
1100
+
1101
+ """
1102
+ with self._get_auth_session() as s:
1103
+ URL = f"{self._auth.url}api/v1/data"
1104
+ response = s.get(
1105
+ f"{URL}/protein?analysisId={analysis_id}&retry=false"
1106
+ )
1107
+
1108
+ if response.status_code != 200:
1109
+ raise ValueError(
1110
+ "Could not fetch protein data. Please verify that your analysis completed."
1111
+ )
1112
+ response = response.json()
1113
+
1114
+ protein_data = {}
1115
+ for row in response:
1116
+ if row.get("name") == "npLink":
1117
+ protein_data["npLink"] = {
1118
+ "url": row.get("link", {}).get("url", "")
1119
+ }
1120
+ if row.get("name") == "panelLink":
1121
+ protein_data["panelLink"] = {
1122
+ "url": row.get("link", {}).get("url", "")
1123
+ }
1124
+ if not protein_data:
1125
+ raise ValueError("No protein result files found.")
1126
+ if not "panelLink" in protein_data.keys():
1127
+ protein_data["panelLink"] = {"url": ""}
1128
+
1129
+ if link:
1130
+ return protein_data
1131
+ else:
1132
+ if not pg:
1133
+ return {
1134
+ "protein_np": url_to_df(protein_data["npLink"]["url"]),
1135
+ "protein_panel": url_to_df(
1136
+ protein_data["panelLink"]["url"]
1137
+ ),
1138
+ }
1139
+ else:
1140
+ protein_np = url_to_df(
1141
+ protein_data["npLink"]["url"]
1142
+ ).query(f"`Protein Group` == '{pg}'")
1143
+ protein_panel = url_to_df(
1144
+ protein_data["panelLink"]["url"]
1145
+ ).query(f"`Protein Group` == '{pg}'")
1146
+
1147
+ if protein_np.empty and protein_panel.empty:
1148
+ raise ValueError(
1149
+ f"Protein group {pg} not found in analysis {analysis_id}."
1150
+ )
1151
+
1152
+ return {
1153
+ "protein_np": protein_np,
1154
+ "protein_panel": protein_panel,
1155
+ }
1156
+
1157
+ def get_analysis_result_peptide_data(
1158
+ self, analysis_id: str, link: bool = False, peptide: str = None
1159
+ ):
1160
+ """
1161
+ Given an analysis id, this function returns the peptide data for the analysis.
1162
+
1163
+ Parameters
1164
+ ----------
1165
+
1166
+ analysis_id : str
1167
+ ID of the analysis for which the data is to be fetched.
1168
+
1169
+ link : bool
1170
+ Boolean flag denoting whether the user wants the default peptide data. Defaults to False.
1171
+
1172
+ peptide : str
1173
+ Peptide sequence to filter dataframe results. Defaults to None.
1174
+
1175
+ """
1176
+
1177
+ with self._get_auth_session() as s:
1178
+ URL = f"{self._auth.url}api/v1/data"
1179
+ response = s.get(
1180
+ f"{URL}/peptide?analysisId={analysis_id}&retry=false"
1181
+ )
1182
+
1183
+ if response.status_code != 200:
1184
+ raise ValueError(
1185
+ "Could not fetch peptide data. Please verify that your analysis completed."
1186
+ )
1187
+
1188
+ response = response.json()
1189
+
1190
+ peptide_data = {}
1191
+ for row in response:
1192
+ if row.get("name") == "npLink":
1193
+ peptide_data["npLink"] = {
1194
+ "url": row.get("link", {}).get("url", "")
1195
+ }
1196
+ if row.get("name") == "panelLink":
1197
+ peptide_data["panelLink"] = {
1198
+ "url": row.get("link", {}).get("url", "")
1199
+ }
1200
+ if not peptide_data:
1201
+ raise ValueError("No peptide result files found.")
1202
+ if not "panelLink" in peptide_data.keys():
1203
+ peptide_data["panelLink"] = {"url": ""}
1204
+ if link:
1205
+ return peptide_data
1206
+ else:
1207
+ if not peptide:
1208
+ return {
1209
+ "peptide_np": url_to_df(peptide_data["npLink"]["url"]),
1210
+ "peptide_panel": url_to_df(
1211
+ peptide_data["panelLink"]["url"]
1212
+ ),
1213
+ }
1214
+ else:
1215
+ peptide_np = url_to_df(
1216
+ peptide_data["npLink"]["url"]
1217
+ ).query(f"Peptide == '{peptide}'")
1218
+ peptide_panel = url_to_df(
1219
+ peptide_data["panelLink"]["url"]
1220
+ ).query(f"Peptide == '{peptide}'")
1221
+
1222
+ if peptide_np.empty and peptide_panel.empty:
1223
+ raise ValueError(
1224
+ f"Peptide {peptide} not found in analysis {analysis_id}."
1225
+ )
1226
+
1227
+ return {
1228
+ "peptide_np": peptide_np,
1229
+ "peptide_panel": peptide_panel,
1230
+ }
1231
+
1232
+ def list_analysis_result_files(self, analysis_id: str):
1233
+ """
1234
+ Given an analysis id, this function returns a list of files associated with the analysis.
1235
+
1236
+ Parameters
1237
+ ----------
1238
+ analysis_id : str
1239
+ ID of the analysis for which the data is to be fetched.
1240
+
1241
+ Returns
1242
+ -------
1243
+ files: list
1244
+ List of files associated with the analysis.
1245
+ """
1246
+ try:
1247
+ analysis_metadata = self.get_analysis(analysis_id)[0]
1248
+ except (IndexError, ServerError):
1249
+ raise ValueError("Invalid analysis ID.")
1250
+ except:
1251
+ raise ValueError("Could not fetch analysis metadata.")
1252
+
1253
+ if analysis_metadata.get("status") in ["Failed", None]:
1254
+ raise ValueError("Cannot find files for a failed analysis.")
1255
+ with self._get_auth_session() as s:
1256
+ response = s.get(
1257
+ f"{self._auth.url}api/v2/analysisResultFiles/{analysis_id}"
1258
+ )
1259
+ if response.status_code != 200:
1260
+ raise ServerError(
1261
+ "Could not fetch analysis result files. Please verify that your analysis completed."
1262
+ )
1263
+ response = response.json()
1264
+ files = []
1265
+ for row in response["data"]:
1266
+ files.append(row["filename"])
1267
+ return files
1268
+
1269
+ def get_analysis_result_file_url(self, analysis_id: str, filename: str):
1270
+ """
1271
+ Given an analysis id and a analysis result filename, this function returns the signed URL for the file.
1272
+
1273
+ Parameters
1274
+ ----------
1275
+ analysis_id : str
1276
+ ID of the analysis for which the data is to be fetched.
1277
+
1278
+ filename : str
1279
+ Name of the file to be fetched.
1280
+
1281
+ Returns
1282
+ -------
1283
+ file_url: dict
1284
+ Response object containing the url for the file.
1285
+ """
1286
+
1287
+ # Allow user to pass in filenames without an extension.
1288
+ analysis_result_files = self.list_analysis_result_files(analysis_id)
1289
+ analysis_result_files_prefix_mapper = {
1290
+ ".".join(x.split(".")[:-1]): x for x in analysis_result_files
1291
+ }
1292
+ if filename in analysis_result_files_prefix_mapper:
1293
+ filename = analysis_result_files_prefix_mapper[filename]
1294
+
1295
+ analysis_metadata = self.get_analysis(analysis_id)[0]
1296
+ if analysis_metadata.get("status") in ["Failed", None]:
1297
+ raise ValueError("Cannot generate links for failed analyses.")
1298
+ with self._get_auth_session() as s:
1299
+ file_url = s.post(
1300
+ f"{self._auth.url}api/v1/analysisResultFiles/getUrl",
1301
+ json={
1302
+ "analysisId": analysis_id,
1303
+ "projectId": analysis_metadata["project_id"],
1304
+ "filename": filename,
1305
+ },
1306
+ )
1307
+ response = file_url.json()
1308
+ if not response.get("url"):
1309
+ raise ValueError(f"File {filename} not found.")
1310
+ return response
1311
+
1312
+ def get_analysis_result_files(
1313
+ self,
1314
+ analysis_id: str,
1315
+ filenames: _List[str],
1316
+ download_path: str = "",
1317
+ protein_all: bool = False,
1318
+ peptide_all: bool = False,
1319
+ ):
1320
+ """
1321
+ Given an analysis id and a list of file names, this function returns the file in form of downloadable content, if applicable.
1322
+
1323
+ Parameters
1324
+ ----------
1325
+ analysis_id : str
1326
+ ID of the analysis for which the data is to be fetched.
1327
+
1328
+ filenames : list
1329
+ List of filenames to be fetched. Only csv and tsv files are supported.
1330
+
1331
+ download_path : str
1332
+ String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
1333
+
1334
+ protein_all : bool
1335
+ Boolean flag denoting whether the user wants the default protein data. Defaults to False.
1336
+
1337
+ peptide_all : bool
1338
+ Boolean flag denoting whether the user wants the default peptide data. Defaults to False.
1339
+
1340
+ Returns
1341
+ -------
1342
+ links: dict
1343
+ Contains dataframe objects for the requested files. If a filename is not found, it is skipped.
1344
+
1345
+
1346
+ Examples
1347
+ -------
1348
+ >>> from seer_pas_sdk import SeerSDK
1349
+ >>> seer_sdk = SeerSDK()
1350
+ >>> analysis_id = "YOUR_ANALYSIS_ID_HERE"
1351
+ >>> filenames = ["protein_np.tsv", "peptide_np.tsv"]
1352
+ >>> seer_sdk.get_analysis_result_files(analysis_id, filenames)
1353
+ {
1354
+ "protein_np.tsv": <protein_np dataframe object>,
1355
+ "peptide_np.tsv": <peptide_np dataframe object>
1356
+ }
1357
+ >>> seer_sdk.get_analysis_result_files(analysis_id, [], protein_all=True, peptide_all=True)
1358
+ {
1359
+ "protein_np.tsv": <protein_np dataframe object>,
1360
+ "protein_panel.tsv": <protein_panel dataframe object>,
1361
+ "peptide_np.tsv": <peptide_np dataframe object>,
1362
+ "peptide_panel.tsv": <peptide_panel dataframe object>
1363
+ }
1364
+ >>> seer_sdk.get_analysis_result_files(analysis_id, ["report.tsv"], download_path="/Users/Downloads")
1365
+ { "report.tsv": <report.tsv dataframe object> }
1366
+ """
1367
+
1368
+ if not analysis_id:
1369
+ raise ValueError("Analysis ID cannot be empty.")
1370
+
1371
+ if download_path and not os.path.exists(download_path):
1372
+ raise ValueError(
1373
+ "Please specify a valid folder path as download path."
1374
+ )
1375
+
1376
+ links = {}
1377
+ if protein_all:
1378
+ protein_data = self.get_analysis_result_protein_data(
1379
+ analysis_id, link=True
1380
+ )
1381
+ links["protein_np.tsv"] = protein_data["npLink"]["url"]
1382
+ links["protein_panel.tsv"] = protein_data["panelLink"]["url"]
1383
+ if peptide_all:
1384
+ peptide_data = self.get_analysis_result_peptide_data(
1385
+ analysis_id, link=True
1386
+ )
1387
+ links["peptide_np.tsv"] = peptide_data["npLink"]["url"]
1388
+ links["peptide_panel.tsv"] = peptide_data["panelLink"]["url"]
1389
+
1390
+ filenames = set(filenames)
1391
+ # Allow user to pass in filenames without an extension.
1392
+ analysis_result_files = self.list_analysis_result_files(analysis_id)
1393
+ analysis_result_files_prefix_mapper = {
1394
+ ".".join(x.split(".")[:-1]): x for x in analysis_result_files
1395
+ }
1396
+ for filename in filenames:
1397
+ if filename in analysis_result_files_prefix_mapper:
1398
+ filename = analysis_result_files_prefix_mapper[filename]
1399
+ if filename == "protein_np.tsv":
1400
+ if protein_all:
1401
+ continue
1402
+ protein_data = self.get_analysis_result_protein_data(
1403
+ analysis_id, link=True
1404
+ )
1405
+ links["protein_np.tsv"] = protein_data["npLink"]["url"]
1406
+ elif filename == "protein_panel.tsv":
1407
+ if protein_all:
1408
+ continue
1409
+ protein_data = self.get_analysis_result_protein_data(
1410
+ analysis_id, link=True
1411
+ )
1412
+ links["protein_panel.tsv"] = protein_data["panelLink"]["url"]
1413
+ elif filename == "peptide_np.tsv":
1414
+ if peptide_all:
1415
+ continue
1416
+ peptide_data = self.get_analysis_result_peptide_data(
1417
+ analysis_id, link=True
1418
+ )
1419
+ links["peptide_np.tsv"] = peptide_data["npLink"]["url"]
1420
+ elif filename == "peptide_panel.tsv":
1421
+ if peptide_all:
1422
+ continue
1423
+ peptide_data = self.get_analysis_result_peptide_data(
1424
+ analysis_id, link=True
1425
+ )
1426
+ links["peptide_panel.tsv"] = peptide_data["panelLink"]["url"]
1427
+ else:
1428
+ try:
1429
+ links[filename] = self.get_analysis_result_file_url(
1430
+ analysis_id, filename
1431
+ )["url"]
1432
+ except Exception as e:
1433
+ print(e)
1434
+ continue
1435
+
1436
+ links = {
1437
+ k: url_to_df(v, is_tsv=k.endswith(".tsv"))
1438
+ for k, v in links.items()
1439
+ }
1440
+ if download_path:
1441
+ name = f"{download_path}/downloads/{analysis_id}"
1442
+ print(f"Start download to path {name}")
1443
+ if not os.path.exists(name):
1444
+ os.makedirs(name)
1445
+ for filename, content in links.items():
1446
+ separator = ","
1447
+ if filename.endswith(".tsv"):
1448
+ separator = "\t"
1449
+ content.to_csv(f"{name}/{filename}", sep=separator)
1450
+ print("Download complete.")
1451
+
1452
+ return links
1453
+
1454
+ def get_analysis_result(
1455
+ self,
1456
+ analysis_id: str,
1457
+ download_path: str = "",
1458
+ diann_report: bool = False,
1459
+ ):
1460
+ """
1461
+ Given an `analysis_id`, this function returns all relevant analysis data files in form of downloadable content, if applicable.
1462
+
1463
+ Parameters
1464
+ ----------
1465
+ analysis_id : str
1466
+ ID of the analysis for which the data is to be fetched.
1467
+
1468
+ download_path : str
1469
+ String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
1470
+
1471
+ diann_report : bool
1472
+ Boolean flag denoting whether the user wants the DIANN report to be included in the response. Defaults to False.
1473
+
1474
+ Returns
1475
+ -------
1476
+ links: dict
1477
+ Contains dataframe objects for the `analysis_id`, given that the analysis has been complete.
1478
+
1479
+ Examples
1480
+ -------
1481
+ >>> from seer_pas_sdk import SeerSDK
1482
+ >>> seer_sdk = SeerSDK()
1483
+
1484
+ >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE")
1485
+ >>> {
1486
+ "peptide_np": <peptide_np dataframe object>,
1487
+ "peptide_panel": <peptide_panel dataframe object>,
1488
+ "protein_np": <protein_np dataframe object>,
1489
+ "protein_panel": <protein_panel dataframe object>
1490
+ }
1491
+
1492
+ >>> seer_sdk.get_analysis_result("YOUR_DIANN_ANALYSIS_ID_HERE")
1493
+ >>> {
1494
+ "peptide_np": <peptide_np dataframe object>,
1495
+ "peptide_panel": <peptide_panel dataframe object>,
1496
+ "protein_np": <protein_np dataframe object>,
1497
+ "protein_panel": <protein_panel dataframe object>,
1498
+ "diann_report": <report.tsv dataframe object>
1499
+ }
1500
+
1501
+ >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE", download_path="/Users/Downloads")
1502
+ >>> { "status": "Download complete." }
1503
+ """
1504
+
1505
+ if not analysis_id:
1506
+ raise ValueError("Analysis ID cannot be empty.")
1507
+
1508
+ if download_path and not os.path.exists(download_path):
1509
+ raise ValueError("The download path you entered is invalid.")
1510
+
1511
+ protein_data = self.get_analysis_result_protein_data(
1512
+ analysis_id, link=True
1513
+ )
1514
+ peptide_data = self.get_analysis_result_peptide_data(
1515
+ analysis_id, link=True
1516
+ )
1517
+ links = {
1518
+ "peptide_np": url_to_df(peptide_data["npLink"]["url"]),
1519
+ "peptide_panel": url_to_df(peptide_data["panelLink"]["url"]),
1520
+ "protein_np": url_to_df(protein_data["npLink"]["url"]),
1521
+ "protein_panel": url_to_df(protein_data["panelLink"]["url"]),
1522
+ }
1523
+
1524
+ if diann_report:
1525
+ diann_report_url = self.get_analysis_result_file_url(
1526
+ analysis_id, "report.tsv"
1527
+ )
1528
+ links["diann_report"] = url_to_df(diann_report_url["url"])
1529
+
1530
+ if download_path:
1531
+ name = f"{download_path}/downloads/{analysis_id}"
1532
+ if not os.path.exists(name):
1533
+ os.makedirs(name)
1534
+
1535
+ links["peptide_np"].to_csv(f"{name}/peptide_np.csv", sep="\t")
1536
+ links["peptide_panel"].to_csv(
1537
+ f"{name}/peptide_panel.csv", sep="\t"
1538
+ )
1539
+ links["protein_np"].to_csv(f"{name}/protein_np.csv", sep="\t")
1540
+ links["protein_panel"].to_csv(
1541
+ f"{name}/protein_panel.csv", sep="\t"
1542
+ )
1543
+
1544
+ if "diann_report" in links:
1545
+ links["diann_report"].to_csv(
1546
+ f"{name}/diann_report.csv", sep="\t"
1547
+ )
1548
+
1549
+ return {"status": "Download complete."}
1550
+
1551
+ return links
1552
+
1553
+ def analysis_complete(self, analysis_id: str):
1554
+ """
1555
+ Returns the status of the analysis with the given id.
1556
+
1557
+ Parameters
1558
+ ----------
1559
+ analysis_id : str
1560
+ The analysis id.
1561
+
1562
+ Returns
1563
+ -------
1564
+ res : dict
1565
+ A dictionary containing the status of the analysis.
1566
+
1567
+ Examples
1568
+ -------
1569
+ >>> from seer_pas_sdk import SeerSDK
1570
+ >>> seer_sdk = SeerSDK()
1571
+ >>> seer_sdk.analysis_complete("YOUR_ANALYSIS_ID_HERE")
1572
+ >>> {
1573
+ "status": "SUCCEEDED"
1574
+ }
1575
+ """
1576
+
1577
+ if not analysis_id:
1578
+ raise ValueError("Analysis id cannot be empty.")
1579
+
1580
+ try:
1581
+ res = self.get_analysis(analysis_id)
1582
+ except ValueError:
1583
+ return ValueError("Analysis not found. Your ID could be incorrect")
1584
+
1585
+ return {"status": res[0]["status"]}
1586
+
1587
+ def list_ms_data_files(self, folder="", space=None):
1588
+ """
1589
+ Lists all the MS data files in the given folder as long as the folder path passed in the params is valid.
1590
+
1591
+ Parameters
1592
+ ----------
1593
+ folder : str, optional
1594
+ Folder path to list the files from. Defaults to an empty string and displays all files for the user.
1595
+ space : str, optional
1596
+ ID of the user group to which the files belong, defaulted to None.
1597
+
1598
+ Returns
1599
+ -------
1600
+ list
1601
+ Contains the list of files in the folder.
1602
+
1603
+ Examples
1604
+ -------
1605
+ >>> from seer_pas_sdk import SeerSDK
1606
+ >>> sdk = SeerSDK()
1607
+ >>> folder_path = "test-may-2/"
1608
+ >>> sdk.list_ms_data_files(folder_path)
1609
+ >>> [
1610
+ "test-may-2/EXP20028/EXP20028_2020ms0096X10_A.raw",
1611
+ "test-may-2/agilent/05_C2_19ug-r001.d.zip",
1612
+ "test-may-2/agilent/08_BC_24ug-r001.d.zip",
1613
+ "test-may-2/d.zip/EXP22023_2022ms0143bX10_A_GA2_1_6681.d/EXP22023_2022ms0143bX10_A_GA2_1_6681.d.zip",
1614
+ "test-may-2/DIA/EXP20002_2020ms0142X10_A.wiff",
1615
+ "test-may-2/DIA/EXP20002_2020ms0142X10_A.wiff.scan",
1616
+ "test-may-2/DIA/EXP20002_2020ms0142X17_A.wiff",
1617
+ "test-may-2/DIA/EXP20002_2020ms0142X17_A.wiff.scan",
1618
+ "test-may-2/DIA/EXP20002_2020ms0142X18_A.wiff",
1619
+ "test-may-2/DIA/EXP20002_2020ms0142X18_A.wiff.scan"
1620
+ ]
1621
+ """
1622
+
1623
+ URL = (
1624
+ f"{self._auth.url}api/v1/msdataindex/filesinfolder?folder={folder}"
1625
+ if not space
1626
+ else f"{self._auth.url}api/v1/msdataindex/filesinfolder?folder={folder}&userGroupId={space}"
1627
+ )
1628
+ with self._get_auth_session() as s:
1629
+
1630
+ files = s.get(URL)
1631
+
1632
+ if files.status_code != 200:
1633
+ raise ValueError(
1634
+ "Invalid request. Please check your parameters."
1635
+ )
1636
+ return files.json()["filesList"]
1637
+
1638
+ def download_ms_data_files(
1639
+ self, paths: _List[str], download_path: str, space: str = None
1640
+ ):
1641
+ """
1642
+ Downloads all MS data files for paths passed in the params to the specified download path.
1643
+
1644
+ Parameters
1645
+ ----------
1646
+ paths : list[str]
1647
+ List of paths to download.
1648
+ download_path : str
1649
+ Path to download the files to.
1650
+ space : str, optional
1651
+ ID of the user group to which the files belongs, defaulted to None.
1652
+
1653
+ Returns
1654
+ -------
1655
+ message: dict
1656
+ Contains the message whether the files were downloaded or not.
1657
+ """
1658
+
1659
+ urls = []
1660
+
1661
+ if not download_path:
1662
+ download_path = os.getcwd()
1663
+ print(f"\nDownload path not specified.\n")
1664
+
1665
+ if not os.path.isdir(download_path):
1666
+ print(
1667
+ f'\nThe path "{download_path}" you specified does not exist, was either invalid or not absolute.\n'
1668
+ )
1669
+ download_path = f"{os.getcwd()}/downloads"
1670
+
1671
+ name = (
1672
+ download_path if download_path[-1] != "/" else download_path[:-1]
1673
+ )
1674
+
1675
+ if not os.path.exists(name):
1676
+ os.makedirs(name)
1677
+
1678
+ print(f'Downloading files to "{name}"\n')
1679
+
1680
+ URL = f"{self._auth.url}api/v1/msdataindex/download/getUrl"
1681
+ tenant_id = self._auth.active_tenant_id
1682
+
1683
+ for path in paths:
1684
+ with self._get_auth_session() as s:
1685
+
1686
+ download_url = s.post(
1687
+ URL,
1688
+ json={
1689
+ "filepath": f"{tenant_id}/{path}",
1690
+ "userGroupId": space,
1691
+ },
1692
+ )
1693
+
1694
+ if download_url.status_code != 200:
1695
+ raise ValueError(
1696
+ "Could not download file. Please check if the backend is running."
1697
+ )
1698
+ urls.append(download_url.text)
1699
+ for i in range(len(urls)):
1700
+ filename = paths[i].split("/")[-1]
1701
+ url = urls[i]
1702
+
1703
+ print(f"Downloading {filename}")
1704
+
1705
+ for _ in range(2):
1706
+ try:
1707
+ with tqdm(
1708
+ unit="B",
1709
+ unit_scale=True,
1710
+ unit_divisor=1024,
1711
+ miniters=1,
1712
+ desc=f"Progress",
1713
+ ) as t:
1714
+ ssl._create_default_https_context = (
1715
+ ssl._create_unverified_context
1716
+ )
1717
+ urllib.request.urlretrieve(
1718
+ url,
1719
+ f"{name}/{filename}",
1720
+ reporthook=download_hook(t),
1721
+ data=None,
1722
+ )
1723
+ break
1724
+ except:
1725
+ filename = filename.split("/")
1726
+ name += "/" + "/".join(
1727
+ [filename[i] for i in range(len(filename) - 1)]
1728
+ )
1729
+ filename = filename[-1]
1730
+ if not os.path.isdir(f"{name}/{filename}"):
1731
+ os.makedirs(f"{name}/")
1732
+
1733
+ else:
1734
+ raise ValueError(
1735
+ "Your download failed. Please check if the backend is still running."
1736
+ )
1737
+
1738
+ print(f"Finished downloading {filename}\n")
1739
+
1740
+ return {"message": f"Files downloaded successfully to '{name}'"}
1741
+
1742
+ def get_group_analysis(
1743
+ self, analysis_id, group_analysis_id=None, **kwargs
1744
+ ):
1745
+ """
1746
+ Returns the list of group analysis objects for the given analysis id, provided they exist.
1747
+
1748
+ Parameters
1749
+ ----------
1750
+ analysis_id : str
1751
+ The analysis id.
1752
+
1753
+ group_analysis_id : str, optional
1754
+ The group analysis id, defaulted to None. If provided, the function will return the group analysis object for the given group analysis id.
1755
+
1756
+ **kwargs : dict, optional
1757
+ Search keyword parameters to be passed in. Acceptable values are 'name' or 'description'.
1758
+
1759
+ """
1760
+ params = {"analysisid": analysis_id}
1761
+ if kwargs and not group_analysis_id:
1762
+ if len(kwargs.keys()) > 1:
1763
+ raise ValueError("Please include only one search parameter.")
1764
+ search_field = list(kwargs.keys())[0]
1765
+ if search_field not in ["name", "description"]:
1766
+ raise ValueError(
1767
+ "Invalid search field. Please choose between 'name' or 'description'."
1768
+ )
1769
+ search_item = kwargs[search_field]
1770
+
1771
+ if not search_item:
1772
+ raise ValueError(
1773
+ f"Please provide a non null value for {search_field}"
1774
+ )
1775
+ params["searchFields"] = search_field
1776
+ params["searchItem"] = search_item
1777
+
1778
+ URL = f"{self._auth.url}api/v1/groupanalysis/groupanalyses"
1779
+
1780
+ if group_analysis_id:
1781
+ URL = f"{URL}/{group_analysis_id}"
1782
+ params["id"] = group_analysis_id
1783
+
1784
+ with self._get_auth_session() as s:
1785
+ response = s.get(URL, params=params)
1786
+ if response.status_code != 200:
1787
+ raise ServerError(
1788
+ "Request failed. Please check your parameters."
1789
+ )
1790
+ response = response.json()
1791
+ return response
1792
+
1793
+ def group_analysis_results(self, analysis_id: str, group_analysis_id=None):
1794
+ """
1795
+ Returns the group analysis data for the given analysis id, provided it exists.
1796
+
1797
+ If no group analysis id is provided, the function will return the most recent group analysis data for the given analysis id.
1798
+
1799
+ Parameters
1800
+ ----------
1801
+ analysis_id : str
1802
+ The analysis id.
1803
+
1804
+ group_analysis_id : str, optional
1805
+ The group analysis id, defaulted to None.
1806
+
1807
+ Returns
1808
+ -------
1809
+ res : dict
1810
+ A dictionary containing the group analysis data.
1811
+
1812
+ Examples
1813
+ -------
1814
+ >>> from seer_pas_sdk import SeerSDK
1815
+ >>> seer_sdk = SeerSDK()
1816
+ >>> seer_sdk.group_analysis_results("YOUR_ANALYSIS_ID_HERE")
1817
+ >>> {
1818
+ "pre": {
1819
+ "protein": [],
1820
+ "peptide": [],
1821
+ },
1822
+ "post": {
1823
+ "protein": {},
1824
+ "protein_url": {
1825
+ "protein_processed_file_url": "",
1826
+ "protein_processed_long_form_file_url": "",
1827
+ },
1828
+ "peptide": {},
1829
+ "peptide_url": {
1830
+ "peptide_processed_file_url": "",
1831
+ "peptide_processed_long_form_file_url": "",
1832
+ },
1833
+ },
1834
+ "box_plot": []
1835
+ }
1836
+ """
1837
+
1838
+ if not analysis_id:
1839
+ raise ValueError("Analysis ID cannot be empty.")
1840
+
1841
+ URL = f"{self._auth.url}"
1842
+
1843
+ res = {
1844
+ "pre": {
1845
+ "protein": [],
1846
+ "peptide": [],
1847
+ },
1848
+ "post": {
1849
+ "protein": {},
1850
+ "protein_url": {
1851
+ "protein_processed_file_url": "",
1852
+ "protein_processed_long_form_file_url": "",
1853
+ },
1854
+ "peptide": {},
1855
+ "peptide_url": {
1856
+ "peptide_processed_file_url": "",
1857
+ "peptide_processed_long_form_file_url": "",
1858
+ },
1859
+ },
1860
+ }
1861
+
1862
+ # Pre-GA data call
1863
+ with self._get_auth_session() as s:
1864
+
1865
+ protein_pre_data = s.post(
1866
+ url=f"{URL}api/v2/groupanalysis/protein",
1867
+ json={"analysisId": analysis_id, "grouping": "condition"},
1868
+ )
1869
+ if protein_pre_data.status_code != 200:
1870
+ raise ServerError(
1871
+ "Invalid request. Could not fetch group analysis protein pre data. Please check your parameters."
1872
+ )
1873
+
1874
+ protein_pre_data = protein_pre_data.json()
1875
+
1876
+ res["pre"]["protein"] = protein_pre_data
1877
+
1878
+ with self._get_auth_session() as s:
1879
+
1880
+ peptide_pre_data = s.post(
1881
+ url=f"{URL}api/v2/groupanalysis/peptide",
1882
+ json={"analysisId": analysis_id, "grouping": "condition"},
1883
+ )
1884
+
1885
+ if peptide_pre_data.status_code != 200:
1886
+ raise ServerError(
1887
+ "Invalid request. Could not fetch group analysis peptide pre data. Please check your parameters."
1888
+ )
1889
+
1890
+ peptide_pre_data = peptide_pre_data.json()
1891
+ res["pre"]["peptide"] = peptide_pre_data
1892
+
1893
+ # Post-GA data call
1894
+ with self._get_auth_session() as s:
1895
+ if group_analysis_id:
1896
+ get_saved_result = self.get_group_analysis(
1897
+ analysis_id=analysis_id,
1898
+ group_analysis_id=group_analysis_id,
1899
+ )
1900
+ else:
1901
+ get_saved_result = s.get(
1902
+ f"{URL}api/v1/groupanalysis/getSavedResults?analysisid={analysis_id}"
1903
+ )
1904
+ if get_saved_result.status_code != 200:
1905
+ raise ServerError(
1906
+ "Could not fetch saved results. Please check your analysis id."
1907
+ )
1908
+ get_saved_result = get_saved_result.json()
1909
+
1910
+ # Protein data
1911
+ if "pgResult" in get_saved_result:
1912
+ res["post"]["protein"] = get_saved_result["pgResult"]
1913
+
1914
+ # Peptide data
1915
+ if "peptideResult" in get_saved_result:
1916
+ res["post"]["peptide"] = get_saved_result["peptideResult"]
1917
+
1918
+ # require that either protein or peptide data exists
1919
+ # Error handling is necessary for volcano plot calculations downstream
1920
+ if not (res["post"].get("protein") or res["post"].get("peptide")):
1921
+ raise ValueError(
1922
+ "No group analysis data returned from server."
1923
+ )
1924
+
1925
+ # Protein URLs
1926
+ if "pgProcessedFileUrl" in get_saved_result:
1927
+ res["post"]["protein_url"]["protein_processed_file_url"] = (
1928
+ get_saved_result["pgProcessedFileUrl"]
1929
+ )
1930
+ if "pgProcessedLongFormFileUrl" in get_saved_result:
1931
+ res["post"]["protein_url"][
1932
+ "protein_processed_long_form_file_url"
1933
+ ] = get_saved_result["pgProcessedLongFormFileUrl"]
1934
+
1935
+ # Peptide URLs
1936
+ if "peptideProcessedFileUrl" in get_saved_result:
1937
+ res["post"]["peptide_url"]["peptide_processed_file_url"] = (
1938
+ get_saved_result["peptideProcessedFileUrl"]
1939
+ )
1940
+
1941
+ if "peptideProcessedLongFormFileUrl" in get_saved_result:
1942
+ res["post"]["peptide_url"][
1943
+ "peptide_processed_long_form_file_url"
1944
+ ] = get_saved_result["peptideProcessedLongFormFileUrl"]
1945
+
1946
+ return res
1947
+
1948
+ def get_box_plot_data(
1949
+ self,
1950
+ analysis_id: str,
1951
+ group_analysis_id: str = None,
1952
+ feature_ids: _List[str] = [],
1953
+ show_significant_only: bool = False,
1954
+ as_df=False,
1955
+ volcano_plot=False,
1956
+ cached=False,
1957
+ ):
1958
+ """Get box plot data for given analyses and samples formatted in a DataFrame or a dictionary.
1959
+
1960
+ Args:
1961
+ analysis_id (str): ID of the analysis.
1962
+ feature_ids (list[str], optional): Filter result object to a set of ids. Defaults to [].
1963
+ show_significant_only (bool, optional): Mark true if only significant results are to be returned. Defaults to False.
1964
+ as_df (bool, optional): Mark true if return object should be a pandas DataFrame. Defaults to False.
1965
+ volcano_plot (bool, optional): Mark true to include the volcano plot data in the return object. Defaults to False.
1966
+ cached (bool, optional): Mark true to return volcano plot data as a VolcanoPlotBuilder object. No effect if volcano_plot flag is marked false. Defaults to False.
1967
+
1968
+ Raises:
1969
+ ValueError: Invalid feature type. Must be either 'protein' or 'peptide'.
1970
+ ServerError: Could not fetch box plot data.
1971
+
1972
+ Returns:
1973
+ list[dict] | pd.DataFrame : A list of dictionaries or a dataframe with each row containing the following keys/columns:
1974
+ 'proteinId', 'intensity', 'sampleName', 'sampleId', 'condition','gene'
1975
+ """
1976
+
1977
+ with self._get_auth_session() as s:
1978
+
1979
+ # API call 1 - get volcano plot data for filtered results and gene mapping
1980
+ builder = self.get_volcano_plot_data(
1981
+ analysis_id, cached=True, group_analysis_id=group_analysis_id
1982
+ )
1983
+
1984
+ protein_peptide_gene_map = builder.protein_gene_map
1985
+
1986
+ # API call 2 - get analysis samples metadata to get condition
1987
+ samples_metadata = self.get_analysis_samples(analysis_id)
1988
+
1989
+ json = {"analysisId": analysis_id}
1990
+ if feature_ids:
1991
+ json["featureIds"] = ",".join(feature_ids)
1992
+ filters = ""
1993
+ # API call 3 - get group analysis data. This gives us the filters for the group analysis
1994
+ if group_analysis_id:
1995
+ ga = self.get_group_analysis(
1996
+ analysis_id, group_analysis_id=group_analysis_id
1997
+ )
1998
+ filters = ga["parameters"]["filters"]
1999
+ if filters:
2000
+ json["filters"] = filters
2001
+
2002
+ json["featureType"] = (
2003
+ builder.type if builder.type == "peptide" else "proteingroup"
2004
+ )
2005
+
2006
+ # API call 4 - get intensities
2007
+ box_plot_data = s.post(
2008
+ url=f"{self._auth.url}api/v1/groupanalysis/rawdata", json=json
2009
+ )
2010
+
2011
+ if box_plot_data.status_code != 200:
2012
+ raise ServerError("Could not fetch box plot data.")
2013
+
2014
+ box_plot_data = box_plot_data.json()
2015
+ feature_type_index = (
2016
+ "peptide" if builder.type == "peptide" else "proteinId"
2017
+ )
2018
+ box_plot_data = [
2019
+ x
2020
+ for x in box_plot_data
2021
+ if x[feature_type_index] in protein_peptide_gene_map
2022
+ ]
2023
+ sample_id_condition = {
2024
+ x["id"]: x["condition"] for x in samples_metadata[0]["samples"]
2025
+ }
2026
+
2027
+ if show_significant_only:
2028
+ significant_rows = set(builder.get_significant_rows())
2029
+ box_plot_data = [
2030
+ x
2031
+ for x in box_plot_data
2032
+ if x[feature_type_index] in significant_rows
2033
+ ]
2034
+
2035
+ for row in box_plot_data:
2036
+ row["condition"] = sample_id_condition.get(
2037
+ row["sampleId"], None
2038
+ )
2039
+ row["gene"] = builder.protein_gene_map[row[feature_type_index]]
2040
+
2041
+ if as_df:
2042
+ box_plot_data = pd.DataFrame(box_plot_data)
2043
+
2044
+ if volcano_plot:
2045
+ vplot = None
2046
+ if cached:
2047
+ vplot = builder
2048
+ elif as_df:
2049
+ vplot = pd.DataFrame(builder.volcano_plot)
2050
+ else:
2051
+ vplot = builder.volcano_plot
2052
+
2053
+ return {"box_plot": box_plot_data, "volcano_plot": vplot}
2054
+ return box_plot_data
2055
+
2056
+ def get_all_volcano_plot_data(self, analysis_id: str, box_plot=False):
2057
+ """
2058
+ Get all volcano plot data for a given analysis.
2059
+
2060
+ Args:
2061
+ analysis_id (str): ID of the analysis.
2062
+ box_plot (bool, optional): Mark true to include box plot data in the return object. Defaults to False.
2063
+
2064
+ Returns:
2065
+ dict: A dictionary containing the volcano plot and optionally box plot data for each group analysis.
2066
+ """
2067
+ group_analysis_ids = [
2068
+ x["id"]
2069
+ for x in self.get_group_analysis(analysis_id).get("data", [])
2070
+ if x.get("id")
2071
+ ]
2072
+ if not group_analysis_ids:
2073
+ return {}
2074
+ results = dict()
2075
+
2076
+ if box_plot:
2077
+ results = {
2078
+ ga_id: {
2079
+ k: v
2080
+ for k, v in self.get_box_plot_data(
2081
+ analysis_id, ga_id, as_df=True, volcano_plot=True
2082
+ ).items()
2083
+ }
2084
+ for ga_id in group_analysis_ids
2085
+ }
2086
+ else:
2087
+ results = {
2088
+ ga_id: {
2089
+ "volcano_plot": self.get_volcano_plot_data(
2090
+ analysis_id, group_analysis_id=ga_id, as_df=True
2091
+ )
2092
+ }
2093
+ for ga_id in group_analysis_ids
2094
+ }
2095
+
2096
+ return results
2097
+
2098
+ def _get_analysis_pca(
2099
+ self,
2100
+ analysis_ids: _List[str],
2101
+ sample_ids: _List[str],
2102
+ type: str,
2103
+ hide_control: bool = False,
2104
+ ):
2105
+ """
2106
+ ****************
2107
+ [UNEXPOSED METHOD CALL]
2108
+ ****************
2109
+ Get PCA data for given analyses and samples.
2110
+ Args:
2111
+ analysis_ids (list[str]): IDs of the analyses of interest.
2112
+ sample_ids (list[str]): IDs of the samples of interest.
2113
+ type (str): Type of data to be fetched. Must be either 'protein' or 'peptide'.
2114
+ hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
2115
+ Raises:
2116
+ ValueError: No analysis IDs provided.
2117
+ ValueError: No sample IDs provided.
2118
+ ValueError: Invalid type provided.
2119
+ ServerError: Could not fetch PCA data.
2120
+ Returns:
2121
+ dict
2122
+ Pure response from the API.
2123
+ """
2124
+ if not analysis_ids:
2125
+ raise ValueError("Analysis IDs cannot be empty.")
2126
+ if type not in ["protein", "peptide"]:
2127
+ raise ValueError("Type must be either 'protein' or 'peptide'.")
2128
+
2129
+ URL = f"{self._auth.url}api/v1/analysisqcpca"
2130
+
2131
+ with self._get_auth_session() as s:
2132
+ json = {
2133
+ "analysisIds": ",".join(analysis_ids),
2134
+ "type": type,
2135
+ }
2136
+ if sample_ids:
2137
+ json["sampleIds"] = ",".join(sample_ids)
2138
+
2139
+ # specify hideControl as a string - unexpected behavior occurs if a boolean is passed
2140
+ if hide_control:
2141
+ json["hideControl"] = "true"
2142
+ else:
2143
+ json["hideControl"] = "false"
2144
+
2145
+ pca_data = s.post(URL, json=json)
2146
+
2147
+ if pca_data.status_code != 200:
2148
+ raise ServerError("Could not fetch PCA data.")
2149
+
2150
+ return pca_data.json()
2151
+
2152
+ def get_analysis_pca_data(
2153
+ self,
2154
+ analysis_ids: _List[str],
2155
+ type: str,
2156
+ sample_ids: _List[str] = [],
2157
+ hide_control: bool = False,
2158
+ as_df=False,
2159
+ ):
2160
+ """
2161
+ Get PCA data for given analyses and samples formatted in a DataFrame or a dictionary.
2162
+ Args:
2163
+ analysis_ids (list[str]): IDs of the analyses of interest.
2164
+ type (str): Type of data to be fetched. Must be either 'protein' or 'peptide'.
2165
+ sample_ids (list[str], optional): IDs of the samples of interest.
2166
+ hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
2167
+ as_df (bool, optional): Mark true if the data should be returned as a pandas DataFrame. Defaults to False.
2168
+ Raises:
2169
+ ValueError: No analysis IDs provided.
2170
+ ValueError: No sample IDs provided.
2171
+ ValueError: Invalid type parameter provided.
2172
+ ServerError: Could not fetch PCA data.
2173
+ Returns:
2174
+ A dictionary with the following keys:
2175
+ - x_contribution_ratio (float): Proportion of variance explained by the x-axis.
2176
+ - y_contribution_ratio (float): Proportion of variance explained by the y-axis.
2177
+ - data (list[dict] | pd.DataFrame): A list of dictionaries or a dataframe with each row containing the following keys/columns:
2178
+ - sample_name (str): Name of the sample.
2179
+ - plate_name (str): Name of the plate.
2180
+ - sample_id (int): ID of the sample.
2181
+ - condition (str): Condition.
2182
+ - PC1 (float): X-value of the PCA point.
2183
+ - PC2 (float): Y-value of the PCA point.
2184
+ - custom_* (str): Custom fields. Included if meaningful, i.e., not null, in the data.
2185
+ Examples
2186
+ --------
2187
+ >>> from seer_pas_sdk import *
2188
+ >>> sdk = SeerSDK()
2189
+ >>> sdk.get_analysis_pca_data(
2190
+ analysis_ids=["analysis_id"],
2191
+ sample_ids=["sample_id"],
2192
+ type="protein",
2193
+ hide_control=False
2194
+ )
2195
+ """
2196
+ pca_data = self._get_analysis_pca(
2197
+ analysis_ids, sample_ids, type, hide_control
2198
+ )
2199
+
2200
+ # common columns returned by the API
2201
+ generic_columns = [
2202
+ "sample_name",
2203
+ "plate_name",
2204
+ "sample_id",
2205
+ "condition",
2206
+ "PC1",
2207
+ "PC2",
2208
+ ]
2209
+
2210
+ # edge case where yContributionRatio is NaN when zero points are returned.
2211
+ if not "yContributionRatio" in pca_data:
2212
+ y_contribution_ratio = None
2213
+ else:
2214
+ y_contribution_ratio = pca_data["yContributionRatio"]
2215
+
2216
+ x_contribution_ratio = pca_data["xContributionRatio"]
2217
+ samples = pca_data["samples"]
2218
+ points = pca_data["points"]
2219
+
2220
+ df = pd.DataFrame(
2221
+ [
2222
+ sample | {"PC1": point[0], "PC2": point[1]}
2223
+ for sample, point in zip(samples, points)
2224
+ ]
2225
+ )
2226
+
2227
+ # Slice the df such that only custom columns are dropped in the absence of data
2228
+ df = pd.concat(
2229
+ [
2230
+ df.drop(columns=generic_columns).dropna(how="all", axis=1),
2231
+ df[generic_columns],
2232
+ ],
2233
+ axis=1,
2234
+ )
2235
+
2236
+ # Filter down to a minimal set of columns
2237
+ permitted_columns = [
2238
+ x
2239
+ for x in df.columns
2240
+ if x in generic_columns or x.startswith("custom_")
2241
+ ]
2242
+
2243
+ df = df.loc(axis=1)[permitted_columns]
2244
+
2245
+ # Return the data as a DataFrame if as_df is True
2246
+ if not as_df:
2247
+ df = df.to_dict(orient="records")
2248
+ result = dict(
2249
+ x_contribution_ratio=x_contribution_ratio,
2250
+ y_contribution_ratio=y_contribution_ratio,
2251
+ data=df,
2252
+ )
2253
+ return result
2254
+
2255
+ def get_analysis_hierarchical_clustering(
2256
+ self,
2257
+ analysis_ids: _List[str],
2258
+ sample_ids: _List[str] = [],
2259
+ hide_control: bool = False,
2260
+ ):
2261
+ """
2262
+ Get hierarchical clustering data for given analyses and samples.
2263
+ Args:
2264
+ analysis_ids (list[str]): IDs of the analyses.
2265
+ sample_ids (list[str], optional): IDs of the samples.
2266
+ hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
2267
+ raw_data (bool, optional): Mark true if raw data should be returned. Defaults to True.
2268
+ Raises:
2269
+ ValueError: No analysis IDs provided.
2270
+ ValueError: No sample IDs provided.
2271
+ ValueError: Response status code is not 200.
2272
+ Returns:
2273
+ dict
2274
+ Hierarchical clustering data returned by the API.
2275
+ """
2276
+ if not analysis_ids:
2277
+ raise ValueError("Analysis IDs cannot be empty.")
2278
+
2279
+ URL = f"{self._auth.url}api/v1/analysishcluster"
2280
+
2281
+ with self._get_auth_session() as s:
2282
+ json = {
2283
+ "analysisIds": ",".join(analysis_ids),
2284
+ }
2285
+ if sample_ids:
2286
+ json["sampleIds"] = ",".join(sample_ids)
2287
+
2288
+ if sample_ids:
2289
+ json["sampleIds"] = ",".join(sample_ids)
2290
+
2291
+ # specify hideControl as a string
2292
+ # Python bool values are not recognized by the API
2293
+ if hide_control:
2294
+ json["hideControl"] = "true"
2295
+ else:
2296
+ json["hideControl"] = "false"
2297
+
2298
+ hc_data = s.post(URL, json=json)
2299
+
2300
+ if hc_data.status_code != 200:
2301
+ raise ValueError(
2302
+ "Invalid request. Please check your parameters."
2303
+ )
2304
+
2305
+ data = hc_data.json()
2306
+
2307
+ # Filter out custom fields that are not part of the tenant's custom fields
2308
+ if not "samples" in data:
2309
+ raise ValueError("No sample data returned from server.")
2310
+
2311
+ data["samples"] = [
2312
+ {k: v for k, v in sample.items()} for sample in data["samples"]
2313
+ ]
2314
+
2315
+ return data
2316
+
2317
+ def get_ppi_network_data(
2318
+ self, significant_pgs: _List[str], species: str = None
2319
+ ):
2320
+ """
2321
+ Get PPI network data for given significant protein groups.
2322
+ Args:
2323
+ significant_pgs (_List[str]): Significant protein groups.
2324
+ species (str, optional): Species of interest. Defaults to None.
2325
+ Raises:
2326
+ ValueError: No significant protein groups provided.
2327
+ ValueError: Response status code is not 200.
2328
+ Returns:
2329
+ dict
2330
+ Response returned by the API.
2331
+ """
2332
+ if not significant_pgs:
2333
+ raise ValueError("Significant protein groups cannot be empty.")
2334
+
2335
+ URL = f"{self._auth.url}api/v1/groupanalysis/stringdb"
2336
+
2337
+ with self._get_auth_session() as s:
2338
+ json = {
2339
+ "significantPGs": ",".join(significant_pgs),
2340
+ }
2341
+ if species:
2342
+ json["species"] = species
2343
+
2344
+ ppi_data = s.post(URL, json=json)
2345
+
2346
+ if ppi_data.status_code != 200:
2347
+ raise ValueError("Server error - bad response")
2348
+
2349
+ return ppi_data.json()
2350
+
2351
+ # groups are user defined by the sample description file
2352
+ def get_cluster_heatmap_data(
2353
+ self,
2354
+ analysis_id: str,
2355
+ grouping: str,
2356
+ groups: _List[str],
2357
+ contrasts: _List[_Tuple[int, ...]],
2358
+ stat_test: str,
2359
+ feature_type: str,
2360
+ significant_pgs: _List[str] = [],
2361
+ ):
2362
+ """Get cluster heatmap data for the given analysis.
2363
+
2364
+ Args:
2365
+ analysis_id (str): ID of the analysis
2366
+ grouping (str): Category of sample groups
2367
+ groups (_List[str]): sample groups
2368
+ contrasts (_List[_Tuple[int, ...]]): Indicate which groups are compared against each other. e.g. [(0, 1, -1, 0), (1, 0, 0, -1)]
2369
+ stat_test (str): Statistical test to be used
2370
+ feature_type (str): Type of feature to be used, either proteingroup or peptide
2371
+ significant_pgs (_List[str], optional): significant protein group IDs. Defaults to [].
2372
+
2373
+ Raises:
2374
+ ValueError: "Feature type must be either 'proteingroup' or 'peptide'."
2375
+ ValueError: "Stat test must be either 'ttest' or 'wilcoxon'."
2376
+ ValueError: Invalid contrast value.
2377
+ ValueError: Server error
2378
+
2379
+ Returns:
2380
+ dict: the response object
2381
+ clusterProtein: List of protein clusters
2382
+ clusters:
2383
+ indexes: list[int], List of indexes
2384
+ height: int, Height of the cluster
2385
+ children: list[dict] | None, Children of the cluster
2386
+ clusterSample: List of sample clusters
2387
+ clusters:
2388
+ indexes: list[int], List of indexes
2389
+ height: int, Height of the cluster
2390
+ children: list[dict] | None, Children of the cluster
2391
+ data: List of data
2392
+
2393
+ """
2394
+ if feature_type not in ["proteingroup", "peptide"]:
2395
+ raise ValueError(
2396
+ "Feature type must be either 'proteingroup' or 'peptide'."
2397
+ )
2398
+
2399
+ if stat_test not in ["ttest", "wilcoxon"]:
2400
+ raise ValueError("Stat test must be either 'ttest' or 'wilcoxon'.")
2401
+
2402
+ [validate_contrast(contrast, len(groups)) for contrast in contrasts]
2403
+
2404
+ formatted_contrasts = ";".join(
2405
+ [",".join(map(str, x)) for x in contrasts]
2406
+ )
2407
+
2408
+ payload = dict(
2409
+ analysisId=analysis_id,
2410
+ grouping=grouping,
2411
+ groups=",".join(groups),
2412
+ contrasts=formatted_contrasts,
2413
+ statTest=stat_test,
2414
+ featureType=feature_type,
2415
+ significantPGs=",".join(significant_pgs),
2416
+ )
2417
+
2418
+ with self._get_auth_session() as s:
2419
+ URL = f"{self._auth.url}api/v2/clusterheatmap"
2420
+ response = s.post(URL, json=payload)
2421
+ if response.status_code != 200:
2422
+ raise ValueError("Server error. Bad response.")
2423
+ return response.json()
2424
+
2425
+ def get_enrichment_plot(
2426
+ self,
2427
+ analysis_id: str,
2428
+ significant_pgs: _List[str],
2429
+ summarize_output: bool = False,
2430
+ exclude_singleton: bool = False,
2431
+ cutoff: float = None,
2432
+ species: str = None,
2433
+ ):
2434
+ """
2435
+ Get enrichment plot data for a given analysis ID.
2436
+
2437
+ Args:
2438
+ analysis_id (str): ID of the analysis.
2439
+ significant_pgs (_List[str]): List of significant protein/peptide groups.
2440
+ summarize_output (bool, optional): Summarize the output. Defaults to False.
2441
+ exclude_singleton (bool, optional): Exclude singleton values. Defaults to False.
2442
+ cutoff (float, optional): Cutoff value for the p-value to determine significance. Defaults to None.
2443
+ species (str, optional): Species to filter the data by. Defaults to None.
2444
+
2445
+ Raises:
2446
+ ServerError - could not fetch enrichment plot data.
2447
+
2448
+ Returns:
2449
+ dict: A dictionary containing the enrichment plot data.
2450
+ """
2451
+
2452
+ URL = f"{self._auth.url}api/v1/groupanalysis/enrichmentgo"
2453
+
2454
+ if not significant_pgs:
2455
+ raise ValueError("Significant pgs cannot be empty.")
2456
+
2457
+ with self._get_auth_session() as s:
2458
+ json = {
2459
+ "analysisId": analysis_id,
2460
+ "significantPGs": significant_pgs,
2461
+ "summarizeOutput": summarize_output,
2462
+ "excludeSingleton": exclude_singleton,
2463
+ }
2464
+ if cutoff:
2465
+ json["cutoff"] = cutoff
2466
+ if species:
2467
+ json["species"] = species
2468
+
2469
+ enrichment_data = s.post(URL, json=json)
2470
+
2471
+ if enrichment_data.status_code != 200:
2472
+ raise ValueError("Could not fetch enrichment plot data.")
2473
+
2474
+ return enrichment_data.json()
2475
+
2476
+ def get_volcano_plot_data(
2477
+ self,
2478
+ analysis_id,
2479
+ group_analysis_id=None,
2480
+ significance_threshold=0.05,
2481
+ fold_change_threshold=1,
2482
+ label_by="fold_change",
2483
+ cached=False,
2484
+ as_df=False,
2485
+ ):
2486
+ """Get volcano plot data for a given analysis ID.
2487
+
2488
+ Args:
2489
+ analysis_id (str): ID of the analysis.
2490
+ significance_threshold (float, optional): Cutoff value for the p-value to determine significance. Defaults to 0.05.
2491
+ fold_change_threshold (float, optional): Cutoff value for the fold change to determine significance. Defaults to 1.
2492
+ label_by (str, optional): Metric to sort result data. Defaults to "fold_change".
2493
+ cached (bool, optional): Return a VolcanoPlotBuilder object for calculation reuse. Defaults to False.
2494
+ as_df (bool, optional): Return data as a pandas DataFrame. Defaults to False.
2495
+
2496
+ Raises:
2497
+ ServerError - could not fetch group analysis results.
2498
+ Returns:
2499
+ list[dict] | pd.DataFrame | VolcanoPlotBuilder: A list of dictionaries, a DataFrame, or a VolcanoPlotBuilder object containing the volcano plot data.
2500
+ Object contains the following columns: 'logFD', 'negativeLog10P', 'dataIndex', 'rowID', 'gene', 'protein',
2501
+ 'group', 'significant', 'euclideanDistance'
2502
+ """
2503
+ try:
2504
+ response = self.group_analysis_results(
2505
+ analysis_id, group_analysis_id=group_analysis_id
2506
+ )
2507
+ except:
2508
+ raise ServerError(
2509
+ f"Could not fetch group analysis results. Please check that group analysis has completed for analysis {analysis_id}."
2510
+ )
2511
+
2512
+ obj = VolcanoPlotBuilder(
2513
+ response, significance_threshold, fold_change_threshold, label_by
2514
+ )
2515
+
2516
+ if cached:
2517
+ return obj
2518
+ else:
2519
+ if as_df:
2520
+ return pd.DataFrame(obj.volcano_plot)
2521
+ else:
2522
+ return obj.volcano_plot
2523
+
2524
+ def get_analysis_samples(self, analysis_id: str):
2525
+ """
2526
+ Get the samples associated with a given analysis ID.
2527
+
2528
+ Args:
2529
+ analysis_id (str): The analysis ID.
2530
+
2531
+ Raises:
2532
+ ServerError - could not retrieve samples for analysis.
2533
+ Returns:
2534
+ dict: A dictionary containing the samples associated with the analysis.
2535
+ """
2536
+ if not analysis_id:
2537
+ raise ValueError("Analysis ID cannot be empty.")
2538
+
2539
+ URL = f"{self._auth.url}api/v1/analyses/samples/{analysis_id}"
2540
+ with self._get_auth_session() as s:
2541
+ samples = s.get(URL)
2542
+
2543
+ if samples.status_code != 200:
2544
+ raise ServerError("Could not retrieve samples for analysis.")
2545
+
2546
+ return samples.json()
2547
+
2548
+ def get_analysis_protocol_fasta(self, analysis_id, download_path=None):
2549
+ if not analysis_id:
2550
+ raise ValueError("Analysis ID cannot be empty.")
2551
+
2552
+ if not download_path:
2553
+ download_path = os.getcwd()
2554
+
2555
+ try:
2556
+ analysis_protocol_id = self.get_analysis(analysis_id)[0][
2557
+ "analysis_protocol_id"
2558
+ ]
2559
+ except (IndexError, KeyError):
2560
+ raise ValueError(f"Could not parse server response.")
2561
+
2562
+ try:
2563
+ analysis_protocol_engine = self.get_analysis_protocols(
2564
+ analysis_protocol_id=analysis_protocol_id
2565
+ )[0]["analysis_engine"]
2566
+ except (IndexError, KeyError):
2567
+ raise ValueError(f"Could not parse server response.")
2568
+
2569
+ analysis_protocol_engine = analysis_protocol_engine.lower()
2570
+ if analysis_protocol_engine == "diann":
2571
+ URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/diann/{analysis_protocol_id}"
2572
+ elif analysis_protocol_engine == "encyclopedia":
2573
+ URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/dia/{analysis_protocol_id}"
2574
+ elif analysis_protocol_engine == "msfragger":
2575
+ URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/msfragger/{analysis_protocol_id}"
2576
+ elif analysis_protocol_engine == "proteogenomics":
2577
+ URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/proteogenomics/{analysis_protocol_id}"
2578
+ else:
2579
+ # Change needed on the backend to get s3 file path for MaxQuant
2580
+ # URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/{analysis_protocol_id}"
2581
+ raise ValueError(
2582
+ f"Analysis protocol engine {analysis_protocol_engine} not supported for fasta download."
2583
+ )
2584
+
2585
+ with self._get_auth_session() as s:
2586
+ response = s.get(URL)
2587
+ if response.status_code != 200:
2588
+ raise ServerError("Request failed.")
2589
+ response = response.json()
2590
+ if type(response) == dict:
2591
+ response = response["editableParameters"]
2592
+ fasta_filenames = [
2593
+ x["Value"]
2594
+ for x in response
2595
+ if x["Key"] in ["fasta", "fastaFilePath", "referencegenome"]
2596
+ ]
2597
+ if not fasta_filenames:
2598
+ raise ServerError("No fasta file name returned from server.")
2599
+
2600
+ URL = f"{self._auth.url}api/v1/analysisProtocolFiles/getUrl"
2601
+ for file in fasta_filenames:
2602
+ with self._get_auth_session() as s:
2603
+ response = s.post(URL, json={"filepath": file})
2604
+ if response.status_code != 200:
2605
+ raise ServerError("Request failed.")
2606
+ url = response.json()["url"]
2607
+ filename = os.path.basename(file)
2608
+ print(f"Downloading {filename}")
2609
+ for _ in range(2):
2610
+ try:
2611
+ with tqdm(
2612
+ unit="B",
2613
+ unit_scale=True,
2614
+ unit_divisor=1024,
2615
+ miniters=1,
2616
+ desc=f"Progress",
2617
+ ) as t:
2618
+ ssl._create_default_https_context = (
2619
+ ssl._create_unverified_context
2620
+ )
2621
+ urllib.request.urlretrieve(
2622
+ url,
2623
+ f"{download_path}/{filename}",
2624
+ reporthook=download_hook(t),
2625
+ data=None,
2626
+ )
2627
+ break
2628
+ except:
2629
+ if not os.path.isdir(f"{download_path}"):
2630
+ os.makedirs(f"{download_path}")
2631
+
2632
+ print(f"Downloaded file to {download_path}/{file}")