seer-pas-sdk 0.1.3__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
seer_pas_sdk/core/sdk.py CHANGED
@@ -1,17 +1,17 @@
1
1
  from tqdm import tqdm
2
2
 
3
+ import deprecation
3
4
  import os
4
5
  import jwt
5
6
  import requests
6
7
  import urllib.request
7
8
  import ssl
8
- import shutil
9
9
 
10
- from typing import List as _List
10
+ from typing import List as _List, Tuple as _Tuple
11
11
 
12
12
  from ..common import *
13
13
  from ..auth import Auth
14
- from ..objects import PlateMap
14
+ from ..objects.volcanoplot import VolcanoPlotBuilder
15
15
 
16
16
 
17
17
  class SeerSDK:
@@ -27,40 +27,205 @@ class SeerSDK:
27
27
  >>> seer_sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
28
28
  """
29
29
 
30
- def __init__(self, username, password, instance="US"):
30
+ def __init__(self, username, password, instance="US", tenant=None):
31
31
  try:
32
32
  self._auth = Auth(username, password, instance)
33
33
 
34
34
  self._auth.get_token()
35
-
36
35
  print(f"User '{username}' logged in.\n")
37
36
 
38
- except:
37
+ if not tenant:
38
+ tenant = self._auth.active_tenant_id
39
+ try:
40
+ self.switch_tenant(tenant)
41
+ except Exception as e:
42
+ print(
43
+ f"Encountered an error directing you to tenant {tenant}: {e}."
44
+ )
45
+ print("Logging into home tenant...")
46
+ # If an error occurs while directing the user to a tenant, default to home tenant.
47
+ print(f"You are now active in {self.get_active_tenant_name()}")
48
+ except Exception as e:
39
49
  raise ValueError(
40
- "Could not log in.\nPlease check your credentials and/or instance."
50
+ f"Could not log in.\nPlease check your credentials and/or instance: {e}."
41
51
  )
42
52
 
43
- def _get_auth_headers(self):
53
+ def _get_auth_headers(self, use_multi_tenant=True):
44
54
  id_token, access_token = self._auth.get_token()
45
- return {
55
+ header = {
46
56
  "Authorization": id_token,
47
- "access-token": access_token,
57
+ "Access-Token": access_token,
48
58
  }
59
+ if use_multi_tenant:
60
+ multi_tenant = {
61
+ "Tenant-Id": self._auth.active_tenant_id,
62
+ "Role": self._auth.active_role,
63
+ }
64
+ header.update(multi_tenant)
65
+ return header
49
66
 
50
- def _get_auth_session(self):
67
+ def _get_auth_session(self, use_multi_tenant=True):
51
68
  sess = requests.Session()
52
69
 
53
- sess.headers.update(self._get_auth_headers())
70
+ sess.headers.update(self._get_auth_headers(use_multi_tenant))
54
71
 
55
72
  return sess
56
73
 
74
+ def get_user_tenant(self, index=True):
75
+ """
76
+ Fetches the tenant metadata for the authenticated user.
77
+
78
+ Returns
79
+ -------
80
+ response : list[dict]
81
+ A list of tenant objects pertaining to the user.
82
+ """
83
+ with self._get_auth_session() as s:
84
+ response = s.get(f"{self._auth.url}api/v1/usertenants")
85
+
86
+ if response.status_code != 200:
87
+ raise ValueError(
88
+ "Invalid request. Please check your parameters."
89
+ )
90
+
91
+ response = response.json()
92
+ if index:
93
+ mapper = dict()
94
+ for x in response:
95
+ if x["institution"] not in mapper:
96
+ mapper[x["institution"]] = [x]
97
+ else:
98
+ mapper[x["institution"]].append(x)
99
+ return mapper
100
+ else:
101
+ return response
102
+
103
+ def list_tenants(self, reverse=False):
104
+ """
105
+ Lists the institution names and the tenant ids for the authenticated user.
106
+
107
+ Parameters
108
+ ----------
109
+ reverse: bool
110
+ Boolean denoting whether the user wants the result dictionary indexed by tenant id (True) or institution name (False).
111
+
112
+ Returns
113
+ -------
114
+ tenants : dict[str, str]
115
+ A dictionary containing the institution names and tenant ids for the authenticated user.
116
+ """
117
+ tenants = self.get_user_tenant()
118
+ if reverse:
119
+ return {x["tenantId"]: x["institution"] for x in tenants.values()}
120
+ else:
121
+ return {x["institution"]: x["tenantId"] for x in tenants.values()}
122
+
123
+ def switch_tenant(self, identifier: str):
124
+ """
125
+ Switches the tenant for the authenticated user.
126
+
127
+ Parameters
128
+ ----------
129
+ identifier: str
130
+ Tenant ID or organization name to switch to.
131
+
132
+ Returns
133
+ -------
134
+ tenant_id: str
135
+ Returns the value of the active tenant id after the operation.
136
+ """
137
+ map = self.get_user_tenant()
138
+ tenant_id_match = [
139
+ y for x in map.values() for y in x if y["tenantId"] == identifier
140
+ ]
141
+ institution_names = map.keys()
142
+
143
+ if tenant_id_match:
144
+ tenant_id = identifier
145
+ row = tenant_id_match
146
+ if row:
147
+ row = row[0]
148
+ else:
149
+ raise ValueError(
150
+ "Invalid tenant identifier. Tenant was not switched."
151
+ )
152
+ elif identifier in institution_names:
153
+ results = map[identifier]
154
+ if len(results) > 1:
155
+ raise ValueError(
156
+ "Multiple tenants found for the given institution name. Please specify a tenant ID."
157
+ )
158
+ row = results[0]
159
+ tenant_id = row["tenantId"]
160
+ else:
161
+ raise ValueError(
162
+ "Invalid tenant identifier. Tenant was not switched."
163
+ )
164
+
165
+ with self._get_auth_session() as s:
166
+ response = s.put(
167
+ self._auth.url + "api/v1/users/tenant",
168
+ json={
169
+ "currentTenantId": tenant_id,
170
+ "username": self._auth.username,
171
+ },
172
+ )
173
+ if response.status_code != 200:
174
+ raise ServerError(
175
+ "Could not update current tenant for user. Tenant was not switched."
176
+ )
177
+
178
+ self._auth.active_tenant_id = tenant_id
179
+ self._auth.active_role = row["role"]
180
+ print(f"You are now active in {row['institution']}")
181
+ return self._auth.active_tenant_id, self._auth.active_role
182
+
183
+ def get_active_tenant(self):
184
+ """
185
+ Fetches the active tenant for the authenticated user.
186
+
187
+ Returns
188
+ -------
189
+ tenant: dict[str, str]
190
+ Tenant metadata for the authenticated user containing "institution" and "tenantId" keys.
191
+ """
192
+ tenants = self.get_user_tenant(index=False)
193
+ row = [
194
+ x for x in tenants if x["tenantId"] == self._auth.active_tenant_id
195
+ ]
196
+ return row[0] if row else None
197
+
198
+ def get_active_tenant_id(self):
199
+ """
200
+ Fetches the active tenant ID for the authenticated user.
201
+
202
+ Returns
203
+ -------
204
+ tenant_id: str
205
+ Tenant ID for the authenticated user.
206
+ """
207
+ tenant = self.get_active_tenant()
208
+ return tenant["tenantId"] if tenant else None
209
+
210
+ def get_active_tenant_name(self):
211
+ """
212
+ Fetches the active tenant name for the authenticated user.
213
+
214
+ Returns
215
+ -------
216
+ tenant: str
217
+ Tenant name for the authenticated user.
218
+ """
219
+ tenant = self.get_active_tenant()
220
+ return tenant["institution"] if tenant else None
221
+
57
222
  def get_spaces(self):
58
223
  """
59
224
  Fetches a list of spaces for the authenticated user.
60
225
 
61
226
  Returns
62
227
  -------
63
- spaces: list
228
+ spaces: list[dict]
64
229
  List of space objects for the authenticated user.
65
230
 
66
231
  Examples
@@ -86,7 +251,7 @@ class SeerSDK:
86
251
  )
87
252
  return spaces.json()
88
253
 
89
- def get_plate_metadata(self, plate_id: str = None, df: bool = False):
254
+ def get_plates(self, plate_id: str = None, as_df: bool = False):
90
255
  """
91
256
  Fetches a list of plates for the authenticated user. If no `plate_id` is provided, returns all plates for the authenticated user. If `plate_id` is provided, returns the plate with the given `plate_id`, provided it exists.
92
257
 
@@ -94,25 +259,25 @@ class SeerSDK:
94
259
  ----------
95
260
  plate_id : str, optional
96
261
  ID of the plate to be fetched, defaulted to None.
97
- df: bool
98
- Boolean denoting whether the user wants the response back in JSON or a DataFrame object
262
+ as_df: bool
263
+ whether the result should be converted to a DataFrame, defaulted to None.
99
264
 
100
265
  Returns
101
266
  -------
102
- plates: list or DataFrame
267
+ plates: list[dict] or DataFrame
103
268
  List/DataFrame of plate objects for the authenticated user.
104
269
 
105
270
  Examples
106
271
  -------
107
272
  >>> from seer_pas_sdk import SeerSDK
108
273
  >>> seer_sdk = SeerSDK()
109
- >>> seer_sdk.get_plate_metadata()
274
+ >>> seer_sdk.get_plates()
110
275
  >>> [
111
276
  { "id": ... },
112
277
  { "id": ... },
113
278
  ...
114
279
  ]
115
- >>> seer_sdk.get_plate_metadata(df=True)
280
+ >>> seer_sdk.get_plates(as_df=True)
116
281
  >>> id ... user_group
117
282
  0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
118
283
  1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
@@ -151,9 +316,9 @@ class SeerSDK:
151
316
  for entry in res:
152
317
  del entry["tenant_id"]
153
318
 
154
- return res if not df else dict_to_df(res)
319
+ return res if not as_df else dict_to_df(res)
155
320
 
156
- def get_project_metadata(self, project_id: str = None, df: bool = False):
321
+ def get_projects(self, project_id: str = None, as_df: bool = False):
157
322
  """
158
323
  Fetches a list of projects for the authenticated user. If no `project_id` is provided, returns all projects for the authenticated user. If `project_id` is provided, returns the project with the given `project_id`, provided it exists.
159
324
 
@@ -161,26 +326,26 @@ class SeerSDK:
161
326
  ----------
162
327
  project_id: str, optional
163
328
  Project ID of the project to be fetched, defaulted to None.
164
- df: bool
165
- Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
329
+ as_df: bool
330
+ whether the result should be converted to a DataFrame, defaulted to False.
166
331
 
167
332
  Returns
168
333
  -------
169
- projects: list or DataFrame
334
+ projects: list[dict] or DataFrame
170
335
  DataFrame or list of project objects for the authenticated user.
171
336
 
172
337
  Examples
173
338
  -------
174
339
  >>> from seer_pas_sdk import SeerSDK
175
340
  >>> seer_sdk = SeerSDK()
176
- >>> seer_sdk.get_project_metadata()
341
+ >>> seer_sdk.get_projects()
177
342
  >>> [
178
343
  { "project_name": ... },
179
344
  { "project_name": ... },
180
345
  ...
181
346
  ]
182
347
 
183
- >>> seer_sdk.get_project_metadata(df=True)
348
+ >>> seer_sdk.get_projects(as_df=True)
184
349
  >>> id ... user_group
185
350
  0 a7c12190-15da-11ee-bdf1-bbaa73585acf ... None
186
351
  1 8c3b1480-15da-11ee-bdf1-bbaa73585acf ... None
@@ -194,7 +359,7 @@ class SeerSDK:
194
359
  938 5b05d440-6610-11ea-96e3-d5a4dab4ebf6 ... None
195
360
  939 9872e3f0-544e-11ea-ad9e-1991e0725494 ... None
196
361
 
197
- >>> seer_sdk.get_project_metadata(id="YOUR_PROJECT_ID_HERE")
362
+ >>> seer_sdk.get_projects(id="YOUR_PROJECT_ID_HERE")
198
363
  >>> [{ "project_name": ... }]
199
364
  """
200
365
 
@@ -228,19 +393,18 @@ class SeerSDK:
228
393
  entry["raw_file_path"] = entry["raw_file_path"][
229
394
  location(entry["raw_file_path"]) :
230
395
  ]
231
- return res if not df else dict_to_df(res)
396
+ return res if not as_df else dict_to_df(res)
232
397
 
233
- def _get_samples_metadata(
234
- self, plate_id: str = None, project_id: str = None, df: bool = False
398
+ def get_samples(
399
+ self,
400
+ plate_id: str = None,
401
+ project_id: str = None,
402
+ analysis_id: str = None,
403
+ analysis_name: str = None,
404
+ as_df: bool = False,
235
405
  ):
236
406
  """
237
- ****************
238
- [UNEXPOSED METHOD CALL]
239
- ****************
240
-
241
- Fetches a list of samples for the authenticated user, filtered by `plate_id`. Returns all samples for the plate with the given `plate_id`, provided it exists.
242
-
243
- If both `plate_id` and `project_id` are passed in, only the `plate_id` is validated first.
407
+ Fetches a list of samples for the authenticated user with relation to a specified plate, project, or analysis. If no parameters are provided, returns all samples for the authenticated user. If `plate_id` or `project_id` is provided, returns samples associated with that plate or project. If `analysis_id` or `analysis_name` is provided, returns samples associated with that analysis.
244
408
 
245
409
  Parameters
246
410
  ----------
@@ -248,12 +412,16 @@ class SeerSDK:
248
412
  ID of the plate for which samples are to be fetched, defaulted to None.
249
413
  project_id : str, optional
250
414
  ID of the project for which samples are to be fetched, defaulted to None.
251
- df: bool
252
- Boolean denoting whether the user wants the response back in JSON or a DataFrame object
415
+ analysis_id : str, optional
416
+ ID of the analysis for which samples are to be fetched, defaulted to None.
417
+ analysis_name : str, optional
418
+ Name of the analysis for which samples are to be fetched, defaulted to None.
419
+ as_df: bool
420
+ whether the result should be converted to a DataFrame, defaulted to False.
253
421
 
254
422
  Returns
255
423
  -------
256
- samples: list or DataFrame
424
+ samples: list[dict] or DataFrame
257
425
  List/DataFrame of samples for the authenticated user.
258
426
 
259
427
  Examples
@@ -261,14 +429,14 @@ class SeerSDK:
261
429
  >>> from seer_pas_sdk import SeerSDK
262
430
  >>> seer_sdk = SeerSDK()
263
431
 
264
- >>> seer_sdk._get_samples_metadata(plate_id="7ec8cad0-15e0-11ee-bdf1-bbaa73585acf")
432
+ >>> seer_sdk.get_samples(plate_id="7ec8cad0-15e0-11ee-bdf1-bbaa73585acf")
265
433
  >>> [
266
434
  { "id": ... },
267
435
  { "id": ... },
268
436
  ...
269
437
  ]
270
438
 
271
- >>> seer_sdk._get_samples_metadata(df=True)
439
+ >>> seer_sdk.get_samples(as_df=True)
272
440
  >>> id ... control
273
441
  0 812139c0-15e0-11ee-bdf1-bbaa73585acf ...
274
442
  1 803e05b0-15e0-11ee-bdf1-bbaa73585acf ... MPE Control
@@ -283,48 +451,68 @@ class SeerSDK:
283
451
  3628 dd607ef0-654c-11ea-8eb2-25a1cfd1163c ... C132
284
452
  """
285
453
 
286
- if not plate_id and not project_id:
287
- raise ValueError("You must pass in plate ID or project ID.")
454
+ # Raise an error if none or more than one of the primary key parameters are passed in.
455
+ if (
456
+ sum(
457
+ [
458
+ True if x else False
459
+ for x in [plate_id, project_id, analysis_id, analysis_name]
460
+ ]
461
+ )
462
+ != 1
463
+ ):
464
+ raise ValueError(
465
+ "You must pass in exactly one of plate_id, project_id, analysis_id, analysis_name."
466
+ )
288
467
 
289
468
  res = []
290
469
  URL = f"{self._auth.url}api/v1/samples"
291
470
  sample_params = {"all": "true"}
292
471
 
293
- with self._get_auth_session() as s:
294
-
295
- if plate_id:
296
- try:
297
- self.get_plate_metadata(plate_id)
298
- except:
299
- raise ValueError(
300
- "Plate ID is invalid. Please check your parameters and see if the backend is running."
301
- )
302
- sample_params["plateId"] = plate_id
472
+ if project_id or plate_id:
473
+ with self._get_auth_session() as s:
474
+ if plate_id:
475
+ try:
476
+ self.get_plates(plate_id)
477
+ except:
478
+ raise ValueError("Plate ID is invalid.")
479
+ sample_params["plateId"] = plate_id
303
480
 
304
- elif project_id:
305
- try:
306
- self.get_project_metadata(project_id)
307
- except:
308
- raise ValueError(
309
- "Project ID is invalid. Please check your parameters and see if the backend is running."
310
- )
481
+ else:
482
+ try:
483
+ self.get_projects(project_id)
484
+ except:
485
+ raise ValueError("Project ID is invalid.")
311
486
 
312
- sample_params["projectId"] = project_id
487
+ sample_params["projectId"] = project_id
313
488
 
314
489
  samples = s.get(URL, params=sample_params)
315
490
  if samples.status_code != 200:
316
491
  raise ValueError(
317
- "Invalid request. Please check if your plate ID has any samples associated with it."
492
+ f"Failed to fetch sample data for plate ID: {plate_id}."
318
493
  )
319
494
  res = samples.json()["data"]
495
+ res_df = dict_to_df(res)
320
496
 
321
- for entry in res:
322
- del entry["tenant_id"]
497
+ # API returns empty strings if not a control, replace with None for filtering purposes
498
+ res_df["control"] = res_df["control"].apply(
499
+ lambda x: x if x else None
500
+ )
501
+ else:
502
+ if analysis_id:
503
+ res_df = self._get_analysis_samples(
504
+ analysis_id=analysis_id, as_df=True
505
+ )
506
+ else:
507
+ res_df = self._get_analysis_samples(
508
+ analysis_name=analysis_name, as_df=True, is_name=True
509
+ )
510
+
511
+ # apply post processing
512
+ res_df.drop(["tenant_id"], axis=1, inplace=True)
323
513
 
324
- # Exclude custom fields that don't belong to the tenant
325
- res_df = dict_to_df(res)
326
514
  custom_columns = [
327
- x["field_name"] for x in self.get_sample_custom_fields()
515
+ x["field_name"] for x in self._get_sample_custom_fields()
328
516
  ]
329
517
  res_df = res_df[
330
518
  [
@@ -334,9 +522,73 @@ class SeerSDK:
334
522
  ]
335
523
  ]
336
524
 
337
- return res_df.to_dict(orient="records") if not df else res_df
525
+ return res_df.to_dict(orient="records") if not as_df else res_df
526
+
527
+ def _filter_samples_metadata(
528
+ self,
529
+ project_id: str,
530
+ filter: str,
531
+ sample_ids: list = None,
532
+ ):
533
+ """
534
+ ****************
535
+ [UNEXPOSED METHOD CALL]
536
+ ****************
537
+ Get samples given a filter and project_id.
538
+
539
+ Parameters
540
+ ----------
541
+ project_id : str
542
+ The project id.
543
+ filter : str
544
+ The filter to be applied. Acceptable values are 'control' or 'sample'.
545
+ sample_ids : list, optional
546
+ List of user provided sample ids
547
+
548
+ Returns
549
+ -------
550
+ res : list[str]
551
+ A list of sample ids
552
+
553
+ Examples
554
+ -------
555
+ >>> from core import SeerSDK
556
+ >>> seer_sdk = SeerSDK()
557
+ >>> seer_sdk._get_samples_filter("FILTER", "PROJECT_ID")
558
+ >>> {
559
+ "samples": [
560
+ {
561
+ "id": "SAMPLE_ID",
562
+ "plate_id": "PLATE_ID",
563
+ "sample_name": "SAMPLE_NAME",
564
+ ...
565
+ ...
566
+ },
567
+ ...
568
+ ...
569
+ ]
570
+ }
571
+ """
572
+
573
+ if filter and filter not in ["control", "sample"]:
574
+ raise ValueError(
575
+ "Invalid filter. Please choose between 'control' or 'sample'."
576
+ )
577
+
578
+ df = self.get_samples(project_id=project_id, as_df=True)
579
+
580
+ if filter == "control":
581
+ df = df[~df["control"].isna()]
582
+ elif filter == "sample":
583
+ df = df[df["control"].isna()]
584
+
585
+ valid_samples = df["id"].tolist()
586
+ if sample_ids:
587
+ valid_samples = list(set(valid_samples) & set(sample_ids))
588
+
589
+ return valid_samples
338
590
 
339
- def get_sample_custom_fields(self):
591
+ def _get_sample_custom_fields(self):
340
592
  """
341
593
  Fetches a list of custom fields defined for the authenticated user.
342
594
  """
@@ -356,7 +608,7 @@ class SeerSDK:
356
608
  del entry["tenant_id"]
357
609
  return res
358
610
 
359
- def get_msdata(self, sample_ids: list, df: bool = False):
611
+ def get_msruns(self, sample_ids: list, as_df: bool = False):
360
612
  """
361
613
  Fetches MS data files for passed in `sample_ids` (provided they are valid and contain relevant files) for an authenticated user.
362
614
 
@@ -366,12 +618,12 @@ class SeerSDK:
366
618
  ----------
367
619
  sample_ids : list
368
620
  List of unique sample IDs.
369
- df: bool
370
- Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
621
+ as_df: bool
622
+ whether the result should be converted to a DataFrame, defaulted to False.
371
623
 
372
624
  Returns
373
625
  -------
374
- res: list or DataFrame
626
+ res: list[dict] or DataFrame
375
627
  List/DataFrame of plate objects for the authenticated user.
376
628
 
377
629
  Examples
@@ -380,13 +632,13 @@ class SeerSDK:
380
632
  >>> seer_sdk = SeerSDK()
381
633
  >>> sample_ids = ["812139c0-15e0-11ee-bdf1-bbaa73585acf", "803e05b0-15e0-11ee-bdf1-bbaa73585acf"]
382
634
 
383
- >>> seer_sdk.get_msdata(sample_ids)
635
+ >>> seer_sdk.get_runs(sample_ids)
384
636
  >>> [
385
637
  {"id": "SAMPLE_ID_1_HERE" ... },
386
638
  {"id": "SAMPLE_ID_2_HERE" ... }
387
639
  ]
388
640
 
389
- >>> seer_sdk.get_msdata(sample_ids, df=True)
641
+ >>> seer_sdk.get_msruns(sample_ids, as_df=True)
390
642
  >>> id ... gradient
391
643
  0 81c6a180-15e0-11ee-bdf1-bbaa73585acf ... None
392
644
  1 816a9ed0-15e0-11ee-bdf1-bbaa73585acf ... None
@@ -405,10 +657,10 @@ class SeerSDK:
405
657
 
406
658
  if msdatas.status_code != 200 or not msdatas.json()["data"]:
407
659
  raise ValueError(
408
- "Failed to fetch MS data for your plate ID."
660
+ f"Failed to fetch MS data for sample ID={sample_id}."
409
661
  )
410
662
 
411
- res.append(msdatas.json()["data"][0])
663
+ res += [x for x in msdatas.json()["data"]]
412
664
 
413
665
  for entry in res:
414
666
  if "tenant_id" in entry:
@@ -421,189 +673,7 @@ class SeerSDK:
421
673
  entry["raw_file_path"] = entry["raw_file_path"][
422
674
  location(entry["raw_file_path"]) :
423
675
  ]
424
- return res if not df else dict_to_df(res)
425
-
426
- def get_plate(self, plate_id: str, df: bool = False):
427
- """
428
- Fetches MS data files for a `plate_id` (provided that the `plate_id` is valid and has samples associated with it) for an authenticated user.
429
-
430
- The function returns a dict containing DataFrame objects if the `df` flag is passed in as True, otherwise a nested dict object is returned instead.
431
-
432
- Parameters
433
- ----------
434
- plate_id : str, optional
435
- ID of the plate for which samples are to be fetched, defaulted to None.
436
- df: bool
437
- Boolean denoting whether the user wants the response back in JSON or a DataFrame object
438
-
439
- Returns
440
- -------
441
- res: list or DataFrame
442
- List/DataFrame of MS data file objects for the authenticated user.
443
-
444
- Examples
445
- -------
446
- >>> from seer_pas_sdk import SeerSDK
447
- >>> seer_sdk = SeerSDK()
448
- >>> plate_id = "7ec8cad0-15e0-11ee-bdf1-bbaa73585acf"
449
-
450
- >>> seer_sdk.get_plate(plate_id)
451
- >>> [
452
- {"id": "PLATE_ID_1_HERE" ... },
453
- {"id": "PLATE_ID_2_HERE" ... }
454
- ]
455
-
456
- >>> seer_sdk.get_plate(plate_id, df=True)
457
- >>> id ... volume
458
- 0 PLATE_ID_1_HERE ... None
459
- 1 PLATE_ID_2_HERE ... None
460
-
461
- [2 rows x 26 columns]
462
- """
463
- plate_samples = self._get_samples_metadata(plate_id=plate_id)
464
- sample_ids = [sample["id"] for sample in plate_samples]
465
- return self.get_msdata(sample_ids, df)
466
-
467
- def get_project(
468
- self, project_id: str, msdata: bool = False, df: bool = False
469
- ):
470
- """
471
- Fetches samples (and MS data files) for a `project_id` (provided that the `project_id` is valid and has samples associated with it) for an authenticated user.
472
-
473
- The function returns a DataFrame object if the `df` flag is passed in as True, otherwise a nested dict object is returned instead. If the both the `df` and `msdata` flags are passed in as True, then a nested DataFrame object is returned instead.
474
-
475
- Parameters
476
- ----------
477
- project_id : str
478
- ID of the project for which samples are to be fetched.
479
- msdata: bool, optional
480
- Boolean flag denoting whether the user wants relevant MS data files associated with the samples.
481
- df: bool, optional
482
- Boolean denoting whether the user wants the response back in JSON or a DataFrame object.
483
-
484
- Returns
485
- -------
486
- res: list or DataFrame
487
- List/DataFrame of plate objects for the authenticated user.
488
-
489
- Examples
490
- -------
491
- >>> from seer_pas_sdk import SeerSDK
492
- >>> seer_sdk = SeerSDK()
493
- >>> project_id = "7e48e150-8a47-11ed-b382-bf440acece26"
494
-
495
- >>> seer_sdk.get_project(project_id=project_id, msdata=False, df=False)
496
- >>> {
497
- "project_samples": [
498
- {
499
- "id": "SAMPLE_ID_1_HERE",
500
- "sample_type": "Plasma",
501
- ...
502
- ...
503
- },
504
- {
505
- "id": "SAMPLE_ID_2_HERE",
506
- "sample_type": "Plasma",
507
- ...
508
- ...
509
- }
510
- ]
511
- }
512
-
513
- >>> seer_sdk.get_project(project_id=project_id, msdata=True, df=False)
514
- >>> [
515
- {
516
- "id": "SAMPLE_ID_1_HERE",
517
- "sample_type": "Plasma",
518
- ...
519
- ...
520
- "ms_data_files": [
521
- {
522
- "id": MS_DATA_FILE_ID_1_HERE,
523
- "tenant_id": "TENANT_ID_HERE",
524
- ...
525
- ...
526
- },
527
- {
528
- "id": MS_DATA_FILE_ID_1_HERE,
529
- "tenant_id": "TENANT_ID_HERE",
530
- ...
531
- ...
532
- }
533
- ]
534
- },
535
- {
536
- "id": "SAMPLE_ID_2_HERE",
537
- "sample_type": "Plasma",
538
- ...
539
- ...
540
- "ms_data_files": [
541
- {
542
- "id": MS_DATA_FILE_ID_2_HERE,
543
- "tenant_id": "TENANT_ID_HERE",
544
- ...
545
- ...
546
- },
547
- {
548
- "id": MS_DATA_FILE_ID_2_HERE,
549
- "tenant_id": "TENANT_ID_HERE",
550
- ...
551
- ...
552
- }
553
- ]
554
- }
555
- ]
556
-
557
- >>> seer_sdk.get_project(project_id=project_id, msdata=True, df=True)
558
- >>> id ... ms_data_files
559
- 0 829509f0-8a47-11ed-b382-bf440acece26 ... id ... g...
560
- 1 828d41c0-8a47-11ed-b382-bf440acece26 ... id ... g...
561
- 2 8294e2e0-8a47-11ed-b382-bf440acece26 ... id ... g...
562
- 3 8285eec0-8a47-11ed-b382-bf440acece26 ... id ... g...
563
-
564
- [4 rows x 60 columns]
565
- """
566
- if not project_id:
567
- return ValueError("No project ID specified.")
568
-
569
- sample_ids = []
570
- project_samples = self._get_samples_metadata(
571
- project_id=project_id, df=False
572
- )
573
-
574
- if msdata:
575
- sample_ids = [
576
- sample["id"] for sample in project_samples
577
- ] # will always contain unique values
578
- ms_data_files = self.get_msdata(sample_ids=sample_ids, df=False)
579
-
580
- for ms_data_file in ms_data_files:
581
- for sample_index in range(len(project_samples)):
582
- if (
583
- project_samples[sample_index]["id"]
584
- == ms_data_file["sample_id"]
585
- ):
586
- if "ms_data_file" not in project_samples[sample_index]:
587
- project_samples[sample_index]["ms_data_files"] = [
588
- ms_data_file
589
- ]
590
- else:
591
- project_samples[sample_index][
592
- "ms_data_files"
593
- ].append(ms_data_file)
594
-
595
- if df:
596
- for sample_index in range(len(project_samples)):
597
- if "ms_data_files" in project_samples[sample_index]:
598
- project_samples[sample_index]["ms_data_files"] = (
599
- dict_to_df(
600
- project_samples[sample_index]["ms_data_files"]
601
- )
602
- )
603
-
604
- project_samples = dict_to_df(project_samples)
605
-
606
- return project_samples
676
+ return res if not as_df else dict_to_df(res)
607
677
 
608
678
  def get_analysis_protocols(
609
679
  self,
@@ -623,7 +693,7 @@ class SeerSDK:
623
693
 
624
694
  Returns
625
695
  -------
626
- protocols: list
696
+ protocols: list[dict]
627
697
  List of analysis protocol objects for the authenticated user.
628
698
 
629
699
  Examples
@@ -694,11 +764,16 @@ class SeerSDK:
694
764
  self,
695
765
  analysis_id: str = None,
696
766
  folder_id: str = None,
697
- show_folders=True,
698
- analysis_only=True,
767
+ show_folders: bool = True,
768
+ analysis_only: bool = True,
769
+ project_id: str = None,
770
+ plate_name: str = None,
771
+ **kwargs,
699
772
  ):
700
773
  """
701
774
  Returns a list of analyses objects for the authenticated user. If no id is provided, returns all analyses for the authenticated user.
775
+ Search parameters may be passed in as keyword arguments to filter the results. Acceptable values are 'analysis_name', 'folder_name', 'description', 'notes', or 'number_msdatafile'.
776
+ Only search on a single field is supported.
702
777
 
703
778
  Parameters
704
779
  ----------
@@ -709,16 +784,25 @@ class SeerSDK:
709
784
  ID of the folder to be fetched, defaulted to None.
710
785
 
711
786
  show_folders : bool, optional
712
- Mark True if folder contents are to be returned in the response, defaulted to True.
787
+ Mark True if folder contents are to be returned in the response, i.e. recursive search, defaulted to True.
713
788
  Will be disabled if an analysis id is provided.
714
789
 
715
790
  analysis_only : bool, optional
716
791
  Mark True if only analyses objects are to be returned in the response, defaulted to True.
717
792
  If marked false, folder objects will also be included in the response.
718
793
 
794
+ project_id : str, optional
795
+ ID of the project to be fetched, defaulted to None.
796
+
797
+ plate_name : str, optional
798
+ Name of the plate to be fetched, defaulted to None.
799
+
800
+ **kwargs : dict, optional
801
+ Search keyword parameters to be passed in. Acceptable values are 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'.
802
+
719
803
  Returns
720
804
  -------
721
- analyses: dict
805
+ analyses: list[dict]
722
806
  Contains a list of analyses objects for the authenticated user.
723
807
 
724
808
  Examples
@@ -732,19 +816,67 @@ class SeerSDK:
732
816
  {id: "YOUR_ANALYSIS_ID_HERE", ...}
733
817
  ]
734
818
 
735
- >>> seer_sdk.get_analyses("YOUR_ANALYSIS_ID_HERE")
819
+ >>> seer_sdk.get_analysis("YOUR_ANALYSIS_ID_HERE")
820
+ >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
821
+
822
+ >>> seer_sdk.get_analysis(folder_name="YOUR_FOLDER_NAME_HERE")
823
+ >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
824
+
825
+ >>> seer_sdk.get_analysis(analysis_name="YOUR_ANALYSIS")
826
+ >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
827
+
828
+ >>> seer_sdk.get_analysis(description="YOUR_DESCRIPTION")
736
829
  >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
737
830
  """
738
831
 
739
832
  URL = f"{self._auth.url}api/v1/analyses"
740
833
  res = []
741
834
 
835
+ search_field = None
836
+ search_item = None
837
+ if kwargs:
838
+ if len(kwargs.keys()) > 1:
839
+ raise ValueError("Please include only one search parameter.")
840
+ search_field = list(kwargs.keys())[0]
841
+ search_item = kwargs[search_field]
842
+
843
+ if not search_item:
844
+ raise ValueError(
845
+ f"Please provide a non null value for {search_field}"
846
+ )
847
+
848
+ if search_field and search_field not in [
849
+ "analysis_name",
850
+ "folder_name",
851
+ "analysis_protocol_name",
852
+ "description",
853
+ "notes",
854
+ "number_msdatafile",
855
+ ]:
856
+ raise ValueError(
857
+ "Invalid search field. Please choose between 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'."
858
+ )
859
+
742
860
  with self._get_auth_session() as s:
743
861
 
744
862
  params = {"all": "true"}
745
863
  if folder_id:
746
864
  params["folder"] = folder_id
747
865
 
866
+ if search_field:
867
+ params["searchFields"] = search_field
868
+ params["searchItem"] = search_item
869
+ del params["all"]
870
+
871
+ if search_field == "folder_name":
872
+ params["searchFields"] = "analysis_name"
873
+
874
+ if project_id:
875
+ params["projectId"] = project_id
876
+
877
+ if plate_name:
878
+ params["plateName"] = plate_name
879
+
748
880
  analyses = s.get(
749
881
  f"{URL}/{analysis_id}" if analysis_id else URL, params=params
750
882
  )
@@ -790,100 +922,706 @@ class SeerSDK:
790
922
  ]
791
923
  return res
792
924
 
793
- def get_analysis_result(self, analysis_id: str, download_path: str = ""):
925
+ @deprecation.deprecated(deprecated_in="0.3.0", removed_in="1.0.0")
926
+ def get_analysis_result_protein_data(
927
+ self, analysis_id: str, link: bool = False, pg: str = None
928
+ ):
794
929
  """
795
- Given an `analysis_id`, this function returns all relevant analysis data files in form of downloadable content, if applicable.
930
+ Given an analysis id, this function returns the protein data for the analysis.
796
931
 
797
932
  Parameters
798
933
  ----------
934
+
799
935
  analysis_id : str
800
936
  ID of the analysis for which the data is to be fetched.
937
+ link : bool
938
+ Boolean flag denoting whether the user wants the default protein data. Defaults to False.
939
+ pg : str
940
+ Protein group ID to filter dataframe results. Defaults to None.
801
941
 
802
- download_path : bool
803
- String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
804
-
805
- Returns
806
- -------
807
- links: dict
808
- Contains dataframe objects for the `analysis_id`, given that the analysis has been complete.
942
+ """
943
+ with self._get_auth_session() as s:
944
+ URL = f"{self._auth.url}api/v1/data"
945
+ response = s.get(
946
+ f"{URL}/protein?analysisId={analysis_id}&retry=false"
947
+ )
809
948
 
810
- Examples
811
- -------
812
- >>> from seer_pas_sdk import SeerSDK
813
- >>> seer_sdk = SeerSDK()
949
+ if response.status_code != 200:
950
+ raise ValueError(
951
+ "Could not fetch protein data. Please verify that your analysis completed."
952
+ )
953
+ response = response.json()
954
+
955
+ protein_data = {}
956
+ for row in response:
957
+ if row.get("name") == "npLink":
958
+ protein_data["npLink"] = {
959
+ "url": row.get("link", {}).get("url", "")
960
+ }
961
+ if row.get("name") == "panelLink":
962
+ protein_data["panelLink"] = {
963
+ "url": row.get("link", {}).get("url", "")
964
+ }
965
+ if not protein_data:
966
+ raise ValueError("No protein result files found.")
967
+ if not "panelLink" in protein_data.keys():
968
+ protein_data["panelLink"] = {"url": ""}
969
+
970
+ if link:
971
+ return protein_data
972
+ else:
973
+ if not pg:
974
+ return {
975
+ "protein_np": url_to_df(protein_data["npLink"]["url"]),
976
+ "protein_panel": url_to_df(
977
+ protein_data["panelLink"]["url"]
978
+ ),
979
+ }
980
+ else:
981
+ protein_np = url_to_df(
982
+ protein_data["npLink"]["url"]
983
+ ).query(f"`Protein Group` == '{pg}'")
984
+ protein_panel = url_to_df(
985
+ protein_data["panelLink"]["url"]
986
+ ).query(f"`Protein Group` == '{pg}'")
987
+
988
+ if protein_np.empty and protein_panel.empty:
989
+ raise ValueError(
990
+ f"Protein group {pg} not found in analysis {analysis_id}."
991
+ )
814
992
 
815
- >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE")
816
- >>> {
817
- "peptide_np": <peptide_np dataframe object>,
818
- "peptide_panel": <peptide_panel dataframe object>,
819
- "protein_np": <protein_np dataframe object>,
820
- "protein_panel": <protein_panel dataframe object>
821
- }
993
+ return {
994
+ "protein_np": protein_np,
995
+ "protein_panel": protein_panel,
996
+ }
822
997
 
823
- >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE", download_path="/Users/Downloads")
824
- >>> { "status": "Download complete." }
998
+ @deprecation.deprecated(deprecated_in="0.3.0", removed_in="1.0.0")
999
+ def get_analysis_result_peptide_data(
1000
+ self, analysis_id: str, link: bool = False, peptide: str = None
1001
+ ):
825
1002
  """
1003
+ Given an analysis id, this function returns the peptide data for the analysis.
826
1004
 
827
- if not analysis_id:
828
- raise ValueError("Analysis ID cannot be empty.")
1005
+ Parameters
1006
+ ----------
829
1007
 
830
- if download_path and not os.path.exists(download_path):
831
- raise ValueError("The download path you entered is invalid.")
1008
+ analysis_id : str
1009
+ ID of the analysis for which the data is to be fetched.
832
1010
 
833
- if self.get_analysis(analysis_id)[0]["status"] in ["FAILED", None]:
834
- raise ValueError(
835
- "Cannot generate links for failed or null analyses."
836
- )
1011
+ link : bool
1012
+ Boolean flag denoting whether the user wants the default peptide data. Defaults to False.
837
1013
 
838
- URL = f"{self._auth.url}api/v1/data"
1014
+ peptide : str
1015
+ Peptide sequence to filter dataframe results. Defaults to None.
1016
+
1017
+ """
839
1018
 
840
1019
  with self._get_auth_session() as s:
1020
+ URL = f"{self._auth.url}api/v1/data"
1021
+ response = s.get(
1022
+ f"{URL}/peptide?analysisId={analysis_id}&retry=false"
1023
+ )
1024
+
1025
+ if response.status_code != 200:
1026
+ raise ValueError(
1027
+ "Could not fetch peptide data. Please verify that your analysis completed."
1028
+ )
841
1029
 
842
- protein_data = s.get(
1030
+ response = response.json()
1031
+
1032
+ peptide_data = {}
1033
+ for row in response:
1034
+ if row.get("name") == "npLink":
1035
+ peptide_data["npLink"] = {
1036
+ "url": row.get("link", {}).get("url", "")
1037
+ }
1038
+ if row.get("name") == "panelLink":
1039
+ peptide_data["panelLink"] = {
1040
+ "url": row.get("link", {}).get("url", "")
1041
+ }
1042
+ if not peptide_data:
1043
+ raise ValueError("No peptide result files found.")
1044
+ if not "panelLink" in peptide_data.keys():
1045
+ peptide_data["panelLink"] = {"url": ""}
1046
+ if link:
1047
+ return peptide_data
1048
+ else:
1049
+ if not peptide:
1050
+ return {
1051
+ "peptide_np": url_to_df(peptide_data["npLink"]["url"]),
1052
+ "peptide_panel": url_to_df(
1053
+ peptide_data["panelLink"]["url"]
1054
+ ),
1055
+ }
1056
+ else:
1057
+ peptide_np = url_to_df(
1058
+ peptide_data["npLink"]["url"]
1059
+ ).query(f"Peptide == '{peptide}'")
1060
+ peptide_panel = url_to_df(
1061
+ peptide_data["panelLink"]["url"]
1062
+ ).query(f"Peptide == '{peptide}'")
1063
+
1064
+ if peptide_np.empty and peptide_panel.empty:
1065
+ raise ValueError(
1066
+ f"Peptide {peptide} not found in analysis {analysis_id}."
1067
+ )
1068
+
1069
+ return {
1070
+ "peptide_np": peptide_np,
1071
+ "peptide_panel": peptide_panel,
1072
+ }
1073
+
1074
+ def _get_search_result_protein_data(self, analysis_id: str):
1075
+ """
1076
+ Given an analysis id, this function returns the protein data for the analysis.
1077
+
1078
+ Parameters
1079
+ ----------
1080
+ analysis_id : str
1081
+ ID of the analysis for which the data is to be fetched.
1082
+ """
1083
+ with self._get_auth_session() as s:
1084
+ URL = f"{self._auth.url}api/v1/data"
1085
+ response = s.get(
843
1086
  f"{URL}/protein?analysisId={analysis_id}&retry=false"
844
1087
  )
845
1088
 
846
- if protein_data.status_code != 200:
1089
+ if response.status_code != 200:
847
1090
  raise ValueError(
848
- "Invalid request. Could not fetch protein data. Please check your parameters."
1091
+ "Could not fetch protein data. Please verify that your analysis completed."
849
1092
  )
850
- protein_data = protein_data.json()
1093
+ response = response.json()
1094
+
1095
+ protein_data = {}
1096
+ for row in response:
1097
+ if row.get("name") == "npLink":
1098
+ protein_data["npLink"] = {
1099
+ "url": row.get("link", {}).get("url", "")
1100
+ }
1101
+ if row.get("name") == "panelLink":
1102
+ protein_data["panelLink"] = {
1103
+ "url": row.get("link", {}).get("url", "")
1104
+ }
1105
+ if not protein_data:
1106
+ raise ValueError("No protein result files found.")
1107
+ if not "panelLink" in protein_data.keys():
1108
+ protein_data["panelLink"] = {"url": ""}
1109
+
1110
+ return protein_data
1111
+
1112
+ def _get_search_result_peptide_data(self, analysis_id: str):
1113
+ """
1114
+ Given an analysis id, this function returns the peptide data for the analysis.
1115
+
1116
+ Parameters
1117
+ ----------
1118
+
1119
+ analysis_id : str
1120
+ ID of the analysis for which the data is to be fetched.
1121
+
1122
+ Returns
1123
+ -------
1124
+ peptide_data : dict[str, str]
1125
+ Dictionary containing URLs for npLink and panelLink peptide data.
851
1126
 
852
- peptide_data = s.get(
1127
+ """
1128
+
1129
+ with self._get_auth_session() as s:
1130
+ URL = f"{self._auth.url}api/v1/data"
1131
+ response = s.get(
853
1132
  f"{URL}/peptide?analysisId={analysis_id}&retry=false"
854
1133
  )
855
1134
 
856
- if peptide_data.status_code != 200:
1135
+ if response.status_code != 200:
857
1136
  raise ValueError(
858
- "Invalid request. Could not fetch peptide data. Please check your parameters."
1137
+ "Could not fetch peptide data. Please verify that your analysis completed."
1138
+ )
1139
+
1140
+ response = response.json()
1141
+
1142
+ peptide_data = {}
1143
+ for row in response:
1144
+ if row.get("name") == "npLink":
1145
+ peptide_data["npLink"] = {
1146
+ "url": row.get("link", {}).get("url", "")
1147
+ }
1148
+ if row.get("name") == "panelLink":
1149
+ peptide_data["panelLink"] = {
1150
+ "url": row.get("link", {}).get("url", "")
1151
+ }
1152
+ if not peptide_data:
1153
+ raise ValueError("No peptide result files found.")
1154
+ if not "panelLink" in peptide_data.keys():
1155
+ peptide_data["panelLink"] = {"url": ""}
1156
+
1157
+ return peptide_data
1158
+
1159
+ def list_search_result_files(self, analysis_id: str):
1160
+ """
1161
+ Given an analysis id, this function returns a list of files associated with the analysis.
1162
+
1163
+ Parameters
1164
+ ----------
1165
+ analysis_id : str
1166
+ ID of the analysis for which the data is to be fetched.
1167
+
1168
+ Returns
1169
+ -------
1170
+ files: list[str]
1171
+ List of files associated with the analysis.
1172
+ """
1173
+ try:
1174
+ analysis_metadata = self.get_analysis(analysis_id)[0]
1175
+ except (IndexError, ServerError):
1176
+ raise ValueError("Invalid analysis ID.")
1177
+ except:
1178
+ raise ValueError("Could not fetch analysis metadata.")
1179
+
1180
+ if analysis_metadata.get("status") in ["Failed", None]:
1181
+ raise ValueError("Cannot find files for a failed analysis.")
1182
+ with self._get_auth_session() as s:
1183
+ response = s.get(
1184
+ f"{self._auth.url}api/v2/analysisResultFiles/{analysis_id}"
1185
+ )
1186
+ if response.status_code != 200:
1187
+ raise ServerError(
1188
+ "Could not fetch analysis result files. Please verify that your analysis completed."
1189
+ )
1190
+ response = response.json()
1191
+ files = []
1192
+ for row in response["data"]:
1193
+ files.append(row["filename"])
1194
+ return files
1195
+
1196
+ def get_search_result(
1197
+ self, analysis_id: str, analyte_type: str, rollup: str
1198
+ ):
1199
+ """
1200
+ Load one of the files available via the "Download result files" button on the PAS UI.
1201
+
1202
+ Args:
1203
+ analysis_id (str): id of the analysis
1204
+ analyte_type (str): type of the data. Acceptable options are one of ['protein', 'peptide', 'precursor'].
1205
+ rollup (str): the desired file. Acceptable options are one of ['np', 'panel'].
1206
+ Returns:
1207
+ pd.DataFrame: the requested file as a pandas DataFrame
1208
+
1209
+ """
1210
+ if not analysis_id:
1211
+ raise ValueError("Analysis ID cannot be empty.")
1212
+
1213
+ if analyte_type not in ["protein", "peptide", "precursor"]:
1214
+ raise ValueError(
1215
+ "Invalid data type. Please choose between 'protein', 'peptide', or 'precursor'."
1216
+ )
1217
+
1218
+ if rollup not in ["np", "panel"]:
1219
+ raise ValueError(
1220
+ "Invalid file. Please choose between 'np', 'panel'."
1221
+ )
1222
+
1223
+ if analyte_type == "precursor" and rollup == "panel":
1224
+ raise ValueError(
1225
+ "Precursor data is not available for panel rollup, please select np rollup."
1226
+ )
1227
+
1228
+ if analyte_type == "protein":
1229
+ if rollup == "np":
1230
+ return url_to_df(
1231
+ self._get_search_result_protein_data(analysis_id)[
1232
+ "npLink"
1233
+ ]["url"]
1234
+ )
1235
+ elif rollup == "panel":
1236
+ return url_to_df(
1237
+ self._get_search_result_protein_data(analysis_id)[
1238
+ "panelLink"
1239
+ ]["url"]
1240
+ )
1241
+ elif analyte_type == "peptide":
1242
+ if rollup == "np":
1243
+ return url_to_df(
1244
+ self._get_search_result_peptide_data(analysis_id)[
1245
+ "npLink"
1246
+ ]["url"]
1247
+ )
1248
+ elif rollup == "panel":
1249
+ return url_to_df(
1250
+ self._get_search_result_peptide_data(analysis_id)[
1251
+ "panelLink"
1252
+ ]["url"]
1253
+ )
1254
+ else:
1255
+ return url_to_df(
1256
+ self.get_search_result_file_url(
1257
+ analysis_id, filename="report.tsv"
1258
+ )["url"]
1259
+ )
1260
+
1261
+ def download_search_output_file(
1262
+ self, analysis_id: str, filename: str, download_path: str = ""
1263
+ ):
1264
+ """
1265
+ Given an analysis id and a analysis result filename, this function downloads the file to the specified path.
1266
+
1267
+ Parameters
1268
+ ----------
1269
+ analysis_id : str
1270
+ ID of the analysis for which the data is to be fetched.
1271
+
1272
+ filename : str
1273
+ Name of the file to be fetched. Files can be case insensitive and without file extensions.
1274
+
1275
+ download_path : str
1276
+ String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid.
1277
+
1278
+ Returns
1279
+ -------
1280
+ None
1281
+ Downloads the file to the specified path.
1282
+ """
1283
+
1284
+ if not download_path:
1285
+ download_path = os.getcwd()
1286
+
1287
+ if not analysis_id:
1288
+ raise ValueError("Analysis ID cannot be empty.")
1289
+
1290
+ if not os.path.exists(download_path):
1291
+ raise ValueError(
1292
+ "Please specify a valid folder path as download path."
1293
+ )
1294
+
1295
+ file = self.get_search_result_file_url(analysis_id, filename)
1296
+ file_url = file["url"]
1297
+ filename = file["filename"]
1298
+
1299
+ print("Downloading file:", filename)
1300
+ for _ in range(2):
1301
+ try:
1302
+ with tqdm(
1303
+ unit="B",
1304
+ unit_scale=True,
1305
+ unit_divisor=1024,
1306
+ miniters=1,
1307
+ desc=f"Progress",
1308
+ ) as t:
1309
+ ssl._create_default_https_context = (
1310
+ ssl._create_unverified_context
1311
+ )
1312
+ urllib.request.urlretrieve(
1313
+ file_url,
1314
+ f"{download_path}/{filename}",
1315
+ reporthook=download_hook(t),
1316
+ data=None,
1317
+ )
1318
+ break
1319
+ except:
1320
+ filename = filename.split("/")
1321
+ name += "/" + "/".join(
1322
+ [filename[i] for i in range(len(filename) - 1)]
859
1323
  )
1324
+ filename = filename[-1]
1325
+ if not os.path.isdir(f"{name}/{filename}"):
1326
+ os.makedirs(f"{name}/")
1327
+ print(f"File {filename} downloaded successfully to {download_path}.")
1328
+ return
1329
+
1330
+ def get_search_result_file_url(self, analysis_id: str, filename: str):
1331
+ """
1332
+ Given an analysis id and a analysis result filename, this function returns the signed URL for the file.
1333
+
1334
+ Parameters
1335
+ ----------
1336
+ analysis_id : str
1337
+ ID of the analysis for which the data is to be fetched.
1338
+
1339
+ filename : str
1340
+ Name of the file to be fetched.
1341
+
1342
+ Returns
1343
+ -------
1344
+ file_url: dict[str, str]
1345
+ Dictionary containing the 'url' and 'filename' of the file.
1346
+ """
1347
+ if "." in filename:
1348
+ filename = ".".join(filename.split(".")[:-1])
1349
+ filename = filename.casefold()
1350
+
1351
+ # Allow user to pass in filenames without an extension.
1352
+ analysis_result_files = self.list_search_result_files(analysis_id)
1353
+ analysis_result_files_prefix_mapper = {
1354
+ (".".join(x.split(".")[:-1])).casefold(): x
1355
+ for x in analysis_result_files
1356
+ }
1357
+ if filename in analysis_result_files_prefix_mapper:
1358
+ filename = analysis_result_files_prefix_mapper[filename]
1359
+ else:
1360
+ raise ValueError(
1361
+ f"Filename {filename} not among the available analysis result files. Please use SeerSDK.list_search_result_files('{analysis_id}') to see available files for this analysis."
1362
+ )
1363
+
1364
+ analysis_metadata = self.get_analysis(analysis_id)[0]
1365
+ if analysis_metadata.get("status") in ["Failed", None]:
1366
+ raise ValueError("Cannot generate links for failed searches.")
1367
+ with self._get_auth_session() as s:
1368
+ file_url = s.post(
1369
+ f"{self._auth.url}api/v1/analysisResultFiles/getUrl",
1370
+ json={
1371
+ "analysisId": analysis_id,
1372
+ "projectId": analysis_metadata["project_id"],
1373
+ "filename": filename,
1374
+ },
1375
+ )
1376
+ response = file_url.json()
1377
+ if not response.get("url"):
1378
+ raise ValueError(f"File {filename} not found.")
1379
+
1380
+ response["filename"] = filename
1381
+ return response
1382
+
1383
+ @deprecation.deprecated(deprecated_in="0.3.0", removed_in="1.0.0")
1384
+ def get_analysis_result_files(
1385
+ self,
1386
+ analysis_id: str,
1387
+ filenames: _List[str],
1388
+ download_path: str = "",
1389
+ protein_all: bool = False,
1390
+ peptide_all: bool = False,
1391
+ ):
1392
+ """
1393
+ Given an analysis id and a list of file names, this function returns the file in form of downloadable content, if applicable.
1394
+
1395
+ Parameters
1396
+ ----------
1397
+ analysis_id : str
1398
+ ID of the analysis for which the data is to be fetched.
1399
+
1400
+ filenames : list
1401
+ List of filenames to be fetched. Only csv and tsv files are supported.
1402
+
1403
+ download_path : str
1404
+ String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
1405
+
1406
+ protein_all : bool
1407
+ Boolean flag denoting whether the user wants the default protein data. Defaults to False.
1408
+
1409
+ peptide_all : bool
1410
+ Boolean flag denoting whether the user wants the default peptide data. Defaults to False.
1411
+
1412
+ Returns
1413
+ -------
1414
+ links: dict[str, pd.DataFrame]
1415
+ Contains dataframe objects for the requested files. If a filename is not found, it is skipped.
860
1416
 
861
- peptide_data = peptide_data.json()
862
1417
 
863
- links = {
864
- "peptide_np": url_to_df(peptide_data["npLink"]["url"]),
865
- "peptide_panel": url_to_df(peptide_data["panelLink"]["url"]),
866
- "protein_np": url_to_df(protein_data["npLink"]["url"]),
867
- "protein_panel": url_to_df(protein_data["panelLink"]["url"]),
1418
+ Examples
1419
+ -------
1420
+ >>> from seer_pas_sdk import SeerSDK
1421
+ >>> seer_sdk = SeerSDK()
1422
+ >>> analysis_id = "YOUR_ANALYSIS_ID_HERE"
1423
+ >>> filenames = ["protein_np.tsv", "peptide_np.tsv"]
1424
+ >>> seer_sdk.get_analysis_result_files(analysis_id, filenames)
1425
+ {
1426
+ "protein_np.tsv": <protein_np dataframe object>,
1427
+ "peptide_np.tsv": <peptide_np dataframe object>
868
1428
  }
1429
+ >>> seer_sdk.get_analysis_result_files(analysis_id, [], protein_all=True, peptide_all=True)
1430
+ {
1431
+ "protein_np.tsv": <protein_np dataframe object>,
1432
+ "protein_panel.tsv": <protein_panel dataframe object>,
1433
+ "peptide_np.tsv": <peptide_np dataframe object>,
1434
+ "peptide_panel.tsv": <peptide_panel dataframe object>
1435
+ }
1436
+ >>> seer_sdk.get_analysis_result_files(analysis_id, ["report.tsv"], download_path="/Users/Downloads")
1437
+ { "report.tsv": <report.tsv dataframe object> }
1438
+ """
869
1439
 
870
- if download_path:
871
- name = f"{download_path}/downloads/{analysis_id}"
872
- if not os.path.exists(name):
873
- os.makedirs(name)
1440
+ if not analysis_id:
1441
+ raise ValueError("Analysis ID cannot be empty.")
874
1442
 
875
- links["peptide_np"].to_csv(f"{name}/peptide_np.csv", sep="\t")
876
- links["peptide_panel"].to_csv(
877
- f"{name}/peptide_panel.csv", sep="\t"
1443
+ if download_path and not os.path.exists(download_path):
1444
+ raise ValueError(
1445
+ "Please specify a valid folder path as download path."
1446
+ )
1447
+
1448
+ links = {}
1449
+ if protein_all:
1450
+ protein_data = self.get_analysis_result_protein_data(
1451
+ analysis_id, link=True
1452
+ )
1453
+ links["protein_np.tsv"] = protein_data["npLink"]["url"]
1454
+ links["protein_panel.tsv"] = protein_data["panelLink"]["url"]
1455
+ if peptide_all:
1456
+ peptide_data = self.get_analysis_result_peptide_data(
1457
+ analysis_id, link=True
1458
+ )
1459
+ links["peptide_np.tsv"] = peptide_data["npLink"]["url"]
1460
+ links["peptide_panel.tsv"] = peptide_data["panelLink"]["url"]
1461
+
1462
+ filenames = set(filenames)
1463
+ # Allow user to pass in filenames without an extension.
1464
+ analysis_result_files = self.list_search_result_files(analysis_id)
1465
+ analysis_result_files_prefix_mapper = {
1466
+ ".".join(x.split(".")[:-1]): x for x in analysis_result_files
1467
+ }
1468
+ for filename in filenames:
1469
+ if filename in analysis_result_files_prefix_mapper:
1470
+ filename = analysis_result_files_prefix_mapper[filename]
1471
+ if filename == "protein_np.tsv":
1472
+ if protein_all:
1473
+ continue
1474
+ protein_data = self.get_analysis_result_protein_data(
1475
+ analysis_id, link=True
1476
+ )
1477
+ links["protein_np.tsv"] = protein_data["npLink"]["url"]
1478
+ elif filename == "protein_panel.tsv":
1479
+ if protein_all:
1480
+ continue
1481
+ protein_data = self.get_analysis_result_protein_data(
1482
+ analysis_id, link=True
1483
+ )
1484
+ links["protein_panel.tsv"] = protein_data["panelLink"]["url"]
1485
+ elif filename == "peptide_np.tsv":
1486
+ if peptide_all:
1487
+ continue
1488
+ peptide_data = self.get_analysis_result_peptide_data(
1489
+ analysis_id, link=True
878
1490
  )
879
- links["protein_np"].to_csv(f"{name}/protein_np.csv", sep="\t")
880
- links["protein_panel"].to_csv(
881
- f"{name}/protein_panel.csv", sep="\t"
1491
+ links["peptide_np.tsv"] = peptide_data["npLink"]["url"]
1492
+ elif filename == "peptide_panel.tsv":
1493
+ if peptide_all:
1494
+ continue
1495
+ peptide_data = self.get_analysis_result_peptide_data(
1496
+ analysis_id, link=True
1497
+ )
1498
+ links["peptide_panel.tsv"] = peptide_data["panelLink"]["url"]
1499
+ else:
1500
+ try:
1501
+ links[filename] = self._get_search_result_file_url(
1502
+ analysis_id, filename
1503
+ )["url"]
1504
+ except Exception as e:
1505
+ print(e)
1506
+ continue
1507
+
1508
+ links = {
1509
+ k: url_to_df(v, is_tsv=k.endswith(".tsv"))
1510
+ for k, v in links.items()
1511
+ }
1512
+ if download_path:
1513
+ name = f"{download_path}/downloads/{analysis_id}"
1514
+ print(f"Start download to path {name}")
1515
+ if not os.path.exists(name):
1516
+ os.makedirs(name)
1517
+ for filename, content in links.items():
1518
+ separator = ","
1519
+ if filename.endswith(".tsv"):
1520
+ separator = "\t"
1521
+ content.to_csv(f"{name}/{filename}", sep=separator)
1522
+ print("Download complete.")
1523
+
1524
+ return links
1525
+
1526
+ @deprecation.deprecated(deprecated_in="0.3.0", removed_in="1.0.0")
1527
+ def get_analysis_result(
1528
+ self,
1529
+ analysis_id: str,
1530
+ download_path: str = "",
1531
+ diann_report: bool = False,
1532
+ ):
1533
+ """
1534
+ Given an `analysis_id`, this function returns all relevant analysis data files in form of downloadable content, if applicable.
1535
+
1536
+ Parameters
1537
+ ----------
1538
+ analysis_id : str
1539
+ ID of the analysis for which the data is to be fetched.
1540
+
1541
+ download_path : str
1542
+ String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
1543
+
1544
+ diann_report : bool
1545
+ Boolean flag denoting whether the user wants the DIANN report to be included in the response. Defaults to False.
1546
+
1547
+ Returns
1548
+ -------
1549
+ links: dict
1550
+ Contains dataframe objects for the `analysis_id`, given that the analysis has been complete.
1551
+
1552
+ Examples
1553
+ -------
1554
+ >>> from seer_pas_sdk import SeerSDK
1555
+ >>> seer_sdk = SeerSDK()
1556
+
1557
+ >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE")
1558
+ >>> {
1559
+ "peptide_np": <peptide_np dataframe object>,
1560
+ "peptide_panel": <peptide_panel dataframe object>,
1561
+ "protein_np": <protein_np dataframe object>,
1562
+ "protein_panel": <protein_panel dataframe object>
1563
+ }
1564
+
1565
+ >>> seer_sdk.get_analysis_result("YOUR_DIANN_ANALYSIS_ID_HERE")
1566
+ >>> {
1567
+ "peptide_np": <peptide_np dataframe object>,
1568
+ "peptide_panel": <peptide_panel dataframe object>,
1569
+ "protein_np": <protein_np dataframe object>,
1570
+ "protein_panel": <protein_panel dataframe object>,
1571
+ "diann_report": <report.tsv dataframe object>
1572
+ }
1573
+
1574
+ >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE", download_path="/Users/Downloads")
1575
+ >>> { "status": "Download complete." }
1576
+ """
1577
+
1578
+ if not analysis_id:
1579
+ raise ValueError("Analysis ID cannot be empty.")
1580
+
1581
+ if download_path and not os.path.exists(download_path):
1582
+ raise ValueError("The download path you entered is invalid.")
1583
+
1584
+ protein_data = self.get_analysis_result_protein_data(
1585
+ analysis_id, link=True
1586
+ )
1587
+ peptide_data = self.get_analysis_result_peptide_data(
1588
+ analysis_id, link=True
1589
+ )
1590
+ links = {
1591
+ "peptide_np": url_to_df(peptide_data["npLink"]["url"]),
1592
+ "peptide_panel": url_to_df(peptide_data["panelLink"]["url"]),
1593
+ "protein_np": url_to_df(protein_data["npLink"]["url"]),
1594
+ "protein_panel": url_to_df(protein_data["panelLink"]["url"]),
1595
+ }
1596
+
1597
+ if diann_report:
1598
+ diann_report_url = self._get_search_result_file_url(
1599
+ analysis_id, "report.tsv"
1600
+ )
1601
+ links["diann_report"] = url_to_df(diann_report_url["url"])
1602
+
1603
+ if download_path:
1604
+ name = f"{download_path}/downloads/{analysis_id}"
1605
+ if not os.path.exists(name):
1606
+ os.makedirs(name)
1607
+
1608
+ links["peptide_np"].to_csv(f"{name}/peptide_np.csv", sep="\t")
1609
+ links["peptide_panel"].to_csv(
1610
+ f"{name}/peptide_panel.csv", sep="\t"
1611
+ )
1612
+ links["protein_np"].to_csv(f"{name}/protein_np.csv", sep="\t")
1613
+ links["protein_panel"].to_csv(
1614
+ f"{name}/protein_panel.csv", sep="\t"
1615
+ )
1616
+
1617
+ if "diann_report" in links:
1618
+ links["diann_report"].to_csv(
1619
+ f"{name}/diann_report.csv", sep="\t"
882
1620
  )
883
1621
 
884
- return {"status": "Download complete."}
1622
+ return {"status": "Download complete."}
885
1623
 
886
- return links
1624
+ return links
887
1625
 
888
1626
  def analysis_complete(self, analysis_id: str):
889
1627
  """
@@ -932,7 +1670,7 @@ class SeerSDK:
932
1670
 
933
1671
  Returns
934
1672
  -------
935
- list
1673
+ list[str]
936
1674
  Contains the list of files in the folder.
937
1675
 
938
1676
  Examples
@@ -987,8 +1725,8 @@ class SeerSDK:
987
1725
 
988
1726
  Returns
989
1727
  -------
990
- message: dict
991
- Contains the message whether the files were downloaded or not.
1728
+ message: dict[str, str]
1729
+ Contains the 'message' whether the files were downloaded or not.
992
1730
  """
993
1731
 
994
1732
  urls = []
@@ -1013,9 +1751,7 @@ class SeerSDK:
1013
1751
  print(f'Downloading files to "{name}"\n')
1014
1752
 
1015
1753
  URL = f"{self._auth.url}api/v1/msdataindex/download/getUrl"
1016
- tenant_id = jwt.decode(ID_TOKEN, options={"verify_signature": False})[
1017
- "custom:tenantId"
1018
- ]
1754
+ tenant_id = self._auth.active_tenant_id
1019
1755
 
1020
1756
  for path in paths:
1021
1757
  with self._get_auth_session() as s:
@@ -1033,7 +1769,6 @@ class SeerSDK:
1033
1769
  "Could not download file. Please check if the backend is running."
1034
1770
  )
1035
1771
  urls.append(download_url.text)
1036
-
1037
1772
  for i in range(len(urls)):
1038
1773
  filename = paths[i].split("/")[-1]
1039
1774
  url = urls[i]
@@ -1077,22 +1812,80 @@ class SeerSDK:
1077
1812
 
1078
1813
  return {"message": f"Files downloaded successfully to '{name}'"}
1079
1814
 
1080
- def group_analysis_results(self, analysis_id: str, box_plot: dict = None):
1815
+ def get_group_analysis(
1816
+ self, analysis_id, group_analysis_id=None, **kwargs
1817
+ ):
1818
+ """
1819
+ Returns the list of group analysis objects for the given analysis id, provided they exist.
1820
+
1821
+ Parameters
1822
+ ----------
1823
+ analysis_id : str
1824
+ The analysis id.
1825
+
1826
+ group_analysis_id : str, optional
1827
+ The group analysis id, defaulted to None. If provided, the function will return the group analysis object for the given group analysis id.
1828
+
1829
+ **kwargs : dict, optional
1830
+ Search keyword parameters to be passed in. Acceptable values are 'name' or 'description'.
1831
+
1832
+ Returns
1833
+ -------
1834
+ res : list[dict]
1835
+ A list of dictionaries containing the group analysis objects.
1836
+
1837
+ """
1838
+ params = {"analysisid": analysis_id}
1839
+ if kwargs and not group_analysis_id:
1840
+ if len(kwargs.keys()) > 1:
1841
+ raise ValueError("Please include only one search parameter.")
1842
+ search_field = list(kwargs.keys())[0]
1843
+ if search_field not in ["name", "description"]:
1844
+ raise ValueError(
1845
+ "Invalid search field. Please choose between 'name' or 'description'."
1846
+ )
1847
+ search_item = kwargs[search_field]
1848
+
1849
+ if not search_item:
1850
+ raise ValueError(
1851
+ f"Please provide a non null value for {search_field}"
1852
+ )
1853
+ params["searchFields"] = search_field
1854
+ params["searchItem"] = search_item
1855
+
1856
+ URL = f"{self._auth.url}api/v1/groupanalysis/groupanalyses"
1857
+
1858
+ if group_analysis_id:
1859
+ URL = f"{URL}/{group_analysis_id}"
1860
+ params["id"] = group_analysis_id
1861
+
1862
+ with self._get_auth_session() as s:
1863
+ response = s.get(URL, params=params)
1864
+ if response.status_code != 200:
1865
+ raise ServerError(
1866
+ "Request failed. Please check your parameters."
1867
+ )
1868
+ response = response.json()
1869
+ return response
1870
+
1871
+ def group_analysis_results(self, analysis_id: str, group_analysis_id=None):
1081
1872
  """
1082
1873
  Returns the group analysis data for the given analysis id, provided it exists.
1083
1874
 
1875
+ If no group analysis id is provided, the function will return the most recent group analysis data for the given analysis id.
1876
+
1084
1877
  Parameters
1085
1878
  ----------
1086
1879
  analysis_id : str
1087
1880
  The analysis id.
1088
1881
 
1089
- box_plot : dict, optional
1090
- The box plot configuration needed for the analysis, defaulted to None. Contains `feature_type` ("protein" or "peptide") and `feature_ids` (comma separated list of feature IDs) keys.
1882
+ group_analysis_id : str, optional
1883
+ The group analysis id, defaulted to None.
1091
1884
 
1092
1885
  Returns
1093
1886
  -------
1094
1887
  res : dict
1095
- A dictionary containing the group analysis data.
1888
+ A dictionary containing the group analysis object.
1096
1889
 
1097
1890
  Examples
1098
1891
  -------
@@ -1142,7 +1935,6 @@ class SeerSDK:
1142
1935
  "peptide_processed_long_form_file_url": "",
1143
1936
  },
1144
1937
  },
1145
- "box_plot": [],
1146
1938
  }
1147
1939
 
1148
1940
  # Pre-GA data call
@@ -1153,7 +1945,7 @@ class SeerSDK:
1153
1945
  json={"analysisId": analysis_id, "grouping": "condition"},
1154
1946
  )
1155
1947
  if protein_pre_data.status_code != 200:
1156
- raise ValueError(
1948
+ raise ServerError(
1157
1949
  "Invalid request. Could not fetch group analysis protein pre data. Please check your parameters."
1158
1950
  )
1159
1951
 
@@ -1161,15 +1953,15 @@ class SeerSDK:
1161
1953
 
1162
1954
  res["pre"]["protein"] = protein_pre_data
1163
1955
 
1164
- with requests.Session() as s:
1165
- s.headers.update(HEADERS)
1956
+ with self._get_auth_session() as s:
1166
1957
 
1167
1958
  peptide_pre_data = s.post(
1168
1959
  url=f"{URL}api/v2/groupanalysis/peptide",
1169
1960
  json={"analysisId": analysis_id, "grouping": "condition"},
1170
1961
  )
1962
+
1171
1963
  if peptide_pre_data.status_code != 200:
1172
- raise ValueError(
1964
+ raise ServerError(
1173
1965
  "Invalid request. Could not fetch group analysis peptide pre data. Please check your parameters."
1174
1966
  )
1175
1967
 
@@ -1177,18 +1969,21 @@ class SeerSDK:
1177
1969
  res["pre"]["peptide"] = peptide_pre_data
1178
1970
 
1179
1971
  # Post-GA data call
1180
- with requests.Session() as s:
1181
- s.headers.update(HEADERS)
1182
-
1183
- get_saved_result = s.get(
1184
- f"{URL}api/v1/groupanalysis/getSavedResults?analysisid={analysis_id}"
1185
- )
1186
-
1187
- if get_saved_result.status_code != 200:
1188
- raise ValueError(
1189
- "Invalid request. Could not fetch group analysis post data. Please check your parameters."
1972
+ with self._get_auth_session() as s:
1973
+ if group_analysis_id:
1974
+ get_saved_result = self.get_group_analysis(
1975
+ analysis_id=analysis_id,
1976
+ group_analysis_id=group_analysis_id,
1977
+ )
1978
+ else:
1979
+ get_saved_result = s.get(
1980
+ f"{URL}api/v1/groupanalysis/getSavedResults?analysisid={analysis_id}"
1190
1981
  )
1191
- get_saved_result = get_saved_result.json()
1982
+ if get_saved_result.status_code != 200:
1983
+ raise ServerError(
1984
+ "Could not fetch saved results. Please check your analysis id."
1985
+ )
1986
+ get_saved_result = get_saved_result.json()
1192
1987
 
1193
1988
  # Protein data
1194
1989
  if "pgResult" in get_saved_result:
@@ -1198,6 +1993,13 @@ class SeerSDK:
1198
1993
  if "peptideResult" in get_saved_result:
1199
1994
  res["post"]["peptide"] = get_saved_result["peptideResult"]
1200
1995
 
1996
+ # require that either protein or peptide data exists
1997
+ # Error handling is necessary for volcano plot calculations downstream
1998
+ if not (res["post"].get("protein") or res["post"].get("peptide")):
1999
+ raise ValueError(
2000
+ "No group analysis data returned from server."
2001
+ )
2002
+
1201
2003
  # Protein URLs
1202
2004
  if "pgProcessedFileUrl" in get_saved_result:
1203
2005
  res["post"]["protein_url"]["protein_processed_file_url"] = (
@@ -1219,32 +2021,714 @@ class SeerSDK:
1219
2021
  "peptide_processed_long_form_file_url"
1220
2022
  ] = get_saved_result["peptideProcessedLongFormFileUrl"]
1221
2023
 
1222
- # Box plot data call
1223
- if not box_plot:
1224
- del res["box_plot"]
1225
- return res
2024
+ return res
1226
2025
 
1227
- with requests.Session() as s:
1228
- s.headers.update(HEADERS)
1229
- box_plot["feature_type"] = box_plot["feature_type"].lower()
2026
+ def get_box_plot_data(
2027
+ self,
2028
+ analysis_id: str,
2029
+ group_analysis_id: str = None,
2030
+ feature_ids: _List[str] = [],
2031
+ show_significant_only: bool = False,
2032
+ as_df=False,
2033
+ volcano_plot=False,
2034
+ cached=False,
2035
+ ):
2036
+ """Get box plot data for given analyses and samples formatted in a DataFrame or a dictionary.
2037
+
2038
+ Args:
2039
+ analysis_id (str): ID of the analysis.
2040
+ feature_ids (list[str], optional): Filter result object to a set of ids. Defaults to [].
2041
+ show_significant_only (bool, optional): Mark true if only significant results are to be returned. Defaults to False.
2042
+ as_df (bool, optional): whether the result should be converted to a DataFrame. Defaults to False.
2043
+ volcano_plot (bool, optional): Mark true to include the volcano plot data in the return object. Defaults to False.
2044
+ cached (bool, optional): Mark true to return volcano plot data as a VolcanoPlotBuilder object. No effect if volcano_plot flag is marked false. Defaults to False.
2045
+
2046
+ Raises:
2047
+ ValueError: Invalid feature type. Must be either 'protein' or 'peptide'.
2048
+ ServerError: Could not fetch box plot data.
2049
+
2050
+ Returns:
2051
+ list[dict] | pd.DataFrame : A list of dictionaries or a dataframe with each row containing the following keys/columns:
2052
+ 'proteinId', 'intensity', 'sampleName', 'sampleId', 'condition','gene'
2053
+ """
2054
+
2055
+ with self._get_auth_session() as s:
2056
+
2057
+ # API call 1 - get volcano plot data for filtered results and gene mapping
2058
+ builder = self.get_volcano_plot_data(
2059
+ analysis_id, cached=True, group_analysis_id=group_analysis_id
2060
+ )
2061
+
2062
+ protein_peptide_gene_map = builder.protein_gene_map
2063
+
2064
+ # API call 2 - get analysis samples to get condition
2065
+ samples_metadata = self._get_analysis_samples(
2066
+ analysis_id=analysis_id
2067
+ )
2068
+
2069
+ json = {"analysisId": analysis_id}
2070
+ if feature_ids:
2071
+ json["featureIds"] = ",".join(feature_ids)
2072
+ filters = ""
2073
+ # API call 3 - get group analysis data. This gives us the filters for the group analysis
2074
+ if group_analysis_id:
2075
+ ga = self.get_group_analysis(
2076
+ analysis_id, group_analysis_id=group_analysis_id
2077
+ )
2078
+ filters = ga["parameters"]["filters"]
2079
+ if filters:
2080
+ json["filters"] = filters
2081
+
2082
+ json["featureType"] = (
2083
+ builder.type if builder.type == "peptide" else "proteingroup"
2084
+ )
2085
+
2086
+ # API call 4 - get intensities
1230
2087
  box_plot_data = s.post(
1231
- url=f"{URL}api/v1/groupanalysis/rawdata",
1232
- json={
1233
- "analysisId": analysis_id,
1234
- "featureIds": (
1235
- ",".join(box_plot["feature_ids"])
1236
- if len(box_plot["feature_ids"]) > 1
1237
- else box_plot["feature_ids"][0]
1238
- ),
1239
- "featureType": f"{box_plot['feature_type']}group",
1240
- },
2088
+ url=f"{self._auth.url}api/v1/groupanalysis/rawdata", json=json
1241
2089
  )
2090
+
1242
2091
  if box_plot_data.status_code != 200:
2092
+ raise ServerError("Could not fetch box plot data.")
2093
+
2094
+ box_plot_data = box_plot_data.json()
2095
+ feature_type_index = (
2096
+ "peptide" if builder.type == "peptide" else "proteinId"
2097
+ )
2098
+ box_plot_data = [
2099
+ x
2100
+ for x in box_plot_data
2101
+ if x[feature_type_index] in protein_peptide_gene_map
2102
+ ]
2103
+ sample_id_condition = {
2104
+ x["id"]: x["condition"] for x in samples_metadata
2105
+ }
2106
+
2107
+ if show_significant_only:
2108
+ significant_rows = set(builder.get_significant_rows())
2109
+ box_plot_data = [
2110
+ x
2111
+ for x in box_plot_data
2112
+ if x[feature_type_index] in significant_rows
2113
+ ]
2114
+
2115
+ for row in box_plot_data:
2116
+ row["condition"] = sample_id_condition.get(
2117
+ row["sampleId"], None
2118
+ )
2119
+ row["gene"] = builder.protein_gene_map[row[feature_type_index]]
2120
+
2121
+ if as_df:
2122
+ box_plot_data = pd.DataFrame(box_plot_data)
2123
+
2124
+ if volcano_plot:
2125
+ vplot = None
2126
+ if cached:
2127
+ vplot = builder
2128
+ elif as_df:
2129
+ vplot = pd.DataFrame(builder.volcano_plot)
2130
+ else:
2131
+ vplot = builder.volcano_plot
2132
+
2133
+ return {"box_plot": box_plot_data, "volcano_plot": vplot}
2134
+ return box_plot_data
2135
+
2136
+ def get_all_volcano_plot_data(self, analysis_id: str, box_plot=False):
2137
+ """
2138
+ Get all volcano plot data for a given analysis.
2139
+
2140
+ Args:
2141
+ analysis_id (str): ID of the analysis.
2142
+ box_plot (bool, optional): Mark true to include box plot data in the return object. Defaults to False.
2143
+
2144
+ Returns:
2145
+ dict[str, pd.DataFrame]: A dictionary containing the volcano plot and optionally box plot data for each group analysis.
2146
+ """
2147
+ group_analysis_ids = [
2148
+ x["id"]
2149
+ for x in self.get_group_analysis(analysis_id).get("data", [])
2150
+ if x.get("id")
2151
+ ]
2152
+ if not group_analysis_ids:
2153
+ return {}
2154
+ results = dict()
2155
+
2156
+ if box_plot:
2157
+ results = {
2158
+ ga_id: {
2159
+ k: v
2160
+ for k, v in self.get_box_plot_data(
2161
+ analysis_id, ga_id, as_df=True, volcano_plot=True
2162
+ ).items()
2163
+ }
2164
+ for ga_id in group_analysis_ids
2165
+ }
2166
+ else:
2167
+ results = {
2168
+ ga_id: {
2169
+ "volcano_plot": self.get_volcano_plot_data(
2170
+ analysis_id, group_analysis_id=ga_id, as_df=True
2171
+ )
2172
+ }
2173
+ for ga_id in group_analysis_ids
2174
+ }
2175
+
2176
+ return results
2177
+
2178
+ def _get_analysis_pca(
2179
+ self,
2180
+ analysis_ids: _List[str],
2181
+ sample_ids: _List[str],
2182
+ type: str,
2183
+ hide_control: bool = False,
2184
+ ):
2185
+ """
2186
+ ****************
2187
+ [UNEXPOSED METHOD CALL]
2188
+ ****************
2189
+ Get PCA data for given analyses and samples.
2190
+ Args:
2191
+ analysis_ids (list[str]): IDs of the analyses of interest.
2192
+ sample_ids (list[str]): IDs of the samples of interest.
2193
+ type (str): Type of data to be fetched. Must be either 'protein' or 'peptide'.
2194
+ hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
2195
+ Raises:
2196
+ ValueError: No analysis IDs provided.
2197
+ ValueError: No sample IDs provided.
2198
+ ValueError: Invalid type provided.
2199
+ ServerError: Could not fetch PCA data.
2200
+ Returns:
2201
+ dict[str, list|float]
2202
+ Returns response object containing 'xContributionRatio' (float), 'yContributionRatio' (float), 'samples' (list[dict]), and 'points' (list[float]).
2203
+ """
2204
+ if not analysis_ids:
2205
+ raise ValueError("Analysis IDs cannot be empty.")
2206
+ if type not in ["protein", "peptide"]:
2207
+ raise ValueError("Type must be either 'protein' or 'peptide'.")
2208
+
2209
+ URL = f"{self._auth.url}api/v1/analysisqcpca"
2210
+
2211
+ with self._get_auth_session() as s:
2212
+ json = {
2213
+ "analysisIds": ",".join(analysis_ids),
2214
+ "type": type,
2215
+ }
2216
+ if sample_ids:
2217
+ json["sampleIds"] = ",".join(sample_ids)
2218
+
2219
+ # specify hideControl as a string - unexpected behavior occurs if a boolean is passed
2220
+ if hide_control:
2221
+ json["hideControl"] = "true"
2222
+ else:
2223
+ json["hideControl"] = "false"
2224
+
2225
+ pca_data = s.post(URL, json=json)
2226
+
2227
+ if pca_data.status_code != 200:
2228
+ raise ServerError("Could not fetch PCA data.")
2229
+
2230
+ return pca_data.json()
2231
+
2232
+ def get_analysis_pca_data(
2233
+ self,
2234
+ analysis_ids: _List[str],
2235
+ type: str,
2236
+ sample_ids: _List[str] = [],
2237
+ hide_control: bool = False,
2238
+ as_df=False,
2239
+ ):
2240
+ """
2241
+ Get PCA data for given analyses and samples formatted in a DataFrame or a dictionary.
2242
+ Args:
2243
+ analysis_ids (list[str]): IDs of the analyses of interest.
2244
+ type (str): Type of data to be fetched. Must be either 'protein' or 'peptide'.
2245
+ sample_ids (list[str], optional): IDs of the samples of interest.
2246
+ hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
2247
+ as_df (bool, optional): whether the result should be converted to a DataFrame. Defaults to False.
2248
+ Raises:
2249
+ ValueError: No analysis IDs provided.
2250
+ ValueError: No sample IDs provided.
2251
+ ValueError: Invalid type parameter provided.
2252
+ ServerError: Could not fetch PCA data.
2253
+ Returns:
2254
+ A dictionary with the following keys:
2255
+ - x_contribution_ratio (float): Proportion of variance explained by the x-axis.
2256
+ - y_contribution_ratio (float): Proportion of variance explained by the y-axis.
2257
+ - data (list[dict] | pd.DataFrame): A list of dictionaries or a dataframe with each row containing the following keys/columns:
2258
+ - sample_name (str): Name of the sample.
2259
+ - plate_name (str): Name of the plate.
2260
+ - sample_id (int): ID of the sample.
2261
+ - condition (str): Condition.
2262
+ - PC1 (float): X-value of the PCA point.
2263
+ - PC2 (float): Y-value of the PCA point.
2264
+ - custom_* (str): Custom fields. Included if meaningful, i.e., not null, in the data.
2265
+ Examples
2266
+ --------
2267
+ >>> from seer_pas_sdk import *
2268
+ >>> sdk = SeerSDK()
2269
+ >>> sdk.get_analysis_pca_data(
2270
+ analysis_ids=["analysis_id"],
2271
+ sample_ids=["sample_id"],
2272
+ type="protein",
2273
+ hide_control=False
2274
+ )
2275
+ """
2276
+ pca_data = self._get_analysis_pca(
2277
+ analysis_ids, sample_ids, type, hide_control
2278
+ )
2279
+
2280
+ # common columns returned by the API
2281
+ generic_columns = [
2282
+ "sample_name",
2283
+ "plate_name",
2284
+ "sample_id",
2285
+ "condition",
2286
+ "PC1",
2287
+ "PC2",
2288
+ ]
2289
+
2290
+ # edge case where yContributionRatio is NaN when zero points are returned.
2291
+ if not "yContributionRatio" in pca_data:
2292
+ y_contribution_ratio = None
2293
+ else:
2294
+ y_contribution_ratio = pca_data["yContributionRatio"]
2295
+
2296
+ x_contribution_ratio = pca_data["xContributionRatio"]
2297
+ samples = pca_data["samples"]
2298
+ points = pca_data["points"]
2299
+
2300
+ df = pd.DataFrame(
2301
+ [
2302
+ sample | {"PC1": point[0], "PC2": point[1]}
2303
+ for sample, point in zip(samples, points)
2304
+ ]
2305
+ )
2306
+
2307
+ # Slice the df such that only custom columns are dropped in the absence of data
2308
+ df = pd.concat(
2309
+ [
2310
+ df.drop(columns=generic_columns).dropna(how="all", axis=1),
2311
+ df[generic_columns],
2312
+ ],
2313
+ axis=1,
2314
+ )
2315
+
2316
+ # Filter down to a minimal set of columns
2317
+ permitted_columns = [
2318
+ x
2319
+ for x in df.columns
2320
+ if x in generic_columns or x.startswith("custom_")
2321
+ ]
2322
+
2323
+ df = df.loc(axis=1)[permitted_columns]
2324
+
2325
+ # Return the data as a DataFrame if as_df is True
2326
+ if not as_df:
2327
+ df = df.to_dict(orient="records")
2328
+ result = dict(
2329
+ x_contribution_ratio=x_contribution_ratio,
2330
+ y_contribution_ratio=y_contribution_ratio,
2331
+ data=df,
2332
+ )
2333
+ return result
2334
+
2335
+ def get_analysis_hierarchical_clustering(
2336
+ self,
2337
+ analysis_ids: _List[str],
2338
+ sample_ids: _List[str] = [],
2339
+ hide_control: bool = False,
2340
+ ):
2341
+ """
2342
+ Get hierarchical clustering data for given analyses and samples.
2343
+ Args:
2344
+ analysis_ids (list[str]): IDs of the analyses.
2345
+ sample_ids (list[str], optional): IDs of the samples.
2346
+ hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
2347
+ raw_data (bool, optional): Mark true if raw data should be returned. Defaults to True.
2348
+ Raises:
2349
+ ValueError: No analysis IDs provided.
2350
+ ValueError: No sample IDs provided.
2351
+ ValueError: Response status code is not 200.
2352
+ Returns:
2353
+ dict
2354
+ Hierarchical clustering data returned by the API.
2355
+ """
2356
+ if not analysis_ids:
2357
+ raise ValueError("Analysis IDs cannot be empty.")
2358
+
2359
+ URL = f"{self._auth.url}api/v1/analysishcluster"
2360
+
2361
+ with self._get_auth_session() as s:
2362
+ json = {
2363
+ "analysisIds": ",".join(analysis_ids),
2364
+ }
2365
+ if sample_ids:
2366
+ json["sampleIds"] = ",".join(sample_ids)
2367
+
2368
+ if sample_ids:
2369
+ json["sampleIds"] = ",".join(sample_ids)
2370
+
2371
+ # specify hideControl as a string
2372
+ # Python bool values are not recognized by the API
2373
+ if hide_control:
2374
+ json["hideControl"] = "true"
2375
+ else:
2376
+ json["hideControl"] = "false"
2377
+
2378
+ hc_data = s.post(URL, json=json)
2379
+
2380
+ if hc_data.status_code != 200:
1243
2381
  raise ValueError(
1244
- "Invalid request, could not fetch box plot data. Please verify your 'box_plot' parameters, including 'feature_ids' (comma-separated list of feature IDs) and 'feature_type' (needs to be a either 'protein' or 'peptide')."
2382
+ "Invalid request. Please check your parameters."
1245
2383
  )
1246
2384
 
1247
- box_plot_data = box_plot_data.json()
1248
- res["box_plot"] = box_plot_data
2385
+ data = hc_data.json()
1249
2386
 
1250
- return res
2387
+ # Filter out custom fields that are not part of the tenant's custom fields
2388
+ if not "samples" in data:
2389
+ raise ValueError("No sample data returned from server.")
2390
+
2391
+ data["samples"] = [
2392
+ {k: v for k, v in sample.items()} for sample in data["samples"]
2393
+ ]
2394
+
2395
+ return data
2396
+
2397
+ def get_ppi_network_data(
2398
+ self, significant_pgs: _List[str], species: str = None
2399
+ ):
2400
+ """
2401
+ Get PPI network data for given significant protein groups.
2402
+ Args:
2403
+ significant_pgs (_List[str]): Significant protein groups.
2404
+ species (str, optional): Species of interest. Defaults to None.
2405
+ Raises:
2406
+ ValueError: No significant protein groups provided.
2407
+ ValueError: Response status code is not 200.
2408
+ Returns:
2409
+ dict
2410
+ Response returned by the API.
2411
+ """
2412
+ if not significant_pgs:
2413
+ raise ValueError("Significant protein groups cannot be empty.")
2414
+
2415
+ URL = f"{self._auth.url}api/v1/groupanalysis/stringdb"
2416
+
2417
+ with self._get_auth_session() as s:
2418
+ json = {
2419
+ "significantPGs": ",".join(significant_pgs),
2420
+ }
2421
+ if species:
2422
+ json["species"] = species
2423
+
2424
+ ppi_data = s.post(URL, json=json)
2425
+
2426
+ if ppi_data.status_code != 200:
2427
+ raise ValueError("Server error - bad response")
2428
+
2429
+ return ppi_data.json()
2430
+
2431
+ # groups are user defined by the sample description file
2432
+ def get_cluster_heatmap_data(
2433
+ self,
2434
+ analysis_id: str,
2435
+ grouping: str,
2436
+ groups: _List[str],
2437
+ contrasts: _List[_Tuple[int, ...]],
2438
+ stat_test: str,
2439
+ feature_type: str,
2440
+ significant_pgs: _List[str] = [],
2441
+ ):
2442
+ """Get cluster heatmap data for the given analysis.
2443
+
2444
+ Args:
2445
+ analysis_id (str): ID of the analysis
2446
+ grouping (str): Category of sample groups
2447
+ groups (_List[str]): sample groups
2448
+ contrasts (_List[_Tuple[int, ...]]): Indicate which groups are compared against each other. e.g. [(0, 1, -1, 0), (1, 0, 0, -1)]
2449
+ stat_test (str): Statistical test to be used
2450
+ feature_type (str): Type of feature to be used, either proteingroup or peptide
2451
+ significant_pgs (_List[str], optional): significant protein group IDs. Defaults to [].
2452
+
2453
+ Raises:
2454
+ ValueError: "Feature type must be either 'proteingroup' or 'peptide'."
2455
+ ValueError: "Stat test must be either 'ttest' or 'wilcoxon'."
2456
+ ValueError: Invalid contrast value.
2457
+ ValueError: Server error
2458
+
2459
+ Returns:
2460
+ dict: the response object
2461
+ clusterProtein: List of protein clusters
2462
+ clusters:
2463
+ indexes: list[int], List of indexes
2464
+ height: int, Height of the cluster
2465
+ children: list[dict] | None, Children of the cluster
2466
+ clusterSample: List of sample clusters
2467
+ clusters:
2468
+ indexes: list[int], List of indexes
2469
+ height: int, Height of the cluster
2470
+ children: list[dict] | None, Children of the cluster
2471
+ data: List of data
2472
+
2473
+ """
2474
+ if feature_type not in ["proteingroup", "peptide"]:
2475
+ raise ValueError(
2476
+ "Feature type must be either 'proteingroup' or 'peptide'."
2477
+ )
2478
+
2479
+ if stat_test not in ["ttest", "wilcoxon"]:
2480
+ raise ValueError("Stat test must be either 'ttest' or 'wilcoxon'.")
2481
+
2482
+ [validate_contrast(contrast, len(groups)) for contrast in contrasts]
2483
+
2484
+ formatted_contrasts = ";".join(
2485
+ [",".join(map(str, x)) for x in contrasts]
2486
+ )
2487
+
2488
+ payload = dict(
2489
+ analysisId=analysis_id,
2490
+ grouping=grouping,
2491
+ groups=",".join(groups),
2492
+ contrasts=formatted_contrasts,
2493
+ statTest=stat_test,
2494
+ featureType=feature_type,
2495
+ significantPGs=",".join(significant_pgs),
2496
+ )
2497
+
2498
+ with self._get_auth_session() as s:
2499
+ URL = f"{self._auth.url}api/v2/clusterheatmap"
2500
+ response = s.post(URL, json=payload)
2501
+ if response.status_code != 200:
2502
+ raise ValueError("Server error. Bad response.")
2503
+ return response.json()
2504
+
2505
+ def get_enrichment_plot(
2506
+ self,
2507
+ analysis_id: str,
2508
+ significant_pgs: _List[str],
2509
+ summarize_output: bool = False,
2510
+ exclude_singleton: bool = False,
2511
+ cutoff: float = None,
2512
+ species: str = None,
2513
+ ):
2514
+ """
2515
+ Get enrichment plot data for a given analysis ID.
2516
+
2517
+ Args:
2518
+ analysis_id (str): ID of the analysis.
2519
+ significant_pgs (_List[str]): List of significant protein/peptide groups.
2520
+ summarize_output (bool, optional): Summarize the output. Defaults to False.
2521
+ exclude_singleton (bool, optional): Exclude singleton values. Defaults to False.
2522
+ cutoff (float, optional): Cutoff value for the p-value to determine significance. Defaults to None.
2523
+ species (str, optional): Species to filter the data by. Defaults to None.
2524
+
2525
+ Raises:
2526
+ ServerError - could not fetch enrichment plot data.
2527
+
2528
+ Returns:
2529
+ dict: A dictionary containing the enrichment plot data.
2530
+ """
2531
+
2532
+ URL = f"{self._auth.url}api/v1/groupanalysis/enrichmentgo"
2533
+
2534
+ if not significant_pgs:
2535
+ raise ValueError("Significant pgs cannot be empty.")
2536
+
2537
+ with self._get_auth_session() as s:
2538
+ json = {
2539
+ "analysisId": analysis_id,
2540
+ "significantPGs": significant_pgs,
2541
+ "summarizeOutput": summarize_output,
2542
+ "excludeSingleton": exclude_singleton,
2543
+ }
2544
+ if cutoff:
2545
+ json["cutoff"] = cutoff
2546
+ if species:
2547
+ json["species"] = species
2548
+
2549
+ enrichment_data = s.post(URL, json=json)
2550
+
2551
+ if enrichment_data.status_code != 200:
2552
+ raise ValueError("Could not fetch enrichment plot data.")
2553
+
2554
+ return enrichment_data.json()
2555
+
2556
+ def get_volcano_plot_data(
2557
+ self,
2558
+ analysis_id,
2559
+ group_analysis_id=None,
2560
+ significance_threshold=0.05,
2561
+ fold_change_threshold=1,
2562
+ label_by="fold_change",
2563
+ cached=False,
2564
+ as_df=False,
2565
+ ):
2566
+ """Get volcano plot data for a given analysis ID.
2567
+
2568
+ Args:
2569
+ analysis_id (str): ID of the analysis.
2570
+ significance_threshold (float, optional): Cutoff value for the p-value to determine significance. Defaults to 0.05.
2571
+ fold_change_threshold (float, optional): Cutoff value for the fold change to determine significance. Defaults to 1.
2572
+ label_by (str, optional): Metric to sort result data. Defaults to "fold_change".
2573
+ cached (bool, optional): Return a VolcanoPlotBuilder object for calculation reuse. Defaults to False.
2574
+ as_df (bool, optional): whether the result should be converted to a DataFrame. Defaults to False.
2575
+
2576
+ Raises:
2577
+ ServerError - could not fetch group analysis results.
2578
+ Returns:
2579
+ list[dict] | pd.DataFrame | VolcanoPlotBuilder: A list of dictionaries, a DataFrame, or a VolcanoPlotBuilder object containing the volcano plot data.
2580
+ Object contains the following columns: 'logFD', 'negativeLog10P', 'dataIndex', 'rowID', 'gene', 'protein',
2581
+ 'group', 'significant', 'euclideanDistance'
2582
+ """
2583
+ try:
2584
+ response = self.group_analysis_results(
2585
+ analysis_id, group_analysis_id=group_analysis_id
2586
+ )
2587
+ except:
2588
+ raise ServerError(
2589
+ f"Could not fetch group analysis results. Please check that group analysis has completed for analysis {analysis_id}."
2590
+ )
2591
+
2592
+ obj = VolcanoPlotBuilder(
2593
+ response, significance_threshold, fold_change_threshold, label_by
2594
+ )
2595
+
2596
+ if cached:
2597
+ return obj
2598
+ else:
2599
+ if as_df:
2600
+ return pd.DataFrame(obj.volcano_plot)
2601
+ else:
2602
+ return obj.volcano_plot
2603
+
2604
+ def _get_analysis_samples(
2605
+ self, analysis_id: str = None, analysis_name: str = None, as_df=False
2606
+ ):
2607
+ """
2608
+ Get the samples associated with a given analysis.
2609
+
2610
+ Args:
2611
+ analysis_id (str): UUID identifier of the analysis. Defaults to None.
2612
+ analysis_name (str): Name of the analysis. Defaults to None.
2613
+ as_df (bool) : whether the result should be converted to a DataFrame. Defaults to False.
2614
+
2615
+ Raises:
2616
+ ServerError - could not retrieve samples for analysis.
2617
+ Returns:
2618
+ list[dict] : a list of samples associated with the analysis.
2619
+ """
2620
+
2621
+ if not analysis_id and not analysis_name:
2622
+ raise ValueError("Analysis cannot be empty.")
2623
+
2624
+ if analysis_id:
2625
+ rows = [{"id": analysis_id}]
2626
+ else:
2627
+ rows = self.get_analysis(analysis_name=analysis_name)
2628
+
2629
+ resp = []
2630
+ for row in rows:
2631
+ URL = f"{self._auth.url}api/v1/analyses/samples/{row['id']}"
2632
+ with self._get_auth_session() as s:
2633
+ samples = s.get(URL)
2634
+ try:
2635
+ samples.raise_for_status()
2636
+ obj = samples.json()[0]
2637
+ resp += obj["samples"]
2638
+ except:
2639
+ continue
2640
+
2641
+ if not resp:
2642
+ raise ServerError(
2643
+ f"Could not retrieve samples for analysis {analysis_id or analysis_name}."
2644
+ )
2645
+
2646
+ resp = pd.DataFrame(resp)
2647
+ resp.drop_duplicates(subset=["id"], inplace=True)
2648
+ return resp if as_df else resp.to_dict(orient="records")
2649
+
2650
+ def get_analysis_protocol_fasta(self, analysis_id, download_path=None):
2651
+ if not analysis_id:
2652
+ raise ValueError("Analysis ID cannot be empty.")
2653
+
2654
+ if not download_path:
2655
+ download_path = os.getcwd()
2656
+
2657
+ try:
2658
+ analysis_protocol_id = self.get_analysis(analysis_id)[0][
2659
+ "analysis_protocol_id"
2660
+ ]
2661
+ except (IndexError, KeyError):
2662
+ raise ValueError(f"Could not parse server response.")
2663
+
2664
+ try:
2665
+ analysis_protocol_engine = self.get_analysis_protocols(
2666
+ analysis_protocol_id=analysis_protocol_id
2667
+ )[0]["analysis_engine"]
2668
+ except (IndexError, KeyError):
2669
+ raise ValueError(f"Could not parse server response.")
2670
+
2671
+ analysis_protocol_engine = analysis_protocol_engine.lower()
2672
+ if analysis_protocol_engine == "diann":
2673
+ URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/diann/{analysis_protocol_id}"
2674
+ elif analysis_protocol_engine == "encyclopedia":
2675
+ URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/dia/{analysis_protocol_id}"
2676
+ elif analysis_protocol_engine == "msfragger":
2677
+ URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/msfragger/{analysis_protocol_id}"
2678
+ elif analysis_protocol_engine == "proteogenomics":
2679
+ URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/proteogenomics/{analysis_protocol_id}"
2680
+ else:
2681
+ # Change needed on the backend to get s3 file path for MaxQuant
2682
+ # URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/{analysis_protocol_id}"
2683
+ raise ValueError(
2684
+ f"Analysis protocol engine {analysis_protocol_engine} not supported for fasta download."
2685
+ )
2686
+
2687
+ with self._get_auth_session() as s:
2688
+ response = s.get(URL)
2689
+ if response.status_code != 200:
2690
+ raise ServerError("Request failed.")
2691
+ response = response.json()
2692
+ if type(response) == dict:
2693
+ response = response["editableParameters"]
2694
+ fasta_filenames = [
2695
+ x["Value"]
2696
+ for x in response
2697
+ if x["Key"] in ["fasta", "fastaFilePath", "referencegenome"]
2698
+ ]
2699
+ if not fasta_filenames:
2700
+ raise ServerError("No fasta file name returned from server.")
2701
+
2702
+ URL = f"{self._auth.url}api/v1/analysisProtocolFiles/getUrl"
2703
+ for file in fasta_filenames:
2704
+ with self._get_auth_session() as s:
2705
+ response = s.post(URL, json={"filepath": file})
2706
+ if response.status_code != 200:
2707
+ raise ServerError("Request failed.")
2708
+ url = response.json()["url"]
2709
+ filename = os.path.basename(file)
2710
+ print(f"Downloading {filename}")
2711
+ for _ in range(2):
2712
+ try:
2713
+ with tqdm(
2714
+ unit="B",
2715
+ unit_scale=True,
2716
+ unit_divisor=1024,
2717
+ miniters=1,
2718
+ desc=f"Progress",
2719
+ ) as t:
2720
+ ssl._create_default_https_context = (
2721
+ ssl._create_unverified_context
2722
+ )
2723
+ urllib.request.urlretrieve(
2724
+ url,
2725
+ f"{download_path}/{filename}",
2726
+ reporthook=download_hook(t),
2727
+ data=None,
2728
+ )
2729
+ break
2730
+ except:
2731
+ if not os.path.isdir(f"{download_path}"):
2732
+ os.makedirs(f"{download_path}")
2733
+
2734
+ print(f"Downloaded file to {download_path}/{file}")