seer-pas-sdk 0.1.3__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
seer_pas_sdk/core/sdk.py CHANGED
@@ -5,13 +5,12 @@ import jwt
5
5
  import requests
6
6
  import urllib.request
7
7
  import ssl
8
- import shutil
9
8
 
10
- from typing import List as _List
9
+ from typing import List as _List, Tuple as _Tuple
11
10
 
12
11
  from ..common import *
13
12
  from ..auth import Auth
14
- from ..objects import PlateMap
13
+ from ..objects.volcanoplot import VolcanoPlotBuilder
15
14
 
16
15
 
17
16
  class SeerSDK:
@@ -27,33 +26,185 @@ class SeerSDK:
27
26
  >>> seer_sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
28
27
  """
29
28
 
30
- def __init__(self, username, password, instance="US"):
29
+ def __init__(self, username, password, instance="US", tenant=None):
31
30
  try:
32
31
  self._auth = Auth(username, password, instance)
33
32
 
34
33
  self._auth.get_token()
35
-
36
34
  print(f"User '{username}' logged in.\n")
37
35
 
38
- except:
36
+ if not tenant:
37
+ tenant = self._auth.active_tenant_id
38
+ try:
39
+ self.switch_tenant(tenant)
40
+ except Exception as e:
41
+ print(
42
+ f"Encountered an error directing you to tenant {tenant}: {e}."
43
+ )
44
+ print("Logging into home tenant...")
45
+ # If an error occurs while directing the user to a tenant, default to home tenant.
46
+ print(f"You are now active in {self.get_active_tenant_name()}")
47
+ except Exception as e:
39
48
  raise ValueError(
40
- "Could not log in.\nPlease check your credentials and/or instance."
49
+ f"Could not log in.\nPlease check your credentials and/or instance: {e}."
41
50
  )
42
51
 
43
- def _get_auth_headers(self):
52
+ def _get_auth_headers(self, use_multi_tenant=True):
44
53
  id_token, access_token = self._auth.get_token()
45
- return {
54
+ header = {
46
55
  "Authorization": id_token,
47
- "access-token": access_token,
56
+ "Access-Token": access_token,
48
57
  }
58
+ if use_multi_tenant:
59
+ multi_tenant = {
60
+ "Tenant-Id": self._auth.active_tenant_id,
61
+ "Role": self._auth.active_role,
62
+ }
63
+ header.update(multi_tenant)
64
+ return header
49
65
 
50
- def _get_auth_session(self):
66
+ def _get_auth_session(self, use_multi_tenant=True):
51
67
  sess = requests.Session()
52
68
 
53
- sess.headers.update(self._get_auth_headers())
69
+ sess.headers.update(self._get_auth_headers(use_multi_tenant))
54
70
 
55
71
  return sess
56
72
 
73
+ def get_user_tenant_metadata(self, index=True):
74
+ """
75
+ Fetches the tenant metadata for the authenticated user.
76
+
77
+ Returns
78
+ -------
79
+ response : dict
80
+ A dictionary containing the tenant metadata for the authenticated user.
81
+ """
82
+ with self._get_auth_session() as s:
83
+ response = s.get(f"{self._auth.url}api/v1/usertenants")
84
+
85
+ if response.status_code != 200:
86
+ raise ValueError(
87
+ "Invalid request. Please check your parameters."
88
+ )
89
+
90
+ response = response.json()
91
+ if index:
92
+ return {x["institution"]: x for x in response}
93
+ else:
94
+ return response
95
+
96
+ def list_tenants(self, reverse=False):
97
+ """
98
+ Lists the institution names and the tenant ids for the authenticated user.
99
+
100
+ Parameters
101
+ ----------
102
+ reverse: bool
103
+ Boolean denoting whether the user wants the result dictionary indexed by tenant id (True) or institution name (False).
104
+
105
+ Returns
106
+ -------
107
+ tenants : dict
108
+ A dictionary containing the institution names and tenant ids for the authenticated user.
109
+ """
110
+ tenants = self.get_user_tenant_metadata()
111
+ if reverse:
112
+ return {x["tenantId"]: x["institution"] for x in tenants.values()}
113
+ else:
114
+ return {x["institution"]: x["tenantId"] for x in tenants.values()}
115
+
116
+ def switch_tenant(self, identifier: str):
117
+ """
118
+ Switches the tenant for the authenticated user.
119
+
120
+ Parameters
121
+ ----------
122
+ identifier: str
123
+ Tenant ID or organization name to switch to.
124
+
125
+ Returns
126
+ -------
127
+ tenant_id: str
128
+ Returns the value of the active tenant id after the operation.
129
+ """
130
+ map = self.get_user_tenant_metadata()
131
+ tenant_ids = [x["tenantId"] for x in map.values()]
132
+ institution_names = map.keys()
133
+
134
+ if identifier in tenant_ids:
135
+ tenant_id = identifier
136
+ row = [x for x in map.values() if x["tenantId"] == tenant_id]
137
+ if row:
138
+ row = row[0]
139
+ else:
140
+ raise ValueError(
141
+ "Invalid tenant identifier. Tenant was not switched."
142
+ )
143
+ elif identifier in institution_names:
144
+ row = map[identifier]
145
+ tenant_id = row["tenantId"]
146
+ else:
147
+ raise ValueError(
148
+ "Invalid tenant identifier. Tenant was not switched."
149
+ )
150
+
151
+ with self._get_auth_session() as s:
152
+ response = s.put(
153
+ self._auth.url + "api/v1/users/tenant",
154
+ json={
155
+ "currentTenantId": tenant_id,
156
+ "username": self._auth.username,
157
+ },
158
+ )
159
+ if response.status_code != 200:
160
+ raise ServerError(
161
+ "Could not update current tenant for user. Tenant was not switched."
162
+ )
163
+
164
+ self._auth.active_tenant_id = tenant_id
165
+ self._auth.active_role = row["role"]
166
+ print(f"You are now active in {row['institution']}")
167
+ return self._auth.active_tenant_id, self._auth.active_role
168
+
169
+ def get_active_tenant(self):
170
+ """
171
+ Fetches the active tenant for the authenticated user.
172
+
173
+ Returns
174
+ -------
175
+ tenant: dict
176
+ Tenant metadata for the authenticated user containing "institution" and "tenantId" keys.
177
+ """
178
+ tenants = self.get_user_tenant_metadata(index=False)
179
+ row = [
180
+ x for x in tenants if x["tenantId"] == self._auth.active_tenant_id
181
+ ]
182
+ return row[0] if row else None
183
+
184
+ def get_active_tenant_id(self):
185
+ """
186
+ Fetches the active tenant ID for the authenticated user.
187
+
188
+ Returns
189
+ -------
190
+ tenant_id: str
191
+ Tenant ID for the authenticated user.
192
+ """
193
+ tenant = self.get_active_tenant()
194
+ return tenant["tenantId"] if tenant else None
195
+
196
+ def get_active_tenant_name(self):
197
+ """
198
+ Fetches the active tenant name for the authenticated user.
199
+
200
+ Returns
201
+ -------
202
+ tenant: str
203
+ Tenant name for the authenticated user.
204
+ """
205
+ tenant = self.get_active_tenant()
206
+ return tenant["institution"] if tenant else None
207
+
57
208
  def get_spaces(self):
58
209
  """
59
210
  Fetches a list of spaces for the authenticated user.
@@ -230,14 +381,10 @@ class SeerSDK:
230
381
  ]
231
382
  return res if not df else dict_to_df(res)
232
383
 
233
- def _get_samples_metadata(
384
+ def get_samples_metadata(
234
385
  self, plate_id: str = None, project_id: str = None, df: bool = False
235
386
  ):
236
387
  """
237
- ****************
238
- [UNEXPOSED METHOD CALL]
239
- ****************
240
-
241
388
  Fetches a list of samples for the authenticated user, filtered by `plate_id`. Returns all samples for the plate with the given `plate_id`, provided it exists.
242
389
 
243
390
  If both `plate_id` and `project_id` are passed in, only the `plate_id` is validated first.
@@ -261,14 +408,14 @@ class SeerSDK:
261
408
  >>> from seer_pas_sdk import SeerSDK
262
409
  >>> seer_sdk = SeerSDK()
263
410
 
264
- >>> seer_sdk._get_samples_metadata(plate_id="7ec8cad0-15e0-11ee-bdf1-bbaa73585acf")
411
+ >>> seer_sdk.get_samples_metadata(plate_id="7ec8cad0-15e0-11ee-bdf1-bbaa73585acf")
265
412
  >>> [
266
413
  { "id": ... },
267
414
  { "id": ... },
268
415
  ...
269
416
  ]
270
417
 
271
- >>> seer_sdk._get_samples_metadata(df=True)
418
+ >>> seer_sdk.get_samples_metadata(df=True)
272
419
  >>> id ... control
273
420
  0 812139c0-15e0-11ee-bdf1-bbaa73585acf ...
274
421
  1 803e05b0-15e0-11ee-bdf1-bbaa73585acf ... MPE Control
@@ -296,25 +443,21 @@ class SeerSDK:
296
443
  try:
297
444
  self.get_plate_metadata(plate_id)
298
445
  except:
299
- raise ValueError(
300
- "Plate ID is invalid. Please check your parameters and see if the backend is running."
301
- )
446
+ raise ValueError("Plate ID is invalid.")
302
447
  sample_params["plateId"] = plate_id
303
448
 
304
449
  elif project_id:
305
450
  try:
306
451
  self.get_project_metadata(project_id)
307
452
  except:
308
- raise ValueError(
309
- "Project ID is invalid. Please check your parameters and see if the backend is running."
310
- )
453
+ raise ValueError("Project ID is invalid.")
311
454
 
312
455
  sample_params["projectId"] = project_id
313
456
 
314
457
  samples = s.get(URL, params=sample_params)
315
458
  if samples.status_code != 200:
316
459
  raise ValueError(
317
- "Invalid request. Please check if your plate ID has any samples associated with it."
460
+ f"Failed to fetch sample data for plate ID: {plate_id}."
318
461
  )
319
462
  res = samples.json()["data"]
320
463
 
@@ -334,8 +477,75 @@ class SeerSDK:
334
477
  ]
335
478
  ]
336
479
 
480
+ # API returns empty strings if not a control, replace with None for filtering purposes
481
+ res_df["control"] = res_df["control"].apply(lambda x: x if x else None)
482
+
337
483
  return res_df.to_dict(orient="records") if not df else res_df
338
484
 
485
+ def _filter_samples_metadata(
486
+ self,
487
+ project_id: str,
488
+ filter: str,
489
+ sample_ids: list = None,
490
+ ):
491
+ """
492
+ ****************
493
+ [UNEXPOSED METHOD CALL]
494
+ ****************
495
+ Get samples given a filter and project_id.
496
+
497
+ Parameters
498
+ ----------
499
+ project_id : str
500
+ The project id.
501
+ filter : str
502
+ The filter to be applied. Acceptable values are 'control' or 'sample'.
503
+ sample_ids : list, optional
504
+ List of user provided sample ids
505
+
506
+ Returns
507
+ -------
508
+ res : list
509
+ A list of sample ids
510
+
511
+ Examples
512
+ -------
513
+ >>> from core import SeerSDK
514
+ >>> seer_sdk = SeerSDK()
515
+ >>> seer_sdk._get_samples_filter("FILTER", "PROJECT_ID")
516
+ >>> {
517
+ "samples": [
518
+ {
519
+ "id": "SAMPLE_ID",
520
+ "plate_id": "PLATE_ID",
521
+ "sample_name": "SAMPLE_NAME",
522
+ ...
523
+ ...
524
+ },
525
+ ...
526
+ ...
527
+ ]
528
+ }
529
+ """
530
+
531
+ if filter and filter not in ["control", "sample"]:
532
+ raise ValueError(
533
+ "Invalid filter. Please choose between 'control' or 'sample'."
534
+ )
535
+
536
+ df = self.get_samples_metadata(project_id=project_id, df=True)
537
+
538
+ if filter == "control":
539
+ df = df[~df["control"].isna()]
540
+ elif filter == "sample":
541
+ df = df[df["control"].isna()]
542
+
543
+ valid_samples = df["id"].tolist()
544
+ if sample_ids:
545
+ valid_samples = list(set(valid_samples) & set(sample_ids))
546
+
547
+ return valid_samples
548
+
339
549
  def get_sample_custom_fields(self):
340
550
  """
341
551
  Fetches a list of custom fields defined for the authenticated user.
@@ -405,10 +615,10 @@ class SeerSDK:
405
615
 
406
616
  if msdatas.status_code != 200 or not msdatas.json()["data"]:
407
617
  raise ValueError(
408
- "Failed to fetch MS data for your plate ID."
618
+ f"Failed to fetch MS data for sample ID={sample_id}."
409
619
  )
410
620
 
411
- res.append(msdatas.json()["data"][0])
621
+ res += [x for x in msdatas.json()["data"]]
412
622
 
413
623
  for entry in res:
414
624
  if "tenant_id" in entry:
@@ -460,18 +670,24 @@ class SeerSDK:
460
670
 
461
671
  [2 rows x 26 columns]
462
672
  """
463
- plate_samples = self._get_samples_metadata(plate_id=plate_id)
673
+ plate_samples = self.get_samples_metadata(plate_id=plate_id)
464
674
  sample_ids = [sample["id"] for sample in plate_samples]
465
675
  return self.get_msdata(sample_ids, df)
466
676
 
467
677
  def get_project(
468
- self, project_id: str, msdata: bool = False, df: bool = False
678
+ self,
679
+ project_id: str,
680
+ msdata: bool = False,
681
+ df: bool = False,
682
+ flat: bool = False,
469
683
  ):
470
684
  """
471
685
  Fetches samples (and MS data files) for a `project_id` (provided that the `project_id` is valid and has samples associated with it) for an authenticated user.
472
686
 
473
687
  The function returns a DataFrame object if the `df` flag is passed in as True, otherwise a nested dict object is returned instead. If the both the `df` and `msdata` flags are passed in as True, then a nested DataFrame object is returned instead.
474
688
 
689
+ If the `flat` flag is passed in as True, then the nested dict object is returned as an array of dict objects and the nested df object is returned as a single df object.
690
+
475
691
  Parameters
476
692
  ----------
477
693
  project_id : str
@@ -567,39 +783,53 @@ class SeerSDK:
567
783
  return ValueError("No project ID specified.")
568
784
 
569
785
  sample_ids = []
570
- project_samples = self._get_samples_metadata(
786
+ project_samples = self.get_samples_metadata(
571
787
  project_id=project_id, df=False
572
788
  )
789
+ flat_result = []
573
790
 
574
791
  if msdata:
575
- sample_ids = [
576
- sample["id"] for sample in project_samples
577
- ] # will always contain unique values
578
- ms_data_files = self.get_msdata(sample_ids=sample_ids, df=False)
792
+
793
+ # construct map for quick index reference of sample in project_samples
794
+ sample_ids = {
795
+ sample["id"]: i for i, sample in enumerate(project_samples)
796
+ } # will always contain unique values
797
+ ms_data_files = self.get_msdata(
798
+ sample_ids=list(sample_ids.keys()), df=False
799
+ )
579
800
 
580
801
  for ms_data_file in ms_data_files:
581
- for sample_index in range(len(project_samples)):
582
- if (
583
- project_samples[sample_index]["id"]
584
- == ms_data_file["sample_id"]
585
- ):
586
- if "ms_data_file" not in project_samples[sample_index]:
587
- project_samples[sample_index]["ms_data_files"] = [
588
- ms_data_file
589
- ]
590
- else:
591
- project_samples[sample_index][
592
- "ms_data_files"
593
- ].append(ms_data_file)
802
+ index = sample_ids.get(ms_data_file["sample_id"], None)
803
+ if not index:
804
+ continue
805
+
806
+ if not flat:
807
+ if "ms_data_file" not in project_samples[index]:
808
+ project_samples[index]["ms_data_files"] = [
809
+ ms_data_file
810
+ ]
811
+ else:
812
+ project_samples[index]["ms_data_files"].append(
813
+ ms_data_file
814
+ )
815
+ else:
816
+ flat_result.append(project_samples[index] | ms_data_file)
817
+
818
+ # return flat result if results were added to the flat object
819
+ if flat and flat_result:
820
+ project_samples = flat_result
594
821
 
595
822
  if df:
596
- for sample_index in range(len(project_samples)):
597
- if "ms_data_files" in project_samples[sample_index]:
598
- project_samples[sample_index]["ms_data_files"] = (
599
- dict_to_df(
600
- project_samples[sample_index]["ms_data_files"]
823
+ if flat:
824
+ return pd.DataFrame(project_samples)
825
+ else:
826
+ for sample_index in range(len(project_samples)):
827
+ if "ms_data_files" in project_samples[sample_index]:
828
+ project_samples[sample_index]["ms_data_files"] = (
829
+ dict_to_df(
830
+ project_samples[sample_index]["ms_data_files"]
831
+ )
601
832
  )
602
- )
603
833
 
604
834
  project_samples = dict_to_df(project_samples)
605
835
 
@@ -694,11 +924,16 @@ class SeerSDK:
694
924
  self,
695
925
  analysis_id: str = None,
696
926
  folder_id: str = None,
697
- show_folders=True,
698
- analysis_only=True,
927
+ show_folders: bool = True,
928
+ analysis_only: bool = True,
929
+ project_id: str = None,
930
+ plate_name: str = None,
931
+ **kwargs,
699
932
  ):
700
933
  """
701
934
  Returns a list of analyses objects for the authenticated user. If no id is provided, returns all analyses for the authenticated user.
935
+ Search parameters may be passed in as keyword arguments to filter the results. Acceptable values are 'analysis_name', 'folder_name', 'description', 'notes', or 'number_msdatafile'.
936
+ Only search on a single field is supported.
702
937
 
703
938
  Parameters
704
939
  ----------
@@ -709,13 +944,22 @@ class SeerSDK:
709
944
  ID of the folder to be fetched, defaulted to None.
710
945
 
711
946
  show_folders : bool, optional
712
- Mark True if folder contents are to be returned in the response, defaulted to True.
947
+ Mark True if folder contents are to be returned in the response, i.e. recursive search, defaulted to True.
713
948
  Will be disabled if an analysis id is provided.
714
949
 
715
950
  analysis_only : bool, optional
716
951
  Mark True if only analyses objects are to be returned in the response, defaulted to True.
717
952
  If marked false, folder objects will also be included in the response.
718
953
 
954
+ project_id : str, optional
955
+ ID of the project to be fetched, defaulted to None.
956
+
957
+ plate_name : str, optional
958
+ Name of the plate to be fetched, defaulted to None.
959
+
960
+ **kwargs : dict, optional
961
+ Search keyword parameters to be passed in. Acceptable values are 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'.
962
+
719
963
  Returns
720
964
  -------
721
965
  analyses: dict
@@ -732,19 +976,67 @@ class SeerSDK:
732
976
  {id: "YOUR_ANALYSIS_ID_HERE", ...}
733
977
  ]
734
978
 
735
- >>> seer_sdk.get_analyses("YOUR_ANALYSIS_ID_HERE")
979
+ >>> seer_sdk.get_analysis("YOUR_ANALYSIS_ID_HERE")
980
+ >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
981
+
982
+ >>> seer_sdk.get_analysis(folder_name="YOUR_FOLDER_NAME_HERE")
983
+ >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
984
+
985
+ >>> seer_sdk.get_analysis(analysis_name="YOUR_ANALYSIS")
986
+ >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
987
+
988
+ >>> seer_sdk.get_analysis(description="YOUR_DESCRIPTION")
736
989
  >>> [{ id: "YOUR_ANALYSIS_ID_HERE", ...}]
737
990
  """
738
991
 
739
992
  URL = f"{self._auth.url}api/v1/analyses"
740
993
  res = []
741
994
 
995
+ search_field = None
996
+ search_item = None
997
+ if kwargs:
998
+ if len(kwargs.keys()) > 1:
999
+ raise ValueError("Please include only one search parameter.")
1000
+ search_field = list(kwargs.keys())[0]
1001
+ search_item = kwargs[search_field]
1002
+
1003
+ if not search_item:
1004
+ raise ValueError(
1005
+ f"Please provide a non null value for {search_field}"
1006
+ )
1007
+
1008
+ if search_field and search_field not in [
1009
+ "analysis_name",
1010
+ "folder_name",
1011
+ "analysis_protocol_name",
1012
+ "description",
1013
+ "notes",
1014
+ "number_msdatafile",
1015
+ ]:
1016
+ raise ValueError(
1017
+ "Invalid search field. Please choose between 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'."
1018
+ )
1019
+
742
1020
  with self._get_auth_session() as s:
743
1021
 
744
1022
  params = {"all": "true"}
745
1023
  if folder_id:
746
1024
  params["folder"] = folder_id
747
1025
 
1026
+ if search_field:
1027
+ params["searchFields"] = search_field
1028
+ params["searchItem"] = search_item
1029
+ del params["all"]
1030
+
1031
+ if search_field == "folder_name":
1032
+ params["searchFields"] = "analysis_name"
1033
+
1034
+ if project_id:
1035
+ params["projectId"] = project_id
1036
+
1037
+ if plate_name:
1038
+ params["plateName"] = plate_name
1039
+
748
1040
  analyses = s.get(
749
1041
  f"{URL}/{analysis_id}" if analysis_id else URL, params=params
750
1042
  )
@@ -790,150 +1082,523 @@ class SeerSDK:
790
1082
  ]
791
1083
  return res
792
1084
 
793
- def get_analysis_result(self, analysis_id: str, download_path: str = ""):
1085
+ def get_analysis_result_protein_data(
1086
+ self, analysis_id: str, link: bool = False, pg: str = None
1087
+ ):
794
1088
  """
795
- Given an `analysis_id`, this function returns all relevant analysis data files in form of downloadable content, if applicable.
1089
+ Given an analysis id, this function returns the protein data for the analysis.
796
1090
 
797
1091
  Parameters
798
1092
  ----------
1093
+
799
1094
  analysis_id : str
800
1095
  ID of the analysis for which the data is to be fetched.
1096
+ link : bool
1097
+ Boolean flag denoting whether the user wants the default protein data. Defaults to False.
1098
+ pg : str
1099
+ Protein group ID to filter dataframe results. Defaults to None.
801
1100
 
802
- download_path : bool
803
- String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
804
-
805
- Returns
806
- -------
807
- links: dict
808
- Contains dataframe objects for the `analysis_id`, given that the analysis has been complete.
1101
+ """
1102
+ with self._get_auth_session() as s:
1103
+ URL = f"{self._auth.url}api/v1/data"
1104
+ response = s.get(
1105
+ f"{URL}/protein?analysisId={analysis_id}&retry=false"
1106
+ )
809
1107
 
810
- Examples
811
- -------
812
- >>> from seer_pas_sdk import SeerSDK
813
- >>> seer_sdk = SeerSDK()
1108
+ if response.status_code != 200:
1109
+ raise ValueError(
1110
+ "Could not fetch protein data. Please verify that your analysis completed."
1111
+ )
1112
+ response = response.json()
1113
+
1114
+ protein_data = {}
1115
+ for row in response:
1116
+ if row.get("name") == "npLink":
1117
+ protein_data["npLink"] = {
1118
+ "url": row.get("link", {}).get("url", "")
1119
+ }
1120
+ if row.get("name") == "panelLink":
1121
+ protein_data["panelLink"] = {
1122
+ "url": row.get("link", {}).get("url", "")
1123
+ }
1124
+ if not protein_data:
1125
+ raise ValueError("No protein result files found.")
1126
+ if not "panelLink" in protein_data.keys():
1127
+ protein_data["panelLink"] = {"url": ""}
1128
+
1129
+ if link:
1130
+ return protein_data
1131
+ else:
1132
+ if not pg:
1133
+ return {
1134
+ "protein_np": url_to_df(protein_data["npLink"]["url"]),
1135
+ "protein_panel": url_to_df(
1136
+ protein_data["panelLink"]["url"]
1137
+ ),
1138
+ }
1139
+ else:
1140
+ protein_np = url_to_df(
1141
+ protein_data["npLink"]["url"]
1142
+ ).query(f"`Protein Group` == '{pg}'")
1143
+ protein_panel = url_to_df(
1144
+ protein_data["panelLink"]["url"]
1145
+ ).query(f"`Protein Group` == '{pg}'")
1146
+
1147
+ if protein_np.empty and protein_panel.empty:
1148
+ raise ValueError(
1149
+ f"Protein group {pg} not found in analysis {analysis_id}."
1150
+ )
814
1151
 
815
- >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE")
816
- >>> {
817
- "peptide_np": <peptide_np dataframe object>,
818
- "peptide_panel": <peptide_panel dataframe object>,
819
- "protein_np": <protein_np dataframe object>,
820
- "protein_panel": <protein_panel dataframe object>
821
- }
1152
+ return {
1153
+ "protein_np": protein_np,
1154
+ "protein_panel": protein_panel,
1155
+ }
822
1156
 
823
- >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE", download_path="/Users/Downloads")
824
- >>> { "status": "Download complete." }
1157
+ def get_analysis_result_peptide_data(
1158
+ self, analysis_id: str, link: bool = False, peptide: str = None
1159
+ ):
825
1160
  """
1161
+ Given an analysis id, this function returns the peptide data for the analysis.
826
1162
 
827
- if not analysis_id:
828
- raise ValueError("Analysis ID cannot be empty.")
829
-
830
- if download_path and not os.path.exists(download_path):
831
- raise ValueError("The download path you entered is invalid.")
832
-
833
- if self.get_analysis(analysis_id)[0]["status"] in ["FAILED", None]:
834
- raise ValueError(
835
- "Cannot generate links for failed or null analyses."
836
- )
1163
+ Parameters
1164
+ ----------
837
1165
 
838
- URL = f"{self._auth.url}api/v1/data"
1166
+ analysis_id : str
1167
+ ID of the analysis for which the data is to be fetched.
839
1168
 
840
- with self._get_auth_session() as s:
1169
+ link : bool
1170
+ Boolean flag denoting whether the user wants the default peptide data. Defaults to False.
841
1171
 
842
- protein_data = s.get(
843
- f"{URL}/protein?analysisId={analysis_id}&retry=false"
844
- )
1172
+ peptide : str
1173
+ Peptide sequence to filter dataframe results. Defaults to None.
845
1174
 
846
- if protein_data.status_code != 200:
847
- raise ValueError(
848
- "Invalid request. Could not fetch protein data. Please check your parameters."
849
- )
850
- protein_data = protein_data.json()
1175
+ """
851
1176
 
852
- peptide_data = s.get(
1177
+ with self._get_auth_session() as s:
1178
+ URL = f"{self._auth.url}api/v1/data"
1179
+ response = s.get(
853
1180
  f"{URL}/peptide?analysisId={analysis_id}&retry=false"
854
1181
  )
855
1182
 
856
- if peptide_data.status_code != 200:
1183
+ if response.status_code != 200:
857
1184
  raise ValueError(
858
- "Invalid request. Could not fetch peptide data. Please check your parameters."
1185
+ "Could not fetch peptide data. Please verify that your analysis completed."
859
1186
  )
860
1187
 
861
- peptide_data = peptide_data.json()
1188
+ response = response.json()
1189
+
1190
+ peptide_data = {}
1191
+ for row in response:
1192
+ if row.get("name") == "npLink":
1193
+ peptide_data["npLink"] = {
1194
+ "url": row.get("link", {}).get("url", "")
1195
+ }
1196
+ if row.get("name") == "panelLink":
1197
+ peptide_data["panelLink"] = {
1198
+ "url": row.get("link", {}).get("url", "")
1199
+ }
1200
+ if not peptide_data:
1201
+ raise ValueError("No peptide result files found.")
1202
+ if not "panelLink" in peptide_data.keys():
1203
+ peptide_data["panelLink"] = {"url": ""}
1204
+ if link:
1205
+ return peptide_data
1206
+ else:
1207
+ if not peptide:
1208
+ return {
1209
+ "peptide_np": url_to_df(peptide_data["npLink"]["url"]),
1210
+ "peptide_panel": url_to_df(
1211
+ peptide_data["panelLink"]["url"]
1212
+ ),
1213
+ }
1214
+ else:
1215
+ peptide_np = url_to_df(
1216
+ peptide_data["npLink"]["url"]
1217
+ ).query(f"Peptide == '{peptide}'")
1218
+ peptide_panel = url_to_df(
1219
+ peptide_data["panelLink"]["url"]
1220
+ ).query(f"Peptide == '{peptide}'")
1221
+
1222
+ if peptide_np.empty and peptide_panel.empty:
1223
+ raise ValueError(
1224
+ f"Peptide {peptide} not found in analysis {analysis_id}."
1225
+ )
862
1226
 
863
- links = {
864
- "peptide_np": url_to_df(peptide_data["npLink"]["url"]),
865
- "peptide_panel": url_to_df(peptide_data["panelLink"]["url"]),
866
- "protein_np": url_to_df(protein_data["npLink"]["url"]),
867
- "protein_panel": url_to_df(protein_data["panelLink"]["url"]),
868
- }
1227
+ return {
1228
+ "peptide_np": peptide_np,
1229
+ "peptide_panel": peptide_panel,
1230
+ }
869
1231
 
870
- if download_path:
871
- name = f"{download_path}/downloads/{analysis_id}"
872
- if not os.path.exists(name):
873
- os.makedirs(name)
1232
+ def list_analysis_result_files(self, analysis_id: str):
1233
+ """
1234
+ Given an analysis id, this function returns a list of files associated with the analysis.
874
1235
 
875
- links["peptide_np"].to_csv(f"{name}/peptide_np.csv", sep="\t")
876
- links["peptide_panel"].to_csv(
877
- f"{name}/peptide_panel.csv", sep="\t"
878
- )
879
- links["protein_np"].to_csv(f"{name}/protein_np.csv", sep="\t")
880
- links["protein_panel"].to_csv(
881
- f"{name}/protein_panel.csv", sep="\t"
882
- )
1236
+ Parameters
1237
+ ----------
1238
+ analysis_id : str
1239
+ ID of the analysis for which the data is to be fetched.
883
1240
 
884
- return {"status": "Download complete."}
1241
+ Returns
1242
+ -------
1243
+ files: list
1244
+ List of files associated with the analysis.
1245
+ """
1246
+ try:
1247
+ analysis_metadata = self.get_analysis(analysis_id)[0]
1248
+ except (IndexError, ServerError):
1249
+ raise ValueError("Invalid analysis ID.")
1250
+ except:
1251
+ raise ValueError("Could not fetch analysis metadata.")
885
1252
 
886
- return links
1253
+ if analysis_metadata.get("status") in ["Failed", None]:
1254
+ raise ValueError("Cannot find files for a failed analysis.")
1255
+ with self._get_auth_session() as s:
1256
+ response = s.get(
1257
+ f"{self._auth.url}api/v2/analysisResultFiles/{analysis_id}"
1258
+ )
1259
+ if response.status_code != 200:
1260
+ raise ServerError(
1261
+ "Could not fetch analysis result files. Please verify that your analysis completed."
1262
+ )
1263
+ response = response.json()
1264
+ files = []
1265
+ for row in response["data"]:
1266
+ files.append(row["filename"])
1267
+ return files
887
1268
 
888
- def analysis_complete(self, analysis_id: str):
1269
+ def get_analysis_result_file_url(self, analysis_id: str, filename: str):
889
1270
  """
890
- Returns the status of the analysis with the given id.
1271
+ Given an analysis id and a analysis result filename, this function returns the signed URL for the file.
891
1272
 
892
1273
  Parameters
893
1274
  ----------
894
1275
  analysis_id : str
895
- The analysis id.
1276
+ ID of the analysis for which the data is to be fetched.
896
1277
 
897
- Returns
898
- -------
899
- res : dict
900
- A dictionary containing the status of the analysis.
1278
+ filename : str
1279
+ Name of the file to be fetched.
901
1280
 
902
- Examples
1281
+ Returns
903
1282
  -------
904
- >>> from seer_pas_sdk import SeerSDK
905
- >>> seer_sdk = SeerSDK()
906
- >>> seer_sdk.analysis_complete("YOUR_ANALYSIS_ID_HERE")
907
- >>> {
908
- "status": "SUCCEEDED"
909
- }
1283
+ file_url: dict
1284
+ Response object containing the url for the file.
910
1285
  """
911
1286
 
912
- if not analysis_id:
913
- raise ValueError("Analysis id cannot be empty.")
914
-
915
- try:
916
- res = self.get_analysis(analysis_id)
917
- except ValueError:
918
- return ValueError("Analysis not found. Your ID could be incorrect")
1287
+ # Allow user to pass in filenames without an extension.
1288
+ analysis_result_files = self.list_analysis_result_files(analysis_id)
1289
+ analysis_result_files_prefix_mapper = {
1290
+ ".".join(x.split(".")[:-1]): x for x in analysis_result_files
1291
+ }
1292
+ if filename in analysis_result_files_prefix_mapper:
1293
+ filename = analysis_result_files_prefix_mapper[filename]
919
1294
 
920
- return {"status": res[0]["status"]}
1295
+ analysis_metadata = self.get_analysis(analysis_id)[0]
1296
+ if analysis_metadata.get("status") in ["Failed", None]:
1297
+ raise ValueError("Cannot generate links for failed analyses.")
1298
+ with self._get_auth_session() as s:
1299
+ file_url = s.post(
1300
+ f"{self._auth.url}api/v1/analysisResultFiles/getUrl",
1301
+ json={
1302
+ "analysisId": analysis_id,
1303
+ "projectId": analysis_metadata["project_id"],
1304
+ "filename": filename,
1305
+ },
1306
+ )
1307
+ response = file_url.json()
1308
+ if not response.get("url"):
1309
+ raise ValueError(f"File {filename} not found.")
1310
+ return response
921
1311
 
922
- def list_ms_data_files(self, folder="", space=None):
1312
+ def get_analysis_result_files(
1313
+ self,
1314
+ analysis_id: str,
1315
+ filenames: _List[str],
1316
+ download_path: str = "",
1317
+ protein_all: bool = False,
1318
+ peptide_all: bool = False,
1319
+ ):
923
1320
  """
924
- Lists all the MS data files in the given folder as long as the folder path passed in the params is valid.
1321
+ Given an analysis id and a list of file names, this function returns the file in form of downloadable content, if applicable.
925
1322
 
926
1323
  Parameters
927
1324
  ----------
928
- folder : str, optional
929
- Folder path to list the files from. Defaults to an empty string and displays all files for the user.
930
- space : str, optional
931
- ID of the user group to which the files belong, defaulted to None.
1325
+ analysis_id : str
1326
+ ID of the analysis for which the data is to be fetched.
1327
+
1328
+ filenames : list
1329
+ List of filenames to be fetched. Only csv and tsv files are supported.
1330
+
1331
+ download_path : str
1332
+ String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
1333
+
1334
+ protein_all : bool
1335
+ Boolean flag denoting whether the user wants the default protein data. Defaults to False.
1336
+
1337
+ peptide_all : bool
1338
+ Boolean flag denoting whether the user wants the default peptide data. Defaults to False.
932
1339
 
933
1340
  Returns
934
1341
  -------
935
- list
936
- Contains the list of files in the folder.
1342
+ links: dict
1343
+ Contains dataframe objects for the requested files. If a filename is not found, it is skipped.
1344
+
1345
+
1346
+ Examples
1347
+ -------
1348
+ >>> from seer_pas_sdk import SeerSDK
1349
+ >>> seer_sdk = SeerSDK()
1350
+ >>> analysis_id = "YOUR_ANALYSIS_ID_HERE"
1351
+ >>> filenames = ["protein_np.tsv", "peptide_np.tsv"]
1352
+ >>> seer_sdk.get_analysis_result_files(analysis_id, filenames)
1353
+ {
1354
+ "protein_np.tsv": <protein_np dataframe object>,
1355
+ "peptide_np.tsv": <peptide_np dataframe object>
1356
+ }
1357
+ >>> seer_sdk.get_analysis_result_files(analysis_id, [], protein_all=True, peptide_all=True)
1358
+ {
1359
+ "protein_np.tsv": <protein_np dataframe object>,
1360
+ "protein_panel.tsv": <protein_panel dataframe object>,
1361
+ "peptide_np.tsv": <peptide_np dataframe object>,
1362
+ "peptide_panel.tsv": <peptide_panel dataframe object>
1363
+ }
1364
+ >>> seer_sdk.get_analysis_result_files(analysis_id, ["report.tsv"], download_path="/Users/Downloads")
1365
+ { "report.tsv": <report.tsv dataframe object> }
1366
+ """
1367
+
1368
+ if not analysis_id:
1369
+ raise ValueError("Analysis ID cannot be empty.")
1370
+
1371
+ if download_path and not os.path.exists(download_path):
1372
+ raise ValueError(
1373
+ "Please specify a valid folder path as download path."
1374
+ )
1375
+
1376
+ links = {}
1377
+ if protein_all:
1378
+ protein_data = self.get_analysis_result_protein_data(
1379
+ analysis_id, link=True
1380
+ )
1381
+ links["protein_np.tsv"] = protein_data["npLink"]["url"]
1382
+ links["protein_panel.tsv"] = protein_data["panelLink"]["url"]
1383
+ if peptide_all:
1384
+ peptide_data = self.get_analysis_result_peptide_data(
1385
+ analysis_id, link=True
1386
+ )
1387
+ links["peptide_np.tsv"] = peptide_data["npLink"]["url"]
1388
+ links["peptide_panel.tsv"] = peptide_data["panelLink"]["url"]
1389
+
1390
+ filenames = set(filenames)
1391
+ # Allow user to pass in filenames without an extension.
1392
+ analysis_result_files = self.list_analysis_result_files(analysis_id)
1393
+ analysis_result_files_prefix_mapper = {
1394
+ ".".join(x.split(".")[:-1]): x for x in analysis_result_files
1395
+ }
1396
+ for filename in filenames:
1397
+ if filename in analysis_result_files_prefix_mapper:
1398
+ filename = analysis_result_files_prefix_mapper[filename]
1399
+ if filename == "protein_np.tsv":
1400
+ if protein_all:
1401
+ continue
1402
+ protein_data = self.get_analysis_result_protein_data(
1403
+ analysis_id, link=True
1404
+ )
1405
+ links["protein_np.tsv"] = protein_data["npLink"]["url"]
1406
+ elif filename == "protein_panel.tsv":
1407
+ if protein_all:
1408
+ continue
1409
+ protein_data = self.get_analysis_result_protein_data(
1410
+ analysis_id, link=True
1411
+ )
1412
+ links["protein_panel.tsv"] = protein_data["panelLink"]["url"]
1413
+ elif filename == "peptide_np.tsv":
1414
+ if peptide_all:
1415
+ continue
1416
+ peptide_data = self.get_analysis_result_peptide_data(
1417
+ analysis_id, link=True
1418
+ )
1419
+ links["peptide_np.tsv"] = peptide_data["npLink"]["url"]
1420
+ elif filename == "peptide_panel.tsv":
1421
+ if peptide_all:
1422
+ continue
1423
+ peptide_data = self.get_analysis_result_peptide_data(
1424
+ analysis_id, link=True
1425
+ )
1426
+ links["peptide_panel.tsv"] = peptide_data["panelLink"]["url"]
1427
+ else:
1428
+ try:
1429
+ links[filename] = self.get_analysis_result_file_url(
1430
+ analysis_id, filename
1431
+ )["url"]
1432
+ except Exception as e:
1433
+ print(e)
1434
+ continue
1435
+
1436
+ links = {
1437
+ k: url_to_df(v, is_tsv=k.endswith(".tsv"))
1438
+ for k, v in links.items()
1439
+ }
1440
+ if download_path:
1441
+ name = f"{download_path}/downloads/{analysis_id}"
1442
+ print(f"Start download to path {name}")
1443
+ if not os.path.exists(name):
1444
+ os.makedirs(name)
1445
+ for filename, content in links.items():
1446
+ separator = ","
1447
+ if filename.endswith(".tsv"):
1448
+ separator = "\t"
1449
+ content.to_csv(f"{name}/{filename}", sep=separator)
1450
+ print("Download complete.")
1451
+
1452
+ return links
1453
+
1454
+ def get_analysis_result(
1455
+ self,
1456
+ analysis_id: str,
1457
+ download_path: str = "",
1458
+ diann_report: bool = False,
1459
+ ):
1460
+ """
1461
+ Given an `analysis_id`, this function returns all relevant analysis data files in form of downloadable content, if applicable.
1462
+
1463
+ Parameters
1464
+ ----------
1465
+ analysis_id : str
1466
+ ID of the analysis for which the data is to be fetched.
1467
+
1468
+ download_path : str
1469
+ String flag denoting where the user wants the files downloaded. Can be local or absolute as long as the path is valid. Defaults to an empty string.
1470
+
1471
+ diann_report : bool
1472
+ Boolean flag denoting whether the user wants the DIANN report to be included in the response. Defaults to False.
1473
+
1474
+ Returns
1475
+ -------
1476
+ links: dict
1477
+ Contains dataframe objects for the `analysis_id`, given that the analysis has been complete.
1478
+
1479
+ Examples
1480
+ -------
1481
+ >>> from seer_pas_sdk import SeerSDK
1482
+ >>> seer_sdk = SeerSDK()
1483
+
1484
+ >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE")
1485
+ >>> {
1486
+ "peptide_np": <peptide_np dataframe object>,
1487
+ "peptide_panel": <peptide_panel dataframe object>,
1488
+ "protein_np": <protein_np dataframe object>,
1489
+ "protein_panel": <protein_panel dataframe object>
1490
+ }
1491
+
1492
+ >>> seer_sdk.get_analysis_result("YOUR_DIANN_ANALYSIS_ID_HERE")
1493
+ >>> {
1494
+ "peptide_np": <peptide_np dataframe object>,
1495
+ "peptide_panel": <peptide_panel dataframe object>,
1496
+ "protein_np": <protein_np dataframe object>,
1497
+ "protein_panel": <protein_panel dataframe object>,
1498
+ "diann_report": <report.tsv dataframe object>
1499
+ }
1500
+
1501
+ >>> seer_sdk.get_analysis_result("YOUR_ANALYSIS_ID_HERE", download_path="/Users/Downloads")
1502
+ >>> { "status": "Download complete." }
1503
+ """
1504
+
1505
+ if not analysis_id:
1506
+ raise ValueError("Analysis ID cannot be empty.")
1507
+
1508
+ if download_path and not os.path.exists(download_path):
1509
+ raise ValueError("The download path you entered is invalid.")
1510
+
1511
+ protein_data = self.get_analysis_result_protein_data(
1512
+ analysis_id, link=True
1513
+ )
1514
+ peptide_data = self.get_analysis_result_peptide_data(
1515
+ analysis_id, link=True
1516
+ )
1517
+ links = {
1518
+ "peptide_np": url_to_df(peptide_data["npLink"]["url"]),
1519
+ "peptide_panel": url_to_df(peptide_data["panelLink"]["url"]),
1520
+ "protein_np": url_to_df(protein_data["npLink"]["url"]),
1521
+ "protein_panel": url_to_df(protein_data["panelLink"]["url"]),
1522
+ }
1523
+
1524
+ if diann_report:
1525
+ diann_report_url = self.get_analysis_result_file_url(
1526
+ analysis_id, "report.tsv"
1527
+ )
1528
+ links["diann_report"] = url_to_df(diann_report_url["url"])
1529
+
1530
+ if download_path:
1531
+ name = f"{download_path}/downloads/{analysis_id}"
1532
+ if not os.path.exists(name):
1533
+ os.makedirs(name)
1534
+
1535
+ links["peptide_np"].to_csv(f"{name}/peptide_np.csv", sep="\t")
1536
+ links["peptide_panel"].to_csv(
1537
+ f"{name}/peptide_panel.csv", sep="\t"
1538
+ )
1539
+ links["protein_np"].to_csv(f"{name}/protein_np.csv", sep="\t")
1540
+ links["protein_panel"].to_csv(
1541
+ f"{name}/protein_panel.csv", sep="\t"
1542
+ )
1543
+
1544
+ if "diann_report" in links:
1545
+ links["diann_report"].to_csv(
1546
+ f"{name}/diann_report.csv", sep="\t"
1547
+ )
1548
+
1549
+ return {"status": "Download complete."}
1550
+
1551
+ return links
1552
+
1553
+ def analysis_complete(self, analysis_id: str):
1554
+ """
1555
+ Returns the status of the analysis with the given id.
1556
+
1557
+ Parameters
1558
+ ----------
1559
+ analysis_id : str
1560
+ The analysis id.
1561
+
1562
+ Returns
1563
+ -------
1564
+ res : dict
1565
+ A dictionary containing the status of the analysis.
1566
+
1567
+ Examples
1568
+ -------
1569
+ >>> from seer_pas_sdk import SeerSDK
1570
+ >>> seer_sdk = SeerSDK()
1571
+ >>> seer_sdk.analysis_complete("YOUR_ANALYSIS_ID_HERE")
1572
+ >>> {
1573
+ "status": "SUCCEEDED"
1574
+ }
1575
+ """
1576
+
1577
+ if not analysis_id:
1578
+ raise ValueError("Analysis id cannot be empty.")
1579
+
1580
+ try:
1581
+ res = self.get_analysis(analysis_id)
1582
+ except ValueError:
1583
+ return ValueError("Analysis not found. Your ID could be incorrect")
1584
+
1585
+ return {"status": res[0]["status"]}
1586
+
1587
+ def list_ms_data_files(self, folder="", space=None):
1588
+ """
1589
+ Lists all the MS data files in the given folder as long as the folder path passed in the params is valid.
1590
+
1591
+ Parameters
1592
+ ----------
1593
+ folder : str, optional
1594
+ Folder path to list the files from. Defaults to an empty string and displays all files for the user.
1595
+ space : str, optional
1596
+ ID of the user group to which the files belong, defaulted to None.
1597
+
1598
+ Returns
1599
+ -------
1600
+ list
1601
+ Contains the list of files in the folder.
937
1602
 
938
1603
  Examples
939
1604
  -------
@@ -1013,9 +1678,7 @@ class SeerSDK:
1013
1678
  print(f'Downloading files to "{name}"\n')
1014
1679
 
1015
1680
  URL = f"{self._auth.url}api/v1/msdataindex/download/getUrl"
1016
- tenant_id = jwt.decode(ID_TOKEN, options={"verify_signature": False})[
1017
- "custom:tenantId"
1018
- ]
1681
+ tenant_id = self._auth.active_tenant_id
1019
1682
 
1020
1683
  for path in paths:
1021
1684
  with self._get_auth_session() as s:
@@ -1033,7 +1696,6 @@ class SeerSDK:
1033
1696
  "Could not download file. Please check if the backend is running."
1034
1697
  )
1035
1698
  urls.append(download_url.text)
1036
-
1037
1699
  for i in range(len(urls)):
1038
1700
  filename = paths[i].split("/")[-1]
1039
1701
  url = urls[i]
@@ -1077,17 +1739,70 @@ class SeerSDK:
1077
1739
 
1078
1740
  return {"message": f"Files downloaded successfully to '{name}'"}
1079
1741
 
1080
- def group_analysis_results(self, analysis_id: str, box_plot: dict = None):
1742
+ def get_group_analysis(
1743
+ self, analysis_id, group_analysis_id=None, **kwargs
1744
+ ):
1745
+ """
1746
+ Returns the list of group analysis objects for the given analysis id, provided they exist.
1747
+
1748
+ Parameters
1749
+ ----------
1750
+ analysis_id : str
1751
+ The analysis id.
1752
+
1753
+ group_analysis_id : str, optional
1754
+ The group analysis id, defaulted to None. If provided, the function will return the group analysis object for the given group analysis id.
1755
+
1756
+ **kwargs : dict, optional
1757
+ Search keyword parameters to be passed in. Acceptable values are 'name' or 'description'.
1758
+
1759
+ """
1760
+ params = {"analysisid": analysis_id}
1761
+ if kwargs and not group_analysis_id:
1762
+ if len(kwargs.keys()) > 1:
1763
+ raise ValueError("Please include only one search parameter.")
1764
+ search_field = list(kwargs.keys())[0]
1765
+ if search_field not in ["name", "description"]:
1766
+ raise ValueError(
1767
+ "Invalid search field. Please choose between 'name' or 'description'."
1768
+ )
1769
+ search_item = kwargs[search_field]
1770
+
1771
+ if not search_item:
1772
+ raise ValueError(
1773
+ f"Please provide a non null value for {search_field}"
1774
+ )
1775
+ params["searchFields"] = search_field
1776
+ params["searchItem"] = search_item
1777
+
1778
+ URL = f"{self._auth.url}api/v1/groupanalysis/groupanalyses"
1779
+
1780
+ if group_analysis_id:
1781
+ URL = f"{URL}/{group_analysis_id}"
1782
+ params["id"] = group_analysis_id
1783
+
1784
+ with self._get_auth_session() as s:
1785
+ response = s.get(URL, params=params)
1786
+ if response.status_code != 200:
1787
+ raise ServerError(
1788
+ "Request failed. Please check your parameters."
1789
+ )
1790
+ response = response.json()
1791
+ return response
1792
+
1793
+ def group_analysis_results(self, analysis_id: str, group_analysis_id=None):
1081
1794
  """
1082
1795
  Returns the group analysis data for the given analysis id, provided it exists.
1083
1796
 
1797
+ If no group analysis id is provided, the function will return the most recent group analysis data for the given analysis id.
1798
+
1084
1799
  Parameters
1085
1800
  ----------
1086
1801
  analysis_id : str
1087
1802
  The analysis id.
1088
1803
 
1089
- box_plot : dict, optional
1090
- The box plot configuration needed for the analysis, defaulted to None. Contains `feature_type` ("protein" or "peptide") and `feature_ids` (comma separated list of feature IDs) keys.
1804
+ group_analysis_id : str, optional
1805
+ The group analysis id, defaulted to None.
1091
1806
 
1092
1807
  Returns
1093
1808
  -------
@@ -1142,7 +1857,6 @@ class SeerSDK:
1142
1857
  "peptide_processed_long_form_file_url": "",
1143
1858
  },
1144
1859
  },
1145
- "box_plot": [],
1146
1860
  }
1147
1861
 
1148
1862
  # Pre-GA data call
@@ -1153,7 +1867,7 @@ class SeerSDK:
1153
1867
  json={"analysisId": analysis_id, "grouping": "condition"},
1154
1868
  )
1155
1869
  if protein_pre_data.status_code != 200:
1156
- raise ValueError(
1870
+ raise ServerError(
1157
1871
  "Invalid request. Could not fetch group analysis protein pre data. Please check your parameters."
1158
1872
  )
1159
1873
 
@@ -1161,15 +1875,15 @@ class SeerSDK:
1161
1875
 
1162
1876
  res["pre"]["protein"] = protein_pre_data
1163
1877
 
1164
- with requests.Session() as s:
1165
- s.headers.update(HEADERS)
1878
+ with self._get_auth_session() as s:
1166
1879
 
1167
1880
  peptide_pre_data = s.post(
1168
1881
  url=f"{URL}api/v2/groupanalysis/peptide",
1169
1882
  json={"analysisId": analysis_id, "grouping": "condition"},
1170
1883
  )
1884
+
1171
1885
  if peptide_pre_data.status_code != 200:
1172
- raise ValueError(
1886
+ raise ServerError(
1173
1887
  "Invalid request. Could not fetch group analysis peptide pre data. Please check your parameters."
1174
1888
  )
1175
1889
 
@@ -1177,18 +1891,21 @@ class SeerSDK:
1177
1891
  res["pre"]["peptide"] = peptide_pre_data
1178
1892
 
1179
1893
  # Post-GA data call
1180
- with requests.Session() as s:
1181
- s.headers.update(HEADERS)
1182
-
1183
- get_saved_result = s.get(
1184
- f"{URL}api/v1/groupanalysis/getSavedResults?analysisid={analysis_id}"
1185
- )
1186
-
1187
- if get_saved_result.status_code != 200:
1188
- raise ValueError(
1189
- "Invalid request. Could not fetch group analysis post data. Please check your parameters."
1894
+ with self._get_auth_session() as s:
1895
+ if group_analysis_id:
1896
+ get_saved_result = self.get_group_analysis(
1897
+ analysis_id=analysis_id,
1898
+ group_analysis_id=group_analysis_id,
1899
+ )
1900
+ else:
1901
+ get_saved_result = s.get(
1902
+ f"{URL}api/v1/groupanalysis/getSavedResults?analysisid={analysis_id}"
1190
1903
  )
1191
- get_saved_result = get_saved_result.json()
1904
+ if get_saved_result.status_code != 200:
1905
+ raise ServerError(
1906
+ "Could not fetch saved results. Please check your analysis id."
1907
+ )
1908
+ get_saved_result = get_saved_result.json()
1192
1909
 
1193
1910
  # Protein data
1194
1911
  if "pgResult" in get_saved_result:
@@ -1198,6 +1915,13 @@ class SeerSDK:
1198
1915
  if "peptideResult" in get_saved_result:
1199
1916
  res["post"]["peptide"] = get_saved_result["peptideResult"]
1200
1917
 
1918
+ # require that either protein or peptide data exists
1919
+ # Error handling is necessary for volcano plot calculations downstream
1920
+ if not (res["post"].get("protein") or res["post"].get("peptide")):
1921
+ raise ValueError(
1922
+ "No group analysis data returned from server."
1923
+ )
1924
+
1201
1925
  # Protein URLs
1202
1926
  if "pgProcessedFileUrl" in get_saved_result:
1203
1927
  res["post"]["protein_url"]["protein_processed_file_url"] = (
@@ -1219,32 +1943,690 @@ class SeerSDK:
1219
1943
  "peptide_processed_long_form_file_url"
1220
1944
  ] = get_saved_result["peptideProcessedLongFormFileUrl"]
1221
1945
 
1222
- # Box plot data call
1223
- if not box_plot:
1224
- del res["box_plot"]
1225
- return res
1946
+ return res
1947
+
1948
+ def get_box_plot_data(
1949
+ self,
1950
+ analysis_id: str,
1951
+ group_analysis_id: str = None,
1952
+ feature_ids: _List[str] = [],
1953
+ show_significant_only: bool = False,
1954
+ as_df=False,
1955
+ volcano_plot=False,
1956
+ cached=False,
1957
+ ):
1958
+ """Get box plot data for given analyses and samples formatted in a DataFrame or a dictionary.
1959
+
1960
+ Args:
1961
+ analysis_id (str): ID of the analysis.
1962
+ feature_ids (list[str], optional): Filter result object to a set of ids. Defaults to [].
1963
+ show_significant_only (bool, optional): Mark true if only significant results are to be returned. Defaults to False.
1964
+ as_df (bool, optional): Mark true if return object should be a pandas DataFrame. Defaults to False.
1965
+ volcano_plot (bool, optional): Mark true to include the volcano plot data in the return object. Defaults to False.
1966
+ cached (bool, optional): Mark true to return volcano plot data as a VolcanoPlotBuilder object. No effect if volcano_plot flag is marked false. Defaults to False.
1967
+
1968
+ Raises:
1969
+ ValueError: Invalid feature type. Must be either 'protein' or 'peptide'.
1970
+ ServerError: Could not fetch box plot data.
1971
+
1972
+ Returns:
1973
+ list[dict] | pd.DataFrame : A list of dictionaries or a dataframe with each row containing the following keys/columns:
1974
+ 'proteinId', 'intensity', 'sampleName', 'sampleId', 'condition','gene'
1975
+ """
1976
+
1977
+ with self._get_auth_session() as s:
1978
+
1979
+ # API call 1 - get volcano plot data for filtered results and gene mapping
1980
+ builder = self.get_volcano_plot_data(
1981
+ analysis_id, cached=True, group_analysis_id=group_analysis_id
1982
+ )
1983
+
1984
+ protein_peptide_gene_map = builder.protein_gene_map
1985
+
1986
+ # API call 2 - get analysis samples metadata to get condition
1987
+ samples_metadata = self.get_analysis_samples(analysis_id)
1988
+
1989
+ json = {"analysisId": analysis_id}
1990
+ if feature_ids:
1991
+ json["featureIds"] = ",".join(feature_ids)
1992
+ filters = ""
1993
+ # API call 3 - get group analysis data. This gives us the filters for the group analysis
1994
+ if group_analysis_id:
1995
+ ga = self.get_group_analysis(
1996
+ analysis_id, group_analysis_id=group_analysis_id
1997
+ )
1998
+ filters = ga["parameters"]["filters"]
1999
+ if filters:
2000
+ json["filters"] = filters
2001
+
2002
+ json["featureType"] = (
2003
+ builder.type if builder.type == "peptide" else "proteingroup"
2004
+ )
1226
2005
 
1227
- with requests.Session() as s:
1228
- s.headers.update(HEADERS)
1229
- box_plot["feature_type"] = box_plot["feature_type"].lower()
2006
+ # API call 4 - get intensities
1230
2007
  box_plot_data = s.post(
1231
- url=f"{URL}api/v1/groupanalysis/rawdata",
1232
- json={
1233
- "analysisId": analysis_id,
1234
- "featureIds": (
1235
- ",".join(box_plot["feature_ids"])
1236
- if len(box_plot["feature_ids"]) > 1
1237
- else box_plot["feature_ids"][0]
1238
- ),
1239
- "featureType": f"{box_plot['feature_type']}group",
1240
- },
2008
+ url=f"{self._auth.url}api/v1/groupanalysis/rawdata", json=json
1241
2009
  )
2010
+
1242
2011
  if box_plot_data.status_code != 200:
2012
+ raise ServerError("Could not fetch box plot data.")
2013
+
2014
+ box_plot_data = box_plot_data.json()
2015
+ feature_type_index = (
2016
+ "peptide" if builder.type == "peptide" else "proteinId"
2017
+ )
2018
+ box_plot_data = [
2019
+ x
2020
+ for x in box_plot_data
2021
+ if x[feature_type_index] in protein_peptide_gene_map
2022
+ ]
2023
+ sample_id_condition = {
2024
+ x["id"]: x["condition"] for x in samples_metadata[0]["samples"]
2025
+ }
2026
+
2027
+ if show_significant_only:
2028
+ significant_rows = set(builder.get_significant_rows())
2029
+ box_plot_data = [
2030
+ x
2031
+ for x in box_plot_data
2032
+ if x[feature_type_index] in significant_rows
2033
+ ]
2034
+
2035
+ for row in box_plot_data:
2036
+ row["condition"] = sample_id_condition.get(
2037
+ row["sampleId"], None
2038
+ )
2039
+ row["gene"] = builder.protein_gene_map[row[feature_type_index]]
2040
+
2041
+ if as_df:
2042
+ box_plot_data = pd.DataFrame(box_plot_data)
2043
+
2044
+ if volcano_plot:
2045
+ vplot = None
2046
+ if cached:
2047
+ vplot = builder
2048
+ elif as_df:
2049
+ vplot = pd.DataFrame(builder.volcano_plot)
2050
+ else:
2051
+ vplot = builder.volcano_plot
2052
+
2053
+ return {"box_plot": box_plot_data, "volcano_plot": vplot}
2054
+ return box_plot_data
2055
+
2056
+ def get_all_volcano_plot_data(self, analysis_id: str, box_plot=False):
2057
+ """
2058
+ Get all volcano plot data for a given analysis.
2059
+
2060
+ Args:
2061
+ analysis_id (str): ID of the analysis.
2062
+ box_plot (bool, optional): Mark true to include box plot data in the return object. Defaults to False.
2063
+
2064
+ Returns:
2065
+ dict: A dictionary containing the volcano plot and optionally box plot data for each group analysis.
2066
+ """
2067
+ group_analysis_ids = [
2068
+ x["id"]
2069
+ for x in self.get_group_analysis(analysis_id).get("data", [])
2070
+ if x.get("id")
2071
+ ]
2072
+ if not group_analysis_ids:
2073
+ return {}
2074
+ results = dict()
2075
+
2076
+ if box_plot:
2077
+ results = {
2078
+ ga_id: {
2079
+ k: v
2080
+ for k, v in self.get_box_plot_data(
2081
+ analysis_id, ga_id, as_df=True, volcano_plot=True
2082
+ ).items()
2083
+ }
2084
+ for ga_id in group_analysis_ids
2085
+ }
2086
+ else:
2087
+ results = {
2088
+ ga_id: {
2089
+ "volcano_plot": self.get_volcano_plot_data(
2090
+ analysis_id, group_analysis_id=ga_id, as_df=True
2091
+ )
2092
+ }
2093
+ for ga_id in group_analysis_ids
2094
+ }
2095
+
2096
+ return results
2097
+
2098
+ def _get_analysis_pca(
2099
+ self,
2100
+ analysis_ids: _List[str],
2101
+ sample_ids: _List[str],
2102
+ type: str,
2103
+ hide_control: bool = False,
2104
+ ):
2105
+ """
2106
+ ****************
2107
+ [UNEXPOSED METHOD CALL]
2108
+ ****************
2109
+ Get PCA data for given analyses and samples.
2110
+ Args:
2111
+ analysis_ids (list[str]): IDs of the analyses of interest.
2112
+ sample_ids (list[str]): IDs of the samples of interest.
2113
+ type (str): Type of data to be fetched. Must be either 'protein' or 'peptide'.
2114
+ hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
2115
+ Raises:
2116
+ ValueError: No analysis IDs provided.
2117
+ ValueError: No sample IDs provided.
2118
+ ValueError: Invalid type provided.
2119
+ ServerError: Could not fetch PCA data.
2120
+ Returns:
2121
+ dict
2122
+ Pure response from the API.
2123
+ """
2124
+ if not analysis_ids:
2125
+ raise ValueError("Analysis IDs cannot be empty.")
2126
+ if type not in ["protein", "peptide"]:
2127
+ raise ValueError("Type must be either 'protein' or 'peptide'.")
2128
+
2129
+ URL = f"{self._auth.url}api/v1/analysisqcpca"
2130
+
2131
+ with self._get_auth_session() as s:
2132
+ json = {
2133
+ "analysisIds": ",".join(analysis_ids),
2134
+ "type": type,
2135
+ }
2136
+ if sample_ids:
2137
+ json["sampleIds"] = ",".join(sample_ids)
2138
+
2139
+ # specify hideControl as a string - unexpected behavior occurs if a boolean is passed
2140
+ if hide_control:
2141
+ json["hideControl"] = "true"
2142
+ else:
2143
+ json["hideControl"] = "false"
2144
+
2145
+ pca_data = s.post(URL, json=json)
2146
+
2147
+ if pca_data.status_code != 200:
2148
+ raise ServerError("Could not fetch PCA data.")
2149
+
2150
+ return pca_data.json()
2151
+
2152
+ def get_analysis_pca_data(
2153
+ self,
2154
+ analysis_ids: _List[str],
2155
+ type: str,
2156
+ sample_ids: _List[str] = [],
2157
+ hide_control: bool = False,
2158
+ as_df=False,
2159
+ ):
2160
+ """
2161
+ Get PCA data for given analyses and samples formatted in a DataFrame or a dictionary.
2162
+ Args:
2163
+ analysis_ids (list[str]): IDs of the analyses of interest.
2164
+ type (str): Type of data to be fetched. Must be either 'protein' or 'peptide'.
2165
+ sample_ids (list[str], optional): IDs of the samples of interest.
2166
+ hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
2167
+ as_df (bool, optional): Mark true if the data should be returned as a pandas DataFrame. Defaults to False.
2168
+ Raises:
2169
+ ValueError: No analysis IDs provided.
2170
+ ValueError: No sample IDs provided.
2171
+ ValueError: Invalid type parameter provided.
2172
+ ServerError: Could not fetch PCA data.
2173
+ Returns:
2174
+ A dictionary with the following keys:
2175
+ - x_contribution_ratio (float): Proportion of variance explained by the x-axis.
2176
+ - y_contribution_ratio (float): Proportion of variance explained by the y-axis.
2177
+ - data (list[dict] | pd.DataFrame): A list of dictionaries or a dataframe with each row containing the following keys/columns:
2178
+ - sample_name (str): Name of the sample.
2179
+ - plate_name (str): Name of the plate.
2180
+ - sample_id (int): ID of the sample.
2181
+ - condition (str): Condition.
2182
+ - PC1 (float): X-value of the PCA point.
2183
+ - PC2 (float): Y-value of the PCA point.
2184
+ - custom_* (str): Custom fields. Included if meaningful, i.e., not null, in the data.
2185
+ Examples
2186
+ --------
2187
+ >>> from seer_pas_sdk import *
2188
+ >>> sdk = SeerSDK()
2189
+ >>> sdk.get_analysis_pca_data(
2190
+ analysis_ids=["analysis_id"],
2191
+ sample_ids=["sample_id"],
2192
+ type="protein",
2193
+ hide_control=False
2194
+ )
2195
+ """
2196
+ pca_data = self._get_analysis_pca(
2197
+ analysis_ids, sample_ids, type, hide_control
2198
+ )
2199
+
2200
+ # common columns returned by the API
2201
+ generic_columns = [
2202
+ "sample_name",
2203
+ "plate_name",
2204
+ "sample_id",
2205
+ "condition",
2206
+ "PC1",
2207
+ "PC2",
2208
+ ]
2209
+
2210
+ # edge case where yContributionRatio is NaN when zero points are returned.
2211
+ if not "yContributionRatio" in pca_data:
2212
+ y_contribution_ratio = None
2213
+ else:
2214
+ y_contribution_ratio = pca_data["yContributionRatio"]
2215
+
2216
+ x_contribution_ratio = pca_data["xContributionRatio"]
2217
+ samples = pca_data["samples"]
2218
+ points = pca_data["points"]
2219
+
2220
+ df = pd.DataFrame(
2221
+ [
2222
+ sample | {"PC1": point[0], "PC2": point[1]}
2223
+ for sample, point in zip(samples, points)
2224
+ ]
2225
+ )
2226
+
2227
+ # Slice the df such that only custom columns are dropped in the absence of data
2228
+ df = pd.concat(
2229
+ [
2230
+ df.drop(columns=generic_columns).dropna(how="all", axis=1),
2231
+ df[generic_columns],
2232
+ ],
2233
+ axis=1,
2234
+ )
2235
+
2236
+ # Filter down to a minimal set of columns
2237
+ permitted_columns = [
2238
+ x
2239
+ for x in df.columns
2240
+ if x in generic_columns or x.startswith("custom_")
2241
+ ]
2242
+
2243
+ df = df.loc(axis=1)[permitted_columns]
2244
+
2245
+ # Return the data as a DataFrame if as_df is True
2246
+ if not as_df:
2247
+ df = df.to_dict(orient="records")
2248
+ result = dict(
2249
+ x_contribution_ratio=x_contribution_ratio,
2250
+ y_contribution_ratio=y_contribution_ratio,
2251
+ data=df,
2252
+ )
2253
+ return result
2254
+
2255
+ def get_analysis_hierarchical_clustering(
2256
+ self,
2257
+ analysis_ids: _List[str],
2258
+ sample_ids: _List[str] = [],
2259
+ hide_control: bool = False,
2260
+ ):
2261
+ """
2262
+ Get hierarchical clustering data for given analyses and samples.
2263
+ Args:
2264
+ analysis_ids (list[str]): IDs of the analyses.
2265
+ sample_ids (list[str], optional): IDs of the samples.
2266
+ hide_control (bool, optional): Mark true if controls are to be excluded. Defaults to False.
2267
+ raw_data (bool, optional): Mark true if raw data should be returned. Defaults to True.
2268
+ Raises:
2269
+ ValueError: No analysis IDs provided.
2270
+ ValueError: No sample IDs provided.
2271
+ ValueError: Response status code is not 200.
2272
+ Returns:
2273
+ dict
2274
+ Hierarchical clustering data returned by the API.
2275
+ """
2276
+ if not analysis_ids:
2277
+ raise ValueError("Analysis IDs cannot be empty.")
2278
+
2279
+ URL = f"{self._auth.url}api/v1/analysishcluster"
2280
+
2281
+ with self._get_auth_session() as s:
2282
+ json = {
2283
+ "analysisIds": ",".join(analysis_ids),
2284
+ }
2285
+ if sample_ids:
2286
+ json["sampleIds"] = ",".join(sample_ids)
2287
+
2288
+ if sample_ids:
2289
+ json["sampleIds"] = ",".join(sample_ids)
2290
+
2291
+ # specify hideControl as a string
2292
+ # Python bool values are not recognized by the API
2293
+ if hide_control:
2294
+ json["hideControl"] = "true"
2295
+ else:
2296
+ json["hideControl"] = "false"
2297
+
2298
+ hc_data = s.post(URL, json=json)
2299
+
2300
+ if hc_data.status_code != 200:
1243
2301
  raise ValueError(
1244
- "Invalid request, could not fetch box plot data. Please verify your 'box_plot' parameters, including 'feature_ids' (comma-separated list of feature IDs) and 'feature_type' (needs to be a either 'protein' or 'peptide')."
2302
+ "Invalid request. Please check your parameters."
1245
2303
  )
1246
2304
 
1247
- box_plot_data = box_plot_data.json()
1248
- res["box_plot"] = box_plot_data
2305
+ data = hc_data.json()
1249
2306
 
1250
- return res
2307
+ # Filter out custom fields that are not part of the tenant's custom fields
2308
+ if not "samples" in data:
2309
+ raise ValueError("No sample data returned from server.")
2310
+
2311
+ data["samples"] = [
2312
+ {k: v for k, v in sample.items()} for sample in data["samples"]
2313
+ ]
2314
+
2315
+ return data
2316
+
2317
+ def get_ppi_network_data(
2318
+ self, significant_pgs: _List[str], species: str = None
2319
+ ):
2320
+ """
2321
+ Get PPI network data for given significant protein groups.
2322
+ Args:
2323
+ significant_pgs (_List[str]): Significant protein groups.
2324
+ species (str, optional): Species of interest. Defaults to None.
2325
+ Raises:
2326
+ ValueError: No significant protein groups provided.
2327
+ ValueError: Response status code is not 200.
2328
+ Returns:
2329
+ dict
2330
+ Response returned by the API.
2331
+ """
2332
+ if not significant_pgs:
2333
+ raise ValueError("Significant protein groups cannot be empty.")
2334
+
2335
+ URL = f"{self._auth.url}api/v1/groupanalysis/stringdb"
2336
+
2337
+ with self._get_auth_session() as s:
2338
+ json = {
2339
+ "significantPGs": ",".join(significant_pgs),
2340
+ }
2341
+ if species:
2342
+ json["species"] = species
2343
+
2344
+ ppi_data = s.post(URL, json=json)
2345
+
2346
+ if ppi_data.status_code != 200:
2347
+ raise ValueError("Server error - bad response")
2348
+
2349
+ return ppi_data.json()
2350
+
2351
+ # groups are user defined by the sample description file
2352
+ def get_cluster_heatmap_data(
2353
+ self,
2354
+ analysis_id: str,
2355
+ grouping: str,
2356
+ groups: _List[str],
2357
+ contrasts: _List[_Tuple[int, ...]],
2358
+ stat_test: str,
2359
+ feature_type: str,
2360
+ significant_pgs: _List[str] = [],
2361
+ ):
2362
+ """Get cluster heatmap data for the given analysis.
2363
+
2364
+ Args:
2365
+ analysis_id (str): ID of the analysis
2366
+ grouping (str): Category of sample groups
2367
+ groups (_List[str]): sample groups
2368
+ contrasts (_List[_Tuple[int, ...]]): Indicate which groups are compared against each other. e.g. [(0, 1, -1, 0), (1, 0, 0, -1)]
2369
+ stat_test (str): Statistical test to be used
2370
+ feature_type (str): Type of feature to be used, either proteingroup or peptide
2371
+ significant_pgs (_List[str], optional): significant protein group IDs. Defaults to [].
2372
+
2373
+ Raises:
2374
+ ValueError: "Feature type must be either 'proteingroup' or 'peptide'."
2375
+ ValueError: "Stat test must be either 'ttest' or 'wilcoxon'."
2376
+ ValueError: Invalid contrast value.
2377
+ ValueError: Server error
2378
+
2379
+ Returns:
2380
+ dict: the response object
2381
+ clusterProtein: List of protein clusters
2382
+ clusters:
2383
+ indexes: list[int], List of indexes
2384
+ height: int, Height of the cluster
2385
+ children: list[dict] | None, Children of the cluster
2386
+ clusterSample: List of sample clusters
2387
+ clusters:
2388
+ indexes: list[int], List of indexes
2389
+ height: int, Height of the cluster
2390
+ children: list[dict] | None, Children of the cluster
2391
+ data: List of data
2392
+
2393
+ """
2394
+ if feature_type not in ["proteingroup", "peptide"]:
2395
+ raise ValueError(
2396
+ "Feature type must be either 'proteingroup' or 'peptide'."
2397
+ )
2398
+
2399
+ if stat_test not in ["ttest", "wilcoxon"]:
2400
+ raise ValueError("Stat test must be either 'ttest' or 'wilcoxon'.")
2401
+
2402
+ [validate_contrast(contrast, len(groups)) for contrast in contrasts]
2403
+
2404
+ formatted_contrasts = ";".join(
2405
+ [",".join(map(str, x)) for x in contrasts]
2406
+ )
2407
+
2408
+ payload = dict(
2409
+ analysisId=analysis_id,
2410
+ grouping=grouping,
2411
+ groups=",".join(groups),
2412
+ contrasts=formatted_contrasts,
2413
+ statTest=stat_test,
2414
+ featureType=feature_type,
2415
+ significantPGs=",".join(significant_pgs),
2416
+ )
2417
+
2418
+ with self._get_auth_session() as s:
2419
+ URL = f"{self._auth.url}api/v2/clusterheatmap"
2420
+ response = s.post(URL, json=payload)
2421
+ if response.status_code != 200:
2422
+ raise ValueError("Server error. Bad response.")
2423
+ return response.json()
2424
+
2425
+ def get_enrichment_plot(
2426
+ self,
2427
+ analysis_id: str,
2428
+ significant_pgs: _List[str],
2429
+ summarize_output: bool = False,
2430
+ exclude_singleton: bool = False,
2431
+ cutoff: float = None,
2432
+ species: str = None,
2433
+ ):
2434
+ """
2435
+ Get enrichment plot data for a given analysis ID.
2436
+
2437
+ Args:
2438
+ analysis_id (str): ID of the analysis.
2439
+ significant_pgs (_List[str]): List of significant protein/peptide groups.
2440
+ summarize_output (bool, optional): Summarize the output. Defaults to False.
2441
+ exclude_singleton (bool, optional): Exclude singleton values. Defaults to False.
2442
+ cutoff (float, optional): Cutoff value for the p-value to determine significance. Defaults to None.
2443
+ species (str, optional): Species to filter the data by. Defaults to None.
2444
+
2445
+ Raises:
2446
+ ServerError - could not fetch enrichment plot data.
2447
+
2448
+ Returns:
2449
+ dict: A dictionary containing the enrichment plot data.
2450
+ """
2451
+
2452
+ URL = f"{self._auth.url}api/v1/groupanalysis/enrichmentgo"
2453
+
2454
+ if not significant_pgs:
2455
+ raise ValueError("Significant pgs cannot be empty.")
2456
+
2457
+ with self._get_auth_session() as s:
2458
+ json = {
2459
+ "analysisId": analysis_id,
2460
+ "significantPGs": significant_pgs,
2461
+ "summarizeOutput": summarize_output,
2462
+ "excludeSingleton": exclude_singleton,
2463
+ }
2464
+ if cutoff:
2465
+ json["cutoff"] = cutoff
2466
+ if species:
2467
+ json["species"] = species
2468
+
2469
+ enrichment_data = s.post(URL, json=json)
2470
+
2471
+ if enrichment_data.status_code != 200:
2472
+ raise ValueError("Could not fetch enrichment plot data.")
2473
+
2474
+ return enrichment_data.json()
2475
+
2476
+ def get_volcano_plot_data(
2477
+ self,
2478
+ analysis_id,
2479
+ group_analysis_id=None,
2480
+ significance_threshold=0.05,
2481
+ fold_change_threshold=1,
2482
+ label_by="fold_change",
2483
+ cached=False,
2484
+ as_df=False,
2485
+ ):
2486
+ """Get volcano plot data for a given analysis ID.
2487
+
2488
+ Args:
2489
+ analysis_id (str): ID of the analysis.
2490
+ significance_threshold (float, optional): Cutoff value for the p-value to determine significance. Defaults to 0.05.
2491
+ fold_change_threshold (float, optional): Cutoff value for the fold change to determine significance. Defaults to 1.
2492
+ label_by (str, optional): Metric to sort result data. Defaults to "fold_change".
2493
+ cached (bool, optional): Return a VolcanoPlotBuilder object for calculation reuse. Defaults to False.
2494
+ as_df (bool, optional): Return data as a pandas DataFrame. Defaults to False.
2495
+
2496
+ Raises:
2497
+ ServerError - could not fetch group analysis results.
2498
+ Returns:
2499
+ list[dict] | pd.DataFrame | VolcanoPlotBuilder: A list of dictionaries, a DataFrame, or a VolcanoPlotBuilder object containing the volcano plot data.
2500
+ Object contains the following columns: 'logFD', 'negativeLog10P', 'dataIndex', 'rowID', 'gene', 'protein',
2501
+ 'group', 'significant', 'euclideanDistance'
2502
+ """
2503
+ try:
2504
+ response = self.group_analysis_results(
2505
+ analysis_id, group_analysis_id=group_analysis_id
2506
+ )
2507
+ except:
2508
+ raise ServerError(
2509
+ f"Could not fetch group analysis results. Please check that group analysis has completed for analysis {analysis_id}."
2510
+ )
2511
+
2512
+ obj = VolcanoPlotBuilder(
2513
+ response, significance_threshold, fold_change_threshold, label_by
2514
+ )
2515
+
2516
+ if cached:
2517
+ return obj
2518
+ else:
2519
+ if as_df:
2520
+ return pd.DataFrame(obj.volcano_plot)
2521
+ else:
2522
+ return obj.volcano_plot
2523
+
2524
+ def get_analysis_samples(self, analysis_id: str):
2525
+ """
2526
+ Get the samples associated with a given analysis ID.
2527
+
2528
+ Args:
2529
+ analysis_id (str): The analysis ID.
2530
+
2531
+ Raises:
2532
+ ServerError - could not retrieve samples for analysis.
2533
+ Returns:
2534
+ dict: A dictionary containing the samples associated with the analysis.
2535
+ """
2536
+ if not analysis_id:
2537
+ raise ValueError("Analysis ID cannot be empty.")
2538
+
2539
+ URL = f"{self._auth.url}api/v1/analyses/samples/{analysis_id}"
2540
+ with self._get_auth_session() as s:
2541
+ samples = s.get(URL)
2542
+
2543
+ if samples.status_code != 200:
2544
+ raise ServerError("Could not retrieve samples for analysis.")
2545
+
2546
+ return samples.json()
2547
+
2548
+ def get_analysis_protocol_fasta(self, analysis_id, download_path=None):
2549
+ if not analysis_id:
2550
+ raise ValueError("Analysis ID cannot be empty.")
2551
+
2552
+ if not download_path:
2553
+ download_path = os.getcwd()
2554
+
2555
+ try:
2556
+ analysis_protocol_id = self.get_analysis(analysis_id)[0][
2557
+ "analysis_protocol_id"
2558
+ ]
2559
+ except (IndexError, KeyError):
2560
+ raise ValueError(f"Could not parse server response.")
2561
+
2562
+ try:
2563
+ analysis_protocol_engine = self.get_analysis_protocols(
2564
+ analysis_protocol_id=analysis_protocol_id
2565
+ )[0]["analysis_engine"]
2566
+ except (IndexError, KeyError):
2567
+ raise ValueError(f"Could not parse server response.")
2568
+
2569
+ analysis_protocol_engine = analysis_protocol_engine.lower()
2570
+ if analysis_protocol_engine == "diann":
2571
+ URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/diann/{analysis_protocol_id}"
2572
+ elif analysis_protocol_engine == "encyclopedia":
2573
+ URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/dia/{analysis_protocol_id}"
2574
+ elif analysis_protocol_engine == "msfragger":
2575
+ URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/msfragger/{analysis_protocol_id}"
2576
+ elif analysis_protocol_engine == "proteogenomics":
2577
+ URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/proteogenomics/{analysis_protocol_id}"
2578
+ else:
2579
+ # Change needed on the backend to get s3 file path for MaxQuant
2580
+ # URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/{analysis_protocol_id}"
2581
+ raise ValueError(
2582
+ f"Analysis protocol engine {analysis_protocol_engine} not supported for fasta download."
2583
+ )
2584
+
2585
+ with self._get_auth_session() as s:
2586
+ response = s.get(URL)
2587
+ if response.status_code != 200:
2588
+ raise ServerError("Request failed.")
2589
+ response = response.json()
2590
+ if type(response) == dict:
2591
+ response = response["editableParameters"]
2592
+ fasta_filenames = [
2593
+ x["Value"]
2594
+ for x in response
2595
+ if x["Key"] in ["fasta", "fastaFilePath", "referencegenome"]
2596
+ ]
2597
+ if not fasta_filenames:
2598
+ raise ServerError("No fasta file name returned from server.")
2599
+
2600
+ URL = f"{self._auth.url}api/v1/analysisProtocolFiles/getUrl"
2601
+ for file in fasta_filenames:
2602
+ with self._get_auth_session() as s:
2603
+ response = s.post(URL, json={"filepath": file})
2604
+ if response.status_code != 200:
2605
+ raise ServerError("Request failed.")
2606
+ url = response.json()["url"]
2607
+ filename = os.path.basename(file)
2608
+ print(f"Downloading {filename}")
2609
+ for _ in range(2):
2610
+ try:
2611
+ with tqdm(
2612
+ unit="B",
2613
+ unit_scale=True,
2614
+ unit_divisor=1024,
2615
+ miniters=1,
2616
+ desc=f"Progress",
2617
+ ) as t:
2618
+ ssl._create_default_https_context = (
2619
+ ssl._create_unverified_context
2620
+ )
2621
+ urllib.request.urlretrieve(
2622
+ url,
2623
+ f"{download_path}/{filename}",
2624
+ reporthook=download_hook(t),
2625
+ data=None,
2626
+ )
2627
+ break
2628
+ except:
2629
+ if not os.path.isdir(f"{download_path}"):
2630
+ os.makedirs(f"{download_path}")
2631
+
2632
+ print(f"Downloaded file to {download_path}/{file}")