seer-pas-sdk 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,19 +2,12 @@
2
2
  seer_pas_sdk.core.unsupported -- in development
3
3
  """
4
4
 
5
- from tqdm import tqdm
6
-
7
5
  import os
8
- import jwt
9
- import requests
10
- import urllib.request
11
- import ssl
12
6
  import shutil
13
7
 
14
8
  from typing import List as _List
15
9
 
16
10
  from ..common import *
17
- from ..auth import Auth
18
11
  from ..objects import PlateMap
19
12
 
20
13
  from .sdk import SeerSDK as _SeerSDK
@@ -71,7 +64,7 @@ class _UnsupportedSDK(_SeerSDK):
71
64
 
72
65
  URL = f"{self._auth.url}api/v1/samples"
73
66
 
74
- with self._get_auth_session() as s:
67
+ with self._get_auth_session("addsample") as s:
75
68
 
76
69
  response = s.post(URL, json=sample_entry)
77
70
 
@@ -112,7 +105,7 @@ class _UnsupportedSDK(_SeerSDK):
112
105
 
113
106
  URL = f"{self._auth.url}api/v1/samples/batch"
114
107
 
115
- with self._get_auth_session() as s:
108
+ with self._get_auth_session("addsamples") as s:
116
109
  response = s.post(URL, json={"samples": sample_info})
117
110
 
118
111
  if response.status_code != 200:
@@ -164,9 +157,7 @@ class _UnsupportedSDK(_SeerSDK):
164
157
  if not project_name:
165
158
  raise ValueError("Project name cannot be empty.")
166
159
 
167
- all_plate_ids = set(
168
- [plate["id"] for plate in self.get_plate_metadata()]
169
- )
160
+ all_plate_ids = set([plate["id"] for plate in self.find_plates()])
170
161
 
171
162
  for plate_id in plate_ids:
172
163
  if plate_id not in all_plate_ids:
@@ -176,7 +167,7 @@ class _UnsupportedSDK(_SeerSDK):
176
167
 
177
168
  URL = f"{self._auth.url}api/v1/projects"
178
169
 
179
- with self._get_auth_session() as s:
170
+ with self._get_auth_session("addproject") as s:
180
171
 
181
172
  project = s.post(
182
173
  URL,
@@ -233,7 +224,7 @@ class _UnsupportedSDK(_SeerSDK):
233
224
 
234
225
  URL = f"{self._auth.url}api/v1/addSamplesToProject/{project_id}"
235
226
 
236
- with self._get_auth_session() as s:
227
+ with self._get_auth_session("addprojectsamples") as s:
237
228
 
238
229
  response = s.put(
239
230
  URL,
@@ -277,7 +268,7 @@ class _UnsupportedSDK(_SeerSDK):
277
268
  samples = (
278
269
  x["id"]
279
270
  for plate_id in plates
280
- for x in self.get_samples(plate_id=plate_id)
271
+ for x in self.find_samples(plate_id=plate_id)
281
272
  )
282
273
 
283
274
  return self.add_samples_to_project(
@@ -375,7 +366,7 @@ class _UnsupportedSDK(_SeerSDK):
375
366
  validate_plate_map(plate_map_data, local_file_names)
376
367
 
377
368
  # Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
378
- with self._get_auth_session() as s:
369
+ with self._get_auth_session("getplateids") as s:
379
370
  plate_response = s.get(f"{self._auth.url}api/v1/plateids")
380
371
 
381
372
  if plate_response.status_code != 200:
@@ -392,7 +383,7 @@ class _UnsupportedSDK(_SeerSDK):
392
383
 
393
384
  # Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
394
385
 
395
- with self._get_auth_session() as s:
386
+ with self._get_auth_session("addplate") as s:
396
387
  plate_response = s.post(
397
388
  f"{self._auth.url}api/v1/plates",
398
389
  json={
@@ -415,7 +406,7 @@ class _UnsupportedSDK(_SeerSDK):
415
406
  )
416
407
 
417
408
  # Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
418
- with self._get_auth_session() as s:
409
+ with self._get_auth_session("getawsuploadconfig") as s:
419
410
  config_response = s.post(
420
411
  f"{self._auth.url}api/v1/msdatas/getuploadconfig",
421
412
  json={"plateId": id_uuid},
@@ -441,7 +432,7 @@ class _UnsupportedSDK(_SeerSDK):
441
432
  s3_bucket = config_response.json()["s3Bucket"]
442
433
  s3_upload_path = config_response.json()["s3UploadPath"]
443
434
 
444
- with self._get_auth_session() as s:
435
+ with self._get_auth_session("getawsuploadcredentials") as s:
445
436
  config_response = s.get(
446
437
  f"{self._auth.url}auth/getawscredential",
447
438
  )
@@ -487,7 +478,7 @@ class _UnsupportedSDK(_SeerSDK):
487
478
  "Failed to upload plate map to AWS. Please check your connection and reauthenticate."
488
479
  )
489
480
 
490
- with self._get_auth_session() as s:
481
+ with self._get_auth_session("uploadplatemapfile") as s:
491
482
  plate_map_response = s.post(
492
483
  f"{self._auth.url}api/v1/msdataindex/file",
493
484
  json={
@@ -540,7 +531,7 @@ class _UnsupportedSDK(_SeerSDK):
540
531
  "Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
541
532
  )
542
533
 
543
- with self._get_auth_session() as s:
534
+ with self._get_auth_session("uploadsampledescriptionfile") as s:
544
535
  sdf_response = s.post(
545
536
  f"{self._auth.url}api/v1/msdataindex/file",
546
537
  json={
@@ -573,7 +564,7 @@ class _UnsupportedSDK(_SeerSDK):
573
564
  )
574
565
 
575
566
  # Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
576
- with self._get_auth_session() as s:
567
+ with self._get_auth_session("addmsdatas") as s:
577
568
  ms_data_response = s.post(
578
569
  f"{self._auth.url}api/v1/msdatas/batch",
579
570
  json={"msdatas": plate_map_data},
@@ -584,7 +575,7 @@ class _UnsupportedSDK(_SeerSDK):
584
575
  )
585
576
 
586
577
  # Step 9: Upload each msdata file to the S3 bucket.
587
- with self._get_auth_session() as s:
578
+ with self._get_auth_session("getawsuploadcredentials") as s:
588
579
  config_response = s.get(
589
580
  f"{self._auth.url}auth/getawscredential",
590
581
  )
@@ -629,7 +620,7 @@ class _UnsupportedSDK(_SeerSDK):
629
620
  )
630
621
 
631
622
  # Step 10: Make a call to `api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
632
- with self._get_auth_session() as s:
623
+ with self._get_auth_session("addmsdataindex") as s:
633
624
  file_response = s.post(
634
625
  f"{self._auth.url}api/v1/msdataindex/file",
635
626
  json={"files": files},
@@ -671,16 +662,16 @@ class _UnsupportedSDK(_SeerSDK):
671
662
  Name of the analysis.
672
663
 
673
664
  project_id : str
674
- ID of the project to which the analysis belongs. Can be fetched using the get_project_metadata() function.
665
+ ID of the project to which the analysis belongs. Can be fetched using the find_projects() function.
675
666
 
676
667
  sample_ids: list[str], optional
677
668
  List of sample IDs to be used for the analysis. Should be omitted if analysis is to be run with all samples.
678
669
 
679
670
  analysis_protocol_name : str, optional
680
- Name of the analysis protocol to be used for the analysis. Can be fetched using the get_analysis_protocols() function. Should be omitted if analysis_protocol_id is provided.
671
+ Name of the analysis protocol to be used for the analysis. Can be fetched using the find_analysis_protocols() function. Should be omitted if analysis_protocol_id is provided.
681
672
 
682
673
  analysis_protocol_id : str, optional
683
- ID of the analysis protocol to be used for the analysis. Can be fetched using the get_analysis_protocols() function. Should be omitted if analysis_protocol_name is provided.
674
+ ID of the analysis protocol to be used for the analysis. Can be fetched using the find_analysis_protocols() function. Should be omitted if analysis_protocol_name is provided.
684
675
 
685
676
  notes : str, optional
686
677
  Notes for the analysis, defaulted to an empty string.
@@ -714,7 +705,7 @@ class _UnsupportedSDK(_SeerSDK):
714
705
  raise ValueError("Project ID cannot be empty.")
715
706
 
716
707
  if not analysis_protocol_id and analysis_protocol_name:
717
- valid_analysis_protocol = self.get_analysis_protocols(
708
+ valid_analysis_protocol = self.find_analysis_protocols(
718
709
  analysis_protocol_name=analysis_protocol_name
719
710
  )
720
711
 
@@ -726,7 +717,7 @@ class _UnsupportedSDK(_SeerSDK):
726
717
  analysis_protocol_id = valid_analysis_protocol[0]["id"]
727
718
 
728
719
  if analysis_protocol_id and not analysis_protocol_name:
729
- valid_analysis_protocol = self.get_analysis_protocols(
720
+ valid_analysis_protocol = self.find_analysis_protocols(
730
721
  analysis_protocol_id=analysis_protocol_id
731
722
  )
732
723
 
@@ -743,7 +734,7 @@ class _UnsupportedSDK(_SeerSDK):
743
734
  if sample_ids:
744
735
  valid_ids = [
745
736
  entry["id"]
746
- for entry in self.get_samples(project_id=project_id)
737
+ for entry in self.find_samples(project_id=project_id)
747
738
  ]
748
739
 
749
740
  for sample_id in sample_ids:
@@ -758,7 +749,7 @@ class _UnsupportedSDK(_SeerSDK):
758
749
 
759
750
  URL = f"{self._auth.url}api/v1/analyze"
760
751
 
761
- with self._get_auth_session() as s:
752
+ with self._get_auth_session("startanalysis") as s:
762
753
  req_payload = {
763
754
  "analysisName": name,
764
755
  "analysisProtocolId": analysis_protocol_id,
@@ -854,7 +845,7 @@ class _UnsupportedSDK(_SeerSDK):
854
845
  tenant_id = self.get_active_tenant_id()
855
846
 
856
847
  # Step 3: Fetch the S3 bucket name by making a call to `/api/v1/auth/getawscredential`
857
- with self._get_auth_session() as s:
848
+ with self._get_auth_session("getawsuploadcredentials") as s:
858
849
  config_response = s.get(
859
850
  f"{self._auth.url}auth/getawscredential",
860
851
  )
@@ -905,20 +896,21 @@ class _UnsupportedSDK(_SeerSDK):
905
896
 
906
897
  # Step 5: Make a call to `/api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
907
898
  result_files = None
908
- with self._get_auth_session() as s:
899
+ with self._get_auth_session("addmsdataindex") as s:
909
900
  file_response = s.post(
910
901
  f"{self._auth.url}api/v1/msdataindex/file",
911
902
  json={"files": files},
912
903
  )
913
904
 
914
- if (
915
- file_response.status_code != 200
916
- or not file_response.json()
917
- or "created" not in file_response.json()
918
- ):
905
+ if file_response.status_code != 200:
919
906
  raise ServerError("Could not upload MS Files to PAS.")
920
- result_files = file_response.json()["files"]
921
-
907
+ result_files = file_response.json()
908
+ try:
909
+ result_files = result_files["files"]
910
+ except Exception:
911
+ raise ServerError(
912
+ "Unexpected response from PAS server. Please ensure you are using the latest version of SeerSDK."
913
+ )
922
914
  # omit tenant_id from return file path
923
915
  for result in result_files:
924
916
  result["filePath"] = "/".join(result["filePath"].split("/")[1:])
@@ -1003,7 +995,7 @@ class _UnsupportedSDK(_SeerSDK):
1003
995
  target_folder_path = f"{tenant_id}/{target_folder_paths[0]}"
1004
996
  # Retrieve msdatafileindex metadata to determine source space
1005
997
  base_space = None
1006
- with self._get_auth_session() as s:
998
+ with self._get_auth_session("getmsdataindex") as s:
1007
999
  URL = self._auth.url + "api/v1/msdataindex/getmetadata"
1008
1000
  params = {"folderKey": folder_path}
1009
1001
  r = s.get(URL, params=params)
@@ -1046,7 +1038,7 @@ class _UnsupportedSDK(_SeerSDK):
1046
1038
  if target_space_id and base_space != target_space_id:
1047
1039
  json["targetUserGroupId"] = target_space_id
1048
1040
 
1049
- with self._get_auth_session() as s:
1041
+ with self._get_auth_session("movemsdataindex") as s:
1050
1042
  URL = self._auth.url + "api/v1/msdataindex/move"
1051
1043
  json = json
1052
1044
  r = s.post(URL, json=json)
@@ -1098,167 +1090,6 @@ class _UnsupportedSDK(_SeerSDK):
1098
1090
  source_ms_data_files, target_ms_data_files
1099
1091
  )
1100
1092
 
1101
- def download_analysis_files(
1102
- self, analysis_id: str, download_path: str = "", file_name: str = ""
1103
- ):
1104
- """
1105
- Download a specific analysis file from the backend given an `analysis_id` to the specified `download_path`.
1106
-
1107
- If no `download_path` is specified, the file will be downloaded to the current working directory.
1108
-
1109
- If no `file_name` is specified, all files for the analysis will be downloaded.
1110
-
1111
- Parameters
1112
- ----------
1113
- analysis_id : str
1114
- ID of the analysis to download.
1115
- download_path : str, optional
1116
- Path to download the analysis file to, defaulted to current working directory.
1117
- file_name : str, optional
1118
- Name of the analysis file to download, defaulted to None.
1119
-
1120
- Returns
1121
- -------
1122
- dict
1123
- Message containing whether the file was downloaded or not.
1124
-
1125
- Examples
1126
- -------
1127
- >>> from core import SeerSDK
1128
- >>> sdk = SeerSDK()
1129
- >>> sdk.download_analysis_files("analysis_id", "/path/to/download")
1130
- >>> Downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
1131
- Finished downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
1132
-
1133
- Downloading EXP20004_2020ms0007X11_A.mzML.quant
1134
- Finished downloading EXP20004_2020ms0007X11_A.mzML.quant
1135
-
1136
- Downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
1137
- Finished downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
1138
-
1139
- Downloading EXP20004_2020ms0007X11_A/diann.log
1140
- Finished downloading EXP20004_2020ms0007X11_A/diann.log
1141
- >>> { "message": "File downloaded successfully." }
1142
- """
1143
-
1144
- def get_url(analysis_id, file_name, project_id):
1145
- URL = f"{self._auth.url}api/v1/analysisResultFiles/getUrl"
1146
-
1147
- with self._get_auth_session() as s:
1148
-
1149
- download_url = s.post(
1150
- URL,
1151
- json={
1152
- "analysisId": analysis_id,
1153
- "filename": file_name,
1154
- "projectId": project_id,
1155
- },
1156
- )
1157
-
1158
- if download_url.status_code != 200:
1159
- raise ValueError(
1160
- "Could not download file. Please check if the analysis ID is valid or the backend is running."
1161
- )
1162
-
1163
- return download_url.json()["url"]
1164
-
1165
- if not analysis_id:
1166
- raise ValueError("Analysis ID cannot be empty.")
1167
-
1168
- try:
1169
- valid_analysis = self.get_analyses(analysis_id)[0]
1170
- except:
1171
- raise ValueError(
1172
- "Invalid analysis ID. Please check if the analysis ID is valid or the backend is running."
1173
- )
1174
-
1175
- project_id = valid_analysis["project_id"]
1176
-
1177
- if not download_path:
1178
- download_path = os.getcwd()
1179
- print(f"\nDownload path not specified.\n")
1180
-
1181
- if not os.path.isdir(download_path):
1182
- print(
1183
- f'\nThe path "{download_path}" you specified does not exist, was either invalid or not absolute.\n'
1184
- )
1185
- download_path = os.getcwd()
1186
-
1187
- name = f"{download_path}/downloads/{analysis_id}"
1188
-
1189
- if not os.path.exists(name):
1190
- os.makedirs(name)
1191
-
1192
- URL = f"{self._auth.url}api/v1/analysisResultFiles"
1193
-
1194
- with self._get_auth_session() as s:
1195
-
1196
- analysis_files = s.get(f"{URL}/{analysis_id}")
1197
-
1198
- if analysis_files.status_code != 200:
1199
- raise ValueError(
1200
- "Invalid request. Please check if the analysis ID is valid or the backend is running."
1201
- )
1202
-
1203
- res = analysis_files.json()
1204
-
1205
- if file_name:
1206
- filenames = set([file["filename"] for file in res])
1207
-
1208
- if file_name not in filenames:
1209
- raise ValueError(
1210
- "Invalid file name. Please check if the file name is correct."
1211
- )
1212
-
1213
- res = [file for file in res if file["filename"] == file_name]
1214
-
1215
- print(f'Downloading files to "{name}"\n')
1216
-
1217
- for file in res:
1218
- filename = file["filename"]
1219
- url = get_url(analysis_id, filename, project_id)
1220
-
1221
- print(f"Downloading {filename}")
1222
-
1223
- for _ in range(2):
1224
- try:
1225
- with tqdm(
1226
- unit="B",
1227
- unit_scale=True,
1228
- unit_divisor=1024,
1229
- miniters=1,
1230
- desc=f"Progress",
1231
- ) as t:
1232
- ssl._create_default_https_context = (
1233
- ssl._create_unverified_context
1234
- )
1235
- urllib.request.urlretrieve(
1236
- url,
1237
- f"{name}/{filename}",
1238
- reporthook=download_hook(t),
1239
- data=None,
1240
- )
1241
- break
1242
- except:
1243
- filename = filename.split("/")
1244
- name += "/" + "/".join(
1245
- [filename[i] for i in range(len(filename) - 1)]
1246
- )
1247
- filename = filename[-1]
1248
- if not os.path.isdir(f"{name}/{filename}"):
1249
- os.makedirs(f"{name}/")
1250
-
1251
- else:
1252
- raise ValueError(
1253
- "Your download failed. Please check if the backend is still running."
1254
- )
1255
-
1256
- print(f"Finished downloading {filename}\n")
1257
-
1258
- return {
1259
- "message": f"Files downloaded successfully to '{download_path}/downloads/{analysis_id}'"
1260
- }
1261
-
1262
1093
  def link_plate(
1263
1094
  self,
1264
1095
  ms_data_files: _List[str],
@@ -1301,17 +1132,12 @@ class _UnsupportedSDK(_SeerSDK):
1301
1132
 
1302
1133
  plate_ids = (
1303
1134
  set()
1304
- ) # contains all the plate_ids fetched from self.get_plate_metadata()
1305
- files = [] # to be uploaded to sync frontend
1135
+ ) # contains all the plate_ids fetched from self.find_plates()
1306
1136
  samples = [] # list of all the sample responses from the backend
1307
1137
  id_uuid = "" # uuid for the plate id
1308
1138
  raw_file_paths = {} # list of all the AWS raw file paths
1309
1139
  s3_upload_path = None
1310
1140
  s3_bucket = ""
1311
- ms_data_file_names = []
1312
- dir_exists = (
1313
- True # flag to check if the generated_files directory exists
1314
- )
1315
1141
 
1316
1142
  # Step 0: Check if the file paths exist in the S3 bucket.
1317
1143
  for file in ms_data_files:
@@ -1344,7 +1170,7 @@ class _UnsupportedSDK(_SeerSDK):
1344
1170
  validate_plate_map(plate_map_data, ms_data_files)
1345
1171
 
1346
1172
  # Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
1347
- with self._get_auth_session() as s:
1173
+ with self._get_auth_session("getplateids") as s:
1348
1174
  plate_response = s.get(f"{self._auth.url}api/v1/plateids")
1349
1175
 
1350
1176
  if plate_response.status_code != 200:
@@ -1361,7 +1187,7 @@ class _UnsupportedSDK(_SeerSDK):
1361
1187
 
1362
1188
  # Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
1363
1189
 
1364
- with self._get_auth_session() as s:
1190
+ with self._get_auth_session("addplate") as s:
1365
1191
  plate_response = s.post(
1366
1192
  f"{self._auth.url}api/v1/plates",
1367
1193
  json={
@@ -1384,7 +1210,7 @@ class _UnsupportedSDK(_SeerSDK):
1384
1210
  )
1385
1211
 
1386
1212
  # Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
1387
- with self._get_auth_session() as s:
1213
+ with self._get_auth_session("getawsuploadconfig") as s:
1388
1214
  config_response = s.post(
1389
1215
  f"{self._auth.url}api/v1/msdatas/getuploadconfig",
1390
1216
  json={"plateId": id_uuid},
@@ -1410,7 +1236,7 @@ class _UnsupportedSDK(_SeerSDK):
1410
1236
  s3_bucket = config_response.json()["s3Bucket"]
1411
1237
  s3_upload_path = config_response.json()["s3UploadPath"]
1412
1238
 
1413
- with self._get_auth_session() as s:
1239
+ with self._get_auth_session("getawsuploadcredentials") as s:
1414
1240
  config_response = s.get(
1415
1241
  f"{self._auth.url}auth/getawscredential",
1416
1242
  )
@@ -1456,7 +1282,7 @@ class _UnsupportedSDK(_SeerSDK):
1456
1282
  "Failed to upload plate map to AWS. Please check your connection and reauthenticate."
1457
1283
  )
1458
1284
 
1459
- with self._get_auth_session() as s:
1285
+ with self._get_auth_session("uploadplatemap") as s:
1460
1286
  plate_map_response = s.post(
1461
1287
  f"{self._auth.url}api/v1/msdataindex/file",
1462
1288
  json={
@@ -1507,7 +1333,7 @@ class _UnsupportedSDK(_SeerSDK):
1507
1333
  "Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
1508
1334
  )
1509
1335
 
1510
- with self._get_auth_session() as s:
1336
+ with self._get_auth_session("uploadsampledescription") as s:
1511
1337
  sdf_response = s.post(
1512
1338
  f"{self._auth.url}api/v1/msdataindex/file",
1513
1339
  json={
@@ -1542,7 +1368,7 @@ class _UnsupportedSDK(_SeerSDK):
1542
1368
  )
1543
1369
 
1544
1370
  # Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
1545
- with self._get_auth_session() as s:
1371
+ with self._get_auth_session("addmsdatas") as s:
1546
1372
  ms_data_response = s.post(
1547
1373
  f"{self._auth.url}api/v1/msdatas/batch",
1548
1374
  json={"msdatas": plate_map_data},
@@ -1555,7 +1381,7 @@ class _UnsupportedSDK(_SeerSDK):
1555
1381
  print(f"Plate generated with id: '{id_uuid}'")
1556
1382
  return id_uuid
1557
1383
 
1558
- def _get_msdataindex_metadata(self, folder=""):
1384
+ def _get_msdataindex(self, folder=""):
1559
1385
  """
1560
1386
  Get metadata for a given file path.
1561
1387
 
@@ -1566,20 +1392,15 @@ class _UnsupportedSDK(_SeerSDK):
1566
1392
  dict: A dictionary containing the metadata for the file.
1567
1393
  """
1568
1394
  URL = f"{self._auth.url}api/v2/msdataindex/getmetadata"
1569
- with self._get_auth_session() as s:
1395
+ with self._get_auth_session("getmsdataindex") as s:
1570
1396
  params = {"all": "true"}
1571
1397
  if folder:
1572
- tenant_id = jwt.decode(
1573
- self._auth.get_token()[0],
1574
- options={"verify_signature": False},
1575
- )["custom:tenantId"]
1398
+ tenant_id = self.get_active_tenant_id()
1576
1399
  params["folderKey"] = f"{tenant_id}/{folder}"
1577
- print(params["folderKey"])
1578
1400
 
1579
1401
  metadata = s.get(URL, params=params)
1580
1402
 
1581
1403
  if metadata.status_code != 200:
1582
- print(metadata.text)
1583
1404
  raise ServerError("Could not fetch metadata for file.")
1584
1405
 
1585
1406
  return metadata.json()
@@ -1595,9 +1416,7 @@ class _UnsupportedSDK(_SeerSDK):
1595
1416
  dict: A dictionary mapping the display path to the raw file path.
1596
1417
  """
1597
1418
 
1598
- tenant_id = jwt.decode(
1599
- self._auth.get_token()[0], options={"verify_signature": False}
1600
- )["custom:tenantId"]
1419
+ tenant_id = self.get_active_tenant_id()
1601
1420
  result = {}
1602
1421
  # partition by folder_path
1603
1422
  folder_partitions = {os.path.dirname(x): [] for x in display_path}
@@ -1611,9 +1430,7 @@ class _UnsupportedSDK(_SeerSDK):
1611
1430
  try:
1612
1431
  metadata = {
1613
1432
  x["key"]: x["rawFilePath"]
1614
- for x in self._get_msdataindex_metadata(
1615
- folder=folder_path
1616
- )["data"]
1433
+ for x in self._get_msdataindex(folder=folder_path)["data"]
1617
1434
  }
1618
1435
  except:
1619
1436
  # If the metadata fetch fails, skip the folder
@@ -1632,3 +1449,379 @@ class _UnsupportedSDK(_SeerSDK):
1632
1449
  )
1633
1450
 
1634
1451
  return result
1452
+
1453
+ def get_search_data(
1454
+ self,
1455
+ analysis_id: str,
1456
+ analyte_type: str,
1457
+ rollup: str,
1458
+ norm_method: str = "pepcal",
1459
+ ):
1460
+ """
1461
+ Get analyte intensities data for a given PAS analysis.
1462
+ Args:
1463
+ analysis_id (str): ID of the analysis.
1464
+ analyte_type (str): Type of the analyte. Must be either 'protein', 'peptide', precursor.
1465
+ rollup (str): Intensities rollup method. Must be either 'np' or 'panel'.
1466
+ norm_method (str): Search engine. Supported engines are: raw, engine, median, median80, pepcal. Default is 'pepcal'.
1467
+
1468
+ Returns:
1469
+ pd.DataFrame: A dataframe with each row containing the analyte intensity measurement:
1470
+ 'msrun_id', 'sample_id', 'nanoparticle' (if rollup is 'np'), 'protein_group', 'peptide' (for 'peptide' and 'precursor' analyte types), 'charge' (for 'precursor' analyte type),
1471
+ 'intensity_log10', 'protein_group_q_value', 'q_value' (for 'precursor' analyte type), 'rt' and 'irt' (for 'peptide' and 'precursor' analyte types)
1472
+ """
1473
+ # 1. Get msrun data for analysis
1474
+ samples = self.find_samples(analysis_id=analysis_id)
1475
+ sample_name_to_id = {s["sample_name"]: s["id"] for s in samples}
1476
+ # for np rollup, a row represents an msrun
1477
+ msruns = self.find_msruns(sample_ids=sample_name_to_id.values())
1478
+ file_to_msrun = {
1479
+ os.path.basename(msrun["raw_file_path"]).split(".")[0]: msrun
1480
+ for msrun in msruns
1481
+ }
1482
+ sample_to_msrun = {msrun["sample_id"]: msrun for msrun in msruns}
1483
+
1484
+ # for panel rollup, a row represents a sample
1485
+
1486
+ # 2. Get search results
1487
+ # pull the np/panel file, or report.tsv for precursor mode
1488
+ search_results = self.get_search_result(
1489
+ analysis_id=analysis_id,
1490
+ analyte_type=analyte_type,
1491
+ rollup=rollup,
1492
+ )
1493
+ if analyte_type in ["protein", "peptide"]:
1494
+ intensity_column = None
1495
+ if norm_method == "raw":
1496
+ intensity_column = (
1497
+ "Intensities Log10"
1498
+ if "Intensities Log10" in search_results.columns
1499
+ else "Intensity (Log10)"
1500
+ )
1501
+ elif norm_method == "engine":
1502
+ intensity_column = (
1503
+ "DIA-NN Normalized Intensities Log10"
1504
+ if "DIA-NN Normalized Intensities Log10"
1505
+ in search_results.columns
1506
+ else "Normalized Intensity (Log10)"
1507
+ )
1508
+ if intensity_column not in search_results.columns:
1509
+ raise ValueError(
1510
+ "Engine normalized intensities not found in search results. This is only supported for DIA-NN currently."
1511
+ )
1512
+ elif norm_method == "median":
1513
+ if (
1514
+ not "Median Normalized Intensities Log10"
1515
+ in search_results.columns
1516
+ ):
1517
+ raise ValueError(
1518
+ "Median normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later."
1519
+ )
1520
+ intensity_column = "Median Normalized Intensities Log10"
1521
+ elif norm_method == "median80":
1522
+ if (
1523
+ not "Median80 Normalized Intensities Log10"
1524
+ in search_results.columns
1525
+ ):
1526
+ raise ValueError(
1527
+ "Median80 normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later."
1528
+ )
1529
+ intensity_column = "Median80 Normalized Intensities Log10"
1530
+ elif norm_method == "pepcal":
1531
+ if not ("PepCal Intensities Log10" in search_results.columns):
1532
+ raise ValueError(
1533
+ "Pepcal normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later with the Seer Peptide Calibrant option enabled. \n Please retry using different norm_method, such as 'median'"
1534
+ )
1535
+
1536
+ intensity_column = "PepCal Intensities Log10"
1537
+
1538
+ else:
1539
+ raise ValueError(
1540
+ f"norm_method = {norm_method} is not supported. Supported normalization methods are: raw, pepcal, engine, median, median80."
1541
+ )
1542
+ if rollup == "panel":
1543
+ search_results.fillna({"Sample Name": ""}, inplace=True)
1544
+ search_results["File Name"] = search_results[
1545
+ "Sample Name"
1546
+ ].apply(
1547
+ lambda x: (
1548
+ os.path.basename(
1549
+ sample_to_msrun[sample_name_to_id[x]][
1550
+ "raw_file_path"
1551
+ ]
1552
+ ).split(".")[0]
1553
+ if x
1554
+ else None
1555
+ )
1556
+ )
1557
+ search_results["File Name"] = search_results["File Name"].apply(
1558
+ lambda x: os.path.basename(x).split(".")[0] if x else None
1559
+ )
1560
+
1561
+ search_results["Intensity Log10"] = search_results[
1562
+ intensity_column
1563
+ ]
1564
+
1565
+ # 3. Merge report to search results to get Q value and other properties
1566
+ report = self.get_search_result(
1567
+ analysis_id=analysis_id,
1568
+ analyte_type="precursor",
1569
+ rollup="np",
1570
+ )
1571
+ report["File Name"] = report["Run"]
1572
+ report["Protein Group"] = report["Protein.Group"]
1573
+
1574
+ if analyte_type == "protein":
1575
+ report["Protein Q Value"] = report["Protein.Q.Value"]
1576
+
1577
+ report = report[
1578
+ ["File Name", "Protein Group", "Protein Q Value"]
1579
+ ]
1580
+ report.drop_duplicates(
1581
+ subset=["File Name", "Protein Group"], inplace=True
1582
+ )
1583
+ df = pd.merge(
1584
+ search_results,
1585
+ report,
1586
+ on=["File Name", "Protein Group"],
1587
+ how="left",
1588
+ )
1589
+ included_columns = [
1590
+ "MsRun ID",
1591
+ "Sample ID",
1592
+ "Protein Group",
1593
+ "Intensity Log10",
1594
+ "Protein Q Value",
1595
+ ]
1596
+
1597
+ else:
1598
+ report["Peptide"] = report["Stripped.Sequence"]
1599
+ # If analyte_type is peptide, attach retention time (RT, iRT)
1600
+ report = report[["File Name", "Peptide", "RT", "iRT"]]
1601
+ report.drop_duplicates(
1602
+ subset=["File Name", "Peptide"], inplace=True
1603
+ )
1604
+ df = pd.merge(
1605
+ search_results,
1606
+ report,
1607
+ on=["File Name", "Peptide"],
1608
+ how="left",
1609
+ )
1610
+ included_columns = [
1611
+ "MsRun ID",
1612
+ "Sample ID",
1613
+ "Peptide",
1614
+ "Protein Group",
1615
+ "Intensity Log10",
1616
+ "RT",
1617
+ "iRT",
1618
+ ]
1619
+ # endif
1620
+
1621
+ if rollup == "np":
1622
+ included_columns.insert(
1623
+ included_columns.index("Sample ID") + 1, "Nanoparticle"
1624
+ )
1625
+
1626
+ df["MsRun ID"] = df["File Name"].apply(
1627
+ lambda x: (
1628
+ file_to_msrun[x]["id"] if x in file_to_msrun else None
1629
+ )
1630
+ )
1631
+ df["Sample ID"] = df["File Name"].apply(
1632
+ lambda x: (
1633
+ file_to_msrun[x]["sample_id"]
1634
+ if x in file_to_msrun
1635
+ else None
1636
+ )
1637
+ )
1638
+ df = df[included_columns]
1639
+ df.columns = [title_case_to_snake_case(x) for x in df.columns]
1640
+ return df
1641
+ else:
1642
+ # precursor
1643
+ # working only in report.tsv
1644
+ search_results["Intensity"] = search_results["Precursor.Quantity"]
1645
+ search_results["MsRun ID"] = search_results["Run"].apply(
1646
+ lambda x: (
1647
+ file_to_msrun[x]["id"] if x in file_to_msrun else None
1648
+ )
1649
+ )
1650
+ search_results["Sample ID"] = search_results["Run"].apply(
1651
+ lambda x: (
1652
+ file_to_msrun[x]["sample_id"]
1653
+ if x in file_to_msrun
1654
+ else None
1655
+ )
1656
+ )
1657
+ search_results["Protein Group"] = search_results["Protein.Group"]
1658
+ search_results["Peptide"] = search_results["Stripped.Sequence"]
1659
+ search_results["Charge"] = search_results["Precursor.Charge"]
1660
+ search_results["Precursor Id"] = search_results["Precursor.Id"]
1661
+ search_results["Precursor Q Value"] = search_results["Q.Value"]
1662
+ search_results["Protein Q Value"] = search_results[
1663
+ "Protein.Q.Value"
1664
+ ]
1665
+
1666
+ included_columns = [
1667
+ "MsRun ID",
1668
+ "Sample ID",
1669
+ "Protein Group",
1670
+ "Protein Q Value",
1671
+ "Peptide",
1672
+ "Precursor Id",
1673
+ "Intensity",
1674
+ "Precursor Q Value",
1675
+ "Charge",
1676
+ "RT",
1677
+ "iRT",
1678
+ "IM",
1679
+ "iIM",
1680
+ ]
1681
+ df = search_results[included_columns]
1682
+ df.columns = [title_case_to_snake_case(x) for x in df.columns]
1683
+
1684
+ return df
1685
+
1686
+ def get_search_data_analytes(self, analysis_id: str, analyte_type: str):
1687
+ if analyte_type not in ["protein", "peptide", "precursor"]:
1688
+ raise ValueError(
1689
+ f"Unknown analyte_type = {analyte_type}. Supported analytes are 'protein', 'peptide', or 'precursor'."
1690
+ )
1691
+
1692
+ # include
1693
+ # protein group, (peptide sequence), protein names, gene names, biological process, molecular function, cellular component, global q value, library q value
1694
+
1695
+ # 1. for all modes, fetch protein np file to extract protein groups, protein names, gene names, biological process, molecular function, cellular component
1696
+ search_results = self.get_search_result(
1697
+ analysis_id=analysis_id, analyte_type="protein", rollup="np"
1698
+ )
1699
+
1700
+ report_results = self.get_search_result(
1701
+ analysis_id=analysis_id, analyte_type="precursor", rollup="np"
1702
+ )
1703
+
1704
+ search_results = search_results[
1705
+ [
1706
+ "Protein Group",
1707
+ "Protein Names",
1708
+ "Gene Names",
1709
+ "Biological Process",
1710
+ "Molecular Function",
1711
+ "Cellular Component",
1712
+ ]
1713
+ ]
1714
+ search_results.drop_duplicates(subset=["Protein Group"], inplace=True)
1715
+ report_results["Protein Group"] = report_results["Protein.Group"]
1716
+ report_results["Peptide"] = report_results["Stripped.Sequence"]
1717
+
1718
+ if analyte_type == "protein":
1719
+ report_results = report_results[
1720
+ [
1721
+ "Protein Group",
1722
+ "Protein.Ids",
1723
+ "Global.PG.Q.Value",
1724
+ "Lib.PG.Q.Value",
1725
+ ]
1726
+ ]
1727
+ report_results.drop_duplicates(
1728
+ subset=["Protein Group"], inplace=True
1729
+ )
1730
+ df = pd.merge(
1731
+ search_results,
1732
+ report_results,
1733
+ on=["Protein Group"],
1734
+ how="left",
1735
+ )
1736
+ elif analyte_type == "peptide":
1737
+ peptide_results = self.get_search_result(
1738
+ analysis_id=analysis_id, analyte_type="peptide", rollup="np"
1739
+ )
1740
+ peptide_results = peptide_results[["Peptide", "Protein Group"]]
1741
+ search_results = pd.merge(
1742
+ peptide_results,
1743
+ search_results,
1744
+ on=["Protein Group"],
1745
+ how="left",
1746
+ )
1747
+
1748
+ report_results = report_results[
1749
+ ["Peptide", "Protein.Ids", "Protein.Group"]
1750
+ ]
1751
+ report_results.drop_duplicates(subset=["Peptide"], inplace=True)
1752
+ df = pd.merge(
1753
+ search_results,
1754
+ report_results,
1755
+ on=["Peptide"],
1756
+ how="left",
1757
+ )
1758
+ else:
1759
+ # precursor
1760
+ search_results = search_results[
1761
+ [
1762
+ "Protein Group",
1763
+ "Protein Names",
1764
+ "Gene Names",
1765
+ "Biological Process",
1766
+ "Molecular Function",
1767
+ "Cellular Component",
1768
+ ]
1769
+ ]
1770
+ search_results.drop_duplicates(
1771
+ subset=["Protein Group"], inplace=True
1772
+ )
1773
+ report_results = report_results[
1774
+ [
1775
+ "Precursor.Id",
1776
+ "Precursor.Charge",
1777
+ "Peptide",
1778
+ "Protein Group",
1779
+ "Protein.Ids",
1780
+ "Protein.Names",
1781
+ "Genes",
1782
+ "First.Protein.Description",
1783
+ "Modified.Sequence",
1784
+ "Proteotypic",
1785
+ "Global.Q.Value",
1786
+ "Global.PG.Q.Value",
1787
+ "Lib.Q.Value",
1788
+ "Lib.PG.Q.Value",
1789
+ ]
1790
+ ]
1791
+ report_results.drop_duplicates(
1792
+ subset=["Protein Group"], inplace=True
1793
+ )
1794
+ df = pd.merge(
1795
+ report_results,
1796
+ search_results,
1797
+ on=["Protein Group"],
1798
+ how="left",
1799
+ )
1800
+ df = df[
1801
+ [
1802
+ "Precursor.Id",
1803
+ "Precursor.Charge",
1804
+ "Peptide",
1805
+ "Protein Group",
1806
+ "Protein.Ids",
1807
+ "Protein.Names",
1808
+ "Genes",
1809
+ "First.Protein.Description",
1810
+ "Modified.Sequence",
1811
+ "Proteotypic",
1812
+ "Global.Q.Value",
1813
+ "Global.PG.Q.Value",
1814
+ "Lib.Q.Value",
1815
+ "Lib.PG.Q.Value",
1816
+ "Gene Names",
1817
+ "Biological Process",
1818
+ "Molecular Function",
1819
+ "Cellular Component",
1820
+ ]
1821
+ ]
1822
+ df.rename(
1823
+ columns={"Modified.Sequence": "Modified.Peptide"}, inplace=True
1824
+ )
1825
+ # endif
1826
+ df.columns = [title_case_to_snake_case(x) for x in df.columns]
1827
+ return df