seer-pas-sdk 0.3.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,19 +2,12 @@
2
2
  seer_pas_sdk.core.unsupported -- in development
3
3
  """
4
4
 
5
- from tqdm import tqdm
6
-
7
5
  import os
8
- import jwt
9
- import requests
10
- import urllib.request
11
- import ssl
12
6
  import shutil
13
7
 
14
8
  from typing import List as _List
15
9
 
16
10
  from ..common import *
17
- from ..auth import Auth
18
11
  from ..objects import PlateMap
19
12
 
20
13
  from .sdk import SeerSDK as _SeerSDK
@@ -71,7 +64,7 @@ class _UnsupportedSDK(_SeerSDK):
71
64
 
72
65
  URL = f"{self._auth.url}api/v1/samples"
73
66
 
74
- with self._get_auth_session() as s:
67
+ with self._get_auth_session("addsample") as s:
75
68
 
76
69
  response = s.post(URL, json=sample_entry)
77
70
 
@@ -112,7 +105,7 @@ class _UnsupportedSDK(_SeerSDK):
112
105
 
113
106
  URL = f"{self._auth.url}api/v1/samples/batch"
114
107
 
115
- with self._get_auth_session() as s:
108
+ with self._get_auth_session("addsamples") as s:
116
109
  response = s.post(URL, json={"samples": sample_info})
117
110
 
118
111
  if response.status_code != 200:
@@ -164,9 +157,7 @@ class _UnsupportedSDK(_SeerSDK):
164
157
  if not project_name:
165
158
  raise ValueError("Project name cannot be empty.")
166
159
 
167
- all_plate_ids = set(
168
- [plate["id"] for plate in self.get_plate_metadata()]
169
- )
160
+ all_plate_ids = set([plate["id"] for plate in self.find_plates()])
170
161
 
171
162
  for plate_id in plate_ids:
172
163
  if plate_id not in all_plate_ids:
@@ -176,7 +167,7 @@ class _UnsupportedSDK(_SeerSDK):
176
167
 
177
168
  URL = f"{self._auth.url}api/v1/projects"
178
169
 
179
- with self._get_auth_session() as s:
170
+ with self._get_auth_session("addproject") as s:
180
171
 
181
172
  project = s.post(
182
173
  URL,
@@ -233,7 +224,7 @@ class _UnsupportedSDK(_SeerSDK):
233
224
 
234
225
  URL = f"{self._auth.url}api/v1/addSamplesToProject/{project_id}"
235
226
 
236
- with self._get_auth_session() as s:
227
+ with self._get_auth_session("addprojectsamples") as s:
237
228
 
238
229
  response = s.put(
239
230
  URL,
@@ -277,7 +268,7 @@ class _UnsupportedSDK(_SeerSDK):
277
268
  samples = (
278
269
  x["id"]
279
270
  for plate_id in plates
280
- for x in self.get_samples(plate_id=plate_id)
271
+ for x in self.find_samples(plate_id=plate_id)
281
272
  )
282
273
 
283
274
  return self.add_samples_to_project(
@@ -375,7 +366,7 @@ class _UnsupportedSDK(_SeerSDK):
375
366
  validate_plate_map(plate_map_data, local_file_names)
376
367
 
377
368
  # Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
378
- with self._get_auth_session() as s:
369
+ with self._get_auth_session("getplateids") as s:
379
370
  plate_response = s.get(f"{self._auth.url}api/v1/plateids")
380
371
 
381
372
  if plate_response.status_code != 200:
@@ -392,7 +383,7 @@ class _UnsupportedSDK(_SeerSDK):
392
383
 
393
384
  # Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
394
385
 
395
- with self._get_auth_session() as s:
386
+ with self._get_auth_session("addplate") as s:
396
387
  plate_response = s.post(
397
388
  f"{self._auth.url}api/v1/plates",
398
389
  json={
@@ -415,7 +406,7 @@ class _UnsupportedSDK(_SeerSDK):
415
406
  )
416
407
 
417
408
  # Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
418
- with self._get_auth_session() as s:
409
+ with self._get_auth_session("getawsuploadconfig") as s:
419
410
  config_response = s.post(
420
411
  f"{self._auth.url}api/v1/msdatas/getuploadconfig",
421
412
  json={"plateId": id_uuid},
@@ -441,7 +432,7 @@ class _UnsupportedSDK(_SeerSDK):
441
432
  s3_bucket = config_response.json()["s3Bucket"]
442
433
  s3_upload_path = config_response.json()["s3UploadPath"]
443
434
 
444
- with self._get_auth_session() as s:
435
+ with self._get_auth_session("getawsuploadcredentials") as s:
445
436
  config_response = s.get(
446
437
  f"{self._auth.url}auth/getawscredential",
447
438
  )
@@ -487,7 +478,7 @@ class _UnsupportedSDK(_SeerSDK):
487
478
  "Failed to upload plate map to AWS. Please check your connection and reauthenticate."
488
479
  )
489
480
 
490
- with self._get_auth_session() as s:
481
+ with self._get_auth_session("uploadplatemapfile") as s:
491
482
  plate_map_response = s.post(
492
483
  f"{self._auth.url}api/v1/msdataindex/file",
493
484
  json={
@@ -540,7 +531,7 @@ class _UnsupportedSDK(_SeerSDK):
540
531
  "Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
541
532
  )
542
533
 
543
- with self._get_auth_session() as s:
534
+ with self._get_auth_session("uploadsampledescriptionfile") as s:
544
535
  sdf_response = s.post(
545
536
  f"{self._auth.url}api/v1/msdataindex/file",
546
537
  json={
@@ -573,7 +564,7 @@ class _UnsupportedSDK(_SeerSDK):
573
564
  )
574
565
 
575
566
  # Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
576
- with self._get_auth_session() as s:
567
+ with self._get_auth_session("addmsdatas") as s:
577
568
  ms_data_response = s.post(
578
569
  f"{self._auth.url}api/v1/msdatas/batch",
579
570
  json={"msdatas": plate_map_data},
@@ -584,7 +575,7 @@ class _UnsupportedSDK(_SeerSDK):
584
575
  )
585
576
 
586
577
  # Step 9: Upload each msdata file to the S3 bucket.
587
- with self._get_auth_session() as s:
578
+ with self._get_auth_session("getawsuploadcredentials") as s:
588
579
  config_response = s.get(
589
580
  f"{self._auth.url}auth/getawscredential",
590
581
  )
@@ -629,7 +620,7 @@ class _UnsupportedSDK(_SeerSDK):
629
620
  )
630
621
 
631
622
  # Step 10: Make a call to `api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
632
- with self._get_auth_session() as s:
623
+ with self._get_auth_session("addmsdataindex") as s:
633
624
  file_response = s.post(
634
625
  f"{self._auth.url}api/v1/msdataindex/file",
635
626
  json={"files": files},
@@ -671,16 +662,16 @@ class _UnsupportedSDK(_SeerSDK):
671
662
  Name of the analysis.
672
663
 
673
664
  project_id : str
674
- ID of the project to which the analysis belongs. Can be fetched using the get_project_metadata() function.
665
+ ID of the project to which the analysis belongs. Can be fetched using the find_projects() function.
675
666
 
676
667
  sample_ids: list[str], optional
677
668
  List of sample IDs to be used for the analysis. Should be omitted if analysis is to be run with all samples.
678
669
 
679
670
  analysis_protocol_name : str, optional
680
- Name of the analysis protocol to be used for the analysis. Can be fetched using the get_analysis_protocols() function. Should be omitted if analysis_protocol_id is provided.
671
+ Name of the analysis protocol to be used for the analysis. Can be fetched using the find_analysis_protocols() function. Should be omitted if analysis_protocol_id is provided.
681
672
 
682
673
  analysis_protocol_id : str, optional
683
- ID of the analysis protocol to be used for the analysis. Can be fetched using the get_analysis_protocols() function. Should be omitted if analysis_protocol_name is provided.
674
+ ID of the analysis protocol to be used for the analysis. Can be fetched using the find_analysis_protocols() function. Should be omitted if analysis_protocol_name is provided.
684
675
 
685
676
  notes : str, optional
686
677
  Notes for the analysis, defaulted to an empty string.
@@ -714,7 +705,7 @@ class _UnsupportedSDK(_SeerSDK):
714
705
  raise ValueError("Project ID cannot be empty.")
715
706
 
716
707
  if not analysis_protocol_id and analysis_protocol_name:
717
- valid_analysis_protocol = self.get_analysis_protocols(
708
+ valid_analysis_protocol = self.find_analysis_protocols(
718
709
  analysis_protocol_name=analysis_protocol_name
719
710
  )
720
711
 
@@ -726,7 +717,7 @@ class _UnsupportedSDK(_SeerSDK):
726
717
  analysis_protocol_id = valid_analysis_protocol[0]["id"]
727
718
 
728
719
  if analysis_protocol_id and not analysis_protocol_name:
729
- valid_analysis_protocol = self.get_analysis_protocols(
720
+ valid_analysis_protocol = self.find_analysis_protocols(
730
721
  analysis_protocol_id=analysis_protocol_id
731
722
  )
732
723
 
@@ -743,7 +734,7 @@ class _UnsupportedSDK(_SeerSDK):
743
734
  if sample_ids:
744
735
  valid_ids = [
745
736
  entry["id"]
746
- for entry in self.get_samples(project_id=project_id)
737
+ for entry in self.find_samples(project_id=project_id)
747
738
  ]
748
739
 
749
740
  for sample_id in sample_ids:
@@ -758,7 +749,7 @@ class _UnsupportedSDK(_SeerSDK):
758
749
 
759
750
  URL = f"{self._auth.url}api/v1/analyze"
760
751
 
761
- with self._get_auth_session() as s:
752
+ with self._get_auth_session("startanalysis") as s:
762
753
  req_payload = {
763
754
  "analysisName": name,
764
755
  "analysisProtocolId": analysis_protocol_id,
@@ -854,7 +845,7 @@ class _UnsupportedSDK(_SeerSDK):
854
845
  tenant_id = self.get_active_tenant_id()
855
846
 
856
847
  # Step 3: Fetch the S3 bucket name by making a call to `/api/v1/auth/getawscredential`
857
- with self._get_auth_session() as s:
848
+ with self._get_auth_session("getawsuploadcredentials") as s:
858
849
  config_response = s.get(
859
850
  f"{self._auth.url}auth/getawscredential",
860
851
  )
@@ -905,7 +896,7 @@ class _UnsupportedSDK(_SeerSDK):
905
896
 
906
897
  # Step 5: Make a call to `/api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
907
898
  result_files = None
908
- with self._get_auth_session() as s:
899
+ with self._get_auth_session("addmsdataindex") as s:
909
900
  file_response = s.post(
910
901
  f"{self._auth.url}api/v1/msdataindex/file",
911
902
  json={"files": files},
@@ -1003,7 +994,7 @@ class _UnsupportedSDK(_SeerSDK):
1003
994
  target_folder_path = f"{tenant_id}/{target_folder_paths[0]}"
1004
995
  # Retrieve msdatafileindex metadata to determine source space
1005
996
  base_space = None
1006
- with self._get_auth_session() as s:
997
+ with self._get_auth_session("getmsdataindex") as s:
1007
998
  URL = self._auth.url + "api/v1/msdataindex/getmetadata"
1008
999
  params = {"folderKey": folder_path}
1009
1000
  r = s.get(URL, params=params)
@@ -1046,7 +1037,7 @@ class _UnsupportedSDK(_SeerSDK):
1046
1037
  if target_space_id and base_space != target_space_id:
1047
1038
  json["targetUserGroupId"] = target_space_id
1048
1039
 
1049
- with self._get_auth_session() as s:
1040
+ with self._get_auth_session("movemsdataindex") as s:
1050
1041
  URL = self._auth.url + "api/v1/msdataindex/move"
1051
1042
  json = json
1052
1043
  r = s.post(URL, json=json)
@@ -1098,167 +1089,6 @@ class _UnsupportedSDK(_SeerSDK):
1098
1089
  source_ms_data_files, target_ms_data_files
1099
1090
  )
1100
1091
 
1101
- def download_analysis_files(
1102
- self, analysis_id: str, download_path: str = "", file_name: str = ""
1103
- ):
1104
- """
1105
- Download a specific analysis file from the backend given an `analysis_id` to the specified `download_path`.
1106
-
1107
- If no `download_path` is specified, the file will be downloaded to the current working directory.
1108
-
1109
- If no `file_name` is specified, all files for the analysis will be downloaded.
1110
-
1111
- Parameters
1112
- ----------
1113
- analysis_id : str
1114
- ID of the analysis to download.
1115
- download_path : str, optional
1116
- Path to download the analysis file to, defaulted to current working directory.
1117
- file_name : str, optional
1118
- Name of the analysis file to download, defaulted to None.
1119
-
1120
- Returns
1121
- -------
1122
- dict
1123
- Message containing whether the file was downloaded or not.
1124
-
1125
- Examples
1126
- -------
1127
- >>> from core import SeerSDK
1128
- >>> sdk = SeerSDK()
1129
- >>> sdk.download_analysis_files("analysis_id", "/path/to/download")
1130
- >>> Downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
1131
- Finished downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
1132
-
1133
- Downloading EXP20004_2020ms0007X11_A.mzML.quant
1134
- Finished downloading EXP20004_2020ms0007X11_A.mzML.quant
1135
-
1136
- Downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
1137
- Finished downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
1138
-
1139
- Downloading EXP20004_2020ms0007X11_A/diann.log
1140
- Finished downloading EXP20004_2020ms0007X11_A/diann.log
1141
- >>> { "message": "File downloaded successfully." }
1142
- """
1143
-
1144
- def get_url(analysis_id, file_name, project_id):
1145
- URL = f"{self._auth.url}api/v1/analysisResultFiles/getUrl"
1146
-
1147
- with self._get_auth_session() as s:
1148
-
1149
- download_url = s.post(
1150
- URL,
1151
- json={
1152
- "analysisId": analysis_id,
1153
- "filename": file_name,
1154
- "projectId": project_id,
1155
- },
1156
- )
1157
-
1158
- if download_url.status_code != 200:
1159
- raise ValueError(
1160
- "Could not download file. Please check if the analysis ID is valid or the backend is running."
1161
- )
1162
-
1163
- return download_url.json()["url"]
1164
-
1165
- if not analysis_id:
1166
- raise ValueError("Analysis ID cannot be empty.")
1167
-
1168
- try:
1169
- valid_analysis = self.get_analyses(analysis_id)[0]
1170
- except:
1171
- raise ValueError(
1172
- "Invalid analysis ID. Please check if the analysis ID is valid or the backend is running."
1173
- )
1174
-
1175
- project_id = valid_analysis["project_id"]
1176
-
1177
- if not download_path:
1178
- download_path = os.getcwd()
1179
- print(f"\nDownload path not specified.\n")
1180
-
1181
- if not os.path.isdir(download_path):
1182
- print(
1183
- f'\nThe path "{download_path}" you specified does not exist, was either invalid or not absolute.\n'
1184
- )
1185
- download_path = os.getcwd()
1186
-
1187
- name = f"{download_path}/downloads/{analysis_id}"
1188
-
1189
- if not os.path.exists(name):
1190
- os.makedirs(name)
1191
-
1192
- URL = f"{self._auth.url}api/v1/analysisResultFiles"
1193
-
1194
- with self._get_auth_session() as s:
1195
-
1196
- analysis_files = s.get(f"{URL}/{analysis_id}")
1197
-
1198
- if analysis_files.status_code != 200:
1199
- raise ValueError(
1200
- "Invalid request. Please check if the analysis ID is valid or the backend is running."
1201
- )
1202
-
1203
- res = analysis_files.json()
1204
-
1205
- if file_name:
1206
- filenames = set([file["filename"] for file in res])
1207
-
1208
- if file_name not in filenames:
1209
- raise ValueError(
1210
- "Invalid file name. Please check if the file name is correct."
1211
- )
1212
-
1213
- res = [file for file in res if file["filename"] == file_name]
1214
-
1215
- print(f'Downloading files to "{name}"\n')
1216
-
1217
- for file in res:
1218
- filename = file["filename"]
1219
- url = get_url(analysis_id, filename, project_id)
1220
-
1221
- print(f"Downloading {filename}")
1222
-
1223
- for _ in range(2):
1224
- try:
1225
- with tqdm(
1226
- unit="B",
1227
- unit_scale=True,
1228
- unit_divisor=1024,
1229
- miniters=1,
1230
- desc=f"Progress",
1231
- ) as t:
1232
- ssl._create_default_https_context = (
1233
- ssl._create_unverified_context
1234
- )
1235
- urllib.request.urlretrieve(
1236
- url,
1237
- f"{name}/{filename}",
1238
- reporthook=download_hook(t),
1239
- data=None,
1240
- )
1241
- break
1242
- except:
1243
- filename = filename.split("/")
1244
- name += "/" + "/".join(
1245
- [filename[i] for i in range(len(filename) - 1)]
1246
- )
1247
- filename = filename[-1]
1248
- if not os.path.isdir(f"{name}/{filename}"):
1249
- os.makedirs(f"{name}/")
1250
-
1251
- else:
1252
- raise ValueError(
1253
- "Your download failed. Please check if the backend is still running."
1254
- )
1255
-
1256
- print(f"Finished downloading {filename}\n")
1257
-
1258
- return {
1259
- "message": f"Files downloaded successfully to '{download_path}/downloads/{analysis_id}'"
1260
- }
1261
-
1262
1092
  def link_plate(
1263
1093
  self,
1264
1094
  ms_data_files: _List[str],
@@ -1301,17 +1131,12 @@ class _UnsupportedSDK(_SeerSDK):
1301
1131
 
1302
1132
  plate_ids = (
1303
1133
  set()
1304
- ) # contains all the plate_ids fetched from self.get_plate_metadata()
1305
- files = [] # to be uploaded to sync frontend
1134
+ ) # contains all the plate_ids fetched from self.find_plates()
1306
1135
  samples = [] # list of all the sample responses from the backend
1307
1136
  id_uuid = "" # uuid for the plate id
1308
1137
  raw_file_paths = {} # list of all the AWS raw file paths
1309
1138
  s3_upload_path = None
1310
1139
  s3_bucket = ""
1311
- ms_data_file_names = []
1312
- dir_exists = (
1313
- True # flag to check if the generated_files directory exists
1314
- )
1315
1140
 
1316
1141
  # Step 0: Check if the file paths exist in the S3 bucket.
1317
1142
  for file in ms_data_files:
@@ -1344,7 +1169,7 @@ class _UnsupportedSDK(_SeerSDK):
1344
1169
  validate_plate_map(plate_map_data, ms_data_files)
1345
1170
 
1346
1171
  # Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
1347
- with self._get_auth_session() as s:
1172
+ with self._get_auth_session("getplateids") as s:
1348
1173
  plate_response = s.get(f"{self._auth.url}api/v1/plateids")
1349
1174
 
1350
1175
  if plate_response.status_code != 200:
@@ -1361,7 +1186,7 @@ class _UnsupportedSDK(_SeerSDK):
1361
1186
 
1362
1187
  # Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
1363
1188
 
1364
- with self._get_auth_session() as s:
1189
+ with self._get_auth_session("addplate") as s:
1365
1190
  plate_response = s.post(
1366
1191
  f"{self._auth.url}api/v1/plates",
1367
1192
  json={
@@ -1384,7 +1209,7 @@ class _UnsupportedSDK(_SeerSDK):
1384
1209
  )
1385
1210
 
1386
1211
  # Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
1387
- with self._get_auth_session() as s:
1212
+ with self._get_auth_session("getawsuploadconfig") as s:
1388
1213
  config_response = s.post(
1389
1214
  f"{self._auth.url}api/v1/msdatas/getuploadconfig",
1390
1215
  json={"plateId": id_uuid},
@@ -1410,7 +1235,7 @@ class _UnsupportedSDK(_SeerSDK):
1410
1235
  s3_bucket = config_response.json()["s3Bucket"]
1411
1236
  s3_upload_path = config_response.json()["s3UploadPath"]
1412
1237
 
1413
- with self._get_auth_session() as s:
1238
+ with self._get_auth_session("getawsuploadcredentials") as s:
1414
1239
  config_response = s.get(
1415
1240
  f"{self._auth.url}auth/getawscredential",
1416
1241
  )
@@ -1456,7 +1281,7 @@ class _UnsupportedSDK(_SeerSDK):
1456
1281
  "Failed to upload plate map to AWS. Please check your connection and reauthenticate."
1457
1282
  )
1458
1283
 
1459
- with self._get_auth_session() as s:
1284
+ with self._get_auth_session("uploadplatemap") as s:
1460
1285
  plate_map_response = s.post(
1461
1286
  f"{self._auth.url}api/v1/msdataindex/file",
1462
1287
  json={
@@ -1507,7 +1332,7 @@ class _UnsupportedSDK(_SeerSDK):
1507
1332
  "Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
1508
1333
  )
1509
1334
 
1510
- with self._get_auth_session() as s:
1335
+ with self._get_auth_session("uploadsampledescription") as s:
1511
1336
  sdf_response = s.post(
1512
1337
  f"{self._auth.url}api/v1/msdataindex/file",
1513
1338
  json={
@@ -1542,7 +1367,7 @@ class _UnsupportedSDK(_SeerSDK):
1542
1367
  )
1543
1368
 
1544
1369
  # Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
1545
- with self._get_auth_session() as s:
1370
+ with self._get_auth_session("addmsdatas") as s:
1546
1371
  ms_data_response = s.post(
1547
1372
  f"{self._auth.url}api/v1/msdatas/batch",
1548
1373
  json={"msdatas": plate_map_data},
@@ -1555,7 +1380,7 @@ class _UnsupportedSDK(_SeerSDK):
1555
1380
  print(f"Plate generated with id: '{id_uuid}'")
1556
1381
  return id_uuid
1557
1382
 
1558
- def _get_msdataindex_metadata(self, folder=""):
1383
+ def _get_msdataindex(self, folder=""):
1559
1384
  """
1560
1385
  Get metadata for a given file path.
1561
1386
 
@@ -1566,20 +1391,15 @@ class _UnsupportedSDK(_SeerSDK):
1566
1391
  dict: A dictionary containing the metadata for the file.
1567
1392
  """
1568
1393
  URL = f"{self._auth.url}api/v2/msdataindex/getmetadata"
1569
- with self._get_auth_session() as s:
1394
+ with self._get_auth_session("getmsdataindex") as s:
1570
1395
  params = {"all": "true"}
1571
1396
  if folder:
1572
- tenant_id = jwt.decode(
1573
- self._auth.get_token()[0],
1574
- options={"verify_signature": False},
1575
- )["custom:tenantId"]
1397
+ tenant_id = self.get_active_tenant_id()
1576
1398
  params["folderKey"] = f"{tenant_id}/{folder}"
1577
- print(params["folderKey"])
1578
1399
 
1579
1400
  metadata = s.get(URL, params=params)
1580
1401
 
1581
1402
  if metadata.status_code != 200:
1582
- print(metadata.text)
1583
1403
  raise ServerError("Could not fetch metadata for file.")
1584
1404
 
1585
1405
  return metadata.json()
@@ -1595,9 +1415,7 @@ class _UnsupportedSDK(_SeerSDK):
1595
1415
  dict: A dictionary mapping the display path to the raw file path.
1596
1416
  """
1597
1417
 
1598
- tenant_id = jwt.decode(
1599
- self._auth.get_token()[0], options={"verify_signature": False}
1600
- )["custom:tenantId"]
1418
+ tenant_id = self.get_active_tenant_id()
1601
1419
  result = {}
1602
1420
  # partition by folder_path
1603
1421
  folder_partitions = {os.path.dirname(x): [] for x in display_path}
@@ -1611,9 +1429,7 @@ class _UnsupportedSDK(_SeerSDK):
1611
1429
  try:
1612
1430
  metadata = {
1613
1431
  x["key"]: x["rawFilePath"]
1614
- for x in self._get_msdataindex_metadata(
1615
- folder=folder_path
1616
- )["data"]
1432
+ for x in self._get_msdataindex(folder=folder_path)["data"]
1617
1433
  }
1618
1434
  except:
1619
1435
  # If the metadata fetch fails, skip the folder
@@ -1632,3 +1448,379 @@ class _UnsupportedSDK(_SeerSDK):
1632
1448
  )
1633
1449
 
1634
1450
  return result
1451
+
1452
+ def get_search_data(
1453
+ self,
1454
+ analysis_id: str,
1455
+ analyte_type: str,
1456
+ rollup: str,
1457
+ norm_method: str = "pepcal",
1458
+ ):
1459
+ """
1460
+ Get analyte intensities data for a given PAS analysis.
1461
+ Args:
1462
+ analysis_id (str): ID of the analysis.
1463
+ analyte_type (str): Type of the analyte. Must be either 'protein', 'peptide', precursor.
1464
+ rollup (str): Intensities rollup method. Must be either 'np' or 'panel'.
1465
+ norm_method (str): Search engine. Supported engines are: raw, engine, median, median80, pepcal. Default is 'pepcal'.
1466
+
1467
+ Returns:
1468
+ pd.DataFrame: A dataframe with each row containing the analyte intensity measurement:
1469
+ 'msrun_id', 'sample_id', 'nanoparticle' (if rollup is 'np'), 'protein_group', 'peptide' (for 'peptide' and 'precursor' analyte types), 'charge' (for 'precursor' analyte type),
1470
+ 'intensity_log10', 'protein_group_q_value', 'q_value' (for 'precursor' analyte type), 'rt' and 'irt' (for 'peptide' and 'precursor' analyte types)
1471
+ """
1472
+ # 1. Get msrun data for analysis
1473
+ samples = self.find_samples(analysis_id=analysis_id)
1474
+ sample_name_to_id = {s["sample_name"]: s["id"] for s in samples}
1475
+ # for np rollup, a row represents an msrun
1476
+ msruns = self.find_msruns(sample_ids=sample_name_to_id.values())
1477
+ file_to_msrun = {
1478
+ os.path.basename(msrun["raw_file_path"]).split(".")[0]: msrun
1479
+ for msrun in msruns
1480
+ }
1481
+ sample_to_msrun = {msrun["sample_id"]: msrun for msrun in msruns}
1482
+
1483
+ # for panel rollup, a row represents a sample
1484
+
1485
+ # 2. Get search results
1486
+ # pull the np/panel file, or report.tsv for precursor mode
1487
+ search_results = self.get_search_result(
1488
+ analysis_id=analysis_id,
1489
+ analyte_type=analyte_type,
1490
+ rollup=rollup,
1491
+ )
1492
+ if analyte_type in ["protein", "peptide"]:
1493
+ intensity_column = None
1494
+ if norm_method == "raw":
1495
+ intensity_column = (
1496
+ "Intensities Log10"
1497
+ if "Intensities Log10" in search_results.columns
1498
+ else "Intensity (Log10)"
1499
+ )
1500
+ elif norm_method == "engine":
1501
+ intensity_column = (
1502
+ "DIA-NN Normalized Intensities Log10"
1503
+ if "DIA-NN Normalized Intensities Log10"
1504
+ in search_results.columns
1505
+ else "Normalized Intensity (Log10)"
1506
+ )
1507
+ if intensity_column not in search_results.columns:
1508
+ raise ValueError(
1509
+ "Engine normalized intensities not found in search results. This is only supported for DIA-NN currently."
1510
+ )
1511
+ elif norm_method == "median":
1512
+ if (
1513
+ not "Median Normalized Intensities Log10"
1514
+ in search_results.columns
1515
+ ):
1516
+ raise ValueError(
1517
+ "Median normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later."
1518
+ )
1519
+ intensity_column = "Median Normalized Intensities Log10"
1520
+ elif norm_method == "median80":
1521
+ if (
1522
+ not "Median80 Normalized Intensities Log10"
1523
+ in search_results.columns
1524
+ ):
1525
+ raise ValueError(
1526
+ "Median80 normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later."
1527
+ )
1528
+ intensity_column = "Median80 Normalized Intensities Log10"
1529
+ elif norm_method == "pepcal":
1530
+ if not ("PepCal Intensities Log10" in search_results.columns):
1531
+ raise ValueError(
1532
+ "Pepcal normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later with the Seer Peptide Calibrant option enabled. \n Please retry using different norm_method, such as 'median'"
1533
+ )
1534
+
1535
+ intensity_column = "PepCal Intensities Log10"
1536
+
1537
+ else:
1538
+ raise ValueError(
1539
+ f"norm_method = {norm_method} is not supported. Supported normalization methods are: raw, pepcal, engine, median, median80."
1540
+ )
1541
+ if rollup == "panel":
1542
+ search_results.fillna({"Sample Name": ""}, inplace=True)
1543
+ search_results["File Name"] = search_results[
1544
+ "Sample Name"
1545
+ ].apply(
1546
+ lambda x: (
1547
+ os.path.basename(
1548
+ sample_to_msrun[sample_name_to_id[x]][
1549
+ "raw_file_path"
1550
+ ]
1551
+ ).split(".")[0]
1552
+ if x
1553
+ else None
1554
+ )
1555
+ )
1556
+ search_results["File Name"] = search_results["File Name"].apply(
1557
+ lambda x: os.path.basename(x).split(".")[0] if x else None
1558
+ )
1559
+
1560
+ search_results["Intensity Log10"] = search_results[
1561
+ intensity_column
1562
+ ]
1563
+
1564
+ # 3. Merge report to search results to get Q value and other properties
1565
+ report = self.get_search_result(
1566
+ analysis_id=analysis_id,
1567
+ analyte_type="precursor",
1568
+ rollup="np",
1569
+ )
1570
+ report["File Name"] = report["Run"]
1571
+ report["Protein Group"] = report["Protein.Group"]
1572
+
1573
+ if analyte_type == "protein":
1574
+ report["Protein Q Value"] = report["Protein.Q.Value"]
1575
+
1576
+ report = report[
1577
+ ["File Name", "Protein Group", "Protein Q Value"]
1578
+ ]
1579
+ report.drop_duplicates(
1580
+ subset=["File Name", "Protein Group"], inplace=True
1581
+ )
1582
+ df = pd.merge(
1583
+ search_results,
1584
+ report,
1585
+ on=["File Name", "Protein Group"],
1586
+ how="left",
1587
+ )
1588
+ included_columns = [
1589
+ "MsRun ID",
1590
+ "Sample ID",
1591
+ "Protein Group",
1592
+ "Intensity Log10",
1593
+ "Protein Q Value",
1594
+ ]
1595
+
1596
+ else:
1597
+ report["Peptide"] = report["Stripped.Sequence"]
1598
+ # If analyte_type is peptide, attach retention time (RT, iRT)
1599
+ report = report[["File Name", "Peptide", "RT", "iRT"]]
1600
+ report.drop_duplicates(
1601
+ subset=["File Name", "Peptide"], inplace=True
1602
+ )
1603
+ df = pd.merge(
1604
+ search_results,
1605
+ report,
1606
+ on=["File Name", "Peptide"],
1607
+ how="left",
1608
+ )
1609
+ included_columns = [
1610
+ "MsRun ID",
1611
+ "Sample ID",
1612
+ "Peptide",
1613
+ "Protein Group",
1614
+ "Intensity Log10",
1615
+ "RT",
1616
+ "iRT",
1617
+ ]
1618
+ # endif
1619
+
1620
+ if rollup == "np":
1621
+ included_columns.insert(
1622
+ included_columns.index("Sample ID") + 1, "Nanoparticle"
1623
+ )
1624
+
1625
+ df["MsRun ID"] = df["File Name"].apply(
1626
+ lambda x: (
1627
+ file_to_msrun[x]["id"] if x in file_to_msrun else None
1628
+ )
1629
+ )
1630
+ df["Sample ID"] = df["File Name"].apply(
1631
+ lambda x: (
1632
+ file_to_msrun[x]["sample_id"]
1633
+ if x in file_to_msrun
1634
+ else None
1635
+ )
1636
+ )
1637
+ df = df[included_columns]
1638
+ df.columns = [title_case_to_snake_case(x) for x in df.columns]
1639
+ return df
1640
+ else:
1641
+ # precursor
1642
+ # working only in report.tsv
1643
+ search_results["Intensity"] = search_results["Precursor.Quantity"]
1644
+ search_results["MsRun ID"] = search_results["Run"].apply(
1645
+ lambda x: (
1646
+ file_to_msrun[x]["id"] if x in file_to_msrun else None
1647
+ )
1648
+ )
1649
+ search_results["Sample ID"] = search_results["Run"].apply(
1650
+ lambda x: (
1651
+ file_to_msrun[x]["sample_id"]
1652
+ if x in file_to_msrun
1653
+ else None
1654
+ )
1655
+ )
1656
+ search_results["Protein Group"] = search_results["Protein.Group"]
1657
+ search_results["Peptide"] = search_results["Stripped.Sequence"]
1658
+ search_results["Charge"] = search_results["Precursor.Charge"]
1659
+ search_results["Precursor Id"] = search_results["Precursor.Id"]
1660
+ search_results["Precursor Q Value"] = search_results["Q.Value"]
1661
+ search_results["Protein Q Value"] = search_results[
1662
+ "Protein.Q.Value"
1663
+ ]
1664
+
1665
+ included_columns = [
1666
+ "MsRun ID",
1667
+ "Sample ID",
1668
+ "Protein Group",
1669
+ "Protein Q Value",
1670
+ "Peptide",
1671
+ "Precursor Id",
1672
+ "Intensity",
1673
+ "Precursor Q Value",
1674
+ "Charge",
1675
+ "RT",
1676
+ "iRT",
1677
+ "IM",
1678
+ "iIM",
1679
+ ]
1680
+ df = search_results[included_columns]
1681
+ df.columns = [title_case_to_snake_case(x) for x in df.columns]
1682
+
1683
+ return df
1684
+
1685
+ def get_search_data_analytes(self, analysis_id: str, analyte_type: str):
1686
+ if analyte_type not in ["protein", "peptide", "precursor"]:
1687
+ raise ValueError(
1688
+ f"Unknown analyte_type = {analyte_type}. Supported analytes are 'protein', 'peptide', or 'precursor'."
1689
+ )
1690
+
1691
+ # include
1692
+ # protein group, (peptide sequence), protein names, gene names, biological process, molecular function, cellular component, global q value, library q value
1693
+
1694
+ # 1. for all modes, fetch protein np file to extract protein groups, protein names, gene names, biological process, molecular function, cellular component
1695
+ search_results = self.get_search_result(
1696
+ analysis_id=analysis_id, analyte_type="protein", rollup="np"
1697
+ )
1698
+
1699
+ report_results = self.get_search_result(
1700
+ analysis_id=analysis_id, analyte_type="precursor", rollup="np"
1701
+ )
1702
+
1703
+ search_results = search_results[
1704
+ [
1705
+ "Protein Group",
1706
+ "Protein Names",
1707
+ "Gene Names",
1708
+ "Biological Process",
1709
+ "Molecular Function",
1710
+ "Cellular Component",
1711
+ ]
1712
+ ]
1713
+ search_results.drop_duplicates(subset=["Protein Group"], inplace=True)
1714
+ report_results["Protein Group"] = report_results["Protein.Group"]
1715
+ report_results["Peptide"] = report_results["Stripped.Sequence"]
1716
+
1717
+ if analyte_type == "protein":
1718
+ report_results = report_results[
1719
+ [
1720
+ "Protein Group",
1721
+ "Protein.Ids",
1722
+ "Global.PG.Q.Value",
1723
+ "Lib.PG.Q.Value",
1724
+ ]
1725
+ ]
1726
+ report_results.drop_duplicates(
1727
+ subset=["Protein Group"], inplace=True
1728
+ )
1729
+ df = pd.merge(
1730
+ search_results,
1731
+ report_results,
1732
+ on=["Protein Group"],
1733
+ how="left",
1734
+ )
1735
+ elif analyte_type == "peptide":
1736
+ peptide_results = self.get_search_result(
1737
+ analysis_id=analysis_id, analyte_type="peptide", rollup="np"
1738
+ )
1739
+ peptide_results = peptide_results[["Peptide", "Protein Group"]]
1740
+ search_results = pd.merge(
1741
+ peptide_results,
1742
+ search_results,
1743
+ on=["Protein Group"],
1744
+ how="left",
1745
+ )
1746
+
1747
+ report_results = report_results[
1748
+ ["Peptide", "Protein.Ids", "Protein.Group"]
1749
+ ]
1750
+ report_results.drop_duplicates(subset=["Peptide"], inplace=True)
1751
+ df = pd.merge(
1752
+ search_results,
1753
+ report_results,
1754
+ on=["Peptide"],
1755
+ how="left",
1756
+ )
1757
+ else:
1758
+ # precursor
1759
+ search_results = search_results[
1760
+ [
1761
+ "Protein Group",
1762
+ "Protein Names",
1763
+ "Gene Names",
1764
+ "Biological Process",
1765
+ "Molecular Function",
1766
+ "Cellular Component",
1767
+ ]
1768
+ ]
1769
+ search_results.drop_duplicates(
1770
+ subset=["Protein Group"], inplace=True
1771
+ )
1772
+ report_results = report_results[
1773
+ [
1774
+ "Precursor.Id",
1775
+ "Precursor.Charge",
1776
+ "Peptide",
1777
+ "Protein Group",
1778
+ "Protein.Ids",
1779
+ "Protein.Names",
1780
+ "Genes",
1781
+ "First.Protein.Description",
1782
+ "Modified.Sequence",
1783
+ "Proteotypic",
1784
+ "Global.Q.Value",
1785
+ "Global.PG.Q.Value",
1786
+ "Lib.Q.Value",
1787
+ "Lib.PG.Q.Value",
1788
+ ]
1789
+ ]
1790
+ report_results.drop_duplicates(
1791
+ subset=["Protein Group"], inplace=True
1792
+ )
1793
+ df = pd.merge(
1794
+ report_results,
1795
+ search_results,
1796
+ on=["Protein Group"],
1797
+ how="left",
1798
+ )
1799
+ df = df[
1800
+ [
1801
+ "Precursor.Id",
1802
+ "Precursor.Charge",
1803
+ "Peptide",
1804
+ "Protein Group",
1805
+ "Protein.Ids",
1806
+ "Protein.Names",
1807
+ "Genes",
1808
+ "First.Protein.Description",
1809
+ "Modified.Sequence",
1810
+ "Proteotypic",
1811
+ "Global.Q.Value",
1812
+ "Global.PG.Q.Value",
1813
+ "Lib.Q.Value",
1814
+ "Lib.PG.Q.Value",
1815
+ "Gene Names",
1816
+ "Biological Process",
1817
+ "Molecular Function",
1818
+ "Cellular Component",
1819
+ ]
1820
+ ]
1821
+ df.rename(
1822
+ columns={"Modified.Sequence": "Modified.Peptide"}, inplace=True
1823
+ )
1824
+ # endif
1825
+ df.columns = [title_case_to_snake_case(x) for x in df.columns]
1826
+ return df