seer-pas-sdk 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- seer_pas_sdk/auth/auth.py +174 -15
- seer_pas_sdk/common/__init__.py +46 -5
- seer_pas_sdk/core/sdk.py +1474 -183
- seer_pas_sdk/core/unsupported.py +423 -230
- seer_pas_sdk/objects/__init__.py +1 -0
- seer_pas_sdk/objects/headers.py +144 -0
- seer_pas_sdk/objects/volcanoplot.py +3 -2
- {seer_pas_sdk-1.0.0.dist-info → seer_pas_sdk-1.1.1.dist-info}/METADATA +1 -2
- seer_pas_sdk-1.1.1.dist-info/RECORD +19 -0
- seer_pas_sdk-1.0.0.dist-info/RECORD +0 -18
- {seer_pas_sdk-1.0.0.dist-info → seer_pas_sdk-1.1.1.dist-info}/WHEEL +0 -0
- {seer_pas_sdk-1.0.0.dist-info → seer_pas_sdk-1.1.1.dist-info}/licenses/LICENSE.txt +0 -0
- {seer_pas_sdk-1.0.0.dist-info → seer_pas_sdk-1.1.1.dist-info}/top_level.txt +0 -0
seer_pas_sdk/core/unsupported.py
CHANGED
|
@@ -2,19 +2,12 @@
|
|
|
2
2
|
seer_pas_sdk.core.unsupported -- in development
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from tqdm import tqdm
|
|
6
|
-
|
|
7
5
|
import os
|
|
8
|
-
import jwt
|
|
9
|
-
import requests
|
|
10
|
-
import urllib.request
|
|
11
|
-
import ssl
|
|
12
6
|
import shutil
|
|
13
7
|
|
|
14
8
|
from typing import List as _List
|
|
15
9
|
|
|
16
10
|
from ..common import *
|
|
17
|
-
from ..auth import Auth
|
|
18
11
|
from ..objects import PlateMap
|
|
19
12
|
|
|
20
13
|
from .sdk import SeerSDK as _SeerSDK
|
|
@@ -71,7 +64,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
71
64
|
|
|
72
65
|
URL = f"{self._auth.url}api/v1/samples"
|
|
73
66
|
|
|
74
|
-
with self._get_auth_session() as s:
|
|
67
|
+
with self._get_auth_session("addsample") as s:
|
|
75
68
|
|
|
76
69
|
response = s.post(URL, json=sample_entry)
|
|
77
70
|
|
|
@@ -112,7 +105,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
112
105
|
|
|
113
106
|
URL = f"{self._auth.url}api/v1/samples/batch"
|
|
114
107
|
|
|
115
|
-
with self._get_auth_session() as s:
|
|
108
|
+
with self._get_auth_session("addsamples") as s:
|
|
116
109
|
response = s.post(URL, json={"samples": sample_info})
|
|
117
110
|
|
|
118
111
|
if response.status_code != 200:
|
|
@@ -164,9 +157,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
164
157
|
if not project_name:
|
|
165
158
|
raise ValueError("Project name cannot be empty.")
|
|
166
159
|
|
|
167
|
-
all_plate_ids = set(
|
|
168
|
-
[plate["id"] for plate in self.get_plate_metadata()]
|
|
169
|
-
)
|
|
160
|
+
all_plate_ids = set([plate["id"] for plate in self.find_plates()])
|
|
170
161
|
|
|
171
162
|
for plate_id in plate_ids:
|
|
172
163
|
if plate_id not in all_plate_ids:
|
|
@@ -176,7 +167,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
176
167
|
|
|
177
168
|
URL = f"{self._auth.url}api/v1/projects"
|
|
178
169
|
|
|
179
|
-
with self._get_auth_session() as s:
|
|
170
|
+
with self._get_auth_session("addproject") as s:
|
|
180
171
|
|
|
181
172
|
project = s.post(
|
|
182
173
|
URL,
|
|
@@ -233,7 +224,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
233
224
|
|
|
234
225
|
URL = f"{self._auth.url}api/v1/addSamplesToProject/{project_id}"
|
|
235
226
|
|
|
236
|
-
with self._get_auth_session() as s:
|
|
227
|
+
with self._get_auth_session("addprojectsamples") as s:
|
|
237
228
|
|
|
238
229
|
response = s.put(
|
|
239
230
|
URL,
|
|
@@ -277,7 +268,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
277
268
|
samples = (
|
|
278
269
|
x["id"]
|
|
279
270
|
for plate_id in plates
|
|
280
|
-
for x in self.
|
|
271
|
+
for x in self.find_samples(plate_id=plate_id)
|
|
281
272
|
)
|
|
282
273
|
|
|
283
274
|
return self.add_samples_to_project(
|
|
@@ -375,7 +366,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
375
366
|
validate_plate_map(plate_map_data, local_file_names)
|
|
376
367
|
|
|
377
368
|
# Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
|
|
378
|
-
with self._get_auth_session() as s:
|
|
369
|
+
with self._get_auth_session("getplateids") as s:
|
|
379
370
|
plate_response = s.get(f"{self._auth.url}api/v1/plateids")
|
|
380
371
|
|
|
381
372
|
if plate_response.status_code != 200:
|
|
@@ -392,7 +383,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
392
383
|
|
|
393
384
|
# Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
|
|
394
385
|
|
|
395
|
-
with self._get_auth_session() as s:
|
|
386
|
+
with self._get_auth_session("addplate") as s:
|
|
396
387
|
plate_response = s.post(
|
|
397
388
|
f"{self._auth.url}api/v1/plates",
|
|
398
389
|
json={
|
|
@@ -415,7 +406,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
415
406
|
)
|
|
416
407
|
|
|
417
408
|
# Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
|
|
418
|
-
with self._get_auth_session() as s:
|
|
409
|
+
with self._get_auth_session("getawsuploadconfig") as s:
|
|
419
410
|
config_response = s.post(
|
|
420
411
|
f"{self._auth.url}api/v1/msdatas/getuploadconfig",
|
|
421
412
|
json={"plateId": id_uuid},
|
|
@@ -441,7 +432,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
441
432
|
s3_bucket = config_response.json()["s3Bucket"]
|
|
442
433
|
s3_upload_path = config_response.json()["s3UploadPath"]
|
|
443
434
|
|
|
444
|
-
with self._get_auth_session() as s:
|
|
435
|
+
with self._get_auth_session("getawsuploadcredentials") as s:
|
|
445
436
|
config_response = s.get(
|
|
446
437
|
f"{self._auth.url}auth/getawscredential",
|
|
447
438
|
)
|
|
@@ -487,7 +478,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
487
478
|
"Failed to upload plate map to AWS. Please check your connection and reauthenticate."
|
|
488
479
|
)
|
|
489
480
|
|
|
490
|
-
with self._get_auth_session() as s:
|
|
481
|
+
with self._get_auth_session("uploadplatemapfile") as s:
|
|
491
482
|
plate_map_response = s.post(
|
|
492
483
|
f"{self._auth.url}api/v1/msdataindex/file",
|
|
493
484
|
json={
|
|
@@ -540,7 +531,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
540
531
|
"Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
|
|
541
532
|
)
|
|
542
533
|
|
|
543
|
-
with self._get_auth_session() as s:
|
|
534
|
+
with self._get_auth_session("uploadsampledescriptionfile") as s:
|
|
544
535
|
sdf_response = s.post(
|
|
545
536
|
f"{self._auth.url}api/v1/msdataindex/file",
|
|
546
537
|
json={
|
|
@@ -573,7 +564,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
573
564
|
)
|
|
574
565
|
|
|
575
566
|
# Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
|
|
576
|
-
with self._get_auth_session() as s:
|
|
567
|
+
with self._get_auth_session("addmsdatas") as s:
|
|
577
568
|
ms_data_response = s.post(
|
|
578
569
|
f"{self._auth.url}api/v1/msdatas/batch",
|
|
579
570
|
json={"msdatas": plate_map_data},
|
|
@@ -584,7 +575,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
584
575
|
)
|
|
585
576
|
|
|
586
577
|
# Step 9: Upload each msdata file to the S3 bucket.
|
|
587
|
-
with self._get_auth_session() as s:
|
|
578
|
+
with self._get_auth_session("getawsuploadcredentials") as s:
|
|
588
579
|
config_response = s.get(
|
|
589
580
|
f"{self._auth.url}auth/getawscredential",
|
|
590
581
|
)
|
|
@@ -629,7 +620,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
629
620
|
)
|
|
630
621
|
|
|
631
622
|
# Step 10: Make a call to `api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
|
|
632
|
-
with self._get_auth_session() as s:
|
|
623
|
+
with self._get_auth_session("addmsdataindex") as s:
|
|
633
624
|
file_response = s.post(
|
|
634
625
|
f"{self._auth.url}api/v1/msdataindex/file",
|
|
635
626
|
json={"files": files},
|
|
@@ -671,16 +662,16 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
671
662
|
Name of the analysis.
|
|
672
663
|
|
|
673
664
|
project_id : str
|
|
674
|
-
ID of the project to which the analysis belongs. Can be fetched using the
|
|
665
|
+
ID of the project to which the analysis belongs. Can be fetched using the find_projects() function.
|
|
675
666
|
|
|
676
667
|
sample_ids: list[str], optional
|
|
677
668
|
List of sample IDs to be used for the analysis. Should be omitted if analysis is to be run with all samples.
|
|
678
669
|
|
|
679
670
|
analysis_protocol_name : str, optional
|
|
680
|
-
Name of the analysis protocol to be used for the analysis. Can be fetched using the
|
|
671
|
+
Name of the analysis protocol to be used for the analysis. Can be fetched using the find_analysis_protocols() function. Should be omitted if analysis_protocol_id is provided.
|
|
681
672
|
|
|
682
673
|
analysis_protocol_id : str, optional
|
|
683
|
-
ID of the analysis protocol to be used for the analysis. Can be fetched using the
|
|
674
|
+
ID of the analysis protocol to be used for the analysis. Can be fetched using the find_analysis_protocols() function. Should be omitted if analysis_protocol_name is provided.
|
|
684
675
|
|
|
685
676
|
notes : str, optional
|
|
686
677
|
Notes for the analysis, defaulted to an empty string.
|
|
@@ -714,7 +705,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
714
705
|
raise ValueError("Project ID cannot be empty.")
|
|
715
706
|
|
|
716
707
|
if not analysis_protocol_id and analysis_protocol_name:
|
|
717
|
-
valid_analysis_protocol = self.
|
|
708
|
+
valid_analysis_protocol = self.find_analysis_protocols(
|
|
718
709
|
analysis_protocol_name=analysis_protocol_name
|
|
719
710
|
)
|
|
720
711
|
|
|
@@ -726,7 +717,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
726
717
|
analysis_protocol_id = valid_analysis_protocol[0]["id"]
|
|
727
718
|
|
|
728
719
|
if analysis_protocol_id and not analysis_protocol_name:
|
|
729
|
-
valid_analysis_protocol = self.
|
|
720
|
+
valid_analysis_protocol = self.find_analysis_protocols(
|
|
730
721
|
analysis_protocol_id=analysis_protocol_id
|
|
731
722
|
)
|
|
732
723
|
|
|
@@ -743,7 +734,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
743
734
|
if sample_ids:
|
|
744
735
|
valid_ids = [
|
|
745
736
|
entry["id"]
|
|
746
|
-
for entry in self.
|
|
737
|
+
for entry in self.find_samples(project_id=project_id)
|
|
747
738
|
]
|
|
748
739
|
|
|
749
740
|
for sample_id in sample_ids:
|
|
@@ -758,7 +749,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
758
749
|
|
|
759
750
|
URL = f"{self._auth.url}api/v1/analyze"
|
|
760
751
|
|
|
761
|
-
with self._get_auth_session() as s:
|
|
752
|
+
with self._get_auth_session("startanalysis") as s:
|
|
762
753
|
req_payload = {
|
|
763
754
|
"analysisName": name,
|
|
764
755
|
"analysisProtocolId": analysis_protocol_id,
|
|
@@ -854,7 +845,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
854
845
|
tenant_id = self.get_active_tenant_id()
|
|
855
846
|
|
|
856
847
|
# Step 3: Fetch the S3 bucket name by making a call to `/api/v1/auth/getawscredential`
|
|
857
|
-
with self._get_auth_session() as s:
|
|
848
|
+
with self._get_auth_session("getawsuploadcredentials") as s:
|
|
858
849
|
config_response = s.get(
|
|
859
850
|
f"{self._auth.url}auth/getawscredential",
|
|
860
851
|
)
|
|
@@ -905,20 +896,21 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
905
896
|
|
|
906
897
|
# Step 5: Make a call to `/api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
|
|
907
898
|
result_files = None
|
|
908
|
-
with self._get_auth_session() as s:
|
|
899
|
+
with self._get_auth_session("addmsdataindex") as s:
|
|
909
900
|
file_response = s.post(
|
|
910
901
|
f"{self._auth.url}api/v1/msdataindex/file",
|
|
911
902
|
json={"files": files},
|
|
912
903
|
)
|
|
913
904
|
|
|
914
|
-
if
|
|
915
|
-
file_response.status_code != 200
|
|
916
|
-
or not file_response.json()
|
|
917
|
-
or "created" not in file_response.json()
|
|
918
|
-
):
|
|
905
|
+
if file_response.status_code != 200:
|
|
919
906
|
raise ServerError("Could not upload MS Files to PAS.")
|
|
920
|
-
result_files = file_response.json()
|
|
921
|
-
|
|
907
|
+
result_files = file_response.json()
|
|
908
|
+
try:
|
|
909
|
+
result_files = result_files["files"]
|
|
910
|
+
except Exception:
|
|
911
|
+
raise ServerError(
|
|
912
|
+
"Unexpected response from PAS server. Please ensure you are using the latest version of SeerSDK."
|
|
913
|
+
)
|
|
922
914
|
# omit tenant_id from return file path
|
|
923
915
|
for result in result_files:
|
|
924
916
|
result["filePath"] = "/".join(result["filePath"].split("/")[1:])
|
|
@@ -1003,7 +995,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1003
995
|
target_folder_path = f"{tenant_id}/{target_folder_paths[0]}"
|
|
1004
996
|
# Retrieve msdatafileindex metadata to determine source space
|
|
1005
997
|
base_space = None
|
|
1006
|
-
with self._get_auth_session() as s:
|
|
998
|
+
with self._get_auth_session("getmsdataindex") as s:
|
|
1007
999
|
URL = self._auth.url + "api/v1/msdataindex/getmetadata"
|
|
1008
1000
|
params = {"folderKey": folder_path}
|
|
1009
1001
|
r = s.get(URL, params=params)
|
|
@@ -1046,7 +1038,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1046
1038
|
if target_space_id and base_space != target_space_id:
|
|
1047
1039
|
json["targetUserGroupId"] = target_space_id
|
|
1048
1040
|
|
|
1049
|
-
with self._get_auth_session() as s:
|
|
1041
|
+
with self._get_auth_session("movemsdataindex") as s:
|
|
1050
1042
|
URL = self._auth.url + "api/v1/msdataindex/move"
|
|
1051
1043
|
json = json
|
|
1052
1044
|
r = s.post(URL, json=json)
|
|
@@ -1098,167 +1090,6 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1098
1090
|
source_ms_data_files, target_ms_data_files
|
|
1099
1091
|
)
|
|
1100
1092
|
|
|
1101
|
-
def download_analysis_files(
|
|
1102
|
-
self, analysis_id: str, download_path: str = "", file_name: str = ""
|
|
1103
|
-
):
|
|
1104
|
-
"""
|
|
1105
|
-
Download a specific analysis file from the backend given an `analysis_id` to the specified `download_path`.
|
|
1106
|
-
|
|
1107
|
-
If no `download_path` is specified, the file will be downloaded to the current working directory.
|
|
1108
|
-
|
|
1109
|
-
If no `file_name` is specified, all files for the analysis will be downloaded.
|
|
1110
|
-
|
|
1111
|
-
Parameters
|
|
1112
|
-
----------
|
|
1113
|
-
analysis_id : str
|
|
1114
|
-
ID of the analysis to download.
|
|
1115
|
-
download_path : str, optional
|
|
1116
|
-
Path to download the analysis file to, defaulted to current working directory.
|
|
1117
|
-
file_name : str, optional
|
|
1118
|
-
Name of the analysis file to download, defaulted to None.
|
|
1119
|
-
|
|
1120
|
-
Returns
|
|
1121
|
-
-------
|
|
1122
|
-
dict
|
|
1123
|
-
Message containing whether the file was downloaded or not.
|
|
1124
|
-
|
|
1125
|
-
Examples
|
|
1126
|
-
-------
|
|
1127
|
-
>>> from core import SeerSDK
|
|
1128
|
-
>>> sdk = SeerSDK()
|
|
1129
|
-
>>> sdk.download_analysis_files("analysis_id", "/path/to/download")
|
|
1130
|
-
>>> Downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
|
|
1131
|
-
Finished downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
|
|
1132
|
-
|
|
1133
|
-
Downloading EXP20004_2020ms0007X11_A.mzML.quant
|
|
1134
|
-
Finished downloading EXP20004_2020ms0007X11_A.mzML.quant
|
|
1135
|
-
|
|
1136
|
-
Downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
|
|
1137
|
-
Finished downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
|
|
1138
|
-
|
|
1139
|
-
Downloading EXP20004_2020ms0007X11_A/diann.log
|
|
1140
|
-
Finished downloading EXP20004_2020ms0007X11_A/diann.log
|
|
1141
|
-
>>> { "message": "File downloaded successfully." }
|
|
1142
|
-
"""
|
|
1143
|
-
|
|
1144
|
-
def get_url(analysis_id, file_name, project_id):
|
|
1145
|
-
URL = f"{self._auth.url}api/v1/analysisResultFiles/getUrl"
|
|
1146
|
-
|
|
1147
|
-
with self._get_auth_session() as s:
|
|
1148
|
-
|
|
1149
|
-
download_url = s.post(
|
|
1150
|
-
URL,
|
|
1151
|
-
json={
|
|
1152
|
-
"analysisId": analysis_id,
|
|
1153
|
-
"filename": file_name,
|
|
1154
|
-
"projectId": project_id,
|
|
1155
|
-
},
|
|
1156
|
-
)
|
|
1157
|
-
|
|
1158
|
-
if download_url.status_code != 200:
|
|
1159
|
-
raise ValueError(
|
|
1160
|
-
"Could not download file. Please check if the analysis ID is valid or the backend is running."
|
|
1161
|
-
)
|
|
1162
|
-
|
|
1163
|
-
return download_url.json()["url"]
|
|
1164
|
-
|
|
1165
|
-
if not analysis_id:
|
|
1166
|
-
raise ValueError("Analysis ID cannot be empty.")
|
|
1167
|
-
|
|
1168
|
-
try:
|
|
1169
|
-
valid_analysis = self.get_analyses(analysis_id)[0]
|
|
1170
|
-
except:
|
|
1171
|
-
raise ValueError(
|
|
1172
|
-
"Invalid analysis ID. Please check if the analysis ID is valid or the backend is running."
|
|
1173
|
-
)
|
|
1174
|
-
|
|
1175
|
-
project_id = valid_analysis["project_id"]
|
|
1176
|
-
|
|
1177
|
-
if not download_path:
|
|
1178
|
-
download_path = os.getcwd()
|
|
1179
|
-
print(f"\nDownload path not specified.\n")
|
|
1180
|
-
|
|
1181
|
-
if not os.path.isdir(download_path):
|
|
1182
|
-
print(
|
|
1183
|
-
f'\nThe path "{download_path}" you specified does not exist, was either invalid or not absolute.\n'
|
|
1184
|
-
)
|
|
1185
|
-
download_path = os.getcwd()
|
|
1186
|
-
|
|
1187
|
-
name = f"{download_path}/downloads/{analysis_id}"
|
|
1188
|
-
|
|
1189
|
-
if not os.path.exists(name):
|
|
1190
|
-
os.makedirs(name)
|
|
1191
|
-
|
|
1192
|
-
URL = f"{self._auth.url}api/v1/analysisResultFiles"
|
|
1193
|
-
|
|
1194
|
-
with self._get_auth_session() as s:
|
|
1195
|
-
|
|
1196
|
-
analysis_files = s.get(f"{URL}/{analysis_id}")
|
|
1197
|
-
|
|
1198
|
-
if analysis_files.status_code != 200:
|
|
1199
|
-
raise ValueError(
|
|
1200
|
-
"Invalid request. Please check if the analysis ID is valid or the backend is running."
|
|
1201
|
-
)
|
|
1202
|
-
|
|
1203
|
-
res = analysis_files.json()
|
|
1204
|
-
|
|
1205
|
-
if file_name:
|
|
1206
|
-
filenames = set([file["filename"] for file in res])
|
|
1207
|
-
|
|
1208
|
-
if file_name not in filenames:
|
|
1209
|
-
raise ValueError(
|
|
1210
|
-
"Invalid file name. Please check if the file name is correct."
|
|
1211
|
-
)
|
|
1212
|
-
|
|
1213
|
-
res = [file for file in res if file["filename"] == file_name]
|
|
1214
|
-
|
|
1215
|
-
print(f'Downloading files to "{name}"\n')
|
|
1216
|
-
|
|
1217
|
-
for file in res:
|
|
1218
|
-
filename = file["filename"]
|
|
1219
|
-
url = get_url(analysis_id, filename, project_id)
|
|
1220
|
-
|
|
1221
|
-
print(f"Downloading {filename}")
|
|
1222
|
-
|
|
1223
|
-
for _ in range(2):
|
|
1224
|
-
try:
|
|
1225
|
-
with tqdm(
|
|
1226
|
-
unit="B",
|
|
1227
|
-
unit_scale=True,
|
|
1228
|
-
unit_divisor=1024,
|
|
1229
|
-
miniters=1,
|
|
1230
|
-
desc=f"Progress",
|
|
1231
|
-
) as t:
|
|
1232
|
-
ssl._create_default_https_context = (
|
|
1233
|
-
ssl._create_unverified_context
|
|
1234
|
-
)
|
|
1235
|
-
urllib.request.urlretrieve(
|
|
1236
|
-
url,
|
|
1237
|
-
f"{name}/{filename}",
|
|
1238
|
-
reporthook=download_hook(t),
|
|
1239
|
-
data=None,
|
|
1240
|
-
)
|
|
1241
|
-
break
|
|
1242
|
-
except:
|
|
1243
|
-
filename = filename.split("/")
|
|
1244
|
-
name += "/" + "/".join(
|
|
1245
|
-
[filename[i] for i in range(len(filename) - 1)]
|
|
1246
|
-
)
|
|
1247
|
-
filename = filename[-1]
|
|
1248
|
-
if not os.path.isdir(f"{name}/{filename}"):
|
|
1249
|
-
os.makedirs(f"{name}/")
|
|
1250
|
-
|
|
1251
|
-
else:
|
|
1252
|
-
raise ValueError(
|
|
1253
|
-
"Your download failed. Please check if the backend is still running."
|
|
1254
|
-
)
|
|
1255
|
-
|
|
1256
|
-
print(f"Finished downloading {filename}\n")
|
|
1257
|
-
|
|
1258
|
-
return {
|
|
1259
|
-
"message": f"Files downloaded successfully to '{download_path}/downloads/{analysis_id}'"
|
|
1260
|
-
}
|
|
1261
|
-
|
|
1262
1093
|
def link_plate(
|
|
1263
1094
|
self,
|
|
1264
1095
|
ms_data_files: _List[str],
|
|
@@ -1301,17 +1132,12 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1301
1132
|
|
|
1302
1133
|
plate_ids = (
|
|
1303
1134
|
set()
|
|
1304
|
-
) # contains all the plate_ids fetched from self.
|
|
1305
|
-
files = [] # to be uploaded to sync frontend
|
|
1135
|
+
) # contains all the plate_ids fetched from self.find_plates()
|
|
1306
1136
|
samples = [] # list of all the sample responses from the backend
|
|
1307
1137
|
id_uuid = "" # uuid for the plate id
|
|
1308
1138
|
raw_file_paths = {} # list of all the AWS raw file paths
|
|
1309
1139
|
s3_upload_path = None
|
|
1310
1140
|
s3_bucket = ""
|
|
1311
|
-
ms_data_file_names = []
|
|
1312
|
-
dir_exists = (
|
|
1313
|
-
True # flag to check if the generated_files directory exists
|
|
1314
|
-
)
|
|
1315
1141
|
|
|
1316
1142
|
# Step 0: Check if the file paths exist in the S3 bucket.
|
|
1317
1143
|
for file in ms_data_files:
|
|
@@ -1344,7 +1170,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1344
1170
|
validate_plate_map(plate_map_data, ms_data_files)
|
|
1345
1171
|
|
|
1346
1172
|
# Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
|
|
1347
|
-
with self._get_auth_session() as s:
|
|
1173
|
+
with self._get_auth_session("getplateids") as s:
|
|
1348
1174
|
plate_response = s.get(f"{self._auth.url}api/v1/plateids")
|
|
1349
1175
|
|
|
1350
1176
|
if plate_response.status_code != 200:
|
|
@@ -1361,7 +1187,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1361
1187
|
|
|
1362
1188
|
# Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
|
|
1363
1189
|
|
|
1364
|
-
with self._get_auth_session() as s:
|
|
1190
|
+
with self._get_auth_session("addplate") as s:
|
|
1365
1191
|
plate_response = s.post(
|
|
1366
1192
|
f"{self._auth.url}api/v1/plates",
|
|
1367
1193
|
json={
|
|
@@ -1384,7 +1210,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1384
1210
|
)
|
|
1385
1211
|
|
|
1386
1212
|
# Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
|
|
1387
|
-
with self._get_auth_session() as s:
|
|
1213
|
+
with self._get_auth_session("getawsuploadconfig") as s:
|
|
1388
1214
|
config_response = s.post(
|
|
1389
1215
|
f"{self._auth.url}api/v1/msdatas/getuploadconfig",
|
|
1390
1216
|
json={"plateId": id_uuid},
|
|
@@ -1410,7 +1236,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1410
1236
|
s3_bucket = config_response.json()["s3Bucket"]
|
|
1411
1237
|
s3_upload_path = config_response.json()["s3UploadPath"]
|
|
1412
1238
|
|
|
1413
|
-
with self._get_auth_session() as s:
|
|
1239
|
+
with self._get_auth_session("getawsuploadcredentials") as s:
|
|
1414
1240
|
config_response = s.get(
|
|
1415
1241
|
f"{self._auth.url}auth/getawscredential",
|
|
1416
1242
|
)
|
|
@@ -1456,7 +1282,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1456
1282
|
"Failed to upload plate map to AWS. Please check your connection and reauthenticate."
|
|
1457
1283
|
)
|
|
1458
1284
|
|
|
1459
|
-
with self._get_auth_session() as s:
|
|
1285
|
+
with self._get_auth_session("uploadplatemap") as s:
|
|
1460
1286
|
plate_map_response = s.post(
|
|
1461
1287
|
f"{self._auth.url}api/v1/msdataindex/file",
|
|
1462
1288
|
json={
|
|
@@ -1507,7 +1333,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1507
1333
|
"Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
|
|
1508
1334
|
)
|
|
1509
1335
|
|
|
1510
|
-
with self._get_auth_session() as s:
|
|
1336
|
+
with self._get_auth_session("uploadsampledescription") as s:
|
|
1511
1337
|
sdf_response = s.post(
|
|
1512
1338
|
f"{self._auth.url}api/v1/msdataindex/file",
|
|
1513
1339
|
json={
|
|
@@ -1542,7 +1368,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1542
1368
|
)
|
|
1543
1369
|
|
|
1544
1370
|
# Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
|
|
1545
|
-
with self._get_auth_session() as s:
|
|
1371
|
+
with self._get_auth_session("addmsdatas") as s:
|
|
1546
1372
|
ms_data_response = s.post(
|
|
1547
1373
|
f"{self._auth.url}api/v1/msdatas/batch",
|
|
1548
1374
|
json={"msdatas": plate_map_data},
|
|
@@ -1555,7 +1381,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1555
1381
|
print(f"Plate generated with id: '{id_uuid}'")
|
|
1556
1382
|
return id_uuid
|
|
1557
1383
|
|
|
1558
|
-
def
|
|
1384
|
+
def _get_msdataindex(self, folder=""):
|
|
1559
1385
|
"""
|
|
1560
1386
|
Get metadata for a given file path.
|
|
1561
1387
|
|
|
@@ -1566,20 +1392,15 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1566
1392
|
dict: A dictionary containing the metadata for the file.
|
|
1567
1393
|
"""
|
|
1568
1394
|
URL = f"{self._auth.url}api/v2/msdataindex/getmetadata"
|
|
1569
|
-
with self._get_auth_session() as s:
|
|
1395
|
+
with self._get_auth_session("getmsdataindex") as s:
|
|
1570
1396
|
params = {"all": "true"}
|
|
1571
1397
|
if folder:
|
|
1572
|
-
tenant_id =
|
|
1573
|
-
self._auth.get_token()[0],
|
|
1574
|
-
options={"verify_signature": False},
|
|
1575
|
-
)["custom:tenantId"]
|
|
1398
|
+
tenant_id = self.get_active_tenant_id()
|
|
1576
1399
|
params["folderKey"] = f"{tenant_id}/{folder}"
|
|
1577
|
-
print(params["folderKey"])
|
|
1578
1400
|
|
|
1579
1401
|
metadata = s.get(URL, params=params)
|
|
1580
1402
|
|
|
1581
1403
|
if metadata.status_code != 200:
|
|
1582
|
-
print(metadata.text)
|
|
1583
1404
|
raise ServerError("Could not fetch metadata for file.")
|
|
1584
1405
|
|
|
1585
1406
|
return metadata.json()
|
|
@@ -1595,9 +1416,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1595
1416
|
dict: A dictionary mapping the display path to the raw file path.
|
|
1596
1417
|
"""
|
|
1597
1418
|
|
|
1598
|
-
tenant_id =
|
|
1599
|
-
self._auth.get_token()[0], options={"verify_signature": False}
|
|
1600
|
-
)["custom:tenantId"]
|
|
1419
|
+
tenant_id = self.get_active_tenant_id()
|
|
1601
1420
|
result = {}
|
|
1602
1421
|
# partition by folder_path
|
|
1603
1422
|
folder_partitions = {os.path.dirname(x): [] for x in display_path}
|
|
@@ -1611,9 +1430,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1611
1430
|
try:
|
|
1612
1431
|
metadata = {
|
|
1613
1432
|
x["key"]: x["rawFilePath"]
|
|
1614
|
-
for x in self.
|
|
1615
|
-
folder=folder_path
|
|
1616
|
-
)["data"]
|
|
1433
|
+
for x in self._get_msdataindex(folder=folder_path)["data"]
|
|
1617
1434
|
}
|
|
1618
1435
|
except:
|
|
1619
1436
|
# If the metadata fetch fails, skip the folder
|
|
@@ -1632,3 +1449,379 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1632
1449
|
)
|
|
1633
1450
|
|
|
1634
1451
|
return result
|
|
1452
|
+
|
|
1453
|
+
def get_search_data(
|
|
1454
|
+
self,
|
|
1455
|
+
analysis_id: str,
|
|
1456
|
+
analyte_type: str,
|
|
1457
|
+
rollup: str,
|
|
1458
|
+
norm_method: str = "pepcal",
|
|
1459
|
+
):
|
|
1460
|
+
"""
|
|
1461
|
+
Get analyte intensities data for a given PAS analysis.
|
|
1462
|
+
Args:
|
|
1463
|
+
analysis_id (str): ID of the analysis.
|
|
1464
|
+
analyte_type (str): Type of the analyte. Must be either 'protein', 'peptide', precursor.
|
|
1465
|
+
rollup (str): Intensities rollup method. Must be either 'np' or 'panel'.
|
|
1466
|
+
norm_method (str): Search engine. Supported engines are: raw, engine, median, median80, pepcal. Default is 'pepcal'.
|
|
1467
|
+
|
|
1468
|
+
Returns:
|
|
1469
|
+
pd.DataFrame: A dataframe with each row containing the analyte intensity measurement:
|
|
1470
|
+
'msrun_id', 'sample_id', 'nanoparticle' (if rollup is 'np'), 'protein_group', 'peptide' (for 'peptide' and 'precursor' analyte types), 'charge' (for 'precursor' analyte type),
|
|
1471
|
+
'intensity_log10', 'protein_group_q_value', 'q_value' (for 'precursor' analyte type), 'rt' and 'irt' (for 'peptide' and 'precursor' analyte types)
|
|
1472
|
+
"""
|
|
1473
|
+
# 1. Get msrun data for analysis
|
|
1474
|
+
samples = self.find_samples(analysis_id=analysis_id)
|
|
1475
|
+
sample_name_to_id = {s["sample_name"]: s["id"] for s in samples}
|
|
1476
|
+
# for np rollup, a row represents an msrun
|
|
1477
|
+
msruns = self.find_msruns(sample_ids=sample_name_to_id.values())
|
|
1478
|
+
file_to_msrun = {
|
|
1479
|
+
os.path.basename(msrun["raw_file_path"]).split(".")[0]: msrun
|
|
1480
|
+
for msrun in msruns
|
|
1481
|
+
}
|
|
1482
|
+
sample_to_msrun = {msrun["sample_id"]: msrun for msrun in msruns}
|
|
1483
|
+
|
|
1484
|
+
# for panel rollup, a row represents a sample
|
|
1485
|
+
|
|
1486
|
+
# 2. Get search results
|
|
1487
|
+
# pull the np/panel file, or report.tsv for precursor mode
|
|
1488
|
+
search_results = self.get_search_result(
|
|
1489
|
+
analysis_id=analysis_id,
|
|
1490
|
+
analyte_type=analyte_type,
|
|
1491
|
+
rollup=rollup,
|
|
1492
|
+
)
|
|
1493
|
+
if analyte_type in ["protein", "peptide"]:
|
|
1494
|
+
intensity_column = None
|
|
1495
|
+
if norm_method == "raw":
|
|
1496
|
+
intensity_column = (
|
|
1497
|
+
"Intensities Log10"
|
|
1498
|
+
if "Intensities Log10" in search_results.columns
|
|
1499
|
+
else "Intensity (Log10)"
|
|
1500
|
+
)
|
|
1501
|
+
elif norm_method == "engine":
|
|
1502
|
+
intensity_column = (
|
|
1503
|
+
"DIA-NN Normalized Intensities Log10"
|
|
1504
|
+
if "DIA-NN Normalized Intensities Log10"
|
|
1505
|
+
in search_results.columns
|
|
1506
|
+
else "Normalized Intensity (Log10)"
|
|
1507
|
+
)
|
|
1508
|
+
if intensity_column not in search_results.columns:
|
|
1509
|
+
raise ValueError(
|
|
1510
|
+
"Engine normalized intensities not found in search results. This is only supported for DIA-NN currently."
|
|
1511
|
+
)
|
|
1512
|
+
elif norm_method == "median":
|
|
1513
|
+
if (
|
|
1514
|
+
not "Median Normalized Intensities Log10"
|
|
1515
|
+
in search_results.columns
|
|
1516
|
+
):
|
|
1517
|
+
raise ValueError(
|
|
1518
|
+
"Median normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later."
|
|
1519
|
+
)
|
|
1520
|
+
intensity_column = "Median Normalized Intensities Log10"
|
|
1521
|
+
elif norm_method == "median80":
|
|
1522
|
+
if (
|
|
1523
|
+
not "Median80 Normalized Intensities Log10"
|
|
1524
|
+
in search_results.columns
|
|
1525
|
+
):
|
|
1526
|
+
raise ValueError(
|
|
1527
|
+
"Median80 normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later."
|
|
1528
|
+
)
|
|
1529
|
+
intensity_column = "Median80 Normalized Intensities Log10"
|
|
1530
|
+
elif norm_method == "pepcal":
|
|
1531
|
+
if not ("PepCal Intensities Log10" in search_results.columns):
|
|
1532
|
+
raise ValueError(
|
|
1533
|
+
"Pepcal normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later with the Seer Peptide Calibrant option enabled. \n Please retry using different norm_method, such as 'median'"
|
|
1534
|
+
)
|
|
1535
|
+
|
|
1536
|
+
intensity_column = "PepCal Intensities Log10"
|
|
1537
|
+
|
|
1538
|
+
else:
|
|
1539
|
+
raise ValueError(
|
|
1540
|
+
f"norm_method = {norm_method} is not supported. Supported normalization methods are: raw, pepcal, engine, median, median80."
|
|
1541
|
+
)
|
|
1542
|
+
if rollup == "panel":
|
|
1543
|
+
search_results.fillna({"Sample Name": ""}, inplace=True)
|
|
1544
|
+
search_results["File Name"] = search_results[
|
|
1545
|
+
"Sample Name"
|
|
1546
|
+
].apply(
|
|
1547
|
+
lambda x: (
|
|
1548
|
+
os.path.basename(
|
|
1549
|
+
sample_to_msrun[sample_name_to_id[x]][
|
|
1550
|
+
"raw_file_path"
|
|
1551
|
+
]
|
|
1552
|
+
).split(".")[0]
|
|
1553
|
+
if x
|
|
1554
|
+
else None
|
|
1555
|
+
)
|
|
1556
|
+
)
|
|
1557
|
+
search_results["File Name"] = search_results["File Name"].apply(
|
|
1558
|
+
lambda x: os.path.basename(x).split(".")[0] if x else None
|
|
1559
|
+
)
|
|
1560
|
+
|
|
1561
|
+
search_results["Intensity Log10"] = search_results[
|
|
1562
|
+
intensity_column
|
|
1563
|
+
]
|
|
1564
|
+
|
|
1565
|
+
# 3. Merge report to search results to get Q value and other properties
|
|
1566
|
+
report = self.get_search_result(
|
|
1567
|
+
analysis_id=analysis_id,
|
|
1568
|
+
analyte_type="precursor",
|
|
1569
|
+
rollup="np",
|
|
1570
|
+
)
|
|
1571
|
+
report["File Name"] = report["Run"]
|
|
1572
|
+
report["Protein Group"] = report["Protein.Group"]
|
|
1573
|
+
|
|
1574
|
+
if analyte_type == "protein":
|
|
1575
|
+
report["Protein Q Value"] = report["Protein.Q.Value"]
|
|
1576
|
+
|
|
1577
|
+
report = report[
|
|
1578
|
+
["File Name", "Protein Group", "Protein Q Value"]
|
|
1579
|
+
]
|
|
1580
|
+
report.drop_duplicates(
|
|
1581
|
+
subset=["File Name", "Protein Group"], inplace=True
|
|
1582
|
+
)
|
|
1583
|
+
df = pd.merge(
|
|
1584
|
+
search_results,
|
|
1585
|
+
report,
|
|
1586
|
+
on=["File Name", "Protein Group"],
|
|
1587
|
+
how="left",
|
|
1588
|
+
)
|
|
1589
|
+
included_columns = [
|
|
1590
|
+
"MsRun ID",
|
|
1591
|
+
"Sample ID",
|
|
1592
|
+
"Protein Group",
|
|
1593
|
+
"Intensity Log10",
|
|
1594
|
+
"Protein Q Value",
|
|
1595
|
+
]
|
|
1596
|
+
|
|
1597
|
+
else:
|
|
1598
|
+
report["Peptide"] = report["Stripped.Sequence"]
|
|
1599
|
+
# If analyte_type is peptide, attach retention time (RT, iRT)
|
|
1600
|
+
report = report[["File Name", "Peptide", "RT", "iRT"]]
|
|
1601
|
+
report.drop_duplicates(
|
|
1602
|
+
subset=["File Name", "Peptide"], inplace=True
|
|
1603
|
+
)
|
|
1604
|
+
df = pd.merge(
|
|
1605
|
+
search_results,
|
|
1606
|
+
report,
|
|
1607
|
+
on=["File Name", "Peptide"],
|
|
1608
|
+
how="left",
|
|
1609
|
+
)
|
|
1610
|
+
included_columns = [
|
|
1611
|
+
"MsRun ID",
|
|
1612
|
+
"Sample ID",
|
|
1613
|
+
"Peptide",
|
|
1614
|
+
"Protein Group",
|
|
1615
|
+
"Intensity Log10",
|
|
1616
|
+
"RT",
|
|
1617
|
+
"iRT",
|
|
1618
|
+
]
|
|
1619
|
+
# endif
|
|
1620
|
+
|
|
1621
|
+
if rollup == "np":
|
|
1622
|
+
included_columns.insert(
|
|
1623
|
+
included_columns.index("Sample ID") + 1, "Nanoparticle"
|
|
1624
|
+
)
|
|
1625
|
+
|
|
1626
|
+
df["MsRun ID"] = df["File Name"].apply(
|
|
1627
|
+
lambda x: (
|
|
1628
|
+
file_to_msrun[x]["id"] if x in file_to_msrun else None
|
|
1629
|
+
)
|
|
1630
|
+
)
|
|
1631
|
+
df["Sample ID"] = df["File Name"].apply(
|
|
1632
|
+
lambda x: (
|
|
1633
|
+
file_to_msrun[x]["sample_id"]
|
|
1634
|
+
if x in file_to_msrun
|
|
1635
|
+
else None
|
|
1636
|
+
)
|
|
1637
|
+
)
|
|
1638
|
+
df = df[included_columns]
|
|
1639
|
+
df.columns = [title_case_to_snake_case(x) for x in df.columns]
|
|
1640
|
+
return df
|
|
1641
|
+
else:
|
|
1642
|
+
# precursor
|
|
1643
|
+
# working only in report.tsv
|
|
1644
|
+
search_results["Intensity"] = search_results["Precursor.Quantity"]
|
|
1645
|
+
search_results["MsRun ID"] = search_results["Run"].apply(
|
|
1646
|
+
lambda x: (
|
|
1647
|
+
file_to_msrun[x]["id"] if x in file_to_msrun else None
|
|
1648
|
+
)
|
|
1649
|
+
)
|
|
1650
|
+
search_results["Sample ID"] = search_results["Run"].apply(
|
|
1651
|
+
lambda x: (
|
|
1652
|
+
file_to_msrun[x]["sample_id"]
|
|
1653
|
+
if x in file_to_msrun
|
|
1654
|
+
else None
|
|
1655
|
+
)
|
|
1656
|
+
)
|
|
1657
|
+
search_results["Protein Group"] = search_results["Protein.Group"]
|
|
1658
|
+
search_results["Peptide"] = search_results["Stripped.Sequence"]
|
|
1659
|
+
search_results["Charge"] = search_results["Precursor.Charge"]
|
|
1660
|
+
search_results["Precursor Id"] = search_results["Precursor.Id"]
|
|
1661
|
+
search_results["Precursor Q Value"] = search_results["Q.Value"]
|
|
1662
|
+
search_results["Protein Q Value"] = search_results[
|
|
1663
|
+
"Protein.Q.Value"
|
|
1664
|
+
]
|
|
1665
|
+
|
|
1666
|
+
included_columns = [
|
|
1667
|
+
"MsRun ID",
|
|
1668
|
+
"Sample ID",
|
|
1669
|
+
"Protein Group",
|
|
1670
|
+
"Protein Q Value",
|
|
1671
|
+
"Peptide",
|
|
1672
|
+
"Precursor Id",
|
|
1673
|
+
"Intensity",
|
|
1674
|
+
"Precursor Q Value",
|
|
1675
|
+
"Charge",
|
|
1676
|
+
"RT",
|
|
1677
|
+
"iRT",
|
|
1678
|
+
"IM",
|
|
1679
|
+
"iIM",
|
|
1680
|
+
]
|
|
1681
|
+
df = search_results[included_columns]
|
|
1682
|
+
df.columns = [title_case_to_snake_case(x) for x in df.columns]
|
|
1683
|
+
|
|
1684
|
+
return df
|
|
1685
|
+
|
|
1686
|
+
def get_search_data_analytes(self, analysis_id: str, analyte_type: str):
|
|
1687
|
+
if analyte_type not in ["protein", "peptide", "precursor"]:
|
|
1688
|
+
raise ValueError(
|
|
1689
|
+
f"Unknown analyte_type = {analyte_type}. Supported analytes are 'protein', 'peptide', or 'precursor'."
|
|
1690
|
+
)
|
|
1691
|
+
|
|
1692
|
+
# include
|
|
1693
|
+
# protein group, (peptide sequence), protein names, gene names, biological process, molecular function, cellular component, global q value, library q value
|
|
1694
|
+
|
|
1695
|
+
# 1. for all modes, fetch protein np file to extract protein groups, protein names, gene names, biological process, molecular function, cellular component
|
|
1696
|
+
search_results = self.get_search_result(
|
|
1697
|
+
analysis_id=analysis_id, analyte_type="protein", rollup="np"
|
|
1698
|
+
)
|
|
1699
|
+
|
|
1700
|
+
report_results = self.get_search_result(
|
|
1701
|
+
analysis_id=analysis_id, analyte_type="precursor", rollup="np"
|
|
1702
|
+
)
|
|
1703
|
+
|
|
1704
|
+
search_results = search_results[
|
|
1705
|
+
[
|
|
1706
|
+
"Protein Group",
|
|
1707
|
+
"Protein Names",
|
|
1708
|
+
"Gene Names",
|
|
1709
|
+
"Biological Process",
|
|
1710
|
+
"Molecular Function",
|
|
1711
|
+
"Cellular Component",
|
|
1712
|
+
]
|
|
1713
|
+
]
|
|
1714
|
+
search_results.drop_duplicates(subset=["Protein Group"], inplace=True)
|
|
1715
|
+
report_results["Protein Group"] = report_results["Protein.Group"]
|
|
1716
|
+
report_results["Peptide"] = report_results["Stripped.Sequence"]
|
|
1717
|
+
|
|
1718
|
+
if analyte_type == "protein":
|
|
1719
|
+
report_results = report_results[
|
|
1720
|
+
[
|
|
1721
|
+
"Protein Group",
|
|
1722
|
+
"Protein.Ids",
|
|
1723
|
+
"Global.PG.Q.Value",
|
|
1724
|
+
"Lib.PG.Q.Value",
|
|
1725
|
+
]
|
|
1726
|
+
]
|
|
1727
|
+
report_results.drop_duplicates(
|
|
1728
|
+
subset=["Protein Group"], inplace=True
|
|
1729
|
+
)
|
|
1730
|
+
df = pd.merge(
|
|
1731
|
+
search_results,
|
|
1732
|
+
report_results,
|
|
1733
|
+
on=["Protein Group"],
|
|
1734
|
+
how="left",
|
|
1735
|
+
)
|
|
1736
|
+
elif analyte_type == "peptide":
|
|
1737
|
+
peptide_results = self.get_search_result(
|
|
1738
|
+
analysis_id=analysis_id, analyte_type="peptide", rollup="np"
|
|
1739
|
+
)
|
|
1740
|
+
peptide_results = peptide_results[["Peptide", "Protein Group"]]
|
|
1741
|
+
search_results = pd.merge(
|
|
1742
|
+
peptide_results,
|
|
1743
|
+
search_results,
|
|
1744
|
+
on=["Protein Group"],
|
|
1745
|
+
how="left",
|
|
1746
|
+
)
|
|
1747
|
+
|
|
1748
|
+
report_results = report_results[
|
|
1749
|
+
["Peptide", "Protein.Ids", "Protein.Group"]
|
|
1750
|
+
]
|
|
1751
|
+
report_results.drop_duplicates(subset=["Peptide"], inplace=True)
|
|
1752
|
+
df = pd.merge(
|
|
1753
|
+
search_results,
|
|
1754
|
+
report_results,
|
|
1755
|
+
on=["Peptide"],
|
|
1756
|
+
how="left",
|
|
1757
|
+
)
|
|
1758
|
+
else:
|
|
1759
|
+
# precursor
|
|
1760
|
+
search_results = search_results[
|
|
1761
|
+
[
|
|
1762
|
+
"Protein Group",
|
|
1763
|
+
"Protein Names",
|
|
1764
|
+
"Gene Names",
|
|
1765
|
+
"Biological Process",
|
|
1766
|
+
"Molecular Function",
|
|
1767
|
+
"Cellular Component",
|
|
1768
|
+
]
|
|
1769
|
+
]
|
|
1770
|
+
search_results.drop_duplicates(
|
|
1771
|
+
subset=["Protein Group"], inplace=True
|
|
1772
|
+
)
|
|
1773
|
+
report_results = report_results[
|
|
1774
|
+
[
|
|
1775
|
+
"Precursor.Id",
|
|
1776
|
+
"Precursor.Charge",
|
|
1777
|
+
"Peptide",
|
|
1778
|
+
"Protein Group",
|
|
1779
|
+
"Protein.Ids",
|
|
1780
|
+
"Protein.Names",
|
|
1781
|
+
"Genes",
|
|
1782
|
+
"First.Protein.Description",
|
|
1783
|
+
"Modified.Sequence",
|
|
1784
|
+
"Proteotypic",
|
|
1785
|
+
"Global.Q.Value",
|
|
1786
|
+
"Global.PG.Q.Value",
|
|
1787
|
+
"Lib.Q.Value",
|
|
1788
|
+
"Lib.PG.Q.Value",
|
|
1789
|
+
]
|
|
1790
|
+
]
|
|
1791
|
+
report_results.drop_duplicates(
|
|
1792
|
+
subset=["Protein Group"], inplace=True
|
|
1793
|
+
)
|
|
1794
|
+
df = pd.merge(
|
|
1795
|
+
report_results,
|
|
1796
|
+
search_results,
|
|
1797
|
+
on=["Protein Group"],
|
|
1798
|
+
how="left",
|
|
1799
|
+
)
|
|
1800
|
+
df = df[
|
|
1801
|
+
[
|
|
1802
|
+
"Precursor.Id",
|
|
1803
|
+
"Precursor.Charge",
|
|
1804
|
+
"Peptide",
|
|
1805
|
+
"Protein Group",
|
|
1806
|
+
"Protein.Ids",
|
|
1807
|
+
"Protein.Names",
|
|
1808
|
+
"Genes",
|
|
1809
|
+
"First.Protein.Description",
|
|
1810
|
+
"Modified.Sequence",
|
|
1811
|
+
"Proteotypic",
|
|
1812
|
+
"Global.Q.Value",
|
|
1813
|
+
"Global.PG.Q.Value",
|
|
1814
|
+
"Lib.Q.Value",
|
|
1815
|
+
"Lib.PG.Q.Value",
|
|
1816
|
+
"Gene Names",
|
|
1817
|
+
"Biological Process",
|
|
1818
|
+
"Molecular Function",
|
|
1819
|
+
"Cellular Component",
|
|
1820
|
+
]
|
|
1821
|
+
]
|
|
1822
|
+
df.rename(
|
|
1823
|
+
columns={"Modified.Sequence": "Modified.Peptide"}, inplace=True
|
|
1824
|
+
)
|
|
1825
|
+
# endif
|
|
1826
|
+
df.columns = [title_case_to_snake_case(x) for x in df.columns]
|
|
1827
|
+
return df
|