seer-pas-sdk 0.3.4__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- seer_pas_sdk/auth/auth.py +174 -15
- seer_pas_sdk/common/__init__.py +46 -5
- seer_pas_sdk/core/sdk.py +1474 -183
- seer_pas_sdk/core/unsupported.py +415 -223
- seer_pas_sdk/objects/__init__.py +1 -0
- seer_pas_sdk/objects/headers.py +144 -0
- seer_pas_sdk/objects/volcanoplot.py +3 -2
- {seer_pas_sdk-0.3.4.dist-info → seer_pas_sdk-1.1.0.dist-info}/METADATA +1 -2
- seer_pas_sdk-1.1.0.dist-info/RECORD +19 -0
- seer_pas_sdk-0.3.4.dist-info/RECORD +0 -18
- {seer_pas_sdk-0.3.4.dist-info → seer_pas_sdk-1.1.0.dist-info}/WHEEL +0 -0
- {seer_pas_sdk-0.3.4.dist-info → seer_pas_sdk-1.1.0.dist-info}/licenses/LICENSE.txt +0 -0
- {seer_pas_sdk-0.3.4.dist-info → seer_pas_sdk-1.1.0.dist-info}/top_level.txt +0 -0
seer_pas_sdk/core/unsupported.py
CHANGED
|
@@ -2,19 +2,12 @@
|
|
|
2
2
|
seer_pas_sdk.core.unsupported -- in development
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from tqdm import tqdm
|
|
6
|
-
|
|
7
5
|
import os
|
|
8
|
-
import jwt
|
|
9
|
-
import requests
|
|
10
|
-
import urllib.request
|
|
11
|
-
import ssl
|
|
12
6
|
import shutil
|
|
13
7
|
|
|
14
8
|
from typing import List as _List
|
|
15
9
|
|
|
16
10
|
from ..common import *
|
|
17
|
-
from ..auth import Auth
|
|
18
11
|
from ..objects import PlateMap
|
|
19
12
|
|
|
20
13
|
from .sdk import SeerSDK as _SeerSDK
|
|
@@ -71,7 +64,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
71
64
|
|
|
72
65
|
URL = f"{self._auth.url}api/v1/samples"
|
|
73
66
|
|
|
74
|
-
with self._get_auth_session() as s:
|
|
67
|
+
with self._get_auth_session("addsample") as s:
|
|
75
68
|
|
|
76
69
|
response = s.post(URL, json=sample_entry)
|
|
77
70
|
|
|
@@ -112,7 +105,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
112
105
|
|
|
113
106
|
URL = f"{self._auth.url}api/v1/samples/batch"
|
|
114
107
|
|
|
115
|
-
with self._get_auth_session() as s:
|
|
108
|
+
with self._get_auth_session("addsamples") as s:
|
|
116
109
|
response = s.post(URL, json={"samples": sample_info})
|
|
117
110
|
|
|
118
111
|
if response.status_code != 200:
|
|
@@ -164,9 +157,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
164
157
|
if not project_name:
|
|
165
158
|
raise ValueError("Project name cannot be empty.")
|
|
166
159
|
|
|
167
|
-
all_plate_ids = set(
|
|
168
|
-
[plate["id"] for plate in self.get_plate_metadata()]
|
|
169
|
-
)
|
|
160
|
+
all_plate_ids = set([plate["id"] for plate in self.find_plates()])
|
|
170
161
|
|
|
171
162
|
for plate_id in plate_ids:
|
|
172
163
|
if plate_id not in all_plate_ids:
|
|
@@ -176,7 +167,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
176
167
|
|
|
177
168
|
URL = f"{self._auth.url}api/v1/projects"
|
|
178
169
|
|
|
179
|
-
with self._get_auth_session() as s:
|
|
170
|
+
with self._get_auth_session("addproject") as s:
|
|
180
171
|
|
|
181
172
|
project = s.post(
|
|
182
173
|
URL,
|
|
@@ -233,7 +224,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
233
224
|
|
|
234
225
|
URL = f"{self._auth.url}api/v1/addSamplesToProject/{project_id}"
|
|
235
226
|
|
|
236
|
-
with self._get_auth_session() as s:
|
|
227
|
+
with self._get_auth_session("addprojectsamples") as s:
|
|
237
228
|
|
|
238
229
|
response = s.put(
|
|
239
230
|
URL,
|
|
@@ -277,7 +268,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
277
268
|
samples = (
|
|
278
269
|
x["id"]
|
|
279
270
|
for plate_id in plates
|
|
280
|
-
for x in self.
|
|
271
|
+
for x in self.find_samples(plate_id=plate_id)
|
|
281
272
|
)
|
|
282
273
|
|
|
283
274
|
return self.add_samples_to_project(
|
|
@@ -375,7 +366,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
375
366
|
validate_plate_map(plate_map_data, local_file_names)
|
|
376
367
|
|
|
377
368
|
# Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
|
|
378
|
-
with self._get_auth_session() as s:
|
|
369
|
+
with self._get_auth_session("getplateids") as s:
|
|
379
370
|
plate_response = s.get(f"{self._auth.url}api/v1/plateids")
|
|
380
371
|
|
|
381
372
|
if plate_response.status_code != 200:
|
|
@@ -392,7 +383,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
392
383
|
|
|
393
384
|
# Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
|
|
394
385
|
|
|
395
|
-
with self._get_auth_session() as s:
|
|
386
|
+
with self._get_auth_session("addplate") as s:
|
|
396
387
|
plate_response = s.post(
|
|
397
388
|
f"{self._auth.url}api/v1/plates",
|
|
398
389
|
json={
|
|
@@ -415,7 +406,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
415
406
|
)
|
|
416
407
|
|
|
417
408
|
# Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
|
|
418
|
-
with self._get_auth_session() as s:
|
|
409
|
+
with self._get_auth_session("getawsuploadconfig") as s:
|
|
419
410
|
config_response = s.post(
|
|
420
411
|
f"{self._auth.url}api/v1/msdatas/getuploadconfig",
|
|
421
412
|
json={"plateId": id_uuid},
|
|
@@ -441,7 +432,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
441
432
|
s3_bucket = config_response.json()["s3Bucket"]
|
|
442
433
|
s3_upload_path = config_response.json()["s3UploadPath"]
|
|
443
434
|
|
|
444
|
-
with self._get_auth_session() as s:
|
|
435
|
+
with self._get_auth_session("getawsuploadcredentials") as s:
|
|
445
436
|
config_response = s.get(
|
|
446
437
|
f"{self._auth.url}auth/getawscredential",
|
|
447
438
|
)
|
|
@@ -487,7 +478,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
487
478
|
"Failed to upload plate map to AWS. Please check your connection and reauthenticate."
|
|
488
479
|
)
|
|
489
480
|
|
|
490
|
-
with self._get_auth_session() as s:
|
|
481
|
+
with self._get_auth_session("uploadplatemapfile") as s:
|
|
491
482
|
plate_map_response = s.post(
|
|
492
483
|
f"{self._auth.url}api/v1/msdataindex/file",
|
|
493
484
|
json={
|
|
@@ -540,7 +531,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
540
531
|
"Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
|
|
541
532
|
)
|
|
542
533
|
|
|
543
|
-
with self._get_auth_session() as s:
|
|
534
|
+
with self._get_auth_session("uploadsampledescriptionfile") as s:
|
|
544
535
|
sdf_response = s.post(
|
|
545
536
|
f"{self._auth.url}api/v1/msdataindex/file",
|
|
546
537
|
json={
|
|
@@ -573,7 +564,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
573
564
|
)
|
|
574
565
|
|
|
575
566
|
# Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
|
|
576
|
-
with self._get_auth_session() as s:
|
|
567
|
+
with self._get_auth_session("addmsdatas") as s:
|
|
577
568
|
ms_data_response = s.post(
|
|
578
569
|
f"{self._auth.url}api/v1/msdatas/batch",
|
|
579
570
|
json={"msdatas": plate_map_data},
|
|
@@ -584,7 +575,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
584
575
|
)
|
|
585
576
|
|
|
586
577
|
# Step 9: Upload each msdata file to the S3 bucket.
|
|
587
|
-
with self._get_auth_session() as s:
|
|
578
|
+
with self._get_auth_session("getawsuploadcredentials") as s:
|
|
588
579
|
config_response = s.get(
|
|
589
580
|
f"{self._auth.url}auth/getawscredential",
|
|
590
581
|
)
|
|
@@ -629,7 +620,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
629
620
|
)
|
|
630
621
|
|
|
631
622
|
# Step 10: Make a call to `api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
|
|
632
|
-
with self._get_auth_session() as s:
|
|
623
|
+
with self._get_auth_session("addmsdataindex") as s:
|
|
633
624
|
file_response = s.post(
|
|
634
625
|
f"{self._auth.url}api/v1/msdataindex/file",
|
|
635
626
|
json={"files": files},
|
|
@@ -671,16 +662,16 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
671
662
|
Name of the analysis.
|
|
672
663
|
|
|
673
664
|
project_id : str
|
|
674
|
-
ID of the project to which the analysis belongs. Can be fetched using the
|
|
665
|
+
ID of the project to which the analysis belongs. Can be fetched using the find_projects() function.
|
|
675
666
|
|
|
676
667
|
sample_ids: list[str], optional
|
|
677
668
|
List of sample IDs to be used for the analysis. Should be omitted if analysis is to be run with all samples.
|
|
678
669
|
|
|
679
670
|
analysis_protocol_name : str, optional
|
|
680
|
-
Name of the analysis protocol to be used for the analysis. Can be fetched using the
|
|
671
|
+
Name of the analysis protocol to be used for the analysis. Can be fetched using the find_analysis_protocols() function. Should be omitted if analysis_protocol_id is provided.
|
|
681
672
|
|
|
682
673
|
analysis_protocol_id : str, optional
|
|
683
|
-
ID of the analysis protocol to be used for the analysis. Can be fetched using the
|
|
674
|
+
ID of the analysis protocol to be used for the analysis. Can be fetched using the find_analysis_protocols() function. Should be omitted if analysis_protocol_name is provided.
|
|
684
675
|
|
|
685
676
|
notes : str, optional
|
|
686
677
|
Notes for the analysis, defaulted to an empty string.
|
|
@@ -714,7 +705,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
714
705
|
raise ValueError("Project ID cannot be empty.")
|
|
715
706
|
|
|
716
707
|
if not analysis_protocol_id and analysis_protocol_name:
|
|
717
|
-
valid_analysis_protocol = self.
|
|
708
|
+
valid_analysis_protocol = self.find_analysis_protocols(
|
|
718
709
|
analysis_protocol_name=analysis_protocol_name
|
|
719
710
|
)
|
|
720
711
|
|
|
@@ -726,7 +717,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
726
717
|
analysis_protocol_id = valid_analysis_protocol[0]["id"]
|
|
727
718
|
|
|
728
719
|
if analysis_protocol_id and not analysis_protocol_name:
|
|
729
|
-
valid_analysis_protocol = self.
|
|
720
|
+
valid_analysis_protocol = self.find_analysis_protocols(
|
|
730
721
|
analysis_protocol_id=analysis_protocol_id
|
|
731
722
|
)
|
|
732
723
|
|
|
@@ -743,7 +734,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
743
734
|
if sample_ids:
|
|
744
735
|
valid_ids = [
|
|
745
736
|
entry["id"]
|
|
746
|
-
for entry in self.
|
|
737
|
+
for entry in self.find_samples(project_id=project_id)
|
|
747
738
|
]
|
|
748
739
|
|
|
749
740
|
for sample_id in sample_ids:
|
|
@@ -758,7 +749,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
758
749
|
|
|
759
750
|
URL = f"{self._auth.url}api/v1/analyze"
|
|
760
751
|
|
|
761
|
-
with self._get_auth_session() as s:
|
|
752
|
+
with self._get_auth_session("startanalysis") as s:
|
|
762
753
|
req_payload = {
|
|
763
754
|
"analysisName": name,
|
|
764
755
|
"analysisProtocolId": analysis_protocol_id,
|
|
@@ -854,7 +845,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
854
845
|
tenant_id = self.get_active_tenant_id()
|
|
855
846
|
|
|
856
847
|
# Step 3: Fetch the S3 bucket name by making a call to `/api/v1/auth/getawscredential`
|
|
857
|
-
with self._get_auth_session() as s:
|
|
848
|
+
with self._get_auth_session("getawsuploadcredentials") as s:
|
|
858
849
|
config_response = s.get(
|
|
859
850
|
f"{self._auth.url}auth/getawscredential",
|
|
860
851
|
)
|
|
@@ -905,7 +896,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
905
896
|
|
|
906
897
|
# Step 5: Make a call to `/api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
|
|
907
898
|
result_files = None
|
|
908
|
-
with self._get_auth_session() as s:
|
|
899
|
+
with self._get_auth_session("addmsdataindex") as s:
|
|
909
900
|
file_response = s.post(
|
|
910
901
|
f"{self._auth.url}api/v1/msdataindex/file",
|
|
911
902
|
json={"files": files},
|
|
@@ -1003,7 +994,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1003
994
|
target_folder_path = f"{tenant_id}/{target_folder_paths[0]}"
|
|
1004
995
|
# Retrieve msdatafileindex metadata to determine source space
|
|
1005
996
|
base_space = None
|
|
1006
|
-
with self._get_auth_session() as s:
|
|
997
|
+
with self._get_auth_session("getmsdataindex") as s:
|
|
1007
998
|
URL = self._auth.url + "api/v1/msdataindex/getmetadata"
|
|
1008
999
|
params = {"folderKey": folder_path}
|
|
1009
1000
|
r = s.get(URL, params=params)
|
|
@@ -1046,7 +1037,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1046
1037
|
if target_space_id and base_space != target_space_id:
|
|
1047
1038
|
json["targetUserGroupId"] = target_space_id
|
|
1048
1039
|
|
|
1049
|
-
with self._get_auth_session() as s:
|
|
1040
|
+
with self._get_auth_session("movemsdataindex") as s:
|
|
1050
1041
|
URL = self._auth.url + "api/v1/msdataindex/move"
|
|
1051
1042
|
json = json
|
|
1052
1043
|
r = s.post(URL, json=json)
|
|
@@ -1098,167 +1089,6 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1098
1089
|
source_ms_data_files, target_ms_data_files
|
|
1099
1090
|
)
|
|
1100
1091
|
|
|
1101
|
-
def download_analysis_files(
|
|
1102
|
-
self, analysis_id: str, download_path: str = "", file_name: str = ""
|
|
1103
|
-
):
|
|
1104
|
-
"""
|
|
1105
|
-
Download a specific analysis file from the backend given an `analysis_id` to the specified `download_path`.
|
|
1106
|
-
|
|
1107
|
-
If no `download_path` is specified, the file will be downloaded to the current working directory.
|
|
1108
|
-
|
|
1109
|
-
If no `file_name` is specified, all files for the analysis will be downloaded.
|
|
1110
|
-
|
|
1111
|
-
Parameters
|
|
1112
|
-
----------
|
|
1113
|
-
analysis_id : str
|
|
1114
|
-
ID of the analysis to download.
|
|
1115
|
-
download_path : str, optional
|
|
1116
|
-
Path to download the analysis file to, defaulted to current working directory.
|
|
1117
|
-
file_name : str, optional
|
|
1118
|
-
Name of the analysis file to download, defaulted to None.
|
|
1119
|
-
|
|
1120
|
-
Returns
|
|
1121
|
-
-------
|
|
1122
|
-
dict
|
|
1123
|
-
Message containing whether the file was downloaded or not.
|
|
1124
|
-
|
|
1125
|
-
Examples
|
|
1126
|
-
-------
|
|
1127
|
-
>>> from core import SeerSDK
|
|
1128
|
-
>>> sdk = SeerSDK()
|
|
1129
|
-
>>> sdk.download_analysis_files("analysis_id", "/path/to/download")
|
|
1130
|
-
>>> Downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
|
|
1131
|
-
Finished downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
|
|
1132
|
-
|
|
1133
|
-
Downloading EXP20004_2020ms0007X11_A.mzML.quant
|
|
1134
|
-
Finished downloading EXP20004_2020ms0007X11_A.mzML.quant
|
|
1135
|
-
|
|
1136
|
-
Downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
|
|
1137
|
-
Finished downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
|
|
1138
|
-
|
|
1139
|
-
Downloading EXP20004_2020ms0007X11_A/diann.log
|
|
1140
|
-
Finished downloading EXP20004_2020ms0007X11_A/diann.log
|
|
1141
|
-
>>> { "message": "File downloaded successfully." }
|
|
1142
|
-
"""
|
|
1143
|
-
|
|
1144
|
-
def get_url(analysis_id, file_name, project_id):
|
|
1145
|
-
URL = f"{self._auth.url}api/v1/analysisResultFiles/getUrl"
|
|
1146
|
-
|
|
1147
|
-
with self._get_auth_session() as s:
|
|
1148
|
-
|
|
1149
|
-
download_url = s.post(
|
|
1150
|
-
URL,
|
|
1151
|
-
json={
|
|
1152
|
-
"analysisId": analysis_id,
|
|
1153
|
-
"filename": file_name,
|
|
1154
|
-
"projectId": project_id,
|
|
1155
|
-
},
|
|
1156
|
-
)
|
|
1157
|
-
|
|
1158
|
-
if download_url.status_code != 200:
|
|
1159
|
-
raise ValueError(
|
|
1160
|
-
"Could not download file. Please check if the analysis ID is valid or the backend is running."
|
|
1161
|
-
)
|
|
1162
|
-
|
|
1163
|
-
return download_url.json()["url"]
|
|
1164
|
-
|
|
1165
|
-
if not analysis_id:
|
|
1166
|
-
raise ValueError("Analysis ID cannot be empty.")
|
|
1167
|
-
|
|
1168
|
-
try:
|
|
1169
|
-
valid_analysis = self.get_analyses(analysis_id)[0]
|
|
1170
|
-
except:
|
|
1171
|
-
raise ValueError(
|
|
1172
|
-
"Invalid analysis ID. Please check if the analysis ID is valid or the backend is running."
|
|
1173
|
-
)
|
|
1174
|
-
|
|
1175
|
-
project_id = valid_analysis["project_id"]
|
|
1176
|
-
|
|
1177
|
-
if not download_path:
|
|
1178
|
-
download_path = os.getcwd()
|
|
1179
|
-
print(f"\nDownload path not specified.\n")
|
|
1180
|
-
|
|
1181
|
-
if not os.path.isdir(download_path):
|
|
1182
|
-
print(
|
|
1183
|
-
f'\nThe path "{download_path}" you specified does not exist, was either invalid or not absolute.\n'
|
|
1184
|
-
)
|
|
1185
|
-
download_path = os.getcwd()
|
|
1186
|
-
|
|
1187
|
-
name = f"{download_path}/downloads/{analysis_id}"
|
|
1188
|
-
|
|
1189
|
-
if not os.path.exists(name):
|
|
1190
|
-
os.makedirs(name)
|
|
1191
|
-
|
|
1192
|
-
URL = f"{self._auth.url}api/v1/analysisResultFiles"
|
|
1193
|
-
|
|
1194
|
-
with self._get_auth_session() as s:
|
|
1195
|
-
|
|
1196
|
-
analysis_files = s.get(f"{URL}/{analysis_id}")
|
|
1197
|
-
|
|
1198
|
-
if analysis_files.status_code != 200:
|
|
1199
|
-
raise ValueError(
|
|
1200
|
-
"Invalid request. Please check if the analysis ID is valid or the backend is running."
|
|
1201
|
-
)
|
|
1202
|
-
|
|
1203
|
-
res = analysis_files.json()
|
|
1204
|
-
|
|
1205
|
-
if file_name:
|
|
1206
|
-
filenames = set([file["filename"] for file in res])
|
|
1207
|
-
|
|
1208
|
-
if file_name not in filenames:
|
|
1209
|
-
raise ValueError(
|
|
1210
|
-
"Invalid file name. Please check if the file name is correct."
|
|
1211
|
-
)
|
|
1212
|
-
|
|
1213
|
-
res = [file for file in res if file["filename"] == file_name]
|
|
1214
|
-
|
|
1215
|
-
print(f'Downloading files to "{name}"\n')
|
|
1216
|
-
|
|
1217
|
-
for file in res:
|
|
1218
|
-
filename = file["filename"]
|
|
1219
|
-
url = get_url(analysis_id, filename, project_id)
|
|
1220
|
-
|
|
1221
|
-
print(f"Downloading {filename}")
|
|
1222
|
-
|
|
1223
|
-
for _ in range(2):
|
|
1224
|
-
try:
|
|
1225
|
-
with tqdm(
|
|
1226
|
-
unit="B",
|
|
1227
|
-
unit_scale=True,
|
|
1228
|
-
unit_divisor=1024,
|
|
1229
|
-
miniters=1,
|
|
1230
|
-
desc=f"Progress",
|
|
1231
|
-
) as t:
|
|
1232
|
-
ssl._create_default_https_context = (
|
|
1233
|
-
ssl._create_unverified_context
|
|
1234
|
-
)
|
|
1235
|
-
urllib.request.urlretrieve(
|
|
1236
|
-
url,
|
|
1237
|
-
f"{name}/{filename}",
|
|
1238
|
-
reporthook=download_hook(t),
|
|
1239
|
-
data=None,
|
|
1240
|
-
)
|
|
1241
|
-
break
|
|
1242
|
-
except:
|
|
1243
|
-
filename = filename.split("/")
|
|
1244
|
-
name += "/" + "/".join(
|
|
1245
|
-
[filename[i] for i in range(len(filename) - 1)]
|
|
1246
|
-
)
|
|
1247
|
-
filename = filename[-1]
|
|
1248
|
-
if not os.path.isdir(f"{name}/{filename}"):
|
|
1249
|
-
os.makedirs(f"{name}/")
|
|
1250
|
-
|
|
1251
|
-
else:
|
|
1252
|
-
raise ValueError(
|
|
1253
|
-
"Your download failed. Please check if the backend is still running."
|
|
1254
|
-
)
|
|
1255
|
-
|
|
1256
|
-
print(f"Finished downloading {filename}\n")
|
|
1257
|
-
|
|
1258
|
-
return {
|
|
1259
|
-
"message": f"Files downloaded successfully to '{download_path}/downloads/{analysis_id}'"
|
|
1260
|
-
}
|
|
1261
|
-
|
|
1262
1092
|
def link_plate(
|
|
1263
1093
|
self,
|
|
1264
1094
|
ms_data_files: _List[str],
|
|
@@ -1301,17 +1131,12 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1301
1131
|
|
|
1302
1132
|
plate_ids = (
|
|
1303
1133
|
set()
|
|
1304
|
-
) # contains all the plate_ids fetched from self.
|
|
1305
|
-
files = [] # to be uploaded to sync frontend
|
|
1134
|
+
) # contains all the plate_ids fetched from self.find_plates()
|
|
1306
1135
|
samples = [] # list of all the sample responses from the backend
|
|
1307
1136
|
id_uuid = "" # uuid for the plate id
|
|
1308
1137
|
raw_file_paths = {} # list of all the AWS raw file paths
|
|
1309
1138
|
s3_upload_path = None
|
|
1310
1139
|
s3_bucket = ""
|
|
1311
|
-
ms_data_file_names = []
|
|
1312
|
-
dir_exists = (
|
|
1313
|
-
True # flag to check if the generated_files directory exists
|
|
1314
|
-
)
|
|
1315
1140
|
|
|
1316
1141
|
# Step 0: Check if the file paths exist in the S3 bucket.
|
|
1317
1142
|
for file in ms_data_files:
|
|
@@ -1344,7 +1169,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1344
1169
|
validate_plate_map(plate_map_data, ms_data_files)
|
|
1345
1170
|
|
|
1346
1171
|
# Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
|
|
1347
|
-
with self._get_auth_session() as s:
|
|
1172
|
+
with self._get_auth_session("getplateids") as s:
|
|
1348
1173
|
plate_response = s.get(f"{self._auth.url}api/v1/plateids")
|
|
1349
1174
|
|
|
1350
1175
|
if plate_response.status_code != 200:
|
|
@@ -1361,7 +1186,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1361
1186
|
|
|
1362
1187
|
# Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
|
|
1363
1188
|
|
|
1364
|
-
with self._get_auth_session() as s:
|
|
1189
|
+
with self._get_auth_session("addplate") as s:
|
|
1365
1190
|
plate_response = s.post(
|
|
1366
1191
|
f"{self._auth.url}api/v1/plates",
|
|
1367
1192
|
json={
|
|
@@ -1384,7 +1209,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1384
1209
|
)
|
|
1385
1210
|
|
|
1386
1211
|
# Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
|
|
1387
|
-
with self._get_auth_session() as s:
|
|
1212
|
+
with self._get_auth_session("getawsuploadconfig") as s:
|
|
1388
1213
|
config_response = s.post(
|
|
1389
1214
|
f"{self._auth.url}api/v1/msdatas/getuploadconfig",
|
|
1390
1215
|
json={"plateId": id_uuid},
|
|
@@ -1410,7 +1235,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1410
1235
|
s3_bucket = config_response.json()["s3Bucket"]
|
|
1411
1236
|
s3_upload_path = config_response.json()["s3UploadPath"]
|
|
1412
1237
|
|
|
1413
|
-
with self._get_auth_session() as s:
|
|
1238
|
+
with self._get_auth_session("getawsuploadcredentials") as s:
|
|
1414
1239
|
config_response = s.get(
|
|
1415
1240
|
f"{self._auth.url}auth/getawscredential",
|
|
1416
1241
|
)
|
|
@@ -1456,7 +1281,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1456
1281
|
"Failed to upload plate map to AWS. Please check your connection and reauthenticate."
|
|
1457
1282
|
)
|
|
1458
1283
|
|
|
1459
|
-
with self._get_auth_session() as s:
|
|
1284
|
+
with self._get_auth_session("uploadplatemap") as s:
|
|
1460
1285
|
plate_map_response = s.post(
|
|
1461
1286
|
f"{self._auth.url}api/v1/msdataindex/file",
|
|
1462
1287
|
json={
|
|
@@ -1507,7 +1332,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1507
1332
|
"Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
|
|
1508
1333
|
)
|
|
1509
1334
|
|
|
1510
|
-
with self._get_auth_session() as s:
|
|
1335
|
+
with self._get_auth_session("uploadsampledescription") as s:
|
|
1511
1336
|
sdf_response = s.post(
|
|
1512
1337
|
f"{self._auth.url}api/v1/msdataindex/file",
|
|
1513
1338
|
json={
|
|
@@ -1542,7 +1367,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1542
1367
|
)
|
|
1543
1368
|
|
|
1544
1369
|
# Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
|
|
1545
|
-
with self._get_auth_session() as s:
|
|
1370
|
+
with self._get_auth_session("addmsdatas") as s:
|
|
1546
1371
|
ms_data_response = s.post(
|
|
1547
1372
|
f"{self._auth.url}api/v1/msdatas/batch",
|
|
1548
1373
|
json={"msdatas": plate_map_data},
|
|
@@ -1555,7 +1380,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1555
1380
|
print(f"Plate generated with id: '{id_uuid}'")
|
|
1556
1381
|
return id_uuid
|
|
1557
1382
|
|
|
1558
|
-
def
|
|
1383
|
+
def _get_msdataindex(self, folder=""):
|
|
1559
1384
|
"""
|
|
1560
1385
|
Get metadata for a given file path.
|
|
1561
1386
|
|
|
@@ -1566,20 +1391,15 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1566
1391
|
dict: A dictionary containing the metadata for the file.
|
|
1567
1392
|
"""
|
|
1568
1393
|
URL = f"{self._auth.url}api/v2/msdataindex/getmetadata"
|
|
1569
|
-
with self._get_auth_session() as s:
|
|
1394
|
+
with self._get_auth_session("getmsdataindex") as s:
|
|
1570
1395
|
params = {"all": "true"}
|
|
1571
1396
|
if folder:
|
|
1572
|
-
tenant_id =
|
|
1573
|
-
self._auth.get_token()[0],
|
|
1574
|
-
options={"verify_signature": False},
|
|
1575
|
-
)["custom:tenantId"]
|
|
1397
|
+
tenant_id = self.get_active_tenant_id()
|
|
1576
1398
|
params["folderKey"] = f"{tenant_id}/{folder}"
|
|
1577
|
-
print(params["folderKey"])
|
|
1578
1399
|
|
|
1579
1400
|
metadata = s.get(URL, params=params)
|
|
1580
1401
|
|
|
1581
1402
|
if metadata.status_code != 200:
|
|
1582
|
-
print(metadata.text)
|
|
1583
1403
|
raise ServerError("Could not fetch metadata for file.")
|
|
1584
1404
|
|
|
1585
1405
|
return metadata.json()
|
|
@@ -1595,9 +1415,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1595
1415
|
dict: A dictionary mapping the display path to the raw file path.
|
|
1596
1416
|
"""
|
|
1597
1417
|
|
|
1598
|
-
tenant_id =
|
|
1599
|
-
self._auth.get_token()[0], options={"verify_signature": False}
|
|
1600
|
-
)["custom:tenantId"]
|
|
1418
|
+
tenant_id = self.get_active_tenant_id()
|
|
1601
1419
|
result = {}
|
|
1602
1420
|
# partition by folder_path
|
|
1603
1421
|
folder_partitions = {os.path.dirname(x): [] for x in display_path}
|
|
@@ -1611,9 +1429,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1611
1429
|
try:
|
|
1612
1430
|
metadata = {
|
|
1613
1431
|
x["key"]: x["rawFilePath"]
|
|
1614
|
-
for x in self.
|
|
1615
|
-
folder=folder_path
|
|
1616
|
-
)["data"]
|
|
1432
|
+
for x in self._get_msdataindex(folder=folder_path)["data"]
|
|
1617
1433
|
}
|
|
1618
1434
|
except:
|
|
1619
1435
|
# If the metadata fetch fails, skip the folder
|
|
@@ -1632,3 +1448,379 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1632
1448
|
)
|
|
1633
1449
|
|
|
1634
1450
|
return result
|
|
1451
|
+
|
|
1452
|
+
def get_search_data(
|
|
1453
|
+
self,
|
|
1454
|
+
analysis_id: str,
|
|
1455
|
+
analyte_type: str,
|
|
1456
|
+
rollup: str,
|
|
1457
|
+
norm_method: str = "pepcal",
|
|
1458
|
+
):
|
|
1459
|
+
"""
|
|
1460
|
+
Get analyte intensities data for a given PAS analysis.
|
|
1461
|
+
Args:
|
|
1462
|
+
analysis_id (str): ID of the analysis.
|
|
1463
|
+
analyte_type (str): Type of the analyte. Must be either 'protein', 'peptide', precursor.
|
|
1464
|
+
rollup (str): Intensities rollup method. Must be either 'np' or 'panel'.
|
|
1465
|
+
norm_method (str): Search engine. Supported engines are: raw, engine, median, median80, pepcal. Default is 'pepcal'.
|
|
1466
|
+
|
|
1467
|
+
Returns:
|
|
1468
|
+
pd.DataFrame: A dataframe with each row containing the analyte intensity measurement:
|
|
1469
|
+
'msrun_id', 'sample_id', 'nanoparticle' (if rollup is 'np'), 'protein_group', 'peptide' (for 'peptide' and 'precursor' analyte types), 'charge' (for 'precursor' analyte type),
|
|
1470
|
+
'intensity_log10', 'protein_group_q_value', 'q_value' (for 'precursor' analyte type), 'rt' and 'irt' (for 'peptide' and 'precursor' analyte types)
|
|
1471
|
+
"""
|
|
1472
|
+
# 1. Get msrun data for analysis
|
|
1473
|
+
samples = self.find_samples(analysis_id=analysis_id)
|
|
1474
|
+
sample_name_to_id = {s["sample_name"]: s["id"] for s in samples}
|
|
1475
|
+
# for np rollup, a row represents an msrun
|
|
1476
|
+
msruns = self.find_msruns(sample_ids=sample_name_to_id.values())
|
|
1477
|
+
file_to_msrun = {
|
|
1478
|
+
os.path.basename(msrun["raw_file_path"]).split(".")[0]: msrun
|
|
1479
|
+
for msrun in msruns
|
|
1480
|
+
}
|
|
1481
|
+
sample_to_msrun = {msrun["sample_id"]: msrun for msrun in msruns}
|
|
1482
|
+
|
|
1483
|
+
# for panel rollup, a row represents a sample
|
|
1484
|
+
|
|
1485
|
+
# 2. Get search results
|
|
1486
|
+
# pull the np/panel file, or report.tsv for precursor mode
|
|
1487
|
+
search_results = self.get_search_result(
|
|
1488
|
+
analysis_id=analysis_id,
|
|
1489
|
+
analyte_type=analyte_type,
|
|
1490
|
+
rollup=rollup,
|
|
1491
|
+
)
|
|
1492
|
+
if analyte_type in ["protein", "peptide"]:
|
|
1493
|
+
intensity_column = None
|
|
1494
|
+
if norm_method == "raw":
|
|
1495
|
+
intensity_column = (
|
|
1496
|
+
"Intensities Log10"
|
|
1497
|
+
if "Intensities Log10" in search_results.columns
|
|
1498
|
+
else "Intensity (Log10)"
|
|
1499
|
+
)
|
|
1500
|
+
elif norm_method == "engine":
|
|
1501
|
+
intensity_column = (
|
|
1502
|
+
"DIA-NN Normalized Intensities Log10"
|
|
1503
|
+
if "DIA-NN Normalized Intensities Log10"
|
|
1504
|
+
in search_results.columns
|
|
1505
|
+
else "Normalized Intensity (Log10)"
|
|
1506
|
+
)
|
|
1507
|
+
if intensity_column not in search_results.columns:
|
|
1508
|
+
raise ValueError(
|
|
1509
|
+
"Engine normalized intensities not found in search results. This is only supported for DIA-NN currently."
|
|
1510
|
+
)
|
|
1511
|
+
elif norm_method == "median":
|
|
1512
|
+
if (
|
|
1513
|
+
not "Median Normalized Intensities Log10"
|
|
1514
|
+
in search_results.columns
|
|
1515
|
+
):
|
|
1516
|
+
raise ValueError(
|
|
1517
|
+
"Median normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later."
|
|
1518
|
+
)
|
|
1519
|
+
intensity_column = "Median Normalized Intensities Log10"
|
|
1520
|
+
elif norm_method == "median80":
|
|
1521
|
+
if (
|
|
1522
|
+
not "Median80 Normalized Intensities Log10"
|
|
1523
|
+
in search_results.columns
|
|
1524
|
+
):
|
|
1525
|
+
raise ValueError(
|
|
1526
|
+
"Median80 normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later."
|
|
1527
|
+
)
|
|
1528
|
+
intensity_column = "Median80 Normalized Intensities Log10"
|
|
1529
|
+
elif norm_method == "pepcal":
|
|
1530
|
+
if not ("PepCal Intensities Log10" in search_results.columns):
|
|
1531
|
+
raise ValueError(
|
|
1532
|
+
"Pepcal normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later with the Seer Peptide Calibrant option enabled. \n Please retry using different norm_method, such as 'median'"
|
|
1533
|
+
)
|
|
1534
|
+
|
|
1535
|
+
intensity_column = "PepCal Intensities Log10"
|
|
1536
|
+
|
|
1537
|
+
else:
|
|
1538
|
+
raise ValueError(
|
|
1539
|
+
f"norm_method = {norm_method} is not supported. Supported normalization methods are: raw, pepcal, engine, median, median80."
|
|
1540
|
+
)
|
|
1541
|
+
if rollup == "panel":
|
|
1542
|
+
search_results.fillna({"Sample Name": ""}, inplace=True)
|
|
1543
|
+
search_results["File Name"] = search_results[
|
|
1544
|
+
"Sample Name"
|
|
1545
|
+
].apply(
|
|
1546
|
+
lambda x: (
|
|
1547
|
+
os.path.basename(
|
|
1548
|
+
sample_to_msrun[sample_name_to_id[x]][
|
|
1549
|
+
"raw_file_path"
|
|
1550
|
+
]
|
|
1551
|
+
).split(".")[0]
|
|
1552
|
+
if x
|
|
1553
|
+
else None
|
|
1554
|
+
)
|
|
1555
|
+
)
|
|
1556
|
+
search_results["File Name"] = search_results["File Name"].apply(
|
|
1557
|
+
lambda x: os.path.basename(x).split(".")[0] if x else None
|
|
1558
|
+
)
|
|
1559
|
+
|
|
1560
|
+
search_results["Intensity Log10"] = search_results[
|
|
1561
|
+
intensity_column
|
|
1562
|
+
]
|
|
1563
|
+
|
|
1564
|
+
# 3. Merge report to search results to get Q value and other properties
|
|
1565
|
+
report = self.get_search_result(
|
|
1566
|
+
analysis_id=analysis_id,
|
|
1567
|
+
analyte_type="precursor",
|
|
1568
|
+
rollup="np",
|
|
1569
|
+
)
|
|
1570
|
+
report["File Name"] = report["Run"]
|
|
1571
|
+
report["Protein Group"] = report["Protein.Group"]
|
|
1572
|
+
|
|
1573
|
+
if analyte_type == "protein":
|
|
1574
|
+
report["Protein Q Value"] = report["Protein.Q.Value"]
|
|
1575
|
+
|
|
1576
|
+
report = report[
|
|
1577
|
+
["File Name", "Protein Group", "Protein Q Value"]
|
|
1578
|
+
]
|
|
1579
|
+
report.drop_duplicates(
|
|
1580
|
+
subset=["File Name", "Protein Group"], inplace=True
|
|
1581
|
+
)
|
|
1582
|
+
df = pd.merge(
|
|
1583
|
+
search_results,
|
|
1584
|
+
report,
|
|
1585
|
+
on=["File Name", "Protein Group"],
|
|
1586
|
+
how="left",
|
|
1587
|
+
)
|
|
1588
|
+
included_columns = [
|
|
1589
|
+
"MsRun ID",
|
|
1590
|
+
"Sample ID",
|
|
1591
|
+
"Protein Group",
|
|
1592
|
+
"Intensity Log10",
|
|
1593
|
+
"Protein Q Value",
|
|
1594
|
+
]
|
|
1595
|
+
|
|
1596
|
+
else:
|
|
1597
|
+
report["Peptide"] = report["Stripped.Sequence"]
|
|
1598
|
+
# If analyte_type is peptide, attach retention time (RT, iRT)
|
|
1599
|
+
report = report[["File Name", "Peptide", "RT", "iRT"]]
|
|
1600
|
+
report.drop_duplicates(
|
|
1601
|
+
subset=["File Name", "Peptide"], inplace=True
|
|
1602
|
+
)
|
|
1603
|
+
df = pd.merge(
|
|
1604
|
+
search_results,
|
|
1605
|
+
report,
|
|
1606
|
+
on=["File Name", "Peptide"],
|
|
1607
|
+
how="left",
|
|
1608
|
+
)
|
|
1609
|
+
included_columns = [
|
|
1610
|
+
"MsRun ID",
|
|
1611
|
+
"Sample ID",
|
|
1612
|
+
"Peptide",
|
|
1613
|
+
"Protein Group",
|
|
1614
|
+
"Intensity Log10",
|
|
1615
|
+
"RT",
|
|
1616
|
+
"iRT",
|
|
1617
|
+
]
|
|
1618
|
+
# endif
|
|
1619
|
+
|
|
1620
|
+
if rollup == "np":
|
|
1621
|
+
included_columns.insert(
|
|
1622
|
+
included_columns.index("Sample ID") + 1, "Nanoparticle"
|
|
1623
|
+
)
|
|
1624
|
+
|
|
1625
|
+
df["MsRun ID"] = df["File Name"].apply(
|
|
1626
|
+
lambda x: (
|
|
1627
|
+
file_to_msrun[x]["id"] if x in file_to_msrun else None
|
|
1628
|
+
)
|
|
1629
|
+
)
|
|
1630
|
+
df["Sample ID"] = df["File Name"].apply(
|
|
1631
|
+
lambda x: (
|
|
1632
|
+
file_to_msrun[x]["sample_id"]
|
|
1633
|
+
if x in file_to_msrun
|
|
1634
|
+
else None
|
|
1635
|
+
)
|
|
1636
|
+
)
|
|
1637
|
+
df = df[included_columns]
|
|
1638
|
+
df.columns = [title_case_to_snake_case(x) for x in df.columns]
|
|
1639
|
+
return df
|
|
1640
|
+
else:
|
|
1641
|
+
# precursor
|
|
1642
|
+
# working only in report.tsv
|
|
1643
|
+
search_results["Intensity"] = search_results["Precursor.Quantity"]
|
|
1644
|
+
search_results["MsRun ID"] = search_results["Run"].apply(
|
|
1645
|
+
lambda x: (
|
|
1646
|
+
file_to_msrun[x]["id"] if x in file_to_msrun else None
|
|
1647
|
+
)
|
|
1648
|
+
)
|
|
1649
|
+
search_results["Sample ID"] = search_results["Run"].apply(
|
|
1650
|
+
lambda x: (
|
|
1651
|
+
file_to_msrun[x]["sample_id"]
|
|
1652
|
+
if x in file_to_msrun
|
|
1653
|
+
else None
|
|
1654
|
+
)
|
|
1655
|
+
)
|
|
1656
|
+
search_results["Protein Group"] = search_results["Protein.Group"]
|
|
1657
|
+
search_results["Peptide"] = search_results["Stripped.Sequence"]
|
|
1658
|
+
search_results["Charge"] = search_results["Precursor.Charge"]
|
|
1659
|
+
search_results["Precursor Id"] = search_results["Precursor.Id"]
|
|
1660
|
+
search_results["Precursor Q Value"] = search_results["Q.Value"]
|
|
1661
|
+
search_results["Protein Q Value"] = search_results[
|
|
1662
|
+
"Protein.Q.Value"
|
|
1663
|
+
]
|
|
1664
|
+
|
|
1665
|
+
included_columns = [
|
|
1666
|
+
"MsRun ID",
|
|
1667
|
+
"Sample ID",
|
|
1668
|
+
"Protein Group",
|
|
1669
|
+
"Protein Q Value",
|
|
1670
|
+
"Peptide",
|
|
1671
|
+
"Precursor Id",
|
|
1672
|
+
"Intensity",
|
|
1673
|
+
"Precursor Q Value",
|
|
1674
|
+
"Charge",
|
|
1675
|
+
"RT",
|
|
1676
|
+
"iRT",
|
|
1677
|
+
"IM",
|
|
1678
|
+
"iIM",
|
|
1679
|
+
]
|
|
1680
|
+
df = search_results[included_columns]
|
|
1681
|
+
df.columns = [title_case_to_snake_case(x) for x in df.columns]
|
|
1682
|
+
|
|
1683
|
+
return df
|
|
1684
|
+
|
|
1685
|
+
def get_search_data_analytes(self, analysis_id: str, analyte_type: str):
|
|
1686
|
+
if analyte_type not in ["protein", "peptide", "precursor"]:
|
|
1687
|
+
raise ValueError(
|
|
1688
|
+
f"Unknown analyte_type = {analyte_type}. Supported analytes are 'protein', 'peptide', or 'precursor'."
|
|
1689
|
+
)
|
|
1690
|
+
|
|
1691
|
+
# include
|
|
1692
|
+
# protein group, (peptide sequence), protein names, gene names, biological process, molecular function, cellular component, global q value, library q value
|
|
1693
|
+
|
|
1694
|
+
# 1. for all modes, fetch protein np file to extract protein groups, protein names, gene names, biological process, molecular function, cellular component
|
|
1695
|
+
search_results = self.get_search_result(
|
|
1696
|
+
analysis_id=analysis_id, analyte_type="protein", rollup="np"
|
|
1697
|
+
)
|
|
1698
|
+
|
|
1699
|
+
report_results = self.get_search_result(
|
|
1700
|
+
analysis_id=analysis_id, analyte_type="precursor", rollup="np"
|
|
1701
|
+
)
|
|
1702
|
+
|
|
1703
|
+
search_results = search_results[
|
|
1704
|
+
[
|
|
1705
|
+
"Protein Group",
|
|
1706
|
+
"Protein Names",
|
|
1707
|
+
"Gene Names",
|
|
1708
|
+
"Biological Process",
|
|
1709
|
+
"Molecular Function",
|
|
1710
|
+
"Cellular Component",
|
|
1711
|
+
]
|
|
1712
|
+
]
|
|
1713
|
+
search_results.drop_duplicates(subset=["Protein Group"], inplace=True)
|
|
1714
|
+
report_results["Protein Group"] = report_results["Protein.Group"]
|
|
1715
|
+
report_results["Peptide"] = report_results["Stripped.Sequence"]
|
|
1716
|
+
|
|
1717
|
+
if analyte_type == "protein":
|
|
1718
|
+
report_results = report_results[
|
|
1719
|
+
[
|
|
1720
|
+
"Protein Group",
|
|
1721
|
+
"Protein.Ids",
|
|
1722
|
+
"Global.PG.Q.Value",
|
|
1723
|
+
"Lib.PG.Q.Value",
|
|
1724
|
+
]
|
|
1725
|
+
]
|
|
1726
|
+
report_results.drop_duplicates(
|
|
1727
|
+
subset=["Protein Group"], inplace=True
|
|
1728
|
+
)
|
|
1729
|
+
df = pd.merge(
|
|
1730
|
+
search_results,
|
|
1731
|
+
report_results,
|
|
1732
|
+
on=["Protein Group"],
|
|
1733
|
+
how="left",
|
|
1734
|
+
)
|
|
1735
|
+
elif analyte_type == "peptide":
|
|
1736
|
+
peptide_results = self.get_search_result(
|
|
1737
|
+
analysis_id=analysis_id, analyte_type="peptide", rollup="np"
|
|
1738
|
+
)
|
|
1739
|
+
peptide_results = peptide_results[["Peptide", "Protein Group"]]
|
|
1740
|
+
search_results = pd.merge(
|
|
1741
|
+
peptide_results,
|
|
1742
|
+
search_results,
|
|
1743
|
+
on=["Protein Group"],
|
|
1744
|
+
how="left",
|
|
1745
|
+
)
|
|
1746
|
+
|
|
1747
|
+
report_results = report_results[
|
|
1748
|
+
["Peptide", "Protein.Ids", "Protein.Group"]
|
|
1749
|
+
]
|
|
1750
|
+
report_results.drop_duplicates(subset=["Peptide"], inplace=True)
|
|
1751
|
+
df = pd.merge(
|
|
1752
|
+
search_results,
|
|
1753
|
+
report_results,
|
|
1754
|
+
on=["Peptide"],
|
|
1755
|
+
how="left",
|
|
1756
|
+
)
|
|
1757
|
+
else:
|
|
1758
|
+
# precursor
|
|
1759
|
+
search_results = search_results[
|
|
1760
|
+
[
|
|
1761
|
+
"Protein Group",
|
|
1762
|
+
"Protein Names",
|
|
1763
|
+
"Gene Names",
|
|
1764
|
+
"Biological Process",
|
|
1765
|
+
"Molecular Function",
|
|
1766
|
+
"Cellular Component",
|
|
1767
|
+
]
|
|
1768
|
+
]
|
|
1769
|
+
search_results.drop_duplicates(
|
|
1770
|
+
subset=["Protein Group"], inplace=True
|
|
1771
|
+
)
|
|
1772
|
+
report_results = report_results[
|
|
1773
|
+
[
|
|
1774
|
+
"Precursor.Id",
|
|
1775
|
+
"Precursor.Charge",
|
|
1776
|
+
"Peptide",
|
|
1777
|
+
"Protein Group",
|
|
1778
|
+
"Protein.Ids",
|
|
1779
|
+
"Protein.Names",
|
|
1780
|
+
"Genes",
|
|
1781
|
+
"First.Protein.Description",
|
|
1782
|
+
"Modified.Sequence",
|
|
1783
|
+
"Proteotypic",
|
|
1784
|
+
"Global.Q.Value",
|
|
1785
|
+
"Global.PG.Q.Value",
|
|
1786
|
+
"Lib.Q.Value",
|
|
1787
|
+
"Lib.PG.Q.Value",
|
|
1788
|
+
]
|
|
1789
|
+
]
|
|
1790
|
+
report_results.drop_duplicates(
|
|
1791
|
+
subset=["Protein Group"], inplace=True
|
|
1792
|
+
)
|
|
1793
|
+
df = pd.merge(
|
|
1794
|
+
report_results,
|
|
1795
|
+
search_results,
|
|
1796
|
+
on=["Protein Group"],
|
|
1797
|
+
how="left",
|
|
1798
|
+
)
|
|
1799
|
+
df = df[
|
|
1800
|
+
[
|
|
1801
|
+
"Precursor.Id",
|
|
1802
|
+
"Precursor.Charge",
|
|
1803
|
+
"Peptide",
|
|
1804
|
+
"Protein Group",
|
|
1805
|
+
"Protein.Ids",
|
|
1806
|
+
"Protein.Names",
|
|
1807
|
+
"Genes",
|
|
1808
|
+
"First.Protein.Description",
|
|
1809
|
+
"Modified.Sequence",
|
|
1810
|
+
"Proteotypic",
|
|
1811
|
+
"Global.Q.Value",
|
|
1812
|
+
"Global.PG.Q.Value",
|
|
1813
|
+
"Lib.Q.Value",
|
|
1814
|
+
"Lib.PG.Q.Value",
|
|
1815
|
+
"Gene Names",
|
|
1816
|
+
"Biological Process",
|
|
1817
|
+
"Molecular Function",
|
|
1818
|
+
"Cellular Component",
|
|
1819
|
+
]
|
|
1820
|
+
]
|
|
1821
|
+
df.rename(
|
|
1822
|
+
columns={"Modified.Sequence": "Modified.Peptide"}, inplace=True
|
|
1823
|
+
)
|
|
1824
|
+
# endif
|
|
1825
|
+
df.columns = [title_case_to_snake_case(x) for x in df.columns]
|
|
1826
|
+
return df
|