PyPI - seer-pas-sdk - Versions diffs - 0.3.4__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

seer-pas-sdk 0.3.4py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

seer_pas_sdk/auth/auth.py +174 -15
seer_pas_sdk/common/__init__.py +46 -5
seer_pas_sdk/core/sdk.py +1474 -183
seer_pas_sdk/core/unsupported.py +415 -223
seer_pas_sdk/objects/__init__.py +1 -0
seer_pas_sdk/objects/headers.py +144 -0
seer_pas_sdk/objects/volcanoplot.py +3 -2
{seer_pas_sdk-0.3.4.dist-info → seer_pas_sdk-1.1.0.dist-info}/METADATA +1 -2
seer_pas_sdk-1.1.0.dist-info/RECORD +19 -0
seer_pas_sdk-0.3.4.dist-info/RECORD +0 -18
{seer_pas_sdk-0.3.4.dist-info → seer_pas_sdk-1.1.0.dist-info}/WHEEL +0 -0
{seer_pas_sdk-0.3.4.dist-info → seer_pas_sdk-1.1.0.dist-info}/licenses/LICENSE.txt +0 -0
{seer_pas_sdk-0.3.4.dist-info → seer_pas_sdk-1.1.0.dist-info}/top_level.txt +0 -0

seer_pas_sdk/core/unsupported.py CHANGED Viewed

@@ -2,19 +2,12 @@
 seer_pas_sdk.core.unsupported -- in development
 """
-from tqdm import tqdm
 import os
-import jwt
-import requests
-import urllib.request
-import ssl
 import shutil
 from typing import List as _List
 from ..common import *
-from ..auth import Auth
 from ..objects import PlateMap
 from .sdk import SeerSDK as _SeerSDK
@@ -71,7 +64,7 @@ class _UnsupportedSDK(_SeerSDK):
         URL = f"{self._auth.url}api/v1/samples"
-        with self._get_auth_session() as s:
+        with self._get_auth_session("addsample") as s:
             response = s.post(URL, json=sample_entry)
@@ -112,7 +105,7 @@ class _UnsupportedSDK(_SeerSDK):
         URL = f"{self._auth.url}api/v1/samples/batch"
-        with self._get_auth_session() as s:
+        with self._get_auth_session("addsamples") as s:
             response = s.post(URL, json={"samples": sample_info})
             if response.status_code != 200:
@@ -164,9 +157,7 @@ class _UnsupportedSDK(_SeerSDK):
         if not project_name:
             raise ValueError("Project name cannot be empty.")
-        all_plate_ids = set(
-            [plate["id"] for plate in self.get_plate_metadata()]
-        )
+        all_plate_ids = set([plate["id"] for plate in self.find_plates()])
         for plate_id in plate_ids:
             if plate_id not in all_plate_ids:
@@ -176,7 +167,7 @@ class _UnsupportedSDK(_SeerSDK):
         URL = f"{self._auth.url}api/v1/projects"
-        with self._get_auth_session() as s:
+        with self._get_auth_session("addproject") as s:
             project = s.post(
                 URL,
@@ -233,7 +224,7 @@ class _UnsupportedSDK(_SeerSDK):
         URL = f"{self._auth.url}api/v1/addSamplesToProject/{project_id}"
-        with self._get_auth_session() as s:
+        with self._get_auth_session("addprojectsamples") as s:
             response = s.put(
                 URL,
@@ -277,7 +268,7 @@ class _UnsupportedSDK(_SeerSDK):
         samples = (
             x["id"]
             for plate_id in plates
-            for x in self.get_samples(plate_id=plate_id)
+            for x in self.find_samples(plate_id=plate_id)
         )
         return self.add_samples_to_project(
@@ -375,7 +366,7 @@ class _UnsupportedSDK(_SeerSDK):
         validate_plate_map(plate_map_data, local_file_names)
         # Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
-        with self._get_auth_session() as s:
+        with self._get_auth_session("getplateids") as s:
             plate_response = s.get(f"{self._auth.url}api/v1/plateids")
             if plate_response.status_code != 200:
@@ -392,7 +383,7 @@ class _UnsupportedSDK(_SeerSDK):
         # Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
-        with self._get_auth_session() as s:
+        with self._get_auth_session("addplate") as s:
             plate_response = s.post(
                 f"{self._auth.url}api/v1/plates",
                 json={
@@ -415,7 +406,7 @@ class _UnsupportedSDK(_SeerSDK):
                 )
         # Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
-        with self._get_auth_session() as s:
+        with self._get_auth_session("getawsuploadconfig") as s:
             config_response = s.post(
                 f"{self._auth.url}api/v1/msdatas/getuploadconfig",
                 json={"plateId": id_uuid},
@@ -441,7 +432,7 @@ class _UnsupportedSDK(_SeerSDK):
             s3_bucket = config_response.json()["s3Bucket"]
             s3_upload_path = config_response.json()["s3UploadPath"]
-        with self._get_auth_session() as s:
+        with self._get_auth_session("getawsuploadcredentials") as s:
             config_response = s.get(
                 f"{self._auth.url}auth/getawscredential",
             )
@@ -487,7 +478,7 @@ class _UnsupportedSDK(_SeerSDK):
                 "Failed to upload plate map to AWS. Please check your connection and reauthenticate."
             )
-        with self._get_auth_session() as s:
+        with self._get_auth_session("uploadplatemapfile") as s:
             plate_map_response = s.post(
                 f"{self._auth.url}api/v1/msdataindex/file",
                 json={
@@ -540,7 +531,7 @@ class _UnsupportedSDK(_SeerSDK):
                     "Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
                 )
-            with self._get_auth_session() as s:
+            with self._get_auth_session("uploadsampledescriptionfile") as s:
                 sdf_response = s.post(
                     f"{self._auth.url}api/v1/msdataindex/file",
                     json={
@@ -573,7 +564,7 @@ class _UnsupportedSDK(_SeerSDK):
         )
         # Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
-        with self._get_auth_session() as s:
+        with self._get_auth_session("addmsdatas") as s:
             ms_data_response = s.post(
                 f"{self._auth.url}api/v1/msdatas/batch",
                 json={"msdatas": plate_map_data},
@@ -584,7 +575,7 @@ class _UnsupportedSDK(_SeerSDK):
                 )
         # Step 9: Upload each msdata file to the S3 bucket.
-        with self._get_auth_session() as s:
+        with self._get_auth_session("getawsuploadcredentials") as s:
             config_response = s.get(
                 f"{self._auth.url}auth/getawscredential",
             )
@@ -629,7 +620,7 @@ class _UnsupportedSDK(_SeerSDK):
             )
         # Step 10: Make a call to `api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
-        with self._get_auth_session() as s:
+        with self._get_auth_session("addmsdataindex") as s:
             file_response = s.post(
                 f"{self._auth.url}api/v1/msdataindex/file",
                 json={"files": files},
@@ -671,16 +662,16 @@ class _UnsupportedSDK(_SeerSDK):
             Name of the analysis.
         project_id : str
-            ID of the project to which the analysis belongs. Can be fetched using the get_project_metadata() function.
+            ID of the project to which the analysis belongs. Can be fetched using the find_projects() function.
         sample_ids: list[str], optional
             List of sample IDs to be used for the analysis. Should be omitted if analysis is to be run with all samples.
         analysis_protocol_name : str, optional
-            Name of the analysis protocol to be used for the analysis. Can be fetched using the get_analysis_protocols() function. Should be omitted if analysis_protocol_id is provided.
+            Name of the analysis protocol to be used for the analysis. Can be fetched using the find_analysis_protocols() function. Should be omitted if analysis_protocol_id is provided.
         analysis_protocol_id : str, optional
-            ID of the analysis protocol to be used for the analysis. Can be fetched using the get_analysis_protocols() function. Should be omitted if analysis_protocol_name is provided.
+            ID of the analysis protocol to be used for the analysis. Can be fetched using the find_analysis_protocols() function. Should be omitted if analysis_protocol_name is provided.
         notes : str, optional
             Notes for the analysis, defaulted to an empty string.
@@ -714,7 +705,7 @@ class _UnsupportedSDK(_SeerSDK):
             raise ValueError("Project ID cannot be empty.")
         if not analysis_protocol_id and analysis_protocol_name:
-            valid_analysis_protocol = self.get_analysis_protocols(
+            valid_analysis_protocol = self.find_analysis_protocols(
                 analysis_protocol_name=analysis_protocol_name
             )
@@ -726,7 +717,7 @@ class _UnsupportedSDK(_SeerSDK):
             analysis_protocol_id = valid_analysis_protocol[0]["id"]
         if analysis_protocol_id and not analysis_protocol_name:
-            valid_analysis_protocol = self.get_analysis_protocols(
+            valid_analysis_protocol = self.find_analysis_protocols(
                 analysis_protocol_id=analysis_protocol_id
             )
@@ -743,7 +734,7 @@ class _UnsupportedSDK(_SeerSDK):
         if sample_ids:
             valid_ids = [
                 entry["id"]
-                for entry in self.get_samples(project_id=project_id)
+                for entry in self.find_samples(project_id=project_id)
             ]
             for sample_id in sample_ids:
@@ -758,7 +749,7 @@ class _UnsupportedSDK(_SeerSDK):
         URL = f"{self._auth.url}api/v1/analyze"
-        with self._get_auth_session() as s:
+        with self._get_auth_session("startanalysis") as s:
             req_payload = {
                 "analysisName": name,
                 "analysisProtocolId": analysis_protocol_id,
@@ -854,7 +845,7 @@ class _UnsupportedSDK(_SeerSDK):
         tenant_id = self.get_active_tenant_id()
         # Step 3: Fetch the S3 bucket name by making a call to `/api/v1/auth/getawscredential`
-        with self._get_auth_session() as s:
+        with self._get_auth_session("getawsuploadcredentials") as s:
             config_response = s.get(
                 f"{self._auth.url}auth/getawscredential",
             )
@@ -905,7 +896,7 @@ class _UnsupportedSDK(_SeerSDK):
         # Step 5: Make a call to `/api/v1/msdataindex/file` to sync with frontend. This should only be done after all files have finished uploading, simulating an async "promise"-like scenario in JavaScript.
         result_files = None
-        with self._get_auth_session() as s:
+        with self._get_auth_session("addmsdataindex") as s:
             file_response = s.post(
                 f"{self._auth.url}api/v1/msdataindex/file",
                 json={"files": files},
@@ -1003,7 +994,7 @@ class _UnsupportedSDK(_SeerSDK):
         target_folder_path = f"{tenant_id}/{target_folder_paths[0]}"
         # Retrieve msdatafileindex metadata to determine source space
         base_space = None
-        with self._get_auth_session() as s:
+        with self._get_auth_session("getmsdataindex") as s:
             URL = self._auth.url + "api/v1/msdataindex/getmetadata"
             params = {"folderKey": folder_path}
             r = s.get(URL, params=params)
@@ -1046,7 +1037,7 @@ class _UnsupportedSDK(_SeerSDK):
         if target_space_id and base_space != target_space_id:
             json["targetUserGroupId"] = target_space_id
-        with self._get_auth_session() as s:
+        with self._get_auth_session("movemsdataindex") as s:
             URL = self._auth.url + "api/v1/msdataindex/move"
             json = json
             r = s.post(URL, json=json)
@@ -1098,167 +1089,6 @@ class _UnsupportedSDK(_SeerSDK):
             source_ms_data_files, target_ms_data_files
         )
-    def download_analysis_files(
-        self, analysis_id: str, download_path: str = "", file_name: str = ""
-    ):
-        """
-        Download a specific analysis file from the backend given an `analysis_id` to the specified `download_path`.
-        If no `download_path` is specified, the file will be downloaded to the current working directory.
-        If no `file_name` is specified, all files for the analysis will be downloaded.
-        Parameters
-        ----------
-        analysis_id : str
-            ID of the analysis to download.
-        download_path : str, optional
-            Path to download the analysis file to, defaulted to current working directory.
-        file_name : str, optional
-            Name of the analysis file to download, defaulted to None.
-        Returns
-        -------
-        dict
-            Message containing whether the file was downloaded or not.
-        Examples
-        -------
-        >>> from core import SeerSDK
-        >>> sdk = SeerSDK()
-        >>> sdk.download_analysis_files("analysis_id", "/path/to/download")
-        >>> Downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
-            Finished downloading EXP22006_2022ms0031bX25_B_BA4_1_4768/diann.log
-            Downloading EXP20004_2020ms0007X11_A.mzML.quant
-            Finished downloading EXP20004_2020ms0007X11_A.mzML.quant
-            Downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
-            Finished downloading EXP20004_2020ms0007X11_A/0714-diann181-libfree-mbr.json
-            Downloading EXP20004_2020ms0007X11_A/diann.log
-            Finished downloading EXP20004_2020ms0007X11_A/diann.log
-        >>> { "message": "File downloaded successfully." }
-        """
-        def get_url(analysis_id, file_name, project_id):
-            URL = f"{self._auth.url}api/v1/analysisResultFiles/getUrl"
-            with self._get_auth_session() as s:
-                download_url = s.post(
-                    URL,
-                    json={
-                        "analysisId": analysis_id,
-                        "filename": file_name,
-                        "projectId": project_id,
-                    },
-                )
-                if download_url.status_code != 200:
-                    raise ValueError(
-                        "Could not download file. Please check if the analysis ID is valid or the backend is running."
-                    )
-                return download_url.json()["url"]
-        if not analysis_id:
-            raise ValueError("Analysis ID cannot be empty.")
-        try:
-            valid_analysis = self.get_analyses(analysis_id)[0]
-        except:
-            raise ValueError(
-                "Invalid analysis ID. Please check if the analysis ID is valid or the backend is running."
-            )
-        project_id = valid_analysis["project_id"]
-        if not download_path:
-            download_path = os.getcwd()
-            print(f"\nDownload path not specified.\n")
-        if not os.path.isdir(download_path):
-            print(
-                f'\nThe path "{download_path}" you specified does not exist, was either invalid or not absolute.\n'
-            )
-            download_path = os.getcwd()
-        name = f"{download_path}/downloads/{analysis_id}"
-        if not os.path.exists(name):
-            os.makedirs(name)
-        URL = f"{self._auth.url}api/v1/analysisResultFiles"
-        with self._get_auth_session() as s:
-            analysis_files = s.get(f"{URL}/{analysis_id}")
-            if analysis_files.status_code != 200:
-                raise ValueError(
-                    "Invalid request. Please check if the analysis ID is valid or the backend is running."
-                )
-            res = analysis_files.json()
-        if file_name:
-            filenames = set([file["filename"] for file in res])
-            if file_name not in filenames:
-                raise ValueError(
-                    "Invalid file name. Please check if the file name is correct."
-                )
-            res = [file for file in res if file["filename"] == file_name]
-        print(f'Downloading files to "{name}"\n')
-        for file in res:
-            filename = file["filename"]
-            url = get_url(analysis_id, filename, project_id)
-            print(f"Downloading {filename}")
-            for _ in range(2):
-                try:
-                    with tqdm(
-                        unit="B",
-                        unit_scale=True,
-                        unit_divisor=1024,
-                        miniters=1,
-                        desc=f"Progress",
-                    ) as t:
-                        ssl._create_default_https_context = (
-                            ssl._create_unverified_context
-                        )
-                        urllib.request.urlretrieve(
-                            url,
-                            f"{name}/{filename}",
-                            reporthook=download_hook(t),
-                            data=None,
-                        )
-                        break
-                except:
-                    filename = filename.split("/")
-                    name += "/" + "/".join(
-                        [filename[i] for i in range(len(filename) - 1)]
-                    )
-                    filename = filename[-1]
-                    if not os.path.isdir(f"{name}/{filename}"):
-                        os.makedirs(f"{name}/")
-            else:
-                raise ValueError(
-                    "Your download failed. Please check if the backend is still running."
-                )
-            print(f"Finished downloading {filename}\n")
-        return {
-            "message": f"Files downloaded successfully to '{download_path}/downloads/{analysis_id}'"
-        }
     def link_plate(
         self,
         ms_data_files: _List[str],
@@ -1301,17 +1131,12 @@ class _UnsupportedSDK(_SeerSDK):
         plate_ids = (
             set()
-        )  # contains all the plate_ids fetched from self.get_plate_metadata()
-        files = []  # to be uploaded to sync frontend
+        )  # contains all the plate_ids fetched from self.find_plates()
         samples = []  # list of all the sample responses from the backend
         id_uuid = ""  # uuid for the plate id
         raw_file_paths = {}  # list of all the AWS raw file paths
         s3_upload_path = None
         s3_bucket = ""
-        ms_data_file_names = []
-        dir_exists = (
-            True  # flag to check if the generated_files directory exists
-        )
         # Step 0: Check if the file paths exist in the S3 bucket.
         for file in ms_data_files:
@@ -1344,7 +1169,7 @@ class _UnsupportedSDK(_SeerSDK):
         validate_plate_map(plate_map_data, ms_data_files)
         # Step 1: Check for duplicates in the user-inputted plate id. Populates `plate_ids` set.
-        with self._get_auth_session() as s:
+        with self._get_auth_session("getplateids") as s:
             plate_response = s.get(f"{self._auth.url}api/v1/plateids")
             if plate_response.status_code != 200:
@@ -1361,7 +1186,7 @@ class _UnsupportedSDK(_SeerSDK):
         # Step 2: Fetch the UUID that needs to be passed into the backend from `/api/v1/plates` to fetch the AWS upload config and raw file path. This will sync the plates backend with samples when the user uploads later. This UUID will also be void of duplicates since duplication is handled by the backend.
-        with self._get_auth_session() as s:
+        with self._get_auth_session("addplate") as s:
             plate_response = s.post(
                 f"{self._auth.url}api/v1/plates",
                 json={
@@ -1384,7 +1209,7 @@ class _UnsupportedSDK(_SeerSDK):
                 )
         # Step 3: Fetch AWS upload config from the backend with the plateId we just generated. Populates `s3_upload_path` and `s3_bucket` global variables.
-        with self._get_auth_session() as s:
+        with self._get_auth_session("getawsuploadconfig") as s:
             config_response = s.post(
                 f"{self._auth.url}api/v1/msdatas/getuploadconfig",
                 json={"plateId": id_uuid},
@@ -1410,7 +1235,7 @@ class _UnsupportedSDK(_SeerSDK):
             s3_bucket = config_response.json()["s3Bucket"]
             s3_upload_path = config_response.json()["s3UploadPath"]
-        with self._get_auth_session() as s:
+        with self._get_auth_session("getawsuploadcredentials") as s:
             config_response = s.get(
                 f"{self._auth.url}auth/getawscredential",
             )
@@ -1456,7 +1281,7 @@ class _UnsupportedSDK(_SeerSDK):
                 "Failed to upload plate map to AWS. Please check your connection and reauthenticate."
             )
-        with self._get_auth_session() as s:
+        with self._get_auth_session("uploadplatemap") as s:
             plate_map_response = s.post(
                 f"{self._auth.url}api/v1/msdataindex/file",
                 json={
@@ -1507,7 +1332,7 @@ class _UnsupportedSDK(_SeerSDK):
                     "Failed to upload sample description file to AWS. Please check your connection and reauthenticate."
                 )
-            with self._get_auth_session() as s:
+            with self._get_auth_session("uploadsampledescription") as s:
                 sdf_response = s.post(
                     f"{self._auth.url}api/v1/msdataindex/file",
                     json={
@@ -1542,7 +1367,7 @@ class _UnsupportedSDK(_SeerSDK):
         )
         # Step 8: Make a request to `/api/v1/msdatas/batch` with the processed samples data.
-        with self._get_auth_session() as s:
+        with self._get_auth_session("addmsdatas") as s:
             ms_data_response = s.post(
                 f"{self._auth.url}api/v1/msdatas/batch",
                 json={"msdatas": plate_map_data},
@@ -1555,7 +1380,7 @@ class _UnsupportedSDK(_SeerSDK):
         print(f"Plate generated with id: '{id_uuid}'")
         return id_uuid
-    def _get_msdataindex_metadata(self, folder=""):
+    def _get_msdataindex(self, folder=""):
         """
         Get metadata for a given file path.
@@ -1566,20 +1391,15 @@ class _UnsupportedSDK(_SeerSDK):
             dict: A dictionary containing the metadata for the file.
         """
         URL = f"{self._auth.url}api/v2/msdataindex/getmetadata"
-        with self._get_auth_session() as s:
+        with self._get_auth_session("getmsdataindex") as s:
             params = {"all": "true"}
             if folder:
-                tenant_id = jwt.decode(
-                    self._auth.get_token()[0],
-                    options={"verify_signature": False},
-                )["custom:tenantId"]
+                tenant_id = self.get_active_tenant_id()
                 params["folderKey"] = f"{tenant_id}/{folder}"
-                print(params["folderKey"])
             metadata = s.get(URL, params=params)
             if metadata.status_code != 200:
-                print(metadata.text)
                 raise ServerError("Could not fetch metadata for file.")
             return metadata.json()
@@ -1595,9 +1415,7 @@ class _UnsupportedSDK(_SeerSDK):
             dict: A dictionary mapping the display path to the raw file path.
         """
-        tenant_id = jwt.decode(
-            self._auth.get_token()[0], options={"verify_signature": False}
-        )["custom:tenantId"]
+        tenant_id = self.get_active_tenant_id()
         result = {}
         # partition by folder_path
         folder_partitions = {os.path.dirname(x): [] for x in display_path}
@@ -1611,9 +1429,7 @@ class _UnsupportedSDK(_SeerSDK):
             try:
                 metadata = {
                     x["key"]: x["rawFilePath"]
-                    for x in self._get_msdataindex_metadata(
-                        folder=folder_path
-                    )["data"]
+                    for x in self._get_msdataindex(folder=folder_path)["data"]
                 }
             except:
                 # If the metadata fetch fails, skip the folder
@@ -1632,3 +1448,379 @@ class _UnsupportedSDK(_SeerSDK):
             )
         return result
+    def get_search_data(
+        self,
+        analysis_id: str,
+        analyte_type: str,
+        rollup: str,
+        norm_method: str = "pepcal",
+    ):
+        """
+        Get analyte intensities data for a given PAS analysis.
+        Args:
+            analysis_id (str): ID of the analysis.
+            analyte_type (str): Type of the analyte. Must be either 'protein', 'peptide', precursor.
+            rollup (str): Intensities rollup method. Must be either 'np' or 'panel'.
+            norm_method (str): Search engine. Supported engines are: raw, engine, median, median80, pepcal. Default is 'pepcal'.
+        Returns:
+            pd.DataFrame: A dataframe with each row containing the analyte intensity measurement:
+                            'msrun_id', 'sample_id', 'nanoparticle' (if rollup is 'np'), 'protein_group', 'peptide' (for 'peptide' and 'precursor' analyte types), 'charge' (for 'precursor' analyte type),
+                            'intensity_log10', 'protein_group_q_value', 'q_value' (for 'precursor' analyte type), 'rt' and 'irt' (for 'peptide' and 'precursor' analyte types)
+        """
+        # 1. Get msrun data for analysis
+        samples = self.find_samples(analysis_id=analysis_id)
+        sample_name_to_id = {s["sample_name"]: s["id"] for s in samples}
+        # for np rollup, a row represents an msrun
+        msruns = self.find_msruns(sample_ids=sample_name_to_id.values())
+        file_to_msrun = {
+            os.path.basename(msrun["raw_file_path"]).split(".")[0]: msrun
+            for msrun in msruns
+        }
+        sample_to_msrun = {msrun["sample_id"]: msrun for msrun in msruns}
+        # for panel rollup, a row represents a sample
+        # 2. Get search results
+        # pull the np/panel file, or report.tsv for precursor mode
+        search_results = self.get_search_result(
+            analysis_id=analysis_id,
+            analyte_type=analyte_type,
+            rollup=rollup,
+        )
+        if analyte_type in ["protein", "peptide"]:
+            intensity_column = None
+            if norm_method == "raw":
+                intensity_column = (
+                    "Intensities Log10"
+                    if "Intensities Log10" in search_results.columns
+                    else "Intensity (Log10)"
+                )
+            elif norm_method == "engine":
+                intensity_column = (
+                    "DIA-NN Normalized Intensities Log10"
+                    if "DIA-NN Normalized Intensities Log10"
+                    in search_results.columns
+                    else "Normalized Intensity (Log10)"
+                )
+                if intensity_column not in search_results.columns:
+                    raise ValueError(
+                        "Engine normalized intensities not found in search results. This is only supported for DIA-NN currently."
+                    )
+            elif norm_method == "median":
+                if (
+                    not "Median Normalized Intensities Log10"
+                    in search_results.columns
+                ):
+                    raise ValueError(
+                        "Median normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later."
+                    )
+                intensity_column = "Median Normalized Intensities Log10"
+            elif norm_method == "median80":
+                if (
+                    not "Median80 Normalized Intensities Log10"
+                    in search_results.columns
+                ):
+                    raise ValueError(
+                        "Median80 normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later."
+                    )
+                intensity_column = "Median80 Normalized Intensities Log10"
+            elif norm_method == "pepcal":
+                if not ("PepCal Intensities Log10" in search_results.columns):
+                    raise ValueError(
+                        "Pepcal normalized intensities not found in search results. This is only available with analyses processed with DIA-NN Seer Protocol v2.0 or later with the Seer Peptide Calibrant option enabled. \n Please retry using different norm_method, such as 'median'"
+                    )
+                intensity_column = "PepCal Intensities Log10"
+            else:
+                raise ValueError(
+                    f"norm_method = {norm_method} is not supported. Supported normalization methods are: raw, pepcal, engine, median, median80."
+                )
+            if rollup == "panel":
+                search_results.fillna({"Sample Name": ""}, inplace=True)
+                search_results["File Name"] = search_results[
+                    "Sample Name"
+                ].apply(
+                    lambda x: (
+                        os.path.basename(
+                            sample_to_msrun[sample_name_to_id[x]][
+                                "raw_file_path"
+                            ]
+                        ).split(".")[0]
+                        if x
+                        else None
+                    )
+                )
+            search_results["File Name"] = search_results["File Name"].apply(
+                lambda x: os.path.basename(x).split(".")[0] if x else None
+            )
+            search_results["Intensity Log10"] = search_results[
+                intensity_column
+            ]
+            # 3. Merge report to search results to get Q value and other properties
+            report = self.get_search_result(
+                analysis_id=analysis_id,
+                analyte_type="precursor",
+                rollup="np",
+            )
+            report["File Name"] = report["Run"]
+            report["Protein Group"] = report["Protein.Group"]
+            if analyte_type == "protein":
+                report["Protein Q Value"] = report["Protein.Q.Value"]
+                report = report[
+                    ["File Name", "Protein Group", "Protein Q Value"]
+                ]
+                report.drop_duplicates(
+                    subset=["File Name", "Protein Group"], inplace=True
+                )
+                df = pd.merge(
+                    search_results,
+                    report,
+                    on=["File Name", "Protein Group"],
+                    how="left",
+                )
+                included_columns = [
+                    "MsRun ID",
+                    "Sample ID",
+                    "Protein Group",
+                    "Intensity Log10",
+                    "Protein Q Value",
+                ]
+            else:
+                report["Peptide"] = report["Stripped.Sequence"]
+                #  If analyte_type is peptide, attach retention time (RT, iRT)
+                report = report[["File Name", "Peptide", "RT", "iRT"]]
+                report.drop_duplicates(
+                    subset=["File Name", "Peptide"], inplace=True
+                )
+                df = pd.merge(
+                    search_results,
+                    report,
+                    on=["File Name", "Peptide"],
+                    how="left",
+                )
+                included_columns = [
+                    "MsRun ID",
+                    "Sample ID",
+                    "Peptide",
+                    "Protein Group",
+                    "Intensity Log10",
+                    "RT",
+                    "iRT",
+                ]
+            # endif
+            if rollup == "np":
+                included_columns.insert(
+                    included_columns.index("Sample ID") + 1, "Nanoparticle"
+                )
+            df["MsRun ID"] = df["File Name"].apply(
+                lambda x: (
+                    file_to_msrun[x]["id"] if x in file_to_msrun else None
+                )
+            )
+            df["Sample ID"] = df["File Name"].apply(
+                lambda x: (
+                    file_to_msrun[x]["sample_id"]
+                    if x in file_to_msrun
+                    else None
+                )
+            )
+            df = df[included_columns]
+            df.columns = [title_case_to_snake_case(x) for x in df.columns]
+            return df
+        else:
+            # precursor
+            # working only in report.tsv
+            search_results["Intensity"] = search_results["Precursor.Quantity"]
+            search_results["MsRun ID"] = search_results["Run"].apply(
+                lambda x: (
+                    file_to_msrun[x]["id"] if x in file_to_msrun else None
+                )
+            )
+            search_results["Sample ID"] = search_results["Run"].apply(
+                lambda x: (
+                    file_to_msrun[x]["sample_id"]
+                    if x in file_to_msrun
+                    else None
+                )
+            )
+            search_results["Protein Group"] = search_results["Protein.Group"]
+            search_results["Peptide"] = search_results["Stripped.Sequence"]
+            search_results["Charge"] = search_results["Precursor.Charge"]
+            search_results["Precursor Id"] = search_results["Precursor.Id"]
+            search_results["Precursor Q Value"] = search_results["Q.Value"]
+            search_results["Protein Q Value"] = search_results[
+                "Protein.Q.Value"
+            ]
+            included_columns = [
+                "MsRun ID",
+                "Sample ID",
+                "Protein Group",
+                "Protein Q Value",
+                "Peptide",
+                "Precursor Id",
+                "Intensity",
+                "Precursor Q Value",
+                "Charge",
+                "RT",
+                "iRT",
+                "IM",
+                "iIM",
+            ]
+            df = search_results[included_columns]
+            df.columns = [title_case_to_snake_case(x) for x in df.columns]
+            return df
+    def get_search_data_analytes(self, analysis_id: str, analyte_type: str):
+        if analyte_type not in ["protein", "peptide", "precursor"]:
+            raise ValueError(
+                f"Unknown analyte_type = {analyte_type}. Supported analytes are 'protein', 'peptide', or 'precursor'."
+            )
+        # include
+        # protein group, (peptide sequence), protein names, gene names, biological process, molecular function, cellular component, global q value, library q value
+        # 1. for all modes, fetch protein np file to extract protein groups, protein names, gene names, biological process, molecular function, cellular component
+        search_results = self.get_search_result(
+            analysis_id=analysis_id, analyte_type="protein", rollup="np"
+        )
+        report_results = self.get_search_result(
+            analysis_id=analysis_id, analyte_type="precursor", rollup="np"
+        )
+        search_results = search_results[
+            [
+                "Protein Group",
+                "Protein Names",
+                "Gene Names",
+                "Biological Process",
+                "Molecular Function",
+                "Cellular Component",
+            ]
+        ]
+        search_results.drop_duplicates(subset=["Protein Group"], inplace=True)
+        report_results["Protein Group"] = report_results["Protein.Group"]
+        report_results["Peptide"] = report_results["Stripped.Sequence"]
+        if analyte_type == "protein":
+            report_results = report_results[
+                [
+                    "Protein Group",
+                    "Protein.Ids",
+                    "Global.PG.Q.Value",
+                    "Lib.PG.Q.Value",
+                ]
+            ]
+            report_results.drop_duplicates(
+                subset=["Protein Group"], inplace=True
+            )
+            df = pd.merge(
+                search_results,
+                report_results,
+                on=["Protein Group"],
+                how="left",
+            )
+        elif analyte_type == "peptide":
+            peptide_results = self.get_search_result(
+                analysis_id=analysis_id, analyte_type="peptide", rollup="np"
+            )
+            peptide_results = peptide_results[["Peptide", "Protein Group"]]
+            search_results = pd.merge(
+                peptide_results,
+                search_results,
+                on=["Protein Group"],
+                how="left",
+            )
+            report_results = report_results[
+                ["Peptide", "Protein.Ids", "Protein.Group"]
+            ]
+            report_results.drop_duplicates(subset=["Peptide"], inplace=True)
+            df = pd.merge(
+                search_results,
+                report_results,
+                on=["Peptide"],
+                how="left",
+            )
+        else:
+            # precursor
+            search_results = search_results[
+                [
+                    "Protein Group",
+                    "Protein Names",
+                    "Gene Names",
+                    "Biological Process",
+                    "Molecular Function",
+                    "Cellular Component",
+                ]
+            ]
+            search_results.drop_duplicates(
+                subset=["Protein Group"], inplace=True
+            )
+            report_results = report_results[
+                [
+                    "Precursor.Id",
+                    "Precursor.Charge",
+                    "Peptide",
+                    "Protein Group",
+                    "Protein.Ids",
+                    "Protein.Names",
+                    "Genes",
+                    "First.Protein.Description",
+                    "Modified.Sequence",
+                    "Proteotypic",
+                    "Global.Q.Value",
+                    "Global.PG.Q.Value",
+                    "Lib.Q.Value",
+                    "Lib.PG.Q.Value",
+                ]
+            ]
+            report_results.drop_duplicates(
+                subset=["Protein Group"], inplace=True
+            )
+            df = pd.merge(
+                report_results,
+                search_results,
+                on=["Protein Group"],
+                how="left",
+            )
+            df = df[
+                [
+                    "Precursor.Id",
+                    "Precursor.Charge",
+                    "Peptide",
+                    "Protein Group",
+                    "Protein.Ids",
+                    "Protein.Names",
+                    "Genes",
+                    "First.Protein.Description",
+                    "Modified.Sequence",
+                    "Proteotypic",
+                    "Global.Q.Value",
+                    "Global.PG.Q.Value",
+                    "Lib.Q.Value",
+                    "Lib.PG.Q.Value",
+                    "Gene Names",
+                    "Biological Process",
+                    "Molecular Function",
+                    "Cellular Component",
+                ]
+            ]
+            df.rename(
+                columns={"Modified.Sequence": "Modified.Peptide"}, inplace=True
+            )
+        # endif
+        df.columns = [title_case_to_snake_case(x) for x in df.columns]
+        return df

seer-pas-sdk 0.3.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

seer-pas-sdk 0.3.4py3-none-any.whl → 1.1.0py3-none-any.whl