seer-pas-sdk 1.1.1__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/PKG-INFO +1 -1
  2. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/docs/index.qmd +55 -28
  3. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/common/__init__.py +46 -0
  4. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/core/sdk.py +204 -176
  5. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/core/unsupported.py +104 -30
  6. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk.egg-info/PKG-INFO +1 -1
  7. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/.github/workflows/lint.yml +0 -0
  8. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/.github/workflows/publish.yml +0 -0
  9. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/.github/workflows/test.yml +0 -0
  10. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/.gitignore +0 -0
  11. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/.pre-commit-config.yaml +0 -0
  12. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/LICENSE.txt +0 -0
  13. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/README.md +0 -0
  14. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/docs/_quarto.yml +0 -0
  15. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/pyproject.toml +0 -0
  16. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/__init__.py +0 -0
  17. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/auth/__init__.py +0 -0
  18. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/auth/auth.py +0 -0
  19. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/common/errors.py +0 -0
  20. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/common/groupanalysis.py +0 -0
  21. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/core/__init__.py +0 -0
  22. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/objects/__init__.py +0 -0
  23. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/objects/groupanalysis.py +0 -0
  24. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/objects/headers.py +0 -0
  25. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/objects/platemap.py +0 -0
  26. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/objects/volcanoplot.py +0 -0
  27. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk.egg-info/SOURCES.txt +0 -0
  28. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk.egg-info/dependency_links.txt +0 -0
  29. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk.egg-info/requires.txt +0 -0
  30. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk.egg-info/top_level.txt +0 -0
  31. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/setup.cfg +0 -0
  32. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/__init__.py +0 -0
  33. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/conftest.py +0 -0
  34. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/objects/__init__.py +0 -0
  35. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/objects/test_platemap.py +0 -0
  36. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/test_auth.py +0 -0
  37. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/test_common.py +0 -0
  38. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/test_objects.py +0 -0
  39. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/test_sdk.py +0 -0
  40. {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/unsupported_platemap.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: seer-pas-sdk
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: SDK for Seer Proteograph Analysis Suite (PAS)
5
5
  Author-email: Ryan Sun <rsun@seer.bio>
6
6
  License:
@@ -18,12 +18,27 @@ $ pip install seer-pas-sdk
18
18
  This page gives an overview of the SDK's feature. Complete documentation for each class / method can be found [here](reference/).
19
19
 
20
20
  ### Configuration
21
- PAS has a simple authorization system that just involves your username and password fields like on the web app. You can define your username and password for your own ready reference and convenience as follows:
21
+ The PAS SDK has a simple authorization system that involves your username and password fields like on the web app. You can define your username and password for your own ready reference and convenience as follows:
22
22
  ```{python}
23
23
  USERNAME = "gnu403"
24
24
  PASSWORD = "Test!234567"
25
25
  ```
26
26
 
27
+ The PAS SDK requires either a `tenant` or `tenant_id` argument in the SDK object constructor.
28
+
29
+ `tenant` refers to the user provided name of the tenant.
30
+
31
+ `tenant_id` refers to the immutable and unique identifier of the tenant.
32
+ `tenant_id` is an absolute reference to the tenant, even if the tenant name is changed.
33
+
34
+ More details on multi-tenant management can be found in the [Multi Tenant Management](#multi-tenant-management) section below.
35
+
36
+ You can define your tenant name or tenant ID as follows:
37
+ ```{python}
38
+ TENANT = "My Tenant Name"
39
+ TENANT_ID = "abc1234abc1234"
40
+ ```
41
+
27
42
  You may also choose to pass in an `instance` param in the SDK object to instantiate the PAS SDK to the EU or US instance.:
28
43
  ```{python}
29
44
  INSTANCE = "US"
@@ -38,10 +53,13 @@ After importing the SeerSDK module, you can instantiate an object in the followi
38
53
  from seer_pas_sdk import SeerSDK
39
54
 
40
55
  # Instantiate an SDK object with your credentials:
41
- sdk = SeerSDK(USERNAME, PASSWORD)
56
+ sdk = SeerSDK(USERNAME, PASSWORD, tenant=TENANT)
42
57
 
43
- # You could alternatively pass your credentials and/or the instance directly into the instantiated object.
44
- sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
58
+ # Instantiate an SDK object with your credentials and instance:
59
+ sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE, tenant=TENANT)
60
+
61
+ # Instantiate an SDK object with your credentials and tenant ID:
62
+ sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE, tenant_id=TENANT_ID)
45
63
  ```
46
64
 
47
65
  ```{python}
@@ -56,18 +74,16 @@ Additional information and examples can also be found below.
56
74
  ### Multi Tenant Management
57
75
  Introduced in version 0.2.0
58
76
 
59
- By default, you will be active in your home tenant upon log in. The home tenant is defined as the organization account that issued the original invitation for the user to join PAS.
60
- The optional 'tenant' parameter is available in the SeerSDK constructor to navigate directly to a desired tenant.
61
- A notification message will display upon login.
62
-
63
-
64
77
  The following tools are available to navigate between tenants:
65
78
  ```{python}
66
79
  #| eval: false
67
80
  from seer_pas_sdk import SeerSDK
68
81
 
82
+ # Assume tenant upon login
69
83
  sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE, tenant='My Active Tenant')
70
84
 
85
+ sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE, tenant_id='myuuidstring-1234')
86
+
71
87
  # Retrieve value of current active tenant
72
88
  print(sdk.get_active_tenant())
73
89
 
@@ -578,10 +594,17 @@ log(analysis)
578
594
 
579
595
 
580
596
  ### Find Analyses
581
- Returns a list of analyses objects for the authenticated user. If no `analysis_id` is provided, returns all analyses for the authenticated user.
597
+ Returns a list of analyses objects for the authenticated user. If `None` is provided for all query arguments, returns all analyses available to the user within the active tenant.
582
598
 
583
599
  ###### <u>Params</u>
584
- `analysis_id`: (`str`, optional) Unique ID of the analysis to be fetched, defaulted to None.
600
+ * `analysis_id`: (`str`, optional) Unique ID of the analysis to be fetched, defaulted to None.
601
+ * `analysis_name`: (`str`, optional) Name of the analysis to be fetched, defaulted to None. Results will be matched on a substring basis.
602
+ * `folder_id`: (`str`, optional) Unique ID of the folder to fetch analyses from, defaulted to None.
603
+ * `folder_name`: (`str`, optional) Name of the folder to fetch analyses from, defaulted to None.
604
+ * `project_id`: (`str`, optional) Unique ID of the project to filter the result set of analyses, defaulted to None.
605
+ * `project_name`: (`str`, optional) Name of the project to filter the result set of analyses, defaulted to None.
606
+ * `plate_name`: (`str`, optional) Name of a plate to filter the result set of analyses, defaulted to None.
607
+ * `as_df`: (`bool`, optional) Whether the result should be converted to a DataFrame, defaulted to False.
585
608
  <br>
586
609
 
587
610
  ###### <u>Returns</u>
@@ -955,11 +978,9 @@ log(sdk.group_analysis_results(group_analysis_id, box_plot_info))
955
978
  Downloads the FASTA file(s) associated with an analysis protocol. You can specify an analysis_id (the function will resolve the protocol automatically) or provide an analysis_protocol_id directly.
956
979
 
957
980
  ###### <u>Params</u>
958
- * `analysis_protocol_id`: (`str`, optional) ID of the analysis protocol whose FASTA file(s) you want.
959
-
960
- * `analysis_id`: (`str`, optional) ID of the analysis whose protocol FASTA file(s) you want.
961
-
962
- * `download_path`: (`str`, optional) Directory to save files to. Defaults to the current working directory.
981
+ * `analysis_protocol_id`: (`str`, optional) The unique ID of the analysis protocol associated with the FASTA files to download.
982
+ * `analysis_id`: (`str`, optional) The unique ID of the analysis whose protocol FASTA file(s) will be downloaded.
983
+ * `analysis_name`: (`str`, optional) The name of the analysis whose protocol FASTA file(s) will be downloaded.
963
984
 
964
985
  Note: Provide either analysis_id or analysis_protocol_id (but not both).
965
986
 
@@ -977,6 +998,10 @@ sdk.download_analysis_protocol_fasta(
977
998
  )
978
999
  ```
979
1000
 
1001
+ ```
1002
+ ['./uniprot_human_2023_08.fasta', './contaminants.fasta']
1003
+ ```
1004
+
980
1005
  Download by analysis protocol ID to a specific folder:
981
1006
  ```{python}
982
1007
  #| eval: false
@@ -991,16 +1016,20 @@ sdk.download_analysis_protocol_fasta(
991
1016
  ```
992
1017
  <br>
993
1018
 
994
- ### Get Analysis Protocol FASTA link
995
- Returns signed download links for the FASTA file(s) associated with an analysis protocol. You can specify an analysis_id (the function will resolve the protocol automatically) or provide an analysis_protocol_id directly.
1019
+ ### Get Analysis Protocol FASTA URLs
1020
+ Returns download URLs for the FASTA file(s) associated with an analysis protocol. You can specify an analysis_id (the function will resolve the protocol automatically) or provide an analysis_protocol_id directly.
1021
+
1022
+ Download URLs are valid for 15 minutes after generation.
996
1023
 
997
1024
  ###### <u>Params</u>
998
- * `analysis_protocol_id`: (`str`, optional) ID of the analysis protocol whose FASTA file(s) you want.
999
- * `analysis_id`: (`str`, optional) ID of the analysis whose protocol FASTA file(s) you want.
1000
- Note: Provide either analysis_id or analysis_protocol_id (but not both).
1025
+ * `analysis_protocol_id`: (`str`, optional) The unique ID of the analysis protocol associated with the FASTA files.
1026
+ * `analysis_id`: (`str`, optional) The unique ID of the analysis whose protocol FASTA file(s) should be retrieved.
1027
+ * `analysis_name`: (`str`, optional) The name of the analysis whose protocol FASTA file(s) should be retrieved.
1028
+
1029
+ If both parameters are provided, `analysis_protocol_id` takes precedence.
1001
1030
 
1002
1031
  ###### <u>Returns</u>
1003
- * links: (`list[dict]`) List of dictionaries containing filename and signed URL for each FASTA file.
1032
+ * links: (`dict`) Dictionary containing filename and signed URL as key-value pairs for the FASTA files linked to the protocol.
1004
1033
 
1005
1034
  ###### <u>Examples</u>
1006
1035
  Get by analysis ID:
@@ -1012,10 +1041,8 @@ sdk.get_analysis_protocol_fasta_link(
1012
1041
  ```
1013
1042
 
1014
1043
  ```
1015
- [
1016
- {"filename": "uniprot_human_2023_08.fasta", "url": "https://...signed..."},
1017
- {"filename": "contaminants.fasta", "url": "https://...signed..."}
1018
- ]
1044
+ {"uniprot_human_2023_08.fasta" : "https://...signed...",
1045
+ "contaminants.fasta" : "https://...signed..."}
1019
1046
  ```
1020
1047
  Get by analysis protocol ID:
1021
1048
  ```{python}
@@ -1026,8 +1053,8 @@ sdk.get_analysis_protocol_fasta_link(
1026
1053
  ```
1027
1054
  ```
1028
1055
  [
1029
- {"filename": "uniprot_human_2023_08.fasta", "url": "https://...signed..."},
1030
- {"filename": "contaminants.fasta", "url": "https://...signed..."}
1056
+ {"uniprot_human_2023_08.fasta" : "https://...signed...",
1057
+ "contaminants.fasta" : "https://...signed..."}
1031
1058
  ]
1032
1059
  ```
1033
1060
  <hr>
@@ -679,6 +679,52 @@ def camel_case(s):
679
679
  return "".join([s[0].lower(), s[1:]])
680
680
 
681
681
 
682
+ def validate_d_zip_file(file):
683
+ """
684
+ Return True if a .d.zip file aligns with Seer requirements for PAS upload.
685
+
686
+ Parameters
687
+ ----------
688
+ file : str
689
+ The name of the zip file.
690
+
691
+ Returns
692
+ -------
693
+ bool
694
+ True if the .d.zip file is valid, False otherwise.
695
+ """
696
+
697
+ if not file.lower().endswith(".d.zip"):
698
+ return False
699
+
700
+ basename = os.path.basename(file)
701
+
702
+ # Remove the .zip extension to get the .d folder name
703
+ d_name = basename[:-4]
704
+
705
+ try:
706
+ with zipfile.ZipFile(file, "r") as zf:
707
+ names = zf.namelist()
708
+
709
+ except:
710
+ return False
711
+
712
+ if not names:
713
+ return False
714
+
715
+ # check for files at the root level
716
+ root_entries = [n for n in names if "/" not in n.rstrip("/")]
717
+ if root_entries:
718
+ return False
719
+
720
+ # find folders
721
+ top_level = {n.split("/")[0] for n in names}
722
+ if len(top_level) != 1 or d_name not in top_level:
723
+ return False
724
+
725
+ return True
726
+
727
+
682
728
  def rename_d_zip_file(source, destination):
683
729
  """
684
730
  Renames a .d.zip file. The function extracts the contents of the source zip file, renames the inner .d folder, and rezips the contents into the destination zip file.
@@ -31,31 +31,57 @@ class SeerSDK:
31
31
  >>> seer_sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
32
32
  """
33
33
 
34
- def __init__(self, username, password, instance="US", tenant=None):
34
+ def __init__(
35
+ self,
36
+ username: str,
37
+ password: str,
38
+ instance: str = "US",
39
+ tenant: str = None,
40
+ tenant_id: str = None,
41
+ ):
35
42
  try:
36
43
  self._auth = Auth(username, password, instance)
37
44
 
38
45
  self._auth._login()
39
46
  print(f"User '{username}' logged in.\n")
47
+ except Exception as e:
48
+ raise ValueError(
49
+ f"Could not log in.\nPlease check your credentials and/or instance: {e}."
50
+ )
40
51
 
41
- if not tenant:
42
- tenant = self._auth.active_tenant_id
43
- try:
44
- self.switch_tenant(tenant)
45
- except Exception as e:
52
+ # direct logged in user to the specified tenant
53
+ tenant_data = pd.DataFrame(
54
+ self.get_user_tenant(index=False),
55
+ columns=["institution", "tenantId"],
56
+ ).rename(
57
+ columns={"institution": "Tenant name", "tenantId": "Tenant ID"}
58
+ )
59
+ tenant_names = tenant_data["Tenant name"].tolist()
60
+ tenant_ids = tenant_data["Tenant ID"].tolist()
61
+
62
+ # precondition: None is not a valid tenant_name or tenant_id.
63
+ if tenant_id is None and tenant is None:
64
+ self.logout()
65
+ if None in tenant_names:
46
66
  print(
47
- f"Encountered an error directing you to tenant {tenant}: {e}."
67
+ "Warning: You have access to a tenant with no name. Please either provide a tenant name in the PAS website or specify a tenant_id to access that tenant."
48
68
  )
49
- print("Logging into home tenant...")
50
- # If an error occurs while directing the user to a tenant, default to home tenant.
51
- print(f"You are now active in {self.get_active_tenant_name()}")
52
- except ServerError as e:
53
- raise e
54
- except Exception as e:
55
69
  raise ValueError(
56
- f"Could not log in.\nPlease check your credentials and/or instance: {e}."
70
+ f"Either tenant or tenant_id must be provided. Please indicate one of the following tenants: \n{tenant_data.to_string(index=False)}"
57
71
  )
58
72
 
73
+ if tenant_id not in tenant_ids:
74
+ if tenant in tenant_names:
75
+ # if multiple tenants exist for the same institution name, fall back on multiple tenant error in switch_tenant.
76
+ self.switch_tenant(tenant)
77
+ else:
78
+ self.logout()
79
+ raise ValueError(
80
+ f"Invalid tenant or tenant_id provided. Please indicate one of the following tenants: \n{tenant_data.to_string(index=False)}"
81
+ )
82
+ else:
83
+ self.switch_tenant(tenant_id)
84
+
59
85
  def logout(self):
60
86
  """
61
87
  Perform a logout operation for the current user of the SDK instance.
@@ -115,7 +141,7 @@ class SeerSDK:
115
141
  response = s.get(f"{self._auth.url}api/v1/usertenants")
116
142
 
117
143
  if response.status_code != 200:
118
- raise ValueError(
144
+ raise ServerError(
119
145
  "Invalid request. Please check your parameters."
120
146
  )
121
147
 
@@ -1471,7 +1497,7 @@ class SeerSDK:
1471
1497
  analysis_protocol_engine=res["analysis_engine"],
1472
1498
  )
1473
1499
  )
1474
- except:
1500
+ except Exception:
1475
1501
  res["fasta"] = ""
1476
1502
  return res
1477
1503
  else:
@@ -1590,10 +1616,7 @@ class SeerSDK:
1590
1616
  try:
1591
1617
  res[entry]["fasta"] = ",".join(
1592
1618
  self._get_analysis_protocol_fasta_filenames(
1593
- analysis_protocol_id=res[entry]["id"],
1594
- analysis_protocol_engine=res[entry].get(
1595
- "analysis_engine", None
1596
- ),
1619
+ analysis_protocol_id=res[entry]["id"]
1597
1620
  )
1598
1621
  )
1599
1622
  except:
@@ -1821,18 +1844,12 @@ class SeerSDK:
1821
1844
  if not res.get("is_folder") and res.get(
1822
1845
  "analysis_protocol_id"
1823
1846
  ):
1824
- analysis_protocol = self.get_analysis_protocol(
1825
- analysis_protocol_id=res.get("analysis_protocol_id")
1826
- )
1827
1847
  try:
1828
1848
  res["fasta"] = ",".join(
1829
1849
  self._get_analysis_protocol_fasta_filenames(
1830
1850
  analysis_protocol_id=res.get(
1831
1851
  "analysis_protocol_id"
1832
- ),
1833
- analysis_protocol_engine=analysis_protocol.get(
1834
- "analysis_engine", None
1835
- ),
1852
+ )
1836
1853
  )
1837
1854
  )
1838
1855
  except Exception as e:
@@ -1854,49 +1871,60 @@ class SeerSDK:
1854
1871
  else:
1855
1872
  return res[0]
1856
1873
 
1874
+ def _lookup_analysis_folders(self):
1875
+ """
1876
+ Helper function to map analysis folder ids to names.
1877
+ """
1878
+ with self._get_auth_session("getanalysisfolders") as s:
1879
+ URL = f"{self._auth.url}api/v1/analyses"
1880
+ params = {"all": "true", "folderonly": "true"}
1881
+ folders = s.get(URL, params=params)
1882
+ if folders.status_code != 200:
1883
+ raise ValueError(
1884
+ "Failed to fetch analysis folders. Please check your connection."
1885
+ )
1886
+ res = folders.json()["data"]
1887
+ return res
1888
+
1857
1889
  def find_analyses(
1858
1890
  self,
1859
1891
  analysis_id: str = None,
1892
+ analysis_name: str = None,
1860
1893
  folder_id: str = None,
1861
- show_folders: bool = True,
1862
- analysis_only: bool = True,
1894
+ folder_name: str = None,
1863
1895
  project_id: str = None,
1896
+ project_name: str = None,
1864
1897
  plate_name: str = None,
1865
1898
  as_df=False,
1866
- **kwargs,
1867
1899
  ):
1868
1900
  """
1869
- Returns a list of analyses objects for the authenticated user. If no id is provided, returns all analyses for the authenticated user.
1870
- Search parameters may be passed in as keyword arguments to filter the results. Acceptable values are 'analysis_name', 'folder_name', 'description', 'notes', or 'number_msdatafile'.
1871
- Only search on a single field is supported.
1901
+ Returns a list of analyses objects for the authenticated user. If None is provided for all query arguments, returns all analyses for the authenticated user.
1872
1902
 
1873
1903
  Parameters
1874
1904
  ----------
1875
1905
  analysis_id : str, optional
1876
1906
  ID of the analysis to be fetched, defaulted to None.
1877
1907
 
1908
+ analysis_name : str, optional
1909
+ Name of the analysis to be fetched, defaulted to None. Results will be matched on substring basis.
1910
+
1878
1911
  folder_id : str, optional
1879
- ID of the folder to be fetched, defaulted to None.
1912
+ Unique ID of an analysis folder to filter results, defaulted to None.
1880
1913
 
1881
- show_folders : bool, optional
1882
- Mark True if folder contents are to be returned in the response, i.e. recursive search, defaulted to True.
1883
- Will be disabled if an analysis id is provided.
1884
-
1885
- analysis_only : bool, optional
1886
- Mark True if only analyses objects are to be returned in the response, defaulted to True.
1887
- If marked false, folder objects will also be included in the response.
1914
+ folder_name : str, optional
1915
+ Name of an analysis folder to filter results, defaulted to None.
1888
1916
 
1889
1917
  project_id : str, optional
1890
- ID of the project to be fetched, defaulted to None.
1918
+ Unique ID of an analysis folder to filter results, defaulted to None.
1919
+
1920
+ project_name : str, optional
1921
+ Name of a project to filter results, defaulted to None.
1891
1922
 
1892
1923
  plate_name : str, optional
1893
- Name of the plate to be fetched, defaulted to None.
1924
+ Name of a plate to filter results, defaulted to None.
1894
1925
 
1895
1926
  as_df : bool, optional
1896
- whether the result should be converted to a DataFrame, defaulted to False.
1897
-
1898
- **kwargs : dict, optional
1899
- Search keyword parameters to be passed in. Acceptable values are 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'.
1927
+ Whether the result should be converted to a DataFrame, defaulted to False.
1900
1928
 
1901
1929
  Returns
1902
1930
  -------
@@ -1930,51 +1958,44 @@ class SeerSDK:
1930
1958
  URL = f"{self._auth.url}api/v1/analyses"
1931
1959
  res = []
1932
1960
 
1933
- search_field = None
1934
- search_item = None
1935
- if kwargs:
1936
- if len(kwargs.keys()) > 1:
1937
- raise ValueError("Please include only one search parameter.")
1938
- search_field = list(kwargs.keys())[0]
1939
- search_item = kwargs[search_field]
1940
-
1941
- if not search_item:
1942
- raise ValueError(
1943
- f"Please provide a non null value for {search_field}"
1944
- )
1945
-
1946
- if search_field and search_field not in [
1947
- "analysis_name",
1948
- "folder_name",
1949
- "analysis_protocol_name",
1950
- "description",
1951
- "notes",
1952
- "number_msdatafile",
1953
- ]:
1954
- raise ValueError(
1955
- "Invalid search field. Please choose between 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'."
1956
- )
1957
-
1958
1961
  if analysis_id:
1959
1962
  try:
1960
1963
  return [self.get_analysis(analysis_id=analysis_id)]
1961
- except:
1964
+ except Exception:
1962
1965
  return []
1963
1966
 
1967
+ analysis_folders = self._lookup_analysis_folders()
1968
+ analysis_folder_id_to_name = {
1969
+ x["id"]: x["analysis_name"] for x in analysis_folders
1970
+ }
1971
+ analysis_folder_name_to_id = {
1972
+ v: k for k, v in analysis_folder_id_to_name.items()
1973
+ }
1974
+
1975
+ if folder_name and not folder_id:
1976
+ folder_id = analysis_folder_name_to_id.get(folder_name, None)
1977
+ if not folder_id:
1978
+ raise ValueError(f"No folder found with name '{folder_name}'.")
1979
+
1980
+ if project_name and not project_id:
1981
+ project = self.get_project(project_name=project_name)
1982
+ if not project:
1983
+ raise ValueError(
1984
+ f"No project found with name '{project_name}'."
1985
+ )
1986
+ project_id = project["id"]
1987
+
1964
1988
  with self._get_auth_session("findanalyses") as s:
1965
1989
 
1966
- params = {"all": "true"}
1990
+ params = {"all": "true", "analysisonly": "true"}
1967
1991
  if folder_id:
1968
1992
  params["folder"] = folder_id
1969
1993
 
1970
- if search_field:
1971
- params["searchFields"] = search_field
1972
- params["searchItem"] = search_item
1994
+ if analysis_name:
1995
+ params["searchFields"] = "analysis_name"
1996
+ params["searchItem"] = analysis_name
1973
1997
  del params["all"]
1974
1998
 
1975
- if search_field == "folder_name":
1976
- params["searchFields"] = "analysis_name"
1977
-
1978
1999
  if project_id:
1979
2000
  params["projectId"] = project_id
1980
2001
 
@@ -1989,9 +2010,8 @@ class SeerSDK:
1989
2010
  )
1990
2011
  res = analyses.json()["data"]
1991
2012
 
1992
- folders = []
1993
2013
  spaces = {x["id"]: x["usergroup_name"] for x in self.get_spaces()}
1994
- protocol_to_engine_map = dict()
2014
+ protocol_to_fasta = {}
1995
2015
  for entry in range(len(res)):
1996
2016
  if "tenant_id" in res[entry]:
1997
2017
  del res[entry]["tenant_id"]
@@ -2005,11 +2025,14 @@ class SeerSDK:
2005
2025
  ][location(res[entry]["parameter_file_path"]) :]
2006
2026
 
2007
2027
  if (
2008
- show_folders
2009
- and not analysis_id
2010
- and res[entry]["is_folder"]
2028
+ "folder_id" in res[entry]
2029
+ and res[entry]["folder_id"] is not None
2011
2030
  ):
2012
- folders.append(res[entry]["id"])
2031
+ res[entry]["folder_name"] = analysis_folder_id_to_name.get(
2032
+ res[entry]["folder_id"], None
2033
+ )
2034
+ res[entry]["folder_uuid"] = res[entry]["folder_id"]
2035
+ del res[entry]["folder_id"]
2013
2036
 
2014
2037
  if "user_group" in res[entry]:
2015
2038
  res[entry]["space"] = spaces.get(
@@ -2020,51 +2043,34 @@ class SeerSDK:
2020
2043
  if (not res[entry].get("is_folder")) and res[entry].get(
2021
2044
  "analysis_protocol_id"
2022
2045
  ):
2023
- if (
2024
- res[entry]["analysis_protocol_id"]
2025
- in protocol_to_engine_map
2026
- ):
2027
- analysis_protocol_engine = protocol_to_engine_map[
2028
- res[entry]["analysis_protocol_id"]
2046
+ # analysis_protocol_id for this result row
2047
+ local_analysis_protocol_id = res[entry].get(
2048
+ "analysis_protocol_id"
2049
+ )
2050
+ if local_analysis_protocol_id in protocol_to_fasta:
2051
+ res[entry]["fasta"] = protocol_to_fasta[
2052
+ local_analysis_protocol_id
2029
2053
  ]
2030
2054
  else:
2031
2055
  try:
2032
- analysis_protocol = self.get_analysis_protocol(
2033
- analysis_protocol_id=res[entry].get(
2034
- "analysis_protocol_id"
2056
+ res[entry]["fasta"] = ",".join(
2057
+ self._get_analysis_protocol_fasta_filenames(
2058
+ local_analysis_protocol_id,
2059
+ analysis_protocol_engine=res[entry].get(
2060
+ "analysis_engine"
2061
+ ),
2035
2062
  )
2036
2063
  )
2037
- analysis_protocol_engine = analysis_protocol.get(
2038
- "analysis_engine", None
2064
+ protocol_to_fasta[local_analysis_protocol_id] = (
2065
+ res[entry]["fasta"]
2039
2066
  )
2040
- protocol_to_engine_map[
2041
- res[entry]["analysis_protocol_id"]
2042
- ] = analysis_protocol_engine
2043
2067
  except:
2044
- analysis_protocol_engine = None
2045
- try:
2046
- res[entry]["fasta"] = ",".join(
2047
- self._get_analysis_protocol_fasta_filenames(
2048
- res[entry]["analysis_protocol_id"],
2049
- analysis_protocol_engine=analysis_protocol_engine,
2068
+ print(
2069
+ f"Warning: Could not fetch fasta files for analysis {res[entry].get('analysis_name')}."
2050
2070
  )
2051
- )
2052
- except:
2053
- print(
2054
- f"Warning: Could not fetch fasta files for analysis {res[entry].get('analysis_name')}."
2055
- )
2056
- res[entry]["fasta"] = None
2057
2071
  else:
2058
2072
  res[entry]["fasta"] = None
2059
2073
 
2060
- # recursive solution to get analyses in folders
2061
- for folder in folders:
2062
- res += self.find_analyses(folder_id=folder)
2063
-
2064
- if analysis_only:
2065
- res = [
2066
- analysis for analysis in res if not analysis["is_folder"]
2067
- ]
2068
2074
  if not res and as_df:
2069
2075
  return pd.DataFrame(columns=ANALYSIS_COLUMNS)
2070
2076
  return res if not as_df else dict_to_df(res)
@@ -4059,7 +4065,7 @@ class SeerSDK:
4059
4065
  print(f"Downloaded file to {download_path}/{file}")
4060
4066
 
4061
4067
  def _get_analysis_protocol_fasta_filenames(
4062
- self, analysis_protocol_id: str, analysis_protocol_engine: str
4068
+ self, analysis_protocol_id: str, analysis_protocol_engine: str = None
4063
4069
  ):
4064
4070
  """
4065
4071
  Helper function - Get the fasta file name(s) associated with a given analysis protocol and engine.
@@ -4069,6 +4075,14 @@ class SeerSDK:
4069
4075
  Returns:
4070
4076
  list[str]: A list of fasta file names associated with the analysis protocol.
4071
4077
  """
4078
+ if not analysis_protocol_engine:
4079
+ analysis_protocol_engine = self.get_analysis_protocol(
4080
+ analysis_protocol_id=analysis_protocol_id
4081
+ ).get("analysis_engine")
4082
+ if not analysis_protocol_engine:
4083
+ raise ValueError(
4084
+ f"Could not retrieve analysis protocol engine for analysis protocol {analysis_protocol_id}."
4085
+ )
4072
4086
  analysis_protocol_engine = analysis_protocol_engine.lower()
4073
4087
  if analysis_protocol_engine == "diann":
4074
4088
  URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/diann/{analysis_protocol_id}"
@@ -4108,8 +4122,35 @@ class SeerSDK:
4108
4122
  raise ServerError("No fasta file name returned from server.")
4109
4123
  return fasta_filenames
4110
4124
 
4111
- def get_analysis_protocol_fasta_link(
4112
- self, analysis_protocol_id=None, analysis_id=None, analysis_name=None
4125
+ def _get_analysis_protocol_fasta_url(
4126
+ self, analysis_protocol_fasta_name: str
4127
+ ):
4128
+ """
4129
+ Helper function - Get the download link for a given fasta file name.
4130
+
4131
+ Args:
4132
+ analysis_protocol_fasta_name (str): Name of the fasta file.
4133
+
4134
+ Returns:
4135
+ str: The URL to download the fasta file.
4136
+ """
4137
+ URL = f"{self._auth.url}api/v1/analysisProtocolFiles/getUrl"
4138
+ with self._get_auth_session("getanalysisprotocolfilesurl") as s:
4139
+ response = s.post(
4140
+ URL, json={"filepath": analysis_protocol_fasta_name}
4141
+ )
4142
+ if response.status_code != 200:
4143
+ raise ServerError(
4144
+ f"Could not retrieve download link for {analysis_protocol_fasta_name}."
4145
+ )
4146
+ url = response.json()["url"]
4147
+ return url
4148
+
4149
+ def get_analysis_protocol_fasta_urls(
4150
+ self,
4151
+ analysis_protocol_id=None,
4152
+ analysis_id=None,
4153
+ analysis_name=None,
4113
4154
  ):
4114
4155
  """Get the download link(s) for the fasta file(s) associated with a given analysis protocol.
4115
4156
  Args:
@@ -4120,52 +4161,31 @@ class SeerSDK:
4120
4161
  Returns:
4121
4162
  list[dict]: A list of dictionaries containing the 'filename' and the 'url' to download the fasta file.
4122
4163
  """
4123
- if analysis_name and (not analysis_id):
4124
- analyses = self.find_analyses(analysis_name=analysis_name)
4125
- if len(analyses) > 1:
4126
- raise ValueError(
4127
- f"Multiple analyses found with name {analysis_name}. Please provide an analysis ID instead."
4128
- )
4129
- elif len(analyses) == 0:
4130
- raise ValueError(
4131
- f"No analyses found with name {analysis_name}."
4132
- )
4133
- else:
4134
- analysis_id = analyses[0]["id"]
4135
-
4136
- if not (bool(analysis_protocol_id) ^ bool(analysis_id)):
4137
- raise ValueError(
4138
- "Please provide either an analysis ID or an analysis protocol ID."
4139
- )
4140
-
4141
4164
  if not analysis_protocol_id:
4142
- try:
4143
- analysis_protocol_id = self.get_analysis(
4144
- analysis_id=analysis_id
4145
- )["analysis_protocol_id"]
4146
- except KeyError:
4147
- raise ValueError(f"Could not parse server response.")
4165
+ analysis = self.get_analysis(
4166
+ analysis_id=analysis_id,
4167
+ analysis_name=analysis_name,
4168
+ )
4169
+ analysis_protocol_id = analysis.get("analysis_protocol_id")
4148
4170
 
4149
- engine = self.get_analysis_protocol(
4150
- analysis_protocol_id=analysis_protocol_id
4151
- ).get("analysis_engine", None)
4152
4171
  fasta_filenames = self._get_analysis_protocol_fasta_filenames(
4153
- analysis_protocol_id=analysis_protocol_id,
4154
- analysis_protocol_engine=engine,
4172
+ analysis_protocol_id=analysis_protocol_id
4155
4173
  )
4156
- URL = f"{self._auth.url}api/v1/analysisProtocolFiles/getUrl"
4157
- links = []
4158
- for file in fasta_filenames:
4159
- with self._get_auth_session("getanalysisprotocolfilesurl") as s:
4160
- filename = os.path.basename(file)
4161
- response = s.post(URL, json={"filepath": file})
4162
- if response.status_code != 200:
4163
- print(
4164
- f"ERROR: Could not retrieve download link for {filename}."
4165
- )
4166
- continue
4167
- url = response.json()["url"]
4168
- links.append({"filename": filename, "url": url})
4174
+
4175
+ links = {}
4176
+ for filepath in fasta_filenames:
4177
+ filename = os.path.basename(filepath)
4178
+ try:
4179
+ url = self._get_analysis_protocol_fasta_url(
4180
+ analysis_protocol_fasta_name=filepath
4181
+ )
4182
+ except ServerError:
4183
+ print(
4184
+ f"ERROR: Could not retrieve download link for {filename}."
4185
+ )
4186
+ continue
4187
+
4188
+ links[filename] = url
4169
4189
  return links
4170
4190
 
4171
4191
  def download_analysis_protocol_fasta(
@@ -4186,20 +4206,28 @@ class SeerSDK:
4186
4206
  Returns:
4187
4207
  list[str] : The path to the downloaded fasta file(s).
4188
4208
  """
4189
-
4190
- links = [
4191
- (x["filename"], x["url"])
4192
- for x in self.get_analysis_protocol_fasta_link(
4193
- analysis_protocol_id=analysis_protocol_id,
4194
- analysis_id=analysis_id,
4195
- analysis_name=analysis_name,
4209
+ if not analysis_protocol_id:
4210
+ analysis = self.get_analysis(
4211
+ analysis_id=analysis_id, analysis_name=analysis_name
4196
4212
  )
4197
- ]
4213
+ analysis_protocol_id = analysis.get("analysis_protocol_id")
4214
+
4215
+ filepaths = self._get_analysis_protocol_fasta_filenames(
4216
+ analysis_protocol_id=analysis_protocol_id
4217
+ )
4198
4218
  if not download_path:
4199
4219
  download_path = os.getcwd()
4200
4220
 
4201
4221
  downloads = []
4202
- for filename, url in links:
4222
+ for filepath in filepaths:
4223
+ # run sequentially to avoid signed url expiration
4224
+ url = self._get_analysis_protocol_fasta_url(
4225
+ analysis_protocol_fasta_name=filepath
4226
+ )
4227
+ filename = os.path.basename(filepath)
4228
+
4229
+ # relative path of the file after download
4230
+ local_filename = f"{download_path}/{filename}"
4203
4231
  print(f"Downloading {filename}")
4204
4232
  for _ in range(2):
4205
4233
  try:
@@ -4215,7 +4243,7 @@ class SeerSDK:
4215
4243
  )
4216
4244
  urllib.request.urlretrieve(
4217
4245
  url,
4218
- f"{download_path}/{filename}",
4246
+ local_filename,
4219
4247
  reporthook=download_hook(t),
4220
4248
  data=None,
4221
4249
  )
@@ -4224,5 +4252,5 @@ class SeerSDK:
4224
4252
  if not os.path.isdir(f"{download_path}"):
4225
4253
  os.makedirs(f"{download_path}")
4226
4254
 
4227
- downloads.append(f"{download_path}/{filename}")
4255
+ downloads.append(local_filename)
4228
4256
  return downloads
@@ -4,6 +4,7 @@ seer_pas_sdk.core.unsupported -- in development
4
4
 
5
5
  import os
6
6
  import shutil
7
+ from pathlib import Path
7
8
 
8
9
  from typing import List as _List
9
10
 
@@ -827,20 +828,29 @@ class _UnsupportedSDK(_SeerSDK):
827
828
  )
828
829
 
829
830
  # Step 1: Check if paths and file extensions are valid.
831
+ invalid_d_zip_files = []
830
832
  for file in ms_data_files:
831
833
  if not valid_ms_data_file(file):
832
834
  raise ValueError(
833
835
  "Invalid file or file format. Please check your file."
834
836
  )
837
+ if file.endswith(".d.zip") and (not validate_d_zip_file(file)):
838
+ invalid_d_zip_files.append(file)
839
+
840
+ if invalid_d_zip_files:
841
+ raise ValueError(
842
+ f"The following .d.zip files are invalid: {', '.join(invalid_d_zip_files)}. Please check your files."
843
+ )
835
844
 
836
845
  extensions = set(
837
- [os.path.splitext(file.lower())[1] for file in ms_data_files]
846
+ ["".join(Path(file).suffixes) for file in ms_data_files]
838
847
  )
839
848
 
840
849
  if filenames and ".d.zip" in extensions:
841
850
  raise ValueError(
842
851
  "Please leave the 'filenames' parameter empty when working with .d.zip files. SeerSDK.rename_d_zip_file() is available for this use case."
843
852
  )
853
+
844
854
  # Step 2: Use active tenant to fetch the tenant_id.
845
855
  tenant_id = self.get_active_tenant_id()
846
856
 
@@ -1473,6 +1483,7 @@ class _UnsupportedSDK(_SeerSDK):
1473
1483
  # 1. Get msrun data for analysis
1474
1484
  samples = self.find_samples(analysis_id=analysis_id)
1475
1485
  sample_name_to_id = {s["sample_name"]: s["id"] for s in samples}
1486
+ sample_uuid_to_id = {s["id"]: s["sample_id"] for s in samples}
1476
1487
  # for np rollup, a row represents an msrun
1477
1488
  msruns = self.find_msruns(sample_ids=sample_name_to_id.values())
1478
1489
  file_to_msrun = {
@@ -1636,8 +1647,7 @@ class _UnsupportedSDK(_SeerSDK):
1636
1647
  )
1637
1648
  )
1638
1649
  df = df[included_columns]
1639
- df.columns = [title_case_to_snake_case(x) for x in df.columns]
1640
- return df
1650
+
1641
1651
  else:
1642
1652
  # precursor
1643
1653
  # working only in report.tsv
@@ -1678,10 +1688,17 @@ class _UnsupportedSDK(_SeerSDK):
1678
1688
  "IM",
1679
1689
  "iIM",
1680
1690
  ]
1681
- df = search_results[included_columns]
1682
- df.columns = [title_case_to_snake_case(x) for x in df.columns]
1691
+ df = pd.DataFrame(search_results[included_columns])
1692
+
1693
+ df.columns = [title_case_to_snake_case(x) for x in df.columns]
1694
+ df["sample_uuid"] = df["sample_id"]
1695
+ df["sample_id"] = df["sample_uuid"].apply(
1696
+ lambda x: sample_uuid_to_id.get(x)
1697
+ )
1683
1698
 
1684
- return df
1699
+ if rollup == "panel":
1700
+ df.drop(columns=["msrun_id"], inplace=True, errors="ignore")
1701
+ return df
1685
1702
 
1686
1703
  def get_search_data_analytes(self, analysis_id: str, analyte_type: str):
1687
1704
  if analyte_type not in ["protein", "peptide", "precursor"]:
@@ -1734,27 +1751,57 @@ class _UnsupportedSDK(_SeerSDK):
1734
1751
  how="left",
1735
1752
  )
1736
1753
  elif analyte_type == "peptide":
1737
- peptide_results = self.get_search_result(
1738
- analysis_id=analysis_id, analyte_type="peptide", rollup="np"
1754
+
1755
+ # The below logic performs the following:
1756
+ # 1. orders each peptide group by Global.PG.Q.Value, Lib.PG.Q.Value, and Protein Group (ascending)
1757
+ # 2. for each peptide group, select the first row to find the precursor with the lowest Q values
1758
+ # 3. broadcasts the associated protein group columns across all rows with the same peptide.
1759
+ #
1760
+ # This ensures that for each peptide, we retain consistent protein information while avoiding duplication.
1761
+
1762
+ report_results = report_results.sort_values(
1763
+ [
1764
+ "Peptide",
1765
+ "Global.PG.Q.Value",
1766
+ "Lib.PG.Q.Value",
1767
+ "Protein Group",
1768
+ ]
1739
1769
  )
1740
- peptide_results = peptide_results[["Peptide", "Protein Group"]]
1741
- search_results = pd.merge(
1742
- peptide_results,
1743
- search_results,
1744
- on=["Protein Group"],
1745
- how="left",
1770
+
1771
+ columns_to_broadcast = ["Protein Group", "Protein.Ids"]
1772
+ broadcasted = (
1773
+ report_results.groupby("Peptide")
1774
+ .apply(
1775
+ lambda x: pd.Series(
1776
+ {
1777
+ col: x.iloc[0][col]
1778
+ for col in columns_to_broadcast + ["Peptide"]
1779
+ }
1780
+ )
1781
+ )
1782
+ .reset_index(drop=True)
1783
+ )
1784
+ report_results = (
1785
+ report_results.drop(columns=columns_to_broadcast)
1786
+ .merge(broadcasted, on="Peptide", how="left")
1787
+ .drop_duplicates(subset=["Peptide"])
1746
1788
  )
1747
1789
 
1748
- report_results = report_results[
1749
- ["Peptide", "Protein.Ids", "Protein.Group"]
1750
- ]
1751
- report_results.drop_duplicates(subset=["Peptide"], inplace=True)
1752
1790
  df = pd.merge(
1753
- search_results,
1754
1791
  report_results,
1755
- on=["Peptide"],
1792
+ search_results,
1793
+ on=["Protein Group"],
1756
1794
  how="left",
1757
1795
  )
1796
+ df = df[
1797
+ [
1798
+ "Peptide",
1799
+ "Protein Group",
1800
+ "Protein.Ids",
1801
+ "Protein Names",
1802
+ "Gene Names",
1803
+ ]
1804
+ ]
1758
1805
  else:
1759
1806
  # precursor
1760
1807
  search_results = search_results[
@@ -1762,9 +1809,6 @@ class _UnsupportedSDK(_SeerSDK):
1762
1809
  "Protein Group",
1763
1810
  "Protein Names",
1764
1811
  "Gene Names",
1765
- "Biological Process",
1766
- "Molecular Function",
1767
- "Cellular Component",
1768
1812
  ]
1769
1813
  ]
1770
1814
  search_results.drop_duplicates(
@@ -1779,7 +1823,6 @@ class _UnsupportedSDK(_SeerSDK):
1779
1823
  "Protein.Ids",
1780
1824
  "Protein.Names",
1781
1825
  "Genes",
1782
- "First.Protein.Description",
1783
1826
  "Modified.Sequence",
1784
1827
  "Proteotypic",
1785
1828
  "Global.Q.Value",
@@ -1788,8 +1831,43 @@ class _UnsupportedSDK(_SeerSDK):
1788
1831
  "Lib.PG.Q.Value",
1789
1832
  ]
1790
1833
  ]
1791
- report_results.drop_duplicates(
1792
- subset=["Protein Group"], inplace=True
1834
+
1835
+ # The below logic performs the following:
1836
+ # 1. orders each peptide group by Global.PG.Q.Value, Lib.PG.Q.Value, and Protein Group (ascending)
1837
+ # 2. for each peptide group, select the first row to find the precursor with the lowest Q values
1838
+ # 3. broadcasts the associated protein group columns across all rows with the same peptide.
1839
+ #
1840
+ # This ensures that for each peptide, we retain consistent protein information while avoiding duplication.
1841
+ columns_to_broadcast = [
1842
+ "Protein Group",
1843
+ "Protein.Ids",
1844
+ "Protein.Names",
1845
+ "Genes",
1846
+ ]
1847
+ report_results = report_results.sort_values(
1848
+ [
1849
+ "Peptide",
1850
+ "Global.PG.Q.Value",
1851
+ "Lib.PG.Q.Value",
1852
+ "Protein Group",
1853
+ ],
1854
+ )
1855
+ broadcasted = (
1856
+ report_results.groupby("Peptide")
1857
+ .apply(
1858
+ lambda x: pd.Series(
1859
+ {
1860
+ col: x.iloc[0][col]
1861
+ for col in columns_to_broadcast + ["Peptide"]
1862
+ }
1863
+ )
1864
+ )
1865
+ .reset_index(drop=True)
1866
+ )
1867
+ report_results = (
1868
+ report_results.drop(columns=columns_to_broadcast)
1869
+ .merge(broadcasted, on="Peptide", how="left")
1870
+ .drop_duplicates(subset=["Peptide", "Precursor.Charge"])
1793
1871
  )
1794
1872
  df = pd.merge(
1795
1873
  report_results,
@@ -1806,7 +1884,6 @@ class _UnsupportedSDK(_SeerSDK):
1806
1884
  "Protein.Ids",
1807
1885
  "Protein.Names",
1808
1886
  "Genes",
1809
- "First.Protein.Description",
1810
1887
  "Modified.Sequence",
1811
1888
  "Proteotypic",
1812
1889
  "Global.Q.Value",
@@ -1814,9 +1891,6 @@ class _UnsupportedSDK(_SeerSDK):
1814
1891
  "Lib.Q.Value",
1815
1892
  "Lib.PG.Q.Value",
1816
1893
  "Gene Names",
1817
- "Biological Process",
1818
- "Molecular Function",
1819
- "Cellular Component",
1820
1894
  ]
1821
1895
  ]
1822
1896
  df.rename(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: seer-pas-sdk
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: SDK for Seer Proteograph Analysis Suite (PAS)
5
5
  Author-email: Ryan Sun <rsun@seer.bio>
6
6
  License:
File without changes
File without changes
File without changes
File without changes