seer-pas-sdk 1.1.1__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/PKG-INFO +1 -1
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/docs/index.qmd +55 -28
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/common/__init__.py +46 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/core/sdk.py +204 -176
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/core/unsupported.py +104 -30
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk.egg-info/PKG-INFO +1 -1
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/.github/workflows/lint.yml +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/.github/workflows/publish.yml +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/.github/workflows/test.yml +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/.gitignore +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/.pre-commit-config.yaml +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/LICENSE.txt +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/README.md +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/docs/_quarto.yml +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/pyproject.toml +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/__init__.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/auth/__init__.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/auth/auth.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/common/errors.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/common/groupanalysis.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/core/__init__.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/objects/__init__.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/objects/groupanalysis.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/objects/headers.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/objects/platemap.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk/objects/volcanoplot.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk.egg-info/SOURCES.txt +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk.egg-info/dependency_links.txt +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk.egg-info/requires.txt +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/seer_pas_sdk.egg-info/top_level.txt +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/setup.cfg +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/__init__.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/conftest.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/objects/__init__.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/objects/test_platemap.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/test_auth.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/test_common.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/test_objects.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/test_sdk.py +0 -0
- {seer_pas_sdk-1.1.1 → seer_pas_sdk-1.2.0}/tests/unsupported_platemap.py +0 -0
|
@@ -18,12 +18,27 @@ $ pip install seer-pas-sdk
|
|
|
18
18
|
This page gives an overview of the SDK's feature. Complete documentation for each class / method can be found [here](reference/).
|
|
19
19
|
|
|
20
20
|
### Configuration
|
|
21
|
-
PAS has a simple authorization system that
|
|
21
|
+
The PAS SDK has a simple authorization system that involves your username and password fields like on the web app. You can define your username and password for your own ready reference and convenience as follows:
|
|
22
22
|
```{python}
|
|
23
23
|
USERNAME = "gnu403"
|
|
24
24
|
PASSWORD = "Test!234567"
|
|
25
25
|
```
|
|
26
26
|
|
|
27
|
+
The PAS SDK requires either a `tenant` or `tenant_id` argument in the SDK object constructor.
|
|
28
|
+
|
|
29
|
+
`tenant` refers to the user provided name of the tenant.
|
|
30
|
+
|
|
31
|
+
`tenant_id` refers to the immutable and unique identifier of the tenant.
|
|
32
|
+
`tenant_id` is an absolute reference to the tenant, even if the tenant name is changed.
|
|
33
|
+
|
|
34
|
+
More details on multi-tenant management can be found in the [Multi Tenant Management](#multi-tenant-management) section below.
|
|
35
|
+
|
|
36
|
+
You can define your tenant name or tenant ID as follows:
|
|
37
|
+
```{python}
|
|
38
|
+
TENANT = "My Tenant Name"
|
|
39
|
+
TENANT_ID = "abc1234abc1234"
|
|
40
|
+
```
|
|
41
|
+
|
|
27
42
|
You may also choose to pass in an `instance` param in the SDK object to instantiate the PAS SDK to the EU or US instance.:
|
|
28
43
|
```{python}
|
|
29
44
|
INSTANCE = "US"
|
|
@@ -38,10 +53,13 @@ After importing the SeerSDK module, you can instantiate an object in the followi
|
|
|
38
53
|
from seer_pas_sdk import SeerSDK
|
|
39
54
|
|
|
40
55
|
# Instantiate an SDK object with your credentials:
|
|
41
|
-
sdk = SeerSDK(USERNAME, PASSWORD)
|
|
56
|
+
sdk = SeerSDK(USERNAME, PASSWORD, tenant=TENANT)
|
|
42
57
|
|
|
43
|
-
#
|
|
44
|
-
sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
|
|
58
|
+
# Instantiate an SDK object with your credentials and instance:
|
|
59
|
+
sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE, tenant=TENANT)
|
|
60
|
+
|
|
61
|
+
# Instantiate an SDK object with your credentials and tenant ID:
|
|
62
|
+
sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE, tenant_id=TENANT_ID)
|
|
45
63
|
```
|
|
46
64
|
|
|
47
65
|
```{python}
|
|
@@ -56,18 +74,16 @@ Additional information and examples can also be found below.
|
|
|
56
74
|
### Multi Tenant Management
|
|
57
75
|
Introduced in version 0.2.0
|
|
58
76
|
|
|
59
|
-
By default, you will be active in your home tenant upon log in. The home tenant is defined as the organization account that issued the original invitation for the user to join PAS.
|
|
60
|
-
The optional 'tenant' parameter is available in the SeerSDK constructor to navigate directly to a desired tenant.
|
|
61
|
-
A notification message will display upon login.
|
|
62
|
-
|
|
63
|
-
|
|
64
77
|
The following tools are available to navigate between tenants:
|
|
65
78
|
```{python}
|
|
66
79
|
#| eval: false
|
|
67
80
|
from seer_pas_sdk import SeerSDK
|
|
68
81
|
|
|
82
|
+
# Assume tenant upon login
|
|
69
83
|
sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE, tenant='My Active Tenant')
|
|
70
84
|
|
|
85
|
+
sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE, tenant_id='myuuidstring-1234')
|
|
86
|
+
|
|
71
87
|
# Retrieve value of current active tenant
|
|
72
88
|
print(sdk.get_active_tenant())
|
|
73
89
|
|
|
@@ -578,10 +594,17 @@ log(analysis)
|
|
|
578
594
|
|
|
579
595
|
|
|
580
596
|
### Find Analyses
|
|
581
|
-
Returns a list of analyses objects for the authenticated user. If
|
|
597
|
+
Returns a list of analyses objects for the authenticated user. If `None` is provided for all query arguments, returns all analyses available to the user within the active tenant.
|
|
582
598
|
|
|
583
599
|
###### <u>Params</u>
|
|
584
|
-
`analysis_id`: (`str`, optional) Unique ID of the analysis to be fetched, defaulted to None.
|
|
600
|
+
* `analysis_id`: (`str`, optional) Unique ID of the analysis to be fetched, defaulted to None.
|
|
601
|
+
* `analysis_name`: (`str`, optional) Name of the analysis to be fetched, defaulted to None. Results will be matched on a substring basis.
|
|
602
|
+
* `folder_id`: (`str`, optional) Unique ID of the folder to fetch analyses from, defaulted to None.
|
|
603
|
+
* `folder_name`: (`str`, optional) Name of the folder to fetch analyses from, defaulted to None.
|
|
604
|
+
* `project_id`: (`str`, optional) Unique ID of the project to filter the result set of analyses, defaulted to None.
|
|
605
|
+
* `project_name`: (`str`, optional) Name of the project to filter the result set of analyses, defaulted to None.
|
|
606
|
+
* `plate_name`: (`str`, optional) Name of a plate to filter the result set of analyses, defaulted to None.
|
|
607
|
+
* `as_df`: (`bool`, optional) Whether the result should be converted to a DataFrame, defaulted to False.
|
|
585
608
|
<br>
|
|
586
609
|
|
|
587
610
|
###### <u>Returns</u>
|
|
@@ -955,11 +978,9 @@ log(sdk.group_analysis_results(group_analysis_id, box_plot_info))
|
|
|
955
978
|
Downloads the FASTA file(s) associated with an analysis protocol. You can specify an analysis_id (the function will resolve the protocol automatically) or provide an analysis_protocol_id directly.
|
|
956
979
|
|
|
957
980
|
###### <u>Params</u>
|
|
958
|
-
* `analysis_protocol_id`: (`str`, optional) ID of the analysis protocol
|
|
959
|
-
|
|
960
|
-
* `
|
|
961
|
-
|
|
962
|
-
* `download_path`: (`str`, optional) Directory to save files to. Defaults to the current working directory.
|
|
981
|
+
* `analysis_protocol_id`: (`str`, optional) The unique ID of the analysis protocol associated with the FASTA files to download.
|
|
982
|
+
* `analysis_id`: (`str`, optional) The unique ID of the analysis whose protocol FASTA file(s) will be downloaded.
|
|
983
|
+
* `analysis_name`: (`str`, optional) The name of the analysis whose protocol FASTA file(s) will be downloaded.
|
|
963
984
|
|
|
964
985
|
Note: Provide either analysis_id or analysis_protocol_id (but not both).
|
|
965
986
|
|
|
@@ -977,6 +998,10 @@ sdk.download_analysis_protocol_fasta(
|
|
|
977
998
|
)
|
|
978
999
|
```
|
|
979
1000
|
|
|
1001
|
+
```
|
|
1002
|
+
['./uniprot_human_2023_08.fasta', './contaminants.fasta']
|
|
1003
|
+
```
|
|
1004
|
+
|
|
980
1005
|
Download by analysis protocol ID to a specific folder:
|
|
981
1006
|
```{python}
|
|
982
1007
|
#| eval: false
|
|
@@ -991,16 +1016,20 @@ sdk.download_analysis_protocol_fasta(
|
|
|
991
1016
|
```
|
|
992
1017
|
<br>
|
|
993
1018
|
|
|
994
|
-
### Get Analysis Protocol FASTA
|
|
995
|
-
Returns
|
|
1019
|
+
### Get Analysis Protocol FASTA URLs
|
|
1020
|
+
Returns download URLs for the FASTA file(s) associated with an analysis protocol. You can specify an analysis_id (the function will resolve the protocol automatically) or provide an analysis_protocol_id directly.
|
|
1021
|
+
|
|
1022
|
+
Download URLs are valid for 15 minutes after generation.
|
|
996
1023
|
|
|
997
1024
|
###### <u>Params</u>
|
|
998
|
-
* `analysis_protocol_id`: (`str`, optional) ID of the analysis protocol
|
|
999
|
-
* `analysis_id`: (`str`, optional) ID of the analysis whose protocol FASTA file(s)
|
|
1000
|
-
|
|
1025
|
+
* `analysis_protocol_id`: (`str`, optional) The unique ID of the analysis protocol associated with the FASTA files.
|
|
1026
|
+
* `analysis_id`: (`str`, optional) The unique ID of the analysis whose protocol FASTA file(s) should be retrieved.
|
|
1027
|
+
* `analysis_name`: (`str`, optional) The name of the analysis whose protocol FASTA file(s) should be retrieved.
|
|
1028
|
+
|
|
1029
|
+
If both parameters are provided, `analysis_protocol_id` takes precedence.
|
|
1001
1030
|
|
|
1002
1031
|
###### <u>Returns</u>
|
|
1003
|
-
* links: (`
|
|
1032
|
+
* links: (`dict`) Dictionary containing filename and signed URL as key-value pairs for the FASTA files linked to the protocol.
|
|
1004
1033
|
|
|
1005
1034
|
###### <u>Examples</u>
|
|
1006
1035
|
Get by analysis ID:
|
|
@@ -1012,10 +1041,8 @@ sdk.get_analysis_protocol_fasta_link(
|
|
|
1012
1041
|
```
|
|
1013
1042
|
|
|
1014
1043
|
```
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
{"filename": "contaminants.fasta", "url": "https://...signed..."}
|
|
1018
|
-
]
|
|
1044
|
+
{"uniprot_human_2023_08.fasta" : "https://...signed...",
|
|
1045
|
+
"contaminants.fasta" : "https://...signed..."}
|
|
1019
1046
|
```
|
|
1020
1047
|
Get by analysis protocol ID:
|
|
1021
1048
|
```{python}
|
|
@@ -1026,8 +1053,8 @@ sdk.get_analysis_protocol_fasta_link(
|
|
|
1026
1053
|
```
|
|
1027
1054
|
```
|
|
1028
1055
|
[
|
|
1029
|
-
{"
|
|
1030
|
-
|
|
1056
|
+
{"uniprot_human_2023_08.fasta" : "https://...signed...",
|
|
1057
|
+
"contaminants.fasta" : "https://...signed..."}
|
|
1031
1058
|
]
|
|
1032
1059
|
```
|
|
1033
1060
|
<hr>
|
|
@@ -679,6 +679,52 @@ def camel_case(s):
|
|
|
679
679
|
return "".join([s[0].lower(), s[1:]])
|
|
680
680
|
|
|
681
681
|
|
|
682
|
+
def validate_d_zip_file(file):
|
|
683
|
+
"""
|
|
684
|
+
Return True if a .d.zip file aligns with Seer requirements for PAS upload.
|
|
685
|
+
|
|
686
|
+
Parameters
|
|
687
|
+
----------
|
|
688
|
+
file : str
|
|
689
|
+
The name of the zip file.
|
|
690
|
+
|
|
691
|
+
Returns
|
|
692
|
+
-------
|
|
693
|
+
bool
|
|
694
|
+
True if the .d.zip file is valid, False otherwise.
|
|
695
|
+
"""
|
|
696
|
+
|
|
697
|
+
if not file.lower().endswith(".d.zip"):
|
|
698
|
+
return False
|
|
699
|
+
|
|
700
|
+
basename = os.path.basename(file)
|
|
701
|
+
|
|
702
|
+
# Remove the .zip extension to get the .d folder name
|
|
703
|
+
d_name = basename[:-4]
|
|
704
|
+
|
|
705
|
+
try:
|
|
706
|
+
with zipfile.ZipFile(file, "r") as zf:
|
|
707
|
+
names = zf.namelist()
|
|
708
|
+
|
|
709
|
+
except:
|
|
710
|
+
return False
|
|
711
|
+
|
|
712
|
+
if not names:
|
|
713
|
+
return False
|
|
714
|
+
|
|
715
|
+
# check for files at the root level
|
|
716
|
+
root_entries = [n for n in names if "/" not in n.rstrip("/")]
|
|
717
|
+
if root_entries:
|
|
718
|
+
return False
|
|
719
|
+
|
|
720
|
+
# find folders
|
|
721
|
+
top_level = {n.split("/")[0] for n in names}
|
|
722
|
+
if len(top_level) != 1 or d_name not in top_level:
|
|
723
|
+
return False
|
|
724
|
+
|
|
725
|
+
return True
|
|
726
|
+
|
|
727
|
+
|
|
682
728
|
def rename_d_zip_file(source, destination):
|
|
683
729
|
"""
|
|
684
730
|
Renames a .d.zip file. The function extracts the contents of the source zip file, renames the inner .d folder, and rezips the contents into the destination zip file.
|
|
@@ -31,31 +31,57 @@ class SeerSDK:
|
|
|
31
31
|
>>> seer_sdk = SeerSDK(USERNAME, PASSWORD, INSTANCE)
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
|
-
def __init__(
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
username: str,
|
|
37
|
+
password: str,
|
|
38
|
+
instance: str = "US",
|
|
39
|
+
tenant: str = None,
|
|
40
|
+
tenant_id: str = None,
|
|
41
|
+
):
|
|
35
42
|
try:
|
|
36
43
|
self._auth = Auth(username, password, instance)
|
|
37
44
|
|
|
38
45
|
self._auth._login()
|
|
39
46
|
print(f"User '{username}' logged in.\n")
|
|
47
|
+
except Exception as e:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
f"Could not log in.\nPlease check your credentials and/or instance: {e}."
|
|
50
|
+
)
|
|
40
51
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
52
|
+
# direct logged in user to the specified tenant
|
|
53
|
+
tenant_data = pd.DataFrame(
|
|
54
|
+
self.get_user_tenant(index=False),
|
|
55
|
+
columns=["institution", "tenantId"],
|
|
56
|
+
).rename(
|
|
57
|
+
columns={"institution": "Tenant name", "tenantId": "Tenant ID"}
|
|
58
|
+
)
|
|
59
|
+
tenant_names = tenant_data["Tenant name"].tolist()
|
|
60
|
+
tenant_ids = tenant_data["Tenant ID"].tolist()
|
|
61
|
+
|
|
62
|
+
# precondition: None is not a valid tenant_name or tenant_id.
|
|
63
|
+
if tenant_id is None and tenant is None:
|
|
64
|
+
self.logout()
|
|
65
|
+
if None in tenant_names:
|
|
46
66
|
print(
|
|
47
|
-
|
|
67
|
+
"Warning: You have access to a tenant with no name. Please either provide a tenant name in the PAS website or specify a tenant_id to access that tenant."
|
|
48
68
|
)
|
|
49
|
-
print("Logging into home tenant...")
|
|
50
|
-
# If an error occurs while directing the user to a tenant, default to home tenant.
|
|
51
|
-
print(f"You are now active in {self.get_active_tenant_name()}")
|
|
52
|
-
except ServerError as e:
|
|
53
|
-
raise e
|
|
54
|
-
except Exception as e:
|
|
55
69
|
raise ValueError(
|
|
56
|
-
f"
|
|
70
|
+
f"Either tenant or tenant_id must be provided. Please indicate one of the following tenants: \n{tenant_data.to_string(index=False)}"
|
|
57
71
|
)
|
|
58
72
|
|
|
73
|
+
if tenant_id not in tenant_ids:
|
|
74
|
+
if tenant in tenant_names:
|
|
75
|
+
# if multiple tenants exist for the same institution name, fall back on multiple tenant error in switch_tenant.
|
|
76
|
+
self.switch_tenant(tenant)
|
|
77
|
+
else:
|
|
78
|
+
self.logout()
|
|
79
|
+
raise ValueError(
|
|
80
|
+
f"Invalid tenant or tenant_id provided. Please indicate one of the following tenants: \n{tenant_data.to_string(index=False)}"
|
|
81
|
+
)
|
|
82
|
+
else:
|
|
83
|
+
self.switch_tenant(tenant_id)
|
|
84
|
+
|
|
59
85
|
def logout(self):
|
|
60
86
|
"""
|
|
61
87
|
Perform a logout operation for the current user of the SDK instance.
|
|
@@ -115,7 +141,7 @@ class SeerSDK:
|
|
|
115
141
|
response = s.get(f"{self._auth.url}api/v1/usertenants")
|
|
116
142
|
|
|
117
143
|
if response.status_code != 200:
|
|
118
|
-
raise
|
|
144
|
+
raise ServerError(
|
|
119
145
|
"Invalid request. Please check your parameters."
|
|
120
146
|
)
|
|
121
147
|
|
|
@@ -1471,7 +1497,7 @@ class SeerSDK:
|
|
|
1471
1497
|
analysis_protocol_engine=res["analysis_engine"],
|
|
1472
1498
|
)
|
|
1473
1499
|
)
|
|
1474
|
-
except:
|
|
1500
|
+
except Exception:
|
|
1475
1501
|
res["fasta"] = ""
|
|
1476
1502
|
return res
|
|
1477
1503
|
else:
|
|
@@ -1590,10 +1616,7 @@ class SeerSDK:
|
|
|
1590
1616
|
try:
|
|
1591
1617
|
res[entry]["fasta"] = ",".join(
|
|
1592
1618
|
self._get_analysis_protocol_fasta_filenames(
|
|
1593
|
-
analysis_protocol_id=res[entry]["id"]
|
|
1594
|
-
analysis_protocol_engine=res[entry].get(
|
|
1595
|
-
"analysis_engine", None
|
|
1596
|
-
),
|
|
1619
|
+
analysis_protocol_id=res[entry]["id"]
|
|
1597
1620
|
)
|
|
1598
1621
|
)
|
|
1599
1622
|
except:
|
|
@@ -1821,18 +1844,12 @@ class SeerSDK:
|
|
|
1821
1844
|
if not res.get("is_folder") and res.get(
|
|
1822
1845
|
"analysis_protocol_id"
|
|
1823
1846
|
):
|
|
1824
|
-
analysis_protocol = self.get_analysis_protocol(
|
|
1825
|
-
analysis_protocol_id=res.get("analysis_protocol_id")
|
|
1826
|
-
)
|
|
1827
1847
|
try:
|
|
1828
1848
|
res["fasta"] = ",".join(
|
|
1829
1849
|
self._get_analysis_protocol_fasta_filenames(
|
|
1830
1850
|
analysis_protocol_id=res.get(
|
|
1831
1851
|
"analysis_protocol_id"
|
|
1832
|
-
)
|
|
1833
|
-
analysis_protocol_engine=analysis_protocol.get(
|
|
1834
|
-
"analysis_engine", None
|
|
1835
|
-
),
|
|
1852
|
+
)
|
|
1836
1853
|
)
|
|
1837
1854
|
)
|
|
1838
1855
|
except Exception as e:
|
|
@@ -1854,49 +1871,60 @@ class SeerSDK:
|
|
|
1854
1871
|
else:
|
|
1855
1872
|
return res[0]
|
|
1856
1873
|
|
|
1874
|
+
def _lookup_analysis_folders(self):
|
|
1875
|
+
"""
|
|
1876
|
+
Helper function to map analysis folder ids to names.
|
|
1877
|
+
"""
|
|
1878
|
+
with self._get_auth_session("getanalysisfolders") as s:
|
|
1879
|
+
URL = f"{self._auth.url}api/v1/analyses"
|
|
1880
|
+
params = {"all": "true", "folderonly": "true"}
|
|
1881
|
+
folders = s.get(URL, params=params)
|
|
1882
|
+
if folders.status_code != 200:
|
|
1883
|
+
raise ValueError(
|
|
1884
|
+
"Failed to fetch analysis folders. Please check your connection."
|
|
1885
|
+
)
|
|
1886
|
+
res = folders.json()["data"]
|
|
1887
|
+
return res
|
|
1888
|
+
|
|
1857
1889
|
def find_analyses(
|
|
1858
1890
|
self,
|
|
1859
1891
|
analysis_id: str = None,
|
|
1892
|
+
analysis_name: str = None,
|
|
1860
1893
|
folder_id: str = None,
|
|
1861
|
-
|
|
1862
|
-
analysis_only: bool = True,
|
|
1894
|
+
folder_name: str = None,
|
|
1863
1895
|
project_id: str = None,
|
|
1896
|
+
project_name: str = None,
|
|
1864
1897
|
plate_name: str = None,
|
|
1865
1898
|
as_df=False,
|
|
1866
|
-
**kwargs,
|
|
1867
1899
|
):
|
|
1868
1900
|
"""
|
|
1869
|
-
Returns a list of analyses objects for the authenticated user. If
|
|
1870
|
-
Search parameters may be passed in as keyword arguments to filter the results. Acceptable values are 'analysis_name', 'folder_name', 'description', 'notes', or 'number_msdatafile'.
|
|
1871
|
-
Only search on a single field is supported.
|
|
1901
|
+
Returns a list of analyses objects for the authenticated user. If None is provided for all query arguments, returns all analyses for the authenticated user.
|
|
1872
1902
|
|
|
1873
1903
|
Parameters
|
|
1874
1904
|
----------
|
|
1875
1905
|
analysis_id : str, optional
|
|
1876
1906
|
ID of the analysis to be fetched, defaulted to None.
|
|
1877
1907
|
|
|
1908
|
+
analysis_name : str, optional
|
|
1909
|
+
Name of the analysis to be fetched, defaulted to None. Results will be matched on substring basis.
|
|
1910
|
+
|
|
1878
1911
|
folder_id : str, optional
|
|
1879
|
-
ID of
|
|
1912
|
+
Unique ID of an analysis folder to filter results, defaulted to None.
|
|
1880
1913
|
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
Will be disabled if an analysis id is provided.
|
|
1884
|
-
|
|
1885
|
-
analysis_only : bool, optional
|
|
1886
|
-
Mark True if only analyses objects are to be returned in the response, defaulted to True.
|
|
1887
|
-
If marked false, folder objects will also be included in the response.
|
|
1914
|
+
folder_name : str, optional
|
|
1915
|
+
Name of an analysis folder to filter results, defaulted to None.
|
|
1888
1916
|
|
|
1889
1917
|
project_id : str, optional
|
|
1890
|
-
ID of
|
|
1918
|
+
Unique ID of an analysis folder to filter results, defaulted to None.
|
|
1919
|
+
|
|
1920
|
+
project_name : str, optional
|
|
1921
|
+
Name of a project to filter results, defaulted to None.
|
|
1891
1922
|
|
|
1892
1923
|
plate_name : str, optional
|
|
1893
|
-
Name of
|
|
1924
|
+
Name of a plate to filter results, defaulted to None.
|
|
1894
1925
|
|
|
1895
1926
|
as_df : bool, optional
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
**kwargs : dict, optional
|
|
1899
|
-
Search keyword parameters to be passed in. Acceptable values are 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'.
|
|
1927
|
+
Whether the result should be converted to a DataFrame, defaulted to False.
|
|
1900
1928
|
|
|
1901
1929
|
Returns
|
|
1902
1930
|
-------
|
|
@@ -1930,51 +1958,44 @@ class SeerSDK:
|
|
|
1930
1958
|
URL = f"{self._auth.url}api/v1/analyses"
|
|
1931
1959
|
res = []
|
|
1932
1960
|
|
|
1933
|
-
search_field = None
|
|
1934
|
-
search_item = None
|
|
1935
|
-
if kwargs:
|
|
1936
|
-
if len(kwargs.keys()) > 1:
|
|
1937
|
-
raise ValueError("Please include only one search parameter.")
|
|
1938
|
-
search_field = list(kwargs.keys())[0]
|
|
1939
|
-
search_item = kwargs[search_field]
|
|
1940
|
-
|
|
1941
|
-
if not search_item:
|
|
1942
|
-
raise ValueError(
|
|
1943
|
-
f"Please provide a non null value for {search_field}"
|
|
1944
|
-
)
|
|
1945
|
-
|
|
1946
|
-
if search_field and search_field not in [
|
|
1947
|
-
"analysis_name",
|
|
1948
|
-
"folder_name",
|
|
1949
|
-
"analysis_protocol_name",
|
|
1950
|
-
"description",
|
|
1951
|
-
"notes",
|
|
1952
|
-
"number_msdatafile",
|
|
1953
|
-
]:
|
|
1954
|
-
raise ValueError(
|
|
1955
|
-
"Invalid search field. Please choose between 'analysis_name', 'folder_name', 'analysis_protocol_name', 'description', 'notes', or 'number_msdatafile'."
|
|
1956
|
-
)
|
|
1957
|
-
|
|
1958
1961
|
if analysis_id:
|
|
1959
1962
|
try:
|
|
1960
1963
|
return [self.get_analysis(analysis_id=analysis_id)]
|
|
1961
|
-
except:
|
|
1964
|
+
except Exception:
|
|
1962
1965
|
return []
|
|
1963
1966
|
|
|
1967
|
+
analysis_folders = self._lookup_analysis_folders()
|
|
1968
|
+
analysis_folder_id_to_name = {
|
|
1969
|
+
x["id"]: x["analysis_name"] for x in analysis_folders
|
|
1970
|
+
}
|
|
1971
|
+
analysis_folder_name_to_id = {
|
|
1972
|
+
v: k for k, v in analysis_folder_id_to_name.items()
|
|
1973
|
+
}
|
|
1974
|
+
|
|
1975
|
+
if folder_name and not folder_id:
|
|
1976
|
+
folder_id = analysis_folder_name_to_id.get(folder_name, None)
|
|
1977
|
+
if not folder_id:
|
|
1978
|
+
raise ValueError(f"No folder found with name '{folder_name}'.")
|
|
1979
|
+
|
|
1980
|
+
if project_name and not project_id:
|
|
1981
|
+
project = self.get_project(project_name=project_name)
|
|
1982
|
+
if not project:
|
|
1983
|
+
raise ValueError(
|
|
1984
|
+
f"No project found with name '{project_name}'."
|
|
1985
|
+
)
|
|
1986
|
+
project_id = project["id"]
|
|
1987
|
+
|
|
1964
1988
|
with self._get_auth_session("findanalyses") as s:
|
|
1965
1989
|
|
|
1966
|
-
params = {"all": "true"}
|
|
1990
|
+
params = {"all": "true", "analysisonly": "true"}
|
|
1967
1991
|
if folder_id:
|
|
1968
1992
|
params["folder"] = folder_id
|
|
1969
1993
|
|
|
1970
|
-
if
|
|
1971
|
-
params["searchFields"] =
|
|
1972
|
-
params["searchItem"] =
|
|
1994
|
+
if analysis_name:
|
|
1995
|
+
params["searchFields"] = "analysis_name"
|
|
1996
|
+
params["searchItem"] = analysis_name
|
|
1973
1997
|
del params["all"]
|
|
1974
1998
|
|
|
1975
|
-
if search_field == "folder_name":
|
|
1976
|
-
params["searchFields"] = "analysis_name"
|
|
1977
|
-
|
|
1978
1999
|
if project_id:
|
|
1979
2000
|
params["projectId"] = project_id
|
|
1980
2001
|
|
|
@@ -1989,9 +2010,8 @@ class SeerSDK:
|
|
|
1989
2010
|
)
|
|
1990
2011
|
res = analyses.json()["data"]
|
|
1991
2012
|
|
|
1992
|
-
folders = []
|
|
1993
2013
|
spaces = {x["id"]: x["usergroup_name"] for x in self.get_spaces()}
|
|
1994
|
-
|
|
2014
|
+
protocol_to_fasta = {}
|
|
1995
2015
|
for entry in range(len(res)):
|
|
1996
2016
|
if "tenant_id" in res[entry]:
|
|
1997
2017
|
del res[entry]["tenant_id"]
|
|
@@ -2005,11 +2025,14 @@ class SeerSDK:
|
|
|
2005
2025
|
][location(res[entry]["parameter_file_path"]) :]
|
|
2006
2026
|
|
|
2007
2027
|
if (
|
|
2008
|
-
|
|
2009
|
-
and not
|
|
2010
|
-
and res[entry]["is_folder"]
|
|
2028
|
+
"folder_id" in res[entry]
|
|
2029
|
+
and res[entry]["folder_id"] is not None
|
|
2011
2030
|
):
|
|
2012
|
-
|
|
2031
|
+
res[entry]["folder_name"] = analysis_folder_id_to_name.get(
|
|
2032
|
+
res[entry]["folder_id"], None
|
|
2033
|
+
)
|
|
2034
|
+
res[entry]["folder_uuid"] = res[entry]["folder_id"]
|
|
2035
|
+
del res[entry]["folder_id"]
|
|
2013
2036
|
|
|
2014
2037
|
if "user_group" in res[entry]:
|
|
2015
2038
|
res[entry]["space"] = spaces.get(
|
|
@@ -2020,51 +2043,34 @@ class SeerSDK:
|
|
|
2020
2043
|
if (not res[entry].get("is_folder")) and res[entry].get(
|
|
2021
2044
|
"analysis_protocol_id"
|
|
2022
2045
|
):
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
)
|
|
2027
|
-
|
|
2028
|
-
|
|
2046
|
+
# analysis_protocol_id for this result row
|
|
2047
|
+
local_analysis_protocol_id = res[entry].get(
|
|
2048
|
+
"analysis_protocol_id"
|
|
2049
|
+
)
|
|
2050
|
+
if local_analysis_protocol_id in protocol_to_fasta:
|
|
2051
|
+
res[entry]["fasta"] = protocol_to_fasta[
|
|
2052
|
+
local_analysis_protocol_id
|
|
2029
2053
|
]
|
|
2030
2054
|
else:
|
|
2031
2055
|
try:
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2056
|
+
res[entry]["fasta"] = ",".join(
|
|
2057
|
+
self._get_analysis_protocol_fasta_filenames(
|
|
2058
|
+
local_analysis_protocol_id,
|
|
2059
|
+
analysis_protocol_engine=res[entry].get(
|
|
2060
|
+
"analysis_engine"
|
|
2061
|
+
),
|
|
2035
2062
|
)
|
|
2036
2063
|
)
|
|
2037
|
-
|
|
2038
|
-
"
|
|
2064
|
+
protocol_to_fasta[local_analysis_protocol_id] = (
|
|
2065
|
+
res[entry]["fasta"]
|
|
2039
2066
|
)
|
|
2040
|
-
protocol_to_engine_map[
|
|
2041
|
-
res[entry]["analysis_protocol_id"]
|
|
2042
|
-
] = analysis_protocol_engine
|
|
2043
2067
|
except:
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
res[entry]["fasta"] = ",".join(
|
|
2047
|
-
self._get_analysis_protocol_fasta_filenames(
|
|
2048
|
-
res[entry]["analysis_protocol_id"],
|
|
2049
|
-
analysis_protocol_engine=analysis_protocol_engine,
|
|
2068
|
+
print(
|
|
2069
|
+
f"Warning: Could not fetch fasta files for analysis {res[entry].get('analysis_name')}."
|
|
2050
2070
|
)
|
|
2051
|
-
)
|
|
2052
|
-
except:
|
|
2053
|
-
print(
|
|
2054
|
-
f"Warning: Could not fetch fasta files for analysis {res[entry].get('analysis_name')}."
|
|
2055
|
-
)
|
|
2056
|
-
res[entry]["fasta"] = None
|
|
2057
2071
|
else:
|
|
2058
2072
|
res[entry]["fasta"] = None
|
|
2059
2073
|
|
|
2060
|
-
# recursive solution to get analyses in folders
|
|
2061
|
-
for folder in folders:
|
|
2062
|
-
res += self.find_analyses(folder_id=folder)
|
|
2063
|
-
|
|
2064
|
-
if analysis_only:
|
|
2065
|
-
res = [
|
|
2066
|
-
analysis for analysis in res if not analysis["is_folder"]
|
|
2067
|
-
]
|
|
2068
2074
|
if not res and as_df:
|
|
2069
2075
|
return pd.DataFrame(columns=ANALYSIS_COLUMNS)
|
|
2070
2076
|
return res if not as_df else dict_to_df(res)
|
|
@@ -4059,7 +4065,7 @@ class SeerSDK:
|
|
|
4059
4065
|
print(f"Downloaded file to {download_path}/{file}")
|
|
4060
4066
|
|
|
4061
4067
|
def _get_analysis_protocol_fasta_filenames(
|
|
4062
|
-
self, analysis_protocol_id: str, analysis_protocol_engine: str
|
|
4068
|
+
self, analysis_protocol_id: str, analysis_protocol_engine: str = None
|
|
4063
4069
|
):
|
|
4064
4070
|
"""
|
|
4065
4071
|
Helper function - Get the fasta file name(s) associated with a given analysis protocol and engine.
|
|
@@ -4069,6 +4075,14 @@ class SeerSDK:
|
|
|
4069
4075
|
Returns:
|
|
4070
4076
|
list[str]: A list of fasta file names associated with the analysis protocol.
|
|
4071
4077
|
"""
|
|
4078
|
+
if not analysis_protocol_engine:
|
|
4079
|
+
analysis_protocol_engine = self.get_analysis_protocol(
|
|
4080
|
+
analysis_protocol_id=analysis_protocol_id
|
|
4081
|
+
).get("analysis_engine")
|
|
4082
|
+
if not analysis_protocol_engine:
|
|
4083
|
+
raise ValueError(
|
|
4084
|
+
f"Could not retrieve analysis protocol engine for analysis protocol {analysis_protocol_id}."
|
|
4085
|
+
)
|
|
4072
4086
|
analysis_protocol_engine = analysis_protocol_engine.lower()
|
|
4073
4087
|
if analysis_protocol_engine == "diann":
|
|
4074
4088
|
URL = f"{self._auth.url}api/v1/analysisProtocols/editableParameters/diann/{analysis_protocol_id}"
|
|
@@ -4108,8 +4122,35 @@ class SeerSDK:
|
|
|
4108
4122
|
raise ServerError("No fasta file name returned from server.")
|
|
4109
4123
|
return fasta_filenames
|
|
4110
4124
|
|
|
4111
|
-
def
|
|
4112
|
-
self,
|
|
4125
|
+
def _get_analysis_protocol_fasta_url(
|
|
4126
|
+
self, analysis_protocol_fasta_name: str
|
|
4127
|
+
):
|
|
4128
|
+
"""
|
|
4129
|
+
Helper function - Get the download link for a given fasta file name.
|
|
4130
|
+
|
|
4131
|
+
Args:
|
|
4132
|
+
analysis_protocol_fasta_name (str): Name of the fasta file.
|
|
4133
|
+
|
|
4134
|
+
Returns:
|
|
4135
|
+
str: The URL to download the fasta file.
|
|
4136
|
+
"""
|
|
4137
|
+
URL = f"{self._auth.url}api/v1/analysisProtocolFiles/getUrl"
|
|
4138
|
+
with self._get_auth_session("getanalysisprotocolfilesurl") as s:
|
|
4139
|
+
response = s.post(
|
|
4140
|
+
URL, json={"filepath": analysis_protocol_fasta_name}
|
|
4141
|
+
)
|
|
4142
|
+
if response.status_code != 200:
|
|
4143
|
+
raise ServerError(
|
|
4144
|
+
f"Could not retrieve download link for {analysis_protocol_fasta_name}."
|
|
4145
|
+
)
|
|
4146
|
+
url = response.json()["url"]
|
|
4147
|
+
return url
|
|
4148
|
+
|
|
4149
|
+
def get_analysis_protocol_fasta_urls(
|
|
4150
|
+
self,
|
|
4151
|
+
analysis_protocol_id=None,
|
|
4152
|
+
analysis_id=None,
|
|
4153
|
+
analysis_name=None,
|
|
4113
4154
|
):
|
|
4114
4155
|
"""Get the download link(s) for the fasta file(s) associated with a given analysis protocol.
|
|
4115
4156
|
Args:
|
|
@@ -4120,52 +4161,31 @@ class SeerSDK:
|
|
|
4120
4161
|
Returns:
|
|
4121
4162
|
list[dict]: A list of dictionaries containing the 'filename' and the 'url' to download the fasta file.
|
|
4122
4163
|
"""
|
|
4123
|
-
if analysis_name and (not analysis_id):
|
|
4124
|
-
analyses = self.find_analyses(analysis_name=analysis_name)
|
|
4125
|
-
if len(analyses) > 1:
|
|
4126
|
-
raise ValueError(
|
|
4127
|
-
f"Multiple analyses found with name {analysis_name}. Please provide an analysis ID instead."
|
|
4128
|
-
)
|
|
4129
|
-
elif len(analyses) == 0:
|
|
4130
|
-
raise ValueError(
|
|
4131
|
-
f"No analyses found with name {analysis_name}."
|
|
4132
|
-
)
|
|
4133
|
-
else:
|
|
4134
|
-
analysis_id = analyses[0]["id"]
|
|
4135
|
-
|
|
4136
|
-
if not (bool(analysis_protocol_id) ^ bool(analysis_id)):
|
|
4137
|
-
raise ValueError(
|
|
4138
|
-
"Please provide either an analysis ID or an analysis protocol ID."
|
|
4139
|
-
)
|
|
4140
|
-
|
|
4141
4164
|
if not analysis_protocol_id:
|
|
4142
|
-
|
|
4143
|
-
|
|
4144
|
-
|
|
4145
|
-
|
|
4146
|
-
|
|
4147
|
-
raise ValueError(f"Could not parse server response.")
|
|
4165
|
+
analysis = self.get_analysis(
|
|
4166
|
+
analysis_id=analysis_id,
|
|
4167
|
+
analysis_name=analysis_name,
|
|
4168
|
+
)
|
|
4169
|
+
analysis_protocol_id = analysis.get("analysis_protocol_id")
|
|
4148
4170
|
|
|
4149
|
-
engine = self.get_analysis_protocol(
|
|
4150
|
-
analysis_protocol_id=analysis_protocol_id
|
|
4151
|
-
).get("analysis_engine", None)
|
|
4152
4171
|
fasta_filenames = self._get_analysis_protocol_fasta_filenames(
|
|
4153
|
-
analysis_protocol_id=analysis_protocol_id
|
|
4154
|
-
analysis_protocol_engine=engine,
|
|
4172
|
+
analysis_protocol_id=analysis_protocol_id
|
|
4155
4173
|
)
|
|
4156
|
-
|
|
4157
|
-
links =
|
|
4158
|
-
for
|
|
4159
|
-
|
|
4160
|
-
|
|
4161
|
-
|
|
4162
|
-
|
|
4163
|
-
|
|
4164
|
-
|
|
4165
|
-
|
|
4166
|
-
|
|
4167
|
-
|
|
4168
|
-
|
|
4174
|
+
|
|
4175
|
+
links = {}
|
|
4176
|
+
for filepath in fasta_filenames:
|
|
4177
|
+
filename = os.path.basename(filepath)
|
|
4178
|
+
try:
|
|
4179
|
+
url = self._get_analysis_protocol_fasta_url(
|
|
4180
|
+
analysis_protocol_fasta_name=filepath
|
|
4181
|
+
)
|
|
4182
|
+
except ServerError:
|
|
4183
|
+
print(
|
|
4184
|
+
f"ERROR: Could not retrieve download link for {filename}."
|
|
4185
|
+
)
|
|
4186
|
+
continue
|
|
4187
|
+
|
|
4188
|
+
links[filename] = url
|
|
4169
4189
|
return links
|
|
4170
4190
|
|
|
4171
4191
|
def download_analysis_protocol_fasta(
|
|
@@ -4186,20 +4206,28 @@ class SeerSDK:
|
|
|
4186
4206
|
Returns:
|
|
4187
4207
|
list[str] : The path to the downloaded fasta file(s).
|
|
4188
4208
|
"""
|
|
4189
|
-
|
|
4190
|
-
|
|
4191
|
-
|
|
4192
|
-
for x in self.get_analysis_protocol_fasta_link(
|
|
4193
|
-
analysis_protocol_id=analysis_protocol_id,
|
|
4194
|
-
analysis_id=analysis_id,
|
|
4195
|
-
analysis_name=analysis_name,
|
|
4209
|
+
if not analysis_protocol_id:
|
|
4210
|
+
analysis = self.get_analysis(
|
|
4211
|
+
analysis_id=analysis_id, analysis_name=analysis_name
|
|
4196
4212
|
)
|
|
4197
|
-
|
|
4213
|
+
analysis_protocol_id = analysis.get("analysis_protocol_id")
|
|
4214
|
+
|
|
4215
|
+
filepaths = self._get_analysis_protocol_fasta_filenames(
|
|
4216
|
+
analysis_protocol_id=analysis_protocol_id
|
|
4217
|
+
)
|
|
4198
4218
|
if not download_path:
|
|
4199
4219
|
download_path = os.getcwd()
|
|
4200
4220
|
|
|
4201
4221
|
downloads = []
|
|
4202
|
-
for
|
|
4222
|
+
for filepath in filepaths:
|
|
4223
|
+
# run sequentially to avoid signed url expiration
|
|
4224
|
+
url = self._get_analysis_protocol_fasta_url(
|
|
4225
|
+
analysis_protocol_fasta_name=filepath
|
|
4226
|
+
)
|
|
4227
|
+
filename = os.path.basename(filepath)
|
|
4228
|
+
|
|
4229
|
+
# relative path of the file after download
|
|
4230
|
+
local_filename = f"{download_path}/{filename}"
|
|
4203
4231
|
print(f"Downloading {filename}")
|
|
4204
4232
|
for _ in range(2):
|
|
4205
4233
|
try:
|
|
@@ -4215,7 +4243,7 @@ class SeerSDK:
|
|
|
4215
4243
|
)
|
|
4216
4244
|
urllib.request.urlretrieve(
|
|
4217
4245
|
url,
|
|
4218
|
-
|
|
4246
|
+
local_filename,
|
|
4219
4247
|
reporthook=download_hook(t),
|
|
4220
4248
|
data=None,
|
|
4221
4249
|
)
|
|
@@ -4224,5 +4252,5 @@ class SeerSDK:
|
|
|
4224
4252
|
if not os.path.isdir(f"{download_path}"):
|
|
4225
4253
|
os.makedirs(f"{download_path}")
|
|
4226
4254
|
|
|
4227
|
-
downloads.append(
|
|
4255
|
+
downloads.append(local_filename)
|
|
4228
4256
|
return downloads
|
|
@@ -4,6 +4,7 @@ seer_pas_sdk.core.unsupported -- in development
|
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
6
|
import shutil
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
|
|
8
9
|
from typing import List as _List
|
|
9
10
|
|
|
@@ -827,20 +828,29 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
827
828
|
)
|
|
828
829
|
|
|
829
830
|
# Step 1: Check if paths and file extensions are valid.
|
|
831
|
+
invalid_d_zip_files = []
|
|
830
832
|
for file in ms_data_files:
|
|
831
833
|
if not valid_ms_data_file(file):
|
|
832
834
|
raise ValueError(
|
|
833
835
|
"Invalid file or file format. Please check your file."
|
|
834
836
|
)
|
|
837
|
+
if file.endswith(".d.zip") and (not validate_d_zip_file(file)):
|
|
838
|
+
invalid_d_zip_files.append(file)
|
|
839
|
+
|
|
840
|
+
if invalid_d_zip_files:
|
|
841
|
+
raise ValueError(
|
|
842
|
+
f"The following .d.zip files are invalid: {', '.join(invalid_d_zip_files)}. Please check your files."
|
|
843
|
+
)
|
|
835
844
|
|
|
836
845
|
extensions = set(
|
|
837
|
-
[
|
|
846
|
+
["".join(Path(file).suffixes) for file in ms_data_files]
|
|
838
847
|
)
|
|
839
848
|
|
|
840
849
|
if filenames and ".d.zip" in extensions:
|
|
841
850
|
raise ValueError(
|
|
842
851
|
"Please leave the 'filenames' parameter empty when working with .d.zip files. SeerSDK.rename_d_zip_file() is available for this use case."
|
|
843
852
|
)
|
|
853
|
+
|
|
844
854
|
# Step 2: Use active tenant to fetch the tenant_id.
|
|
845
855
|
tenant_id = self.get_active_tenant_id()
|
|
846
856
|
|
|
@@ -1473,6 +1483,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1473
1483
|
# 1. Get msrun data for analysis
|
|
1474
1484
|
samples = self.find_samples(analysis_id=analysis_id)
|
|
1475
1485
|
sample_name_to_id = {s["sample_name"]: s["id"] for s in samples}
|
|
1486
|
+
sample_uuid_to_id = {s["id"]: s["sample_id"] for s in samples}
|
|
1476
1487
|
# for np rollup, a row represents an msrun
|
|
1477
1488
|
msruns = self.find_msruns(sample_ids=sample_name_to_id.values())
|
|
1478
1489
|
file_to_msrun = {
|
|
@@ -1636,8 +1647,7 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1636
1647
|
)
|
|
1637
1648
|
)
|
|
1638
1649
|
df = df[included_columns]
|
|
1639
|
-
|
|
1640
|
-
return df
|
|
1650
|
+
|
|
1641
1651
|
else:
|
|
1642
1652
|
# precursor
|
|
1643
1653
|
# working only in report.tsv
|
|
@@ -1678,10 +1688,17 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1678
1688
|
"IM",
|
|
1679
1689
|
"iIM",
|
|
1680
1690
|
]
|
|
1681
|
-
df = search_results[included_columns]
|
|
1682
|
-
|
|
1691
|
+
df = pd.DataFrame(search_results[included_columns])
|
|
1692
|
+
|
|
1693
|
+
df.columns = [title_case_to_snake_case(x) for x in df.columns]
|
|
1694
|
+
df["sample_uuid"] = df["sample_id"]
|
|
1695
|
+
df["sample_id"] = df["sample_uuid"].apply(
|
|
1696
|
+
lambda x: sample_uuid_to_id.get(x)
|
|
1697
|
+
)
|
|
1683
1698
|
|
|
1684
|
-
|
|
1699
|
+
if rollup == "panel":
|
|
1700
|
+
df.drop(columns=["msrun_id"], inplace=True, errors="ignore")
|
|
1701
|
+
return df
|
|
1685
1702
|
|
|
1686
1703
|
def get_search_data_analytes(self, analysis_id: str, analyte_type: str):
|
|
1687
1704
|
if analyte_type not in ["protein", "peptide", "precursor"]:
|
|
@@ -1734,27 +1751,57 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1734
1751
|
how="left",
|
|
1735
1752
|
)
|
|
1736
1753
|
elif analyte_type == "peptide":
|
|
1737
|
-
|
|
1738
|
-
|
|
1754
|
+
|
|
1755
|
+
# The below logic performs the following:
|
|
1756
|
+
# 1. orders each peptide group by Global.PG.Q.Value, Lib.PG.Q.Value, and Protein Group (ascending)
|
|
1757
|
+
# 2. for each peptide group, select the first row to find the precursor with the lowest Q values
|
|
1758
|
+
# 3. broadcasts the associated protein group columns across all rows with the same peptide.
|
|
1759
|
+
#
|
|
1760
|
+
# This ensures that for each peptide, we retain consistent protein information while avoiding duplication.
|
|
1761
|
+
|
|
1762
|
+
report_results = report_results.sort_values(
|
|
1763
|
+
[
|
|
1764
|
+
"Peptide",
|
|
1765
|
+
"Global.PG.Q.Value",
|
|
1766
|
+
"Lib.PG.Q.Value",
|
|
1767
|
+
"Protein Group",
|
|
1768
|
+
]
|
|
1739
1769
|
)
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1770
|
+
|
|
1771
|
+
columns_to_broadcast = ["Protein Group", "Protein.Ids"]
|
|
1772
|
+
broadcasted = (
|
|
1773
|
+
report_results.groupby("Peptide")
|
|
1774
|
+
.apply(
|
|
1775
|
+
lambda x: pd.Series(
|
|
1776
|
+
{
|
|
1777
|
+
col: x.iloc[0][col]
|
|
1778
|
+
for col in columns_to_broadcast + ["Peptide"]
|
|
1779
|
+
}
|
|
1780
|
+
)
|
|
1781
|
+
)
|
|
1782
|
+
.reset_index(drop=True)
|
|
1783
|
+
)
|
|
1784
|
+
report_results = (
|
|
1785
|
+
report_results.drop(columns=columns_to_broadcast)
|
|
1786
|
+
.merge(broadcasted, on="Peptide", how="left")
|
|
1787
|
+
.drop_duplicates(subset=["Peptide"])
|
|
1746
1788
|
)
|
|
1747
1789
|
|
|
1748
|
-
report_results = report_results[
|
|
1749
|
-
["Peptide", "Protein.Ids", "Protein.Group"]
|
|
1750
|
-
]
|
|
1751
|
-
report_results.drop_duplicates(subset=["Peptide"], inplace=True)
|
|
1752
1790
|
df = pd.merge(
|
|
1753
|
-
search_results,
|
|
1754
1791
|
report_results,
|
|
1755
|
-
|
|
1792
|
+
search_results,
|
|
1793
|
+
on=["Protein Group"],
|
|
1756
1794
|
how="left",
|
|
1757
1795
|
)
|
|
1796
|
+
df = df[
|
|
1797
|
+
[
|
|
1798
|
+
"Peptide",
|
|
1799
|
+
"Protein Group",
|
|
1800
|
+
"Protein.Ids",
|
|
1801
|
+
"Protein Names",
|
|
1802
|
+
"Gene Names",
|
|
1803
|
+
]
|
|
1804
|
+
]
|
|
1758
1805
|
else:
|
|
1759
1806
|
# precursor
|
|
1760
1807
|
search_results = search_results[
|
|
@@ -1762,9 +1809,6 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1762
1809
|
"Protein Group",
|
|
1763
1810
|
"Protein Names",
|
|
1764
1811
|
"Gene Names",
|
|
1765
|
-
"Biological Process",
|
|
1766
|
-
"Molecular Function",
|
|
1767
|
-
"Cellular Component",
|
|
1768
1812
|
]
|
|
1769
1813
|
]
|
|
1770
1814
|
search_results.drop_duplicates(
|
|
@@ -1779,7 +1823,6 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1779
1823
|
"Protein.Ids",
|
|
1780
1824
|
"Protein.Names",
|
|
1781
1825
|
"Genes",
|
|
1782
|
-
"First.Protein.Description",
|
|
1783
1826
|
"Modified.Sequence",
|
|
1784
1827
|
"Proteotypic",
|
|
1785
1828
|
"Global.Q.Value",
|
|
@@ -1788,8 +1831,43 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1788
1831
|
"Lib.PG.Q.Value",
|
|
1789
1832
|
]
|
|
1790
1833
|
]
|
|
1791
|
-
|
|
1792
|
-
|
|
1834
|
+
|
|
1835
|
+
# The below logic performs the following:
|
|
1836
|
+
# 1. orders each peptide group by Global.PG.Q.Value, Lib.PG.Q.Value, and Protein Group (ascending)
|
|
1837
|
+
# 2. for each peptide group, select the first row to find the precursor with the lowest Q values
|
|
1838
|
+
# 3. broadcasts the associated protein group columns across all rows with the same peptide.
|
|
1839
|
+
#
|
|
1840
|
+
# This ensures that for each peptide, we retain consistent protein information while avoiding duplication.
|
|
1841
|
+
columns_to_broadcast = [
|
|
1842
|
+
"Protein Group",
|
|
1843
|
+
"Protein.Ids",
|
|
1844
|
+
"Protein.Names",
|
|
1845
|
+
"Genes",
|
|
1846
|
+
]
|
|
1847
|
+
report_results = report_results.sort_values(
|
|
1848
|
+
[
|
|
1849
|
+
"Peptide",
|
|
1850
|
+
"Global.PG.Q.Value",
|
|
1851
|
+
"Lib.PG.Q.Value",
|
|
1852
|
+
"Protein Group",
|
|
1853
|
+
],
|
|
1854
|
+
)
|
|
1855
|
+
broadcasted = (
|
|
1856
|
+
report_results.groupby("Peptide")
|
|
1857
|
+
.apply(
|
|
1858
|
+
lambda x: pd.Series(
|
|
1859
|
+
{
|
|
1860
|
+
col: x.iloc[0][col]
|
|
1861
|
+
for col in columns_to_broadcast + ["Peptide"]
|
|
1862
|
+
}
|
|
1863
|
+
)
|
|
1864
|
+
)
|
|
1865
|
+
.reset_index(drop=True)
|
|
1866
|
+
)
|
|
1867
|
+
report_results = (
|
|
1868
|
+
report_results.drop(columns=columns_to_broadcast)
|
|
1869
|
+
.merge(broadcasted, on="Peptide", how="left")
|
|
1870
|
+
.drop_duplicates(subset=["Peptide", "Precursor.Charge"])
|
|
1793
1871
|
)
|
|
1794
1872
|
df = pd.merge(
|
|
1795
1873
|
report_results,
|
|
@@ -1806,7 +1884,6 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1806
1884
|
"Protein.Ids",
|
|
1807
1885
|
"Protein.Names",
|
|
1808
1886
|
"Genes",
|
|
1809
|
-
"First.Protein.Description",
|
|
1810
1887
|
"Modified.Sequence",
|
|
1811
1888
|
"Proteotypic",
|
|
1812
1889
|
"Global.Q.Value",
|
|
@@ -1814,9 +1891,6 @@ class _UnsupportedSDK(_SeerSDK):
|
|
|
1814
1891
|
"Lib.Q.Value",
|
|
1815
1892
|
"Lib.PG.Q.Value",
|
|
1816
1893
|
"Gene Names",
|
|
1817
|
-
"Biological Process",
|
|
1818
|
-
"Molecular Function",
|
|
1819
|
-
"Cellular Component",
|
|
1820
1894
|
]
|
|
1821
1895
|
]
|
|
1822
1896
|
df.rename(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|