polly-python 2.5.0__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {polly_python-2.5.0/polly_python.egg-info → polly_python-3.1.0}/PKG-INFO +8 -15
- polly_python-3.1.0/polly/__init__.py +1 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/constants.py +0 -18
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/curation.py +6 -241
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/errors.py +0 -62
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/help.py +2 -3
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/helpers.py +5 -87
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/omixatlas.py +39 -33
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/pipelines.py +119 -92
- polly_python-3.1.0/polly/polly_kg.py +212 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/session.py +1 -1
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/tracking.py +6 -2
- {polly_python-2.5.0 → polly_python-3.1.0/polly_python.egg-info}/PKG-INFO +8 -15
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_python.egg-info/SOURCES.txt +4 -4
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_python.egg-info/requires.txt +5 -14
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/reporting/reporting.py +49 -27
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/reporting/reporting_hlpr.py +1 -1
- {polly_python-2.5.0 → polly_python-3.1.0}/setup.cfg +6 -13
- polly_python-3.1.0/tests/test_help.py +75 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_helpers.py +0 -18
- polly_python-3.1.0/tests/test_kg.py +201 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_omixatlas.py +9 -4
- {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_pipelines.py +34 -25
- polly_python-3.1.0/tests/test_validation.py +135 -0
- polly_python-2.5.0/polly/__init__.py +0 -1
- polly_python-2.5.0/polly/bridge_cohort.py +0 -399
- polly_python-2.5.0/polly/cohort.py +0 -433
- polly_python-2.5.0/polly/core_cohort.py +0 -721
- polly_python-2.5.0/tests/test_cohort.py +0 -216
- {polly_python-2.5.0 → polly_python-3.1.0}/LICENSE.md +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/MANIFEST.in +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/README.md +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/analyze.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/application_error_info.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/atlas.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/auth.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/data_management.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/http_response_codes.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/index_schema_level_conversion_const.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/jobs.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/omixatlas_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/s3_utils.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/threading_utils.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/validation.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/validation_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly/workspaces.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_interfaces/IFiles.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_interfaces/IReporting.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_interfaces/ISchema.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_interfaces/__init__.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_python.egg-info/dependency_links.txt +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_python.egg-info/top_level.txt +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/__init__.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/dataset.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/files/__init__.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/files/files.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/files/files_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/polly_services_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/reporting/__init__.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/schema/__init__.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/schema/schema.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/schema/schema_const.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/schema/schema_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/schema/validate_schema_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/pyproject.toml +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/setup.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_constants.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_curation.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_data_management.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_jobs.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_s3_utils.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_schema_ux.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_threading_utils.py +0 -0
- {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_workspaces.py +0 -0
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: polly_python
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.1.0
|
|
4
4
|
Summary: Polly SDK
|
|
5
5
|
Home-page: https://github.com/ElucidataInc/polly-python
|
|
6
6
|
Project-URL: Documentation, https://docs.elucidata.io
|
|
7
7
|
Project-URL: Tutorial Notebooks, https://github.com/ElucidataInc/polly-python
|
|
8
|
-
Requires-Python:
|
|
8
|
+
Requires-Python: <3.12,>=3.9
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE.md
|
|
11
|
-
Requires-Dist:
|
|
11
|
+
Requires-Dist: cmapPy<=4.0.1
|
|
12
12
|
Requires-Dist: cloudpathlib>=0.15.0
|
|
13
13
|
Requires-Dist: retrying==1.3.4
|
|
14
14
|
Requires-Dist: rst2txt==1.1.0
|
|
@@ -17,22 +17,15 @@ Requires-Dist: mixpanel==4.10.0
|
|
|
17
17
|
Requires-Dist: Deprecated>=1.2.12
|
|
18
18
|
Requires-Dist: pytest>=6.2.5
|
|
19
19
|
Requires-Dist: cryptography<=38.0.0,>=37.0.1
|
|
20
|
-
Requires-Dist: plotly<5.0.0,>=4.8.1; python_version > "3.6" and python_version < "3.7"
|
|
21
20
|
Requires-Dist: plotly>=5.0.0; python_version >= "3.7"
|
|
22
|
-
Requires-Dist: pandas
|
|
23
|
-
Requires-Dist:
|
|
24
|
-
Requires-Dist: pydantic<1.10.0a1,>=1.8.2; python_version > "3.6" and python_version < "3.7"
|
|
21
|
+
Requires-Dist: pandas<=2.2.2,>=1.3.5; python_version >= "3.7"
|
|
22
|
+
Requires-Dist: numpy<=1.26.4
|
|
25
23
|
Requires-Dist: pydantic==1.10.12; python_version >= "3.7"
|
|
26
24
|
Requires-Dist: requests==2.28.1
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
29
|
-
Requires-Dist: boto3>=1.24.0; python_version >= "3.7"
|
|
30
|
-
Requires-Dist: botocore<1.27.0,>=1.20.73; python_version > "3.6" and python_version < "3.7"
|
|
31
|
-
Requires-Dist: botocore>=1.27.0; python_version >= "3.7"
|
|
32
|
-
Requires-Dist: joblib<=1.1.0,>0.11.0; python_version > "3.6" and python_version < "3.7"
|
|
25
|
+
Requires-Dist: boto3<2.0,>=1.24.0; python_version >= "3.7"
|
|
26
|
+
Requires-Dist: botocore<2.0,>=1.27.0; python_version >= "3.7"
|
|
33
27
|
Requires-Dist: joblib>=1.2.0; python_version >= "3.7"
|
|
34
28
|
Requires-Dist: tabulate==0.9.0
|
|
35
|
-
Requires-Dist: tqdm<4.65.0,>=4.61.0; python_version > "3.6" and python_version < "3.7"
|
|
36
29
|
Requires-Dist: tqdm==4.65.0; python_version >= "3.7"
|
|
37
30
|
Provides-Extra: testing
|
|
38
31
|
Requires-Dist: black; extra == "testing"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "3.1.0"
|
|
@@ -91,24 +91,6 @@ IO_CHUNKSIZE_LARGE_FILE_SIZE = 100 * MB
|
|
|
91
91
|
# S3 Exceptions
|
|
92
92
|
EXPIRED_TOKEN = "ExpiredToken"
|
|
93
93
|
|
|
94
|
-
# cohort constants
|
|
95
|
-
COHORT_VERSION = "0.2"
|
|
96
|
-
COHORT_CONSTANTS_URL = (
|
|
97
|
-
"https://elucidatainc.github.io/PublicAssets/cohort_constants.txt"
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
OBSOLETE_METADATA_FIELDS = [
|
|
101
|
-
"package",
|
|
102
|
-
"region",
|
|
103
|
-
"bucket",
|
|
104
|
-
"key",
|
|
105
|
-
"file_type",
|
|
106
|
-
"file_location",
|
|
107
|
-
"src_uri",
|
|
108
|
-
"timestamp_",
|
|
109
|
-
]
|
|
110
|
-
dot = "."
|
|
111
|
-
|
|
112
94
|
GETTING_UPLOAD_URLS_PAYLOAD = {"data": {"type": "files", "attributes": {"folder": ""}}}
|
|
113
95
|
|
|
114
96
|
INGESTION_LEVEL_METADATA = {
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from collections import namedtuple
|
|
3
|
-
|
|
4
|
-
import shutil
|
|
3
|
+
|
|
5
4
|
from typing import Dict, Optional, List
|
|
6
|
-
|
|
5
|
+
|
|
7
6
|
import pandas as pd
|
|
8
7
|
from functools import lru_cache
|
|
9
8
|
from polly.errors import (
|
|
@@ -13,15 +12,15 @@ from polly.errors import (
|
|
|
13
12
|
RequestException,
|
|
14
13
|
UnauthorizedException,
|
|
15
14
|
extract_json_api_error,
|
|
16
|
-
paramException,
|
|
17
15
|
)
|
|
18
16
|
from polly.auth import Polly
|
|
19
|
-
|
|
17
|
+
|
|
20
18
|
from polly import helpers, constants as const, application_error_info as app_err_info
|
|
21
19
|
from polly.help import example
|
|
22
20
|
import polly.http_response_codes as http_codes
|
|
23
|
-
|
|
24
|
-
from polly.
|
|
21
|
+
|
|
22
|
+
from polly.constants import SUPPORTED_ENTITY_TYPES
|
|
23
|
+
|
|
25
24
|
from polly.tracking import Track
|
|
26
25
|
|
|
27
26
|
|
|
@@ -48,8 +47,6 @@ class Curation:
|
|
|
48
47
|
env="",
|
|
49
48
|
default_env="polly",
|
|
50
49
|
) -> None:
|
|
51
|
-
# check if COMPUTE_ENV_VARIABLE present or not
|
|
52
|
-
# if COMPUTE_ENV_VARIABLE, give priority
|
|
53
50
|
env = helpers.get_platform_value_from_env(
|
|
54
51
|
const.COMPUTE_ENV_VARIABLE, default_env, env
|
|
55
52
|
)
|
|
@@ -60,163 +57,12 @@ class Curation:
|
|
|
60
57
|
f"https://api.datalake.discover.{self.session.env}.elucidata.io/elastic/v2"
|
|
61
58
|
)
|
|
62
59
|
self.inference_url = f"https://api.discover.{self.session.env}.elucidata.io/curations/inferences/"
|
|
63
|
-
self.cohort = Cohort()
|
|
64
|
-
self.cohort_constants = get_cohort_constants()
|
|
65
60
|
|
|
66
61
|
def _handle_errors(self, response):
|
|
67
62
|
detail = response.get("errors")[0].get("detail", [])
|
|
68
63
|
title = response.get("errors")[0].get("title", [])
|
|
69
64
|
return title, detail
|
|
70
65
|
|
|
71
|
-
def _fetch_metadata_from_cohort(self, repo_name: str, dataset_ids: List[str]):
|
|
72
|
-
"""
|
|
73
|
-
Utility function for fetching metadata using cohorts.
|
|
74
|
-
|
|
75
|
-
Arguments:
|
|
76
|
-
repo_name (str) : name of the repository for fetching datasets.
|
|
77
|
-
dataset_ids (List[str]): dataset ids to be used for inference
|
|
78
|
-
|
|
79
|
-
Returns:
|
|
80
|
-
Returns sample metadata, dataset and sample ids.
|
|
81
|
-
"""
|
|
82
|
-
sample_metadata = {}
|
|
83
|
-
dataset_to_sample_id = {"dataset_id": [], "sample_id": []}
|
|
84
|
-
|
|
85
|
-
if not (os.path.isdir(CURATION_COHORT_CACHE)):
|
|
86
|
-
os.mkdir(CURATION_COHORT_CACHE)
|
|
87
|
-
else:
|
|
88
|
-
shutil.rmtree(CURATION_COHORT_CACHE)
|
|
89
|
-
os.mkdir(CURATION_COHORT_CACHE)
|
|
90
|
-
|
|
91
|
-
self.cohort.create_cohort(
|
|
92
|
-
CURATION_COHORT_CACHE, "sample_metadata_query", "desc"
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
# Fetch metadata using cohorts
|
|
96
|
-
for dataset_id in dataset_ids:
|
|
97
|
-
datasets_sample_metadata = []
|
|
98
|
-
|
|
99
|
-
if not (
|
|
100
|
-
repo_name in self.cohort_constants
|
|
101
|
-
and self.cohort_constants[repo_name]["file_structure"] != "multiple"
|
|
102
|
-
):
|
|
103
|
-
# multiple mapped repo such as GEO
|
|
104
|
-
self.cohort.add_to_cohort(repo_name, dataset_id=dataset_id)
|
|
105
|
-
else:
|
|
106
|
-
# for single mapped repos such as TCGA
|
|
107
|
-
self.cohort.add_to_cohort(repo_name, dataset_id=[dataset_id])
|
|
108
|
-
|
|
109
|
-
col_metadata = self.cohort.merge_data("sample")
|
|
110
|
-
all_sample_ids = col_metadata.index.tolist()
|
|
111
|
-
|
|
112
|
-
col_metadata.loc[:, "dataset_id"] = dataset_id
|
|
113
|
-
dataset_to_sample_id["dataset_id"] += [dataset_id] * len(all_sample_ids)
|
|
114
|
-
|
|
115
|
-
col_metadata.loc[:, "sample_id"] = all_sample_ids
|
|
116
|
-
dataset_to_sample_id["sample_id"] += all_sample_ids
|
|
117
|
-
|
|
118
|
-
datasets_sample_metadata += list(col_metadata.T.to_dict().values())
|
|
119
|
-
|
|
120
|
-
if not (
|
|
121
|
-
repo_name in self.cohort_constants
|
|
122
|
-
and self.cohort_constants[repo_name]["file_structure"] != "multiple"
|
|
123
|
-
):
|
|
124
|
-
self.cohort.remove_from_cohort(dataset_id)
|
|
125
|
-
else:
|
|
126
|
-
self.cohort.remove_from_cohort([dataset_id])
|
|
127
|
-
|
|
128
|
-
sample_metadata[dataset_id] = datasets_sample_metadata
|
|
129
|
-
|
|
130
|
-
dataset_to_sample_id = pd.DataFrame.from_dict(dataset_to_sample_id)
|
|
131
|
-
|
|
132
|
-
return sample_metadata, dataset_to_sample_id
|
|
133
|
-
|
|
134
|
-
def _clinical_model_param_checks(
|
|
135
|
-
self,
|
|
136
|
-
repo_name: str,
|
|
137
|
-
dataset_ids: List[str],
|
|
138
|
-
sample_ids: Optional[List[str]] = None,
|
|
139
|
-
):
|
|
140
|
-
"""
|
|
141
|
-
Checking the parameter passed to the clinical label assigning model.
|
|
142
|
-
|
|
143
|
-
Arguments:
|
|
144
|
-
repo_name (str): repo name
|
|
145
|
-
dataset_ids (list[str]): list of dataset ids
|
|
146
|
-
|
|
147
|
-
Keyword Arguments:
|
|
148
|
-
sample_ids (list[str], optional): Optional Parameter. List of sample ids.
|
|
149
|
-
Default is 'None'.
|
|
150
|
-
|
|
151
|
-
Raises:
|
|
152
|
-
paramException
|
|
153
|
-
"""
|
|
154
|
-
if dataset_ids is None or type(dataset_ids) is not list:
|
|
155
|
-
raise paramException(
|
|
156
|
-
title="Param Exception",
|
|
157
|
-
detail="Dataset IDs should be given as a valid list of strings",
|
|
158
|
-
)
|
|
159
|
-
|
|
160
|
-
if sample_ids is not None and type(sample_ids) is not list:
|
|
161
|
-
raise paramException(
|
|
162
|
-
title="Param Exception",
|
|
163
|
-
detail="Sample IDs should be given as a valid list of strings",
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
if repo_name != "geo" and not any(
|
|
167
|
-
["GSE" in dataset_id for dataset_id in dataset_ids]
|
|
168
|
-
):
|
|
169
|
-
warnings.warn(
|
|
170
|
-
"The model is tested with GEO metadata and the labels may be wrong for other repos"
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
def _post_process_clinical_tags(
|
|
174
|
-
self,
|
|
175
|
-
clinical_tags: pd.DataFrame,
|
|
176
|
-
is_sample_tag: bool,
|
|
177
|
-
sample_ids: Optional[List[str]] = None,
|
|
178
|
-
) -> pd.DataFrame:
|
|
179
|
-
"""
|
|
180
|
-
process the response of the model (dataframe with clinical tags and samples)
|
|
181
|
-
and return relevant feilds.
|
|
182
|
-
incase no sample_ids are provided by the user, we return the dataset_ids and the clinical tags
|
|
183
|
-
incase sample_ids are also provided, then we return the dataset_ids, the sample_ids and the clincal tags.
|
|
184
|
-
|
|
185
|
-
Arguments:
|
|
186
|
-
clinical_tags (pd.DataFrame): dataframe of the sample_ids and assigned clinical tags
|
|
187
|
-
is_sample_tag (bool): if samples passed
|
|
188
|
-
|
|
189
|
-
Keyword Arguments:
|
|
190
|
-
sample_ids (list[str]): list of sample ids (default: {None})
|
|
191
|
-
|
|
192
|
-
Returns:
|
|
193
|
-
a dataframe with the the dataset_ids, sample_ids and the assigned clinical tags
|
|
194
|
-
"""
|
|
195
|
-
if is_sample_tag:
|
|
196
|
-
# if the user has provided list of samples, then we filter in just those sample ids
|
|
197
|
-
# for the dataset ids.
|
|
198
|
-
# taking only those clinical tags and samples where the sample_ids are in the sample_id list
|
|
199
|
-
# provided by the user.
|
|
200
|
-
clinical_tags = clinical_tags[
|
|
201
|
-
clinical_tags["sample_id"].isin(sample_ids)
|
|
202
|
-
].reset_index(drop=True)
|
|
203
|
-
|
|
204
|
-
# in case the sample_ids provided by the user are not present in the dataset_ids provided.
|
|
205
|
-
if clinical_tags.empty or clinical_tags.shape[0] < len(sample_ids):
|
|
206
|
-
warnings.warn(
|
|
207
|
-
"The output is empty or has missing sample ids because they are not present in given datasets."
|
|
208
|
-
)
|
|
209
|
-
|
|
210
|
-
# return sample level tags here
|
|
211
|
-
return clinical_tags
|
|
212
|
-
# if no sample_ids were passed by the user, then
|
|
213
|
-
# returning dataset level tags by removing sample id and removing duplicate columns
|
|
214
|
-
return (
|
|
215
|
-
clinical_tags.drop(columns=["sample_id"])
|
|
216
|
-
.drop_duplicates()
|
|
217
|
-
.reset_index(drop=True)
|
|
218
|
-
)
|
|
219
|
-
|
|
220
66
|
def _handle_perform_inference_api_error(self, response):
|
|
221
67
|
if response.status_code == http_codes.UNAUTHORIZED:
|
|
222
68
|
raise UnauthorizedException("User is unauthorized to access this")
|
|
@@ -482,84 +328,3 @@ class Curation:
|
|
|
482
328
|
sample_metadata["is_control"] = output["is_control"].values
|
|
483
329
|
sample_metadata["control_prob"] = output["control_prob"].values
|
|
484
330
|
return sample_metadata
|
|
485
|
-
|
|
486
|
-
@Track.track_decorator
|
|
487
|
-
def assign_clinical_labels(
|
|
488
|
-
self,
|
|
489
|
-
repo_name: str,
|
|
490
|
-
dataset_ids: List[str],
|
|
491
|
-
sample_ids: Optional[List[str]] = None,
|
|
492
|
-
) -> pd.DataFrame:
|
|
493
|
-
"""
|
|
494
|
-
Returns a list of clinical or non clinical labels for the given datasets or samples.
|
|
495
|
-
|
|
496
|
-
Arguments:
|
|
497
|
-
repo_name (str): name of the repository for fetching datasets.
|
|
498
|
-
dataset_ids (List[str]): dataset ids to be used for inference
|
|
499
|
-
|
|
500
|
-
Keyword Arguments:
|
|
501
|
-
sample_ids (List[str], optional): Optional Parameter. Sample ids if that is needed.
|
|
502
|
-
|
|
503
|
-
Raises:
|
|
504
|
-
RequestException: API response exception
|
|
505
|
-
ParamException: Invalid parameters
|
|
506
|
-
err
|
|
507
|
-
|
|
508
|
-
Returns:
|
|
509
|
-
dataframe which is a list of clinical tags for given ids
|
|
510
|
-
"""
|
|
511
|
-
warnings.formatwarning = lambda msg, *args, **kwargs: f"WARNING: {msg}\n"
|
|
512
|
-
|
|
513
|
-
try:
|
|
514
|
-
self._clinical_model_param_checks(repo_name, dataset_ids, sample_ids)
|
|
515
|
-
# evaluating the inference level based on if the user has provided sample_ids
|
|
516
|
-
is_sample_tag = sample_ids is not None
|
|
517
|
-
inference_level = "sample_id" if (is_sample_tag) else "dataset_id"
|
|
518
|
-
|
|
519
|
-
sample_metadata, dataset_to_sample_id = self._fetch_metadata_from_cohort(
|
|
520
|
-
repo_name=repo_name, dataset_ids=dataset_ids
|
|
521
|
-
)
|
|
522
|
-
|
|
523
|
-
clinical_model_predictions = []
|
|
524
|
-
|
|
525
|
-
for dataset_id in sample_metadata:
|
|
526
|
-
# Get output from model endpoint and structure output
|
|
527
|
-
payload = {
|
|
528
|
-
"sample_metadata": sample_metadata[dataset_id],
|
|
529
|
-
"sample_id_column": "sample_id",
|
|
530
|
-
"dataset_id_column": "dataset_id",
|
|
531
|
-
"is_sample_tag": is_sample_tag,
|
|
532
|
-
}
|
|
533
|
-
|
|
534
|
-
output = self._perform_inference("clinical-classifier", payload)
|
|
535
|
-
if "errors" in output:
|
|
536
|
-
title, detail = self._handle_errors(output)
|
|
537
|
-
raise RequestException(title, detail)
|
|
538
|
-
|
|
539
|
-
output = output["clinical_predictions"]
|
|
540
|
-
|
|
541
|
-
clinical_model_predictions += output
|
|
542
|
-
|
|
543
|
-
# creating dataframe with inference_level and clinical_tags with values from the clinical_model_predictions
|
|
544
|
-
clinical_tags = pd.DataFrame(
|
|
545
|
-
{
|
|
546
|
-
inference_level: [
|
|
547
|
-
tag["tag_id"] for tag in clinical_model_predictions
|
|
548
|
-
],
|
|
549
|
-
"clinical_tag": [
|
|
550
|
-
tag["clinical_tag"] for tag in clinical_model_predictions
|
|
551
|
-
],
|
|
552
|
-
}
|
|
553
|
-
)
|
|
554
|
-
|
|
555
|
-
clinical_tags = pd.merge(
|
|
556
|
-
dataset_to_sample_id, clinical_tags, on=inference_level
|
|
557
|
-
)
|
|
558
|
-
|
|
559
|
-
clinical_tags = self._post_process_clinical_tags(
|
|
560
|
-
clinical_tags, is_sample_tag, sample_ids
|
|
561
|
-
)
|
|
562
|
-
except Exception as err:
|
|
563
|
-
raise err
|
|
564
|
-
|
|
565
|
-
return clinical_tags
|
|
@@ -97,68 +97,6 @@ class InvalidDirectoryPathException(Exception):
|
|
|
97
97
|
return "This path does not represent an existing directory. Please try again."
|
|
98
98
|
|
|
99
99
|
|
|
100
|
-
class InvalidCohortPathException(Exception):
|
|
101
|
-
def __str__(self):
|
|
102
|
-
return "This path does not represent a Cohort. Please try again."
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
class InvalidCohortNameException(Exception):
|
|
106
|
-
def __str__(self, cohort_name):
|
|
107
|
-
return f"The identifier {cohort_name} does not represent a valid cohort name. Please try again."
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
class InvalidRepoException(Exception):
|
|
111
|
-
def __init__(self, repo_name):
|
|
112
|
-
self.repo_name = repo_name
|
|
113
|
-
|
|
114
|
-
def __str__(self):
|
|
115
|
-
return f"The repository : {self.repo_name} is not supported. Please contact Polly Support."
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
class InvalidDatasetException(Exception):
|
|
119
|
-
def __str__(self):
|
|
120
|
-
return "Dataset/s not added."
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
class InvalidCohortOperationException(Exception):
|
|
124
|
-
def __str__(self):
|
|
125
|
-
return "This operation is not valid as no cohort has been instantiated."
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
class EmptyCohortException(Exception):
|
|
129
|
-
def __str__(self):
|
|
130
|
-
return "There are no datasets in the cohort. Please try adding datasets using add_to_cohort() function."
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
class CohortEditException(Exception):
|
|
134
|
-
def __str__(self):
|
|
135
|
-
return "No parameter specified for editing in cohort"
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
class InvalidCohortMergeOperation(Exception):
|
|
139
|
-
def __str__(self):
|
|
140
|
-
return "Incorrect or blank parameter specified for merging in cohort"
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
class InvalidCohortAddition(Exception):
|
|
144
|
-
def __str__(self):
|
|
145
|
-
return "The repository type is not compatible with the cohort due to different file structure. Please try again."
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
class OutdatedCohortVersion(Exception):
|
|
149
|
-
def __init__(self, version):
|
|
150
|
-
self.version = version
|
|
151
|
-
|
|
152
|
-
def __str__(self):
|
|
153
|
-
return f"The Cohort version is outdated. Please try again with the new version VERSION-{self.version}."
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
class TechnicalFaultException(Exception):
|
|
157
|
-
def __str__(self):
|
|
158
|
-
return "Samples not downloaded due to a technical fault. Please check \
|
|
159
|
-
the arguments passed and try again. Contact Polly Support in case of repeated failure."
|
|
160
|
-
|
|
161
|
-
|
|
162
100
|
class RequestFailureException(Exception):
|
|
163
101
|
def __str__(self):
|
|
164
102
|
return "Sorry, we're unable to fetch the metadata now. Please contact polly.support@elucidata.io"
|
|
@@ -179,7 +179,7 @@ def checkclass(cls) -> None:
|
|
|
179
179
|
print("Note : use class to get help")
|
|
180
180
|
raise TypeError(title="Use class")
|
|
181
181
|
|
|
182
|
-
if cls.__name__ not in ["Polly", "OmixAtlas", "
|
|
182
|
+
if cls.__name__ not in ["Polly", "OmixAtlas", "Workspaces"]:
|
|
183
183
|
print("Other class methods not allowed")
|
|
184
184
|
raise Exception(title="Other class are not allowed")
|
|
185
185
|
|
|
@@ -210,7 +210,6 @@ def get_line(fun: str, kind: str, txt: str, function_name: str, cls, doc: bool)
|
|
|
210
210
|
# function will return lines to print
|
|
211
211
|
# for a function or class
|
|
212
212
|
Link = {
|
|
213
|
-
"cohort": "https://github.com/ElucidataInc/PublicAssets/blob/master/polly-python/example/cohort.ipynb",
|
|
214
213
|
"omixatlas": "https://github.com/ElucidataInc/PublicAssets/blob/master/polly-python/example/omixatlas.ipynb",
|
|
215
214
|
"polly": "https://github.com/ElucidataInc/PublicAssets/blob/master/polly-python/example/polly.ipynb",
|
|
216
215
|
"workspaces": "https://github.com/ElucidataInc/PublicAssets/blob/master/polly-python/example/workspaces.ipynb",
|
|
@@ -301,7 +300,7 @@ def get_txt(
|
|
|
301
300
|
|
|
302
301
|
def example(cls, function_name: str = "") -> None:
|
|
303
302
|
"""
|
|
304
|
-
function to see examples for class - Polly, OmixAtlas, Workspaces
|
|
303
|
+
function to see examples for class - Polly, OmixAtlas, Workspaces and it's member funtions
|
|
305
304
|
|
|
306
305
|
``Args:``
|
|
307
306
|
``function_name (optional) str:`` provide function name to see examples default empty.
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import re
|
|
3
3
|
import json
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
# import logging
|
|
5
6
|
import requests
|
|
6
7
|
import urllib.request
|
|
7
8
|
from cloudpathlib import S3Client
|
|
@@ -16,16 +17,14 @@ from polly.errors import (
|
|
|
16
17
|
OperationFailedException,
|
|
17
18
|
paramException,
|
|
18
19
|
AccessDeniedError,
|
|
19
|
-
InvalidRepoException,
|
|
20
20
|
DatatypeNotFoundException,
|
|
21
21
|
RepositoryNotFoundException,
|
|
22
22
|
)
|
|
23
|
-
|
|
24
|
-
import contextlib
|
|
25
|
-
import joblib
|
|
23
|
+
|
|
26
24
|
import urllib
|
|
27
25
|
import pandas as pd
|
|
28
|
-
|
|
26
|
+
|
|
27
|
+
# import polly.http_response_codes as http_codes
|
|
29
28
|
from polly.tracking import Track
|
|
30
29
|
import polly.constants as const
|
|
31
30
|
import string
|
|
@@ -411,43 +410,6 @@ def elastic_query(index_name: str, dataset_id: str) -> dict:
|
|
|
411
410
|
return query
|
|
412
411
|
|
|
413
412
|
|
|
414
|
-
def get_cohort_constants() -> json:
|
|
415
|
-
"""
|
|
416
|
-
Returns cohort info from public assests url
|
|
417
|
-
"""
|
|
418
|
-
response = requests.get(COHORT_CONSTANTS_URL)
|
|
419
|
-
error_handler(response)
|
|
420
|
-
return json.loads(response.text)
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
def validate_datatype(datatype: str):
|
|
424
|
-
"""
|
|
425
|
-
Function to validate datatype of a dataset
|
|
426
|
-
Returns 1 in case of datatype is Single Cell, 0 otherwise
|
|
427
|
-
"""
|
|
428
|
-
if datatype == "Single cell":
|
|
429
|
-
return 1
|
|
430
|
-
return 0
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
@contextlib.contextmanager
|
|
434
|
-
def tqdm_joblib(tqdm_object):
|
|
435
|
-
"""Context manager to patch joblib to report into tqdm progress bar given as argument"""
|
|
436
|
-
|
|
437
|
-
class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack):
|
|
438
|
-
def __call__(self, *args, **kwargs):
|
|
439
|
-
tqdm_object.update(n=self.batch_size)
|
|
440
|
-
return super().__call__(*args, **kwargs)
|
|
441
|
-
|
|
442
|
-
old_batch_callback = joblib.parallel.BatchCompletionCallBack
|
|
443
|
-
joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
|
|
444
|
-
try:
|
|
445
|
-
yield tqdm_object
|
|
446
|
-
finally:
|
|
447
|
-
joblib.parallel.BatchCompletionCallBack = old_batch_callback
|
|
448
|
-
tqdm_object.close()
|
|
449
|
-
|
|
450
|
-
|
|
451
413
|
def check_empty(x):
|
|
452
414
|
"""
|
|
453
415
|
Function to validate if the entry is an empty list or not.
|
|
@@ -593,21 +555,6 @@ def workspaces_permission_check(self, workspace_id) -> bool:
|
|
|
593
555
|
)
|
|
594
556
|
|
|
595
557
|
|
|
596
|
-
def return_entity_type(data_source: str, cohort_info: json) -> str:
|
|
597
|
-
"""
|
|
598
|
-
Function to return entity type based on the cohort info present in public assets
|
|
599
|
-
"""
|
|
600
|
-
if data_source not in cohort_info:
|
|
601
|
-
raise InvalidRepoException(data_source)
|
|
602
|
-
for repo, dict in cohort_info.items():
|
|
603
|
-
if data_source == repo:
|
|
604
|
-
if dict["file_structure"] == "single":
|
|
605
|
-
entity_type = "dataset"
|
|
606
|
-
elif dict["file_structure"] == "multiple":
|
|
607
|
-
entity_type = "sample"
|
|
608
|
-
return entity_type
|
|
609
|
-
|
|
610
|
-
|
|
611
558
|
def get_files_in_dir(path_to_dir: str) -> list:
|
|
612
559
|
"""
|
|
613
560
|
returns the files in a given directory
|
|
@@ -722,35 +669,6 @@ def replace_original_name_field(
|
|
|
722
669
|
return replaced_metadata
|
|
723
670
|
|
|
724
671
|
|
|
725
|
-
def upload_html_file(
|
|
726
|
-
session, workspace_id: int, workspace_path: str, local_report_path: str
|
|
727
|
-
):
|
|
728
|
-
"""
|
|
729
|
-
Function to upload an html file to a workspace.
|
|
730
|
-
"""
|
|
731
|
-
upload_url = f"https://v2.api.{session.env}.elucidata.io/workspaces/{workspace_id}/upload_url"
|
|
732
|
-
params = {"file_path": workspace_path, "content_type": "text/html"}
|
|
733
|
-
# get request to get the signed url for s3
|
|
734
|
-
response = session.get(upload_url, params=params)
|
|
735
|
-
error_handler(response)
|
|
736
|
-
attributes = response.json().get("data").get("attributes")
|
|
737
|
-
try:
|
|
738
|
-
with open(local_report_path, "rb") as file_to_upload:
|
|
739
|
-
# uploading the local file to the signed url
|
|
740
|
-
files = {"file": (local_report_path, file_to_upload)}
|
|
741
|
-
upload_response = requests.post(
|
|
742
|
-
attributes["url"], data=attributes["fields"], files=files
|
|
743
|
-
)
|
|
744
|
-
error_handler(upload_response)
|
|
745
|
-
if upload_response.status_code == http_codes.CREATED:
|
|
746
|
-
logging.basicConfig(level=logging.INFO)
|
|
747
|
-
logging.info(
|
|
748
|
-
f"File uploaded successfully to workspace-id = {workspace_id} at path = {workspace_path}!"
|
|
749
|
-
)
|
|
750
|
-
except Exception as e:
|
|
751
|
-
raise e
|
|
752
|
-
|
|
753
|
-
|
|
754
672
|
def get_folder_list_from_list_of_filepaths(filenames_fullpath_list: list) -> list:
|
|
755
673
|
"""
|
|
756
674
|
gives back only the folders from a list of filepaths provided.
|