polly-python 2.5.0__tar.gz → 3.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {polly_python-2.5.0/polly_python.egg-info → polly_python-3.0.0}/PKG-INFO +7 -14
- polly_python-3.0.0/polly/__init__.py +1 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/constants.py +15 -15
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/curation.py +243 -237
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/errors.py +38 -38
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/helpers.py +1 -56
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/omixatlas.py +39 -33
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/pipelines.py +119 -92
- {polly_python-2.5.0 → polly_python-3.0.0/polly_python.egg-info}/PKG-INFO +7 -14
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_python.egg-info/SOURCES.txt +2 -4
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_python.egg-info/requires.txt +5 -14
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/reporting/reporting.py +49 -27
- {polly_python-2.5.0 → polly_python-3.0.0}/setup.cfg +6 -13
- polly_python-3.0.0/tests/test_help.py +81 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/tests/test_helpers.py +10 -10
- {polly_python-2.5.0 → polly_python-3.0.0}/tests/test_omixatlas.py +9 -4
- {polly_python-2.5.0 → polly_python-3.0.0}/tests/test_pipelines.py +34 -25
- polly_python-3.0.0/tests/test_validation.py +135 -0
- polly_python-2.5.0/polly/__init__.py +0 -1
- polly_python-2.5.0/polly/bridge_cohort.py +0 -399
- polly_python-2.5.0/polly/cohort.py +0 -433
- polly_python-2.5.0/polly/core_cohort.py +0 -721
- polly_python-2.5.0/tests/test_cohort.py +0 -216
- {polly_python-2.5.0 → polly_python-3.0.0}/LICENSE.md +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/MANIFEST.in +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/README.md +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/analyze.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/application_error_info.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/atlas.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/auth.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/data_management.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/help.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/http_response_codes.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/index_schema_level_conversion_const.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/jobs.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/omixatlas_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/s3_utils.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/session.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/threading_utils.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/tracking.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/validation.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/validation_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly/workspaces.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_interfaces/IFiles.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_interfaces/IReporting.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_interfaces/ISchema.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_interfaces/__init__.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_python.egg-info/dependency_links.txt +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_python.egg-info/top_level.txt +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/__init__.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/dataset.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/files/__init__.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/files/files.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/files/files_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/polly_services_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/reporting/__init__.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/reporting/reporting_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/schema/__init__.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/schema/schema.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/schema/schema_const.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/schema/schema_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/polly_services/schema/validate_schema_hlpr.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/pyproject.toml +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/setup.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/tests/test_constants.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/tests/test_curation.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/tests/test_data_management.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/tests/test_jobs.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/tests/test_s3_utils.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/tests/test_schema_ux.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/tests/test_threading_utils.py +0 -0
- {polly_python-2.5.0 → polly_python-3.0.0}/tests/test_workspaces.py +0 -0
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: polly_python
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.0.0
|
|
4
4
|
Summary: Polly SDK
|
|
5
5
|
Home-page: https://github.com/ElucidataInc/polly-python
|
|
6
6
|
Project-URL: Documentation, https://docs.elucidata.io
|
|
7
7
|
Project-URL: Tutorial Notebooks, https://github.com/ElucidataInc/polly-python
|
|
8
|
-
Requires-Python:
|
|
8
|
+
Requires-Python: <=3.11,>=3.9
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE.md
|
|
11
|
-
Requires-Dist:
|
|
11
|
+
Requires-Dist: cmapPy<=4.0.1
|
|
12
12
|
Requires-Dist: cloudpathlib>=0.15.0
|
|
13
13
|
Requires-Dist: retrying==1.3.4
|
|
14
14
|
Requires-Dist: rst2txt==1.1.0
|
|
@@ -17,22 +17,15 @@ Requires-Dist: mixpanel==4.10.0
|
|
|
17
17
|
Requires-Dist: Deprecated>=1.2.12
|
|
18
18
|
Requires-Dist: pytest>=6.2.5
|
|
19
19
|
Requires-Dist: cryptography<=38.0.0,>=37.0.1
|
|
20
|
-
Requires-Dist: plotly<5.0.0,>=4.8.1; python_version > "3.6" and python_version < "3.7"
|
|
21
20
|
Requires-Dist: plotly>=5.0.0; python_version >= "3.7"
|
|
22
|
-
Requires-Dist: pandas
|
|
23
|
-
Requires-Dist:
|
|
24
|
-
Requires-Dist: pydantic<1.10.0a1,>=1.8.2; python_version > "3.6" and python_version < "3.7"
|
|
21
|
+
Requires-Dist: pandas<=2.2.2,>=1.3.5; python_version >= "3.7"
|
|
22
|
+
Requires-Dist: numpy<=1.26.4
|
|
25
23
|
Requires-Dist: pydantic==1.10.12; python_version >= "3.7"
|
|
26
24
|
Requires-Dist: requests==2.28.1
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist:
|
|
29
|
-
Requires-Dist: boto3>=1.24.0; python_version >= "3.7"
|
|
30
|
-
Requires-Dist: botocore<1.27.0,>=1.20.73; python_version > "3.6" and python_version < "3.7"
|
|
31
|
-
Requires-Dist: botocore>=1.27.0; python_version >= "3.7"
|
|
32
|
-
Requires-Dist: joblib<=1.1.0,>0.11.0; python_version > "3.6" and python_version < "3.7"
|
|
25
|
+
Requires-Dist: boto3<2.0,>=1.24.0; python_version >= "3.7"
|
|
26
|
+
Requires-Dist: botocore<2.0,>=1.27.0; python_version >= "3.7"
|
|
33
27
|
Requires-Dist: joblib>=1.2.0; python_version >= "3.7"
|
|
34
28
|
Requires-Dist: tabulate==0.9.0
|
|
35
|
-
Requires-Dist: tqdm<4.65.0,>=4.61.0; python_version > "3.6" and python_version < "3.7"
|
|
36
29
|
Requires-Dist: tqdm==4.65.0; python_version >= "3.7"
|
|
37
30
|
Provides-Extra: testing
|
|
38
31
|
Requires-Dist: black; extra == "testing"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "3.0.0"
|
|
@@ -92,22 +92,22 @@ IO_CHUNKSIZE_LARGE_FILE_SIZE = 100 * MB
|
|
|
92
92
|
EXPIRED_TOKEN = "ExpiredToken"
|
|
93
93
|
|
|
94
94
|
# cohort constants
|
|
95
|
-
COHORT_VERSION = "0.2"
|
|
96
|
-
COHORT_CONSTANTS_URL = (
|
|
97
|
-
|
|
98
|
-
)
|
|
95
|
+
# COHORT_VERSION = "0.2"
|
|
96
|
+
# COHORT_CONSTANTS_URL = (
|
|
97
|
+
# "https://elucidatainc.github.io/PublicAssets/cohort_constants.txt"
|
|
98
|
+
# )
|
|
99
99
|
|
|
100
|
-
OBSOLETE_METADATA_FIELDS = [
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
]
|
|
110
|
-
dot = "."
|
|
100
|
+
# OBSOLETE_METADATA_FIELDS = [
|
|
101
|
+
# "package",
|
|
102
|
+
# "region",
|
|
103
|
+
# "bucket",
|
|
104
|
+
# "key",
|
|
105
|
+
# "file_type",
|
|
106
|
+
# "file_location",
|
|
107
|
+
# "src_uri",
|
|
108
|
+
# "timestamp_",
|
|
109
|
+
# ]
|
|
110
|
+
# dot = "."
|
|
111
111
|
|
|
112
112
|
GETTING_UPLOAD_URLS_PAYLOAD = {"data": {"type": "files", "attributes": {"folder": ""}}}
|
|
113
113
|
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from collections import namedtuple
|
|
3
|
-
|
|
4
|
-
import
|
|
3
|
+
|
|
4
|
+
# import os
|
|
5
|
+
# import shutil
|
|
5
6
|
from typing import Dict, Optional, List
|
|
6
|
-
|
|
7
|
+
|
|
8
|
+
# import warnings
|
|
7
9
|
import pandas as pd
|
|
8
10
|
from functools import lru_cache
|
|
9
11
|
from polly.errors import (
|
|
@@ -13,15 +15,19 @@ from polly.errors import (
|
|
|
13
15
|
RequestException,
|
|
14
16
|
UnauthorizedException,
|
|
15
17
|
extract_json_api_error,
|
|
16
|
-
paramException,
|
|
18
|
+
# paramException,
|
|
17
19
|
)
|
|
18
20
|
from polly.auth import Polly
|
|
19
|
-
|
|
21
|
+
|
|
22
|
+
# from polly.cohort import Cohort
|
|
20
23
|
from polly import helpers, constants as const, application_error_info as app_err_info
|
|
21
24
|
from polly.help import example
|
|
22
25
|
import polly.http_response_codes as http_codes
|
|
23
|
-
|
|
24
|
-
from polly.
|
|
26
|
+
|
|
27
|
+
# from polly.constants import SUPPORTED_ENTITY_TYPES, CURATION_COHORT_CACHE
|
|
28
|
+
from polly.constants import SUPPORTED_ENTITY_TYPES
|
|
29
|
+
|
|
30
|
+
# from polly.helpers import get_cohort_constants
|
|
25
31
|
from polly.tracking import Track
|
|
26
32
|
|
|
27
33
|
|
|
@@ -60,162 +66,162 @@ class Curation:
|
|
|
60
66
|
f"https://api.datalake.discover.{self.session.env}.elucidata.io/elastic/v2"
|
|
61
67
|
)
|
|
62
68
|
self.inference_url = f"https://api.discover.{self.session.env}.elucidata.io/curations/inferences/"
|
|
63
|
-
self.cohort = Cohort()
|
|
64
|
-
self.cohort_constants = get_cohort_constants()
|
|
69
|
+
# self.cohort = Cohort()
|
|
70
|
+
# self.cohort_constants = get_cohort_constants()
|
|
65
71
|
|
|
66
72
|
def _handle_errors(self, response):
|
|
67
73
|
detail = response.get("errors")[0].get("detail", [])
|
|
68
74
|
title = response.get("errors")[0].get("title", [])
|
|
69
75
|
return title, detail
|
|
70
76
|
|
|
71
|
-
def _fetch_metadata_from_cohort(self, repo_name: str, dataset_ids: List[str]):
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def _clinical_model_param_checks(
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
):
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
def _post_process_clinical_tags(
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
) -> pd.DataFrame:
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
77
|
+
# def _fetch_metadata_from_cohort(self, repo_name: str, dataset_ids: List[str]):
|
|
78
|
+
# """
|
|
79
|
+
# Utility function for fetching metadata using cohorts.
|
|
80
|
+
|
|
81
|
+
# Arguments:
|
|
82
|
+
# repo_name (str) : name of the repository for fetching datasets.
|
|
83
|
+
# dataset_ids (List[str]): dataset ids to be used for inference
|
|
84
|
+
|
|
85
|
+
# Returns:
|
|
86
|
+
# Returns sample metadata, dataset and sample ids.
|
|
87
|
+
# """
|
|
88
|
+
# sample_metadata = {}
|
|
89
|
+
# dataset_to_sample_id = {"dataset_id": [], "sample_id": []}
|
|
90
|
+
|
|
91
|
+
# if not (os.path.isdir(CURATION_COHORT_CACHE)):
|
|
92
|
+
# os.mkdir(CURATION_COHORT_CACHE)
|
|
93
|
+
# else:
|
|
94
|
+
# shutil.rmtree(CURATION_COHORT_CACHE)
|
|
95
|
+
# os.mkdir(CURATION_COHORT_CACHE)
|
|
96
|
+
|
|
97
|
+
# self.cohort.create_cohort(
|
|
98
|
+
# CURATION_COHORT_CACHE, "sample_metadata_query", "desc"
|
|
99
|
+
# )
|
|
100
|
+
|
|
101
|
+
# # Fetch metadata using cohorts
|
|
102
|
+
# for dataset_id in dataset_ids:
|
|
103
|
+
# datasets_sample_metadata = []
|
|
104
|
+
|
|
105
|
+
# if not (
|
|
106
|
+
# repo_name in self.cohort_constants
|
|
107
|
+
# and self.cohort_constants[repo_name]["file_structure"] != "multiple"
|
|
108
|
+
# ):
|
|
109
|
+
# # multiple mapped repo such as GEO
|
|
110
|
+
# self.cohort.add_to_cohort(repo_name, dataset_id=dataset_id)
|
|
111
|
+
# else:
|
|
112
|
+
# # for single mapped repos such as TCGA
|
|
113
|
+
# self.cohort.add_to_cohort(repo_name, dataset_id=[dataset_id])
|
|
114
|
+
|
|
115
|
+
# col_metadata = self.cohort.merge_data("sample")
|
|
116
|
+
# all_sample_ids = col_metadata.index.tolist()
|
|
117
|
+
|
|
118
|
+
# col_metadata.loc[:, "dataset_id"] = dataset_id
|
|
119
|
+
# dataset_to_sample_id["dataset_id"] += [dataset_id] * len(all_sample_ids)
|
|
120
|
+
|
|
121
|
+
# col_metadata.loc[:, "sample_id"] = all_sample_ids
|
|
122
|
+
# dataset_to_sample_id["sample_id"] += all_sample_ids
|
|
123
|
+
|
|
124
|
+
# datasets_sample_metadata += list(col_metadata.T.to_dict().values())
|
|
125
|
+
|
|
126
|
+
# if not (
|
|
127
|
+
# repo_name in self.cohort_constants
|
|
128
|
+
# and self.cohort_constants[repo_name]["file_structure"] != "multiple"
|
|
129
|
+
# ):
|
|
130
|
+
# self.cohort.remove_from_cohort(dataset_id)
|
|
131
|
+
# else:
|
|
132
|
+
# self.cohort.remove_from_cohort([dataset_id])
|
|
133
|
+
|
|
134
|
+
# sample_metadata[dataset_id] = datasets_sample_metadata
|
|
135
|
+
|
|
136
|
+
# dataset_to_sample_id = pd.DataFrame.from_dict(dataset_to_sample_id)
|
|
137
|
+
|
|
138
|
+
# return sample_metadata, dataset_to_sample_id
|
|
139
|
+
|
|
140
|
+
# def _clinical_model_param_checks(
|
|
141
|
+
# self,
|
|
142
|
+
# repo_name: str,
|
|
143
|
+
# dataset_ids: List[str],
|
|
144
|
+
# sample_ids: Optional[List[str]] = None,
|
|
145
|
+
# ):
|
|
146
|
+
# """
|
|
147
|
+
# Checking the parameter passed to the clinical label assigning model.
|
|
148
|
+
|
|
149
|
+
# Arguments:
|
|
150
|
+
# repo_name (str): repo name
|
|
151
|
+
# dataset_ids (list[str]): list of dataset ids
|
|
152
|
+
|
|
153
|
+
# Keyword Arguments:
|
|
154
|
+
# sample_ids (list[str], optional): Optional Parameter. List of sample ids.
|
|
155
|
+
# Default is 'None'.
|
|
156
|
+
|
|
157
|
+
# Raises:
|
|
158
|
+
# paramException
|
|
159
|
+
# """
|
|
160
|
+
# if dataset_ids is None or type(dataset_ids) is not list:
|
|
161
|
+
# raise paramException(
|
|
162
|
+
# title="Param Exception",
|
|
163
|
+
# detail="Dataset IDs should be given as a valid list of strings",
|
|
164
|
+
# )
|
|
165
|
+
|
|
166
|
+
# if sample_ids is not None and type(sample_ids) is not list:
|
|
167
|
+
# raise paramException(
|
|
168
|
+
# title="Param Exception",
|
|
169
|
+
# detail="Sample IDs should be given as a valid list of strings",
|
|
170
|
+
# )
|
|
171
|
+
|
|
172
|
+
# if repo_name != "geo" and not any(
|
|
173
|
+
# ["GSE" in dataset_id for dataset_id in dataset_ids]
|
|
174
|
+
# ):
|
|
175
|
+
# warnings.warn(
|
|
176
|
+
# "The model is tested with GEO metadata and the labels may be wrong for other repos"
|
|
177
|
+
# )
|
|
178
|
+
|
|
179
|
+
# def _post_process_clinical_tags(
|
|
180
|
+
# self,
|
|
181
|
+
# clinical_tags: pd.DataFrame,
|
|
182
|
+
# is_sample_tag: bool,
|
|
183
|
+
# sample_ids: Optional[List[str]] = None,
|
|
184
|
+
# ) -> pd.DataFrame:
|
|
185
|
+
# """
|
|
186
|
+
# process the response of the model (dataframe with clinical tags and samples)
|
|
187
|
+
# and return relevant feilds.
|
|
188
|
+
# incase no sample_ids are provided by the user, we return the dataset_ids and the clinical tags
|
|
189
|
+
# incase sample_ids are also provided, then we return the dataset_ids, the sample_ids and the clincal tags.
|
|
190
|
+
|
|
191
|
+
# Arguments:
|
|
192
|
+
# clinical_tags (pd.DataFrame): dataframe of the sample_ids and assigned clinical tags
|
|
193
|
+
# is_sample_tag (bool): if samples passed
|
|
194
|
+
|
|
195
|
+
# Keyword Arguments:
|
|
196
|
+
# sample_ids (list[str]): list of sample ids (default: {None})
|
|
197
|
+
|
|
198
|
+
# Returns:
|
|
199
|
+
# a dataframe with the the dataset_ids, sample_ids and the assigned clinical tags
|
|
200
|
+
# """
|
|
201
|
+
# if is_sample_tag:
|
|
202
|
+
# # if the user has provided list of samples, then we filter in just those sample ids
|
|
203
|
+
# # for the dataset ids.
|
|
204
|
+
# # taking only those clinical tags and samples where the sample_ids are in the sample_id list
|
|
205
|
+
# # provided by the user.
|
|
206
|
+
# clinical_tags = clinical_tags[
|
|
207
|
+
# clinical_tags["sample_id"].isin(sample_ids)
|
|
208
|
+
# ].reset_index(drop=True)
|
|
209
|
+
|
|
210
|
+
# # in case the sample_ids provided by the user are not present in the dataset_ids provided.
|
|
211
|
+
# if clinical_tags.empty or clinical_tags.shape[0] < len(sample_ids):
|
|
212
|
+
# warnings.warn(
|
|
213
|
+
# "The output is empty or has missing sample ids because they are not present in given datasets."
|
|
214
|
+
# )
|
|
215
|
+
|
|
216
|
+
# # return sample level tags here
|
|
217
|
+
# return clinical_tags
|
|
218
|
+
# # if no sample_ids were passed by the user, then
|
|
219
|
+
# # returning dataset level tags by removing sample id and removing duplicate columns
|
|
220
|
+
# return (
|
|
221
|
+
# clinical_tags.drop(columns=["sample_id"])
|
|
222
|
+
# .drop_duplicates()
|
|
223
|
+
# .reset_index(drop=True)
|
|
224
|
+
# )
|
|
219
225
|
|
|
220
226
|
def _handle_perform_inference_api_error(self, response):
|
|
221
227
|
if response.status_code == http_codes.UNAUTHORIZED:
|
|
@@ -483,83 +489,83 @@ class Curation:
|
|
|
483
489
|
sample_metadata["control_prob"] = output["control_prob"].values
|
|
484
490
|
return sample_metadata
|
|
485
491
|
|
|
486
|
-
@Track.track_decorator
|
|
487
|
-
def assign_clinical_labels(
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
) -> pd.DataFrame:
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
492
|
+
# @Track.track_decorator
|
|
493
|
+
# def assign_clinical_labels(
|
|
494
|
+
# self,
|
|
495
|
+
# repo_name: str,
|
|
496
|
+
# dataset_ids: List[str],
|
|
497
|
+
# sample_ids: Optional[List[str]] = None,
|
|
498
|
+
# ) -> pd.DataFrame:
|
|
499
|
+
# """
|
|
500
|
+
# Returns a list of clinical or non clinical labels for the given datasets or samples.
|
|
501
|
+
|
|
502
|
+
# Arguments:
|
|
503
|
+
# repo_name (str): name of the repository for fetching datasets.
|
|
504
|
+
# dataset_ids (List[str]): dataset ids to be used for inference
|
|
505
|
+
|
|
506
|
+
# Keyword Arguments:
|
|
507
|
+
# sample_ids (List[str], optional): Optional Parameter. Sample ids if that is needed.
|
|
508
|
+
|
|
509
|
+
# Raises:
|
|
510
|
+
# RequestException: API response exception
|
|
511
|
+
# ParamException: Invalid parameters
|
|
512
|
+
# err
|
|
513
|
+
|
|
514
|
+
# Returns:
|
|
515
|
+
# dataframe which is a list of clinical tags for given ids
|
|
516
|
+
# """
|
|
517
|
+
# warnings.formatwarning = lambda msg, *args, **kwargs: f"WARNING: {msg}\n"
|
|
518
|
+
|
|
519
|
+
# try:
|
|
520
|
+
# self._clinical_model_param_checks(repo_name, dataset_ids, sample_ids)
|
|
521
|
+
# # evaluating the inference level based on if the user has provided sample_ids
|
|
522
|
+
# is_sample_tag = sample_ids is not None
|
|
523
|
+
# inference_level = "sample_id" if (is_sample_tag) else "dataset_id"
|
|
524
|
+
|
|
525
|
+
# sample_metadata, dataset_to_sample_id = self._fetch_metadata_from_cohort(
|
|
526
|
+
# repo_name=repo_name, dataset_ids=dataset_ids
|
|
527
|
+
# )
|
|
528
|
+
|
|
529
|
+
# clinical_model_predictions = []
|
|
530
|
+
|
|
531
|
+
# for dataset_id in sample_metadata:
|
|
532
|
+
# # Get output from model endpoint and structure output
|
|
533
|
+
# payload = {
|
|
534
|
+
# "sample_metadata": sample_metadata[dataset_id],
|
|
535
|
+
# "sample_id_column": "sample_id",
|
|
536
|
+
# "dataset_id_column": "dataset_id",
|
|
537
|
+
# "is_sample_tag": is_sample_tag,
|
|
538
|
+
# }
|
|
539
|
+
|
|
540
|
+
# output = self._perform_inference("clinical-classifier", payload)
|
|
541
|
+
# if "errors" in output:
|
|
542
|
+
# title, detail = self._handle_errors(output)
|
|
543
|
+
# raise RequestException(title, detail)
|
|
544
|
+
|
|
545
|
+
# output = output["clinical_predictions"]
|
|
546
|
+
|
|
547
|
+
# clinical_model_predictions += output
|
|
548
|
+
|
|
549
|
+
# # creating dataframe with inference_level and clinical_tags with values from the clinical_model_predictions
|
|
550
|
+
# clinical_tags = pd.DataFrame(
|
|
551
|
+
# {
|
|
552
|
+
# inference_level: [
|
|
553
|
+
# tag["tag_id"] for tag in clinical_model_predictions
|
|
554
|
+
# ],
|
|
555
|
+
# "clinical_tag": [
|
|
556
|
+
# tag["clinical_tag"] for tag in clinical_model_predictions
|
|
557
|
+
# ],
|
|
558
|
+
# }
|
|
559
|
+
# )
|
|
560
|
+
|
|
561
|
+
# clinical_tags = pd.merge(
|
|
562
|
+
# dataset_to_sample_id, clinical_tags, on=inference_level
|
|
563
|
+
# )
|
|
564
|
+
|
|
565
|
+
# clinical_tags = self._post_process_clinical_tags(
|
|
566
|
+
# clinical_tags, is_sample_tag, sample_ids
|
|
567
|
+
# )
|
|
568
|
+
# except Exception as err:
|
|
569
|
+
# raise err
|
|
570
|
+
|
|
571
|
+
# return clinical_tags
|