polly-python 2.5.0__tar.gz → 3.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {polly_python-2.5.0/polly_python.egg-info → polly_python-3.1.0}/PKG-INFO +8 -15
  2. polly_python-3.1.0/polly/__init__.py +1 -0
  3. {polly_python-2.5.0 → polly_python-3.1.0}/polly/constants.py +0 -18
  4. {polly_python-2.5.0 → polly_python-3.1.0}/polly/curation.py +6 -241
  5. {polly_python-2.5.0 → polly_python-3.1.0}/polly/errors.py +0 -62
  6. {polly_python-2.5.0 → polly_python-3.1.0}/polly/help.py +2 -3
  7. {polly_python-2.5.0 → polly_python-3.1.0}/polly/helpers.py +5 -87
  8. {polly_python-2.5.0 → polly_python-3.1.0}/polly/omixatlas.py +39 -33
  9. {polly_python-2.5.0 → polly_python-3.1.0}/polly/pipelines.py +119 -92
  10. polly_python-3.1.0/polly/polly_kg.py +212 -0
  11. {polly_python-2.5.0 → polly_python-3.1.0}/polly/session.py +1 -1
  12. {polly_python-2.5.0 → polly_python-3.1.0}/polly/tracking.py +6 -2
  13. {polly_python-2.5.0 → polly_python-3.1.0/polly_python.egg-info}/PKG-INFO +8 -15
  14. {polly_python-2.5.0 → polly_python-3.1.0}/polly_python.egg-info/SOURCES.txt +4 -4
  15. {polly_python-2.5.0 → polly_python-3.1.0}/polly_python.egg-info/requires.txt +5 -14
  16. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/reporting/reporting.py +49 -27
  17. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/reporting/reporting_hlpr.py +1 -1
  18. {polly_python-2.5.0 → polly_python-3.1.0}/setup.cfg +6 -13
  19. polly_python-3.1.0/tests/test_help.py +75 -0
  20. {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_helpers.py +0 -18
  21. polly_python-3.1.0/tests/test_kg.py +201 -0
  22. {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_omixatlas.py +9 -4
  23. {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_pipelines.py +34 -25
  24. polly_python-3.1.0/tests/test_validation.py +135 -0
  25. polly_python-2.5.0/polly/__init__.py +0 -1
  26. polly_python-2.5.0/polly/bridge_cohort.py +0 -399
  27. polly_python-2.5.0/polly/cohort.py +0 -433
  28. polly_python-2.5.0/polly/core_cohort.py +0 -721
  29. polly_python-2.5.0/tests/test_cohort.py +0 -216
  30. {polly_python-2.5.0 → polly_python-3.1.0}/LICENSE.md +0 -0
  31. {polly_python-2.5.0 → polly_python-3.1.0}/MANIFEST.in +0 -0
  32. {polly_python-2.5.0 → polly_python-3.1.0}/README.md +0 -0
  33. {polly_python-2.5.0 → polly_python-3.1.0}/polly/analyze.py +0 -0
  34. {polly_python-2.5.0 → polly_python-3.1.0}/polly/application_error_info.py +0 -0
  35. {polly_python-2.5.0 → polly_python-3.1.0}/polly/atlas.py +0 -0
  36. {polly_python-2.5.0 → polly_python-3.1.0}/polly/auth.py +0 -0
  37. {polly_python-2.5.0 → polly_python-3.1.0}/polly/data_management.py +0 -0
  38. {polly_python-2.5.0 → polly_python-3.1.0}/polly/http_response_codes.py +0 -0
  39. {polly_python-2.5.0 → polly_python-3.1.0}/polly/index_schema_level_conversion_const.py +0 -0
  40. {polly_python-2.5.0 → polly_python-3.1.0}/polly/jobs.py +0 -0
  41. {polly_python-2.5.0 → polly_python-3.1.0}/polly/omixatlas_hlpr.py +0 -0
  42. {polly_python-2.5.0 → polly_python-3.1.0}/polly/s3_utils.py +0 -0
  43. {polly_python-2.5.0 → polly_python-3.1.0}/polly/threading_utils.py +0 -0
  44. {polly_python-2.5.0 → polly_python-3.1.0}/polly/validation.py +0 -0
  45. {polly_python-2.5.0 → polly_python-3.1.0}/polly/validation_hlpr.py +0 -0
  46. {polly_python-2.5.0 → polly_python-3.1.0}/polly/workspaces.py +0 -0
  47. {polly_python-2.5.0 → polly_python-3.1.0}/polly_interfaces/IFiles.py +0 -0
  48. {polly_python-2.5.0 → polly_python-3.1.0}/polly_interfaces/IReporting.py +0 -0
  49. {polly_python-2.5.0 → polly_python-3.1.0}/polly_interfaces/ISchema.py +0 -0
  50. {polly_python-2.5.0 → polly_python-3.1.0}/polly_interfaces/__init__.py +0 -0
  51. {polly_python-2.5.0 → polly_python-3.1.0}/polly_python.egg-info/dependency_links.txt +0 -0
  52. {polly_python-2.5.0 → polly_python-3.1.0}/polly_python.egg-info/top_level.txt +0 -0
  53. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/__init__.py +0 -0
  54. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/dataset.py +0 -0
  55. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/files/__init__.py +0 -0
  56. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/files/files.py +0 -0
  57. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/files/files_hlpr.py +0 -0
  58. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/polly_services_hlpr.py +0 -0
  59. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/reporting/__init__.py +0 -0
  60. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/schema/__init__.py +0 -0
  61. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/schema/schema.py +0 -0
  62. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/schema/schema_const.py +0 -0
  63. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/schema/schema_hlpr.py +0 -0
  64. {polly_python-2.5.0 → polly_python-3.1.0}/polly_services/schema/validate_schema_hlpr.py +0 -0
  65. {polly_python-2.5.0 → polly_python-3.1.0}/pyproject.toml +0 -0
  66. {polly_python-2.5.0 → polly_python-3.1.0}/setup.py +0 -0
  67. {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_constants.py +0 -0
  68. {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_curation.py +0 -0
  69. {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_data_management.py +0 -0
  70. {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_jobs.py +0 -0
  71. {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_s3_utils.py +0 -0
  72. {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_schema_ux.py +0 -0
  73. {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_threading_utils.py +0 -0
  74. {polly_python-2.5.0 → polly_python-3.1.0}/tests/test_workspaces.py +0 -0
@@ -1,14 +1,14 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: polly_python
3
- Version: 2.5.0
3
+ Version: 3.1.0
4
4
  Summary: Polly SDK
5
5
  Home-page: https://github.com/ElucidataInc/polly-python
6
6
  Project-URL: Documentation, https://docs.elucidata.io
7
7
  Project-URL: Tutorial Notebooks, https://github.com/ElucidataInc/polly-python
8
- Requires-Python: >3.8
8
+ Requires-Python: <3.12,>=3.9
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE.md
11
- Requires-Dist: elucidatacmapPy==3.3.4
11
+ Requires-Dist: cmapPy<=4.0.1
12
12
  Requires-Dist: cloudpathlib>=0.15.0
13
13
  Requires-Dist: retrying==1.3.4
14
14
  Requires-Dist: rst2txt==1.1.0
@@ -17,22 +17,15 @@ Requires-Dist: mixpanel==4.10.0
17
17
  Requires-Dist: Deprecated>=1.2.12
18
18
  Requires-Dist: pytest>=6.2.5
19
19
  Requires-Dist: cryptography<=38.0.0,>=37.0.1
20
- Requires-Dist: plotly<5.0.0,>=4.8.1; python_version > "3.6" and python_version < "3.7"
21
20
  Requires-Dist: plotly>=5.0.0; python_version >= "3.7"
22
- Requires-Dist: pandas<1.2.0,>=1.1.0; python_version > "3.6" and python_version < "3.7"
23
- Requires-Dist: pandas>=1.3.5; python_version >= "3.7"
24
- Requires-Dist: pydantic<1.10.0a1,>=1.8.2; python_version > "3.6" and python_version < "3.7"
21
+ Requires-Dist: pandas<=2.2.2,>=1.3.5; python_version >= "3.7"
22
+ Requires-Dist: numpy<=1.26.4
25
23
  Requires-Dist: pydantic==1.10.12; python_version >= "3.7"
26
24
  Requires-Dist: requests==2.28.1
27
- Requires-Dist: numpy==1.26.4
28
- Requires-Dist: boto3<1.24.0,>=1.17.73; python_version > "3.6" and python_version < "3.7"
29
- Requires-Dist: boto3>=1.24.0; python_version >= "3.7"
30
- Requires-Dist: botocore<1.27.0,>=1.20.73; python_version > "3.6" and python_version < "3.7"
31
- Requires-Dist: botocore>=1.27.0; python_version >= "3.7"
32
- Requires-Dist: joblib<=1.1.0,>0.11.0; python_version > "3.6" and python_version < "3.7"
25
+ Requires-Dist: boto3<2.0,>=1.24.0; python_version >= "3.7"
26
+ Requires-Dist: botocore<2.0,>=1.27.0; python_version >= "3.7"
33
27
  Requires-Dist: joblib>=1.2.0; python_version >= "3.7"
34
28
  Requires-Dist: tabulate==0.9.0
35
- Requires-Dist: tqdm<4.65.0,>=4.61.0; python_version > "3.6" and python_version < "3.7"
36
29
  Requires-Dist: tqdm==4.65.0; python_version >= "3.7"
37
30
  Provides-Extra: testing
38
31
  Requires-Dist: black; extra == "testing"
@@ -0,0 +1 @@
1
+ __version__ = "3.1.0"
@@ -91,24 +91,6 @@ IO_CHUNKSIZE_LARGE_FILE_SIZE = 100 * MB
91
91
  # S3 Exceptions
92
92
  EXPIRED_TOKEN = "ExpiredToken"
93
93
 
94
- # cohort constants
95
- COHORT_VERSION = "0.2"
96
- COHORT_CONSTANTS_URL = (
97
- "https://elucidatainc.github.io/PublicAssets/cohort_constants.txt"
98
- )
99
-
100
- OBSOLETE_METADATA_FIELDS = [
101
- "package",
102
- "region",
103
- "bucket",
104
- "key",
105
- "file_type",
106
- "file_location",
107
- "src_uri",
108
- "timestamp_",
109
- ]
110
- dot = "."
111
-
112
94
  GETTING_UPLOAD_URLS_PAYLOAD = {"data": {"type": "files", "attributes": {"folder": ""}}}
113
95
 
114
96
  INGESTION_LEVEL_METADATA = {
@@ -1,9 +1,8 @@
1
1
  import json
2
2
  from collections import namedtuple
3
- import os
4
- import shutil
3
+
5
4
  from typing import Dict, Optional, List
6
- import warnings
5
+
7
6
  import pandas as pd
8
7
  from functools import lru_cache
9
8
  from polly.errors import (
@@ -13,15 +12,15 @@ from polly.errors import (
13
12
  RequestException,
14
13
  UnauthorizedException,
15
14
  extract_json_api_error,
16
- paramException,
17
15
  )
18
16
  from polly.auth import Polly
19
- from polly.cohort import Cohort
17
+
20
18
  from polly import helpers, constants as const, application_error_info as app_err_info
21
19
  from polly.help import example
22
20
  import polly.http_response_codes as http_codes
23
- from polly.constants import SUPPORTED_ENTITY_TYPES, CURATION_COHORT_CACHE
24
- from polly.helpers import get_cohort_constants
21
+
22
+ from polly.constants import SUPPORTED_ENTITY_TYPES
23
+
25
24
  from polly.tracking import Track
26
25
 
27
26
 
@@ -48,8 +47,6 @@ class Curation:
48
47
  env="",
49
48
  default_env="polly",
50
49
  ) -> None:
51
- # check if COMPUTE_ENV_VARIABLE present or not
52
- # if COMPUTE_ENV_VARIABLE, give priority
53
50
  env = helpers.get_platform_value_from_env(
54
51
  const.COMPUTE_ENV_VARIABLE, default_env, env
55
52
  )
@@ -60,163 +57,12 @@ class Curation:
60
57
  f"https://api.datalake.discover.{self.session.env}.elucidata.io/elastic/v2"
61
58
  )
62
59
  self.inference_url = f"https://api.discover.{self.session.env}.elucidata.io/curations/inferences/"
63
- self.cohort = Cohort()
64
- self.cohort_constants = get_cohort_constants()
65
60
 
66
61
  def _handle_errors(self, response):
67
62
  detail = response.get("errors")[0].get("detail", [])
68
63
  title = response.get("errors")[0].get("title", [])
69
64
  return title, detail
70
65
 
71
- def _fetch_metadata_from_cohort(self, repo_name: str, dataset_ids: List[str]):
72
- """
73
- Utility function for fetching metadata using cohorts.
74
-
75
- Arguments:
76
- repo_name (str) : name of the repository for fetching datasets.
77
- dataset_ids (List[str]): dataset ids to be used for inference
78
-
79
- Returns:
80
- Returns sample metadata, dataset and sample ids.
81
- """
82
- sample_metadata = {}
83
- dataset_to_sample_id = {"dataset_id": [], "sample_id": []}
84
-
85
- if not (os.path.isdir(CURATION_COHORT_CACHE)):
86
- os.mkdir(CURATION_COHORT_CACHE)
87
- else:
88
- shutil.rmtree(CURATION_COHORT_CACHE)
89
- os.mkdir(CURATION_COHORT_CACHE)
90
-
91
- self.cohort.create_cohort(
92
- CURATION_COHORT_CACHE, "sample_metadata_query", "desc"
93
- )
94
-
95
- # Fetch metadata using cohorts
96
- for dataset_id in dataset_ids:
97
- datasets_sample_metadata = []
98
-
99
- if not (
100
- repo_name in self.cohort_constants
101
- and self.cohort_constants[repo_name]["file_structure"] != "multiple"
102
- ):
103
- # multiple mapped repo such as GEO
104
- self.cohort.add_to_cohort(repo_name, dataset_id=dataset_id)
105
- else:
106
- # for single mapped repos such as TCGA
107
- self.cohort.add_to_cohort(repo_name, dataset_id=[dataset_id])
108
-
109
- col_metadata = self.cohort.merge_data("sample")
110
- all_sample_ids = col_metadata.index.tolist()
111
-
112
- col_metadata.loc[:, "dataset_id"] = dataset_id
113
- dataset_to_sample_id["dataset_id"] += [dataset_id] * len(all_sample_ids)
114
-
115
- col_metadata.loc[:, "sample_id"] = all_sample_ids
116
- dataset_to_sample_id["sample_id"] += all_sample_ids
117
-
118
- datasets_sample_metadata += list(col_metadata.T.to_dict().values())
119
-
120
- if not (
121
- repo_name in self.cohort_constants
122
- and self.cohort_constants[repo_name]["file_structure"] != "multiple"
123
- ):
124
- self.cohort.remove_from_cohort(dataset_id)
125
- else:
126
- self.cohort.remove_from_cohort([dataset_id])
127
-
128
- sample_metadata[dataset_id] = datasets_sample_metadata
129
-
130
- dataset_to_sample_id = pd.DataFrame.from_dict(dataset_to_sample_id)
131
-
132
- return sample_metadata, dataset_to_sample_id
133
-
134
- def _clinical_model_param_checks(
135
- self,
136
- repo_name: str,
137
- dataset_ids: List[str],
138
- sample_ids: Optional[List[str]] = None,
139
- ):
140
- """
141
- Checking the parameter passed to the clinical label assigning model.
142
-
143
- Arguments:
144
- repo_name (str): repo name
145
- dataset_ids (list[str]): list of dataset ids
146
-
147
- Keyword Arguments:
148
- sample_ids (list[str], optional): Optional Parameter. List of sample ids.
149
- Default is 'None'.
150
-
151
- Raises:
152
- paramException
153
- """
154
- if dataset_ids is None or type(dataset_ids) is not list:
155
- raise paramException(
156
- title="Param Exception",
157
- detail="Dataset IDs should be given as a valid list of strings",
158
- )
159
-
160
- if sample_ids is not None and type(sample_ids) is not list:
161
- raise paramException(
162
- title="Param Exception",
163
- detail="Sample IDs should be given as a valid list of strings",
164
- )
165
-
166
- if repo_name != "geo" and not any(
167
- ["GSE" in dataset_id for dataset_id in dataset_ids]
168
- ):
169
- warnings.warn(
170
- "The model is tested with GEO metadata and the labels may be wrong for other repos"
171
- )
172
-
173
- def _post_process_clinical_tags(
174
- self,
175
- clinical_tags: pd.DataFrame,
176
- is_sample_tag: bool,
177
- sample_ids: Optional[List[str]] = None,
178
- ) -> pd.DataFrame:
179
- """
180
- process the response of the model (dataframe with clinical tags and samples)
181
- and return relevant feilds.
182
- incase no sample_ids are provided by the user, we return the dataset_ids and the clinical tags
183
- incase sample_ids are also provided, then we return the dataset_ids, the sample_ids and the clincal tags.
184
-
185
- Arguments:
186
- clinical_tags (pd.DataFrame): dataframe of the sample_ids and assigned clinical tags
187
- is_sample_tag (bool): if samples passed
188
-
189
- Keyword Arguments:
190
- sample_ids (list[str]): list of sample ids (default: {None})
191
-
192
- Returns:
193
- a dataframe with the the dataset_ids, sample_ids and the assigned clinical tags
194
- """
195
- if is_sample_tag:
196
- # if the user has provided list of samples, then we filter in just those sample ids
197
- # for the dataset ids.
198
- # taking only those clinical tags and samples where the sample_ids are in the sample_id list
199
- # provided by the user.
200
- clinical_tags = clinical_tags[
201
- clinical_tags["sample_id"].isin(sample_ids)
202
- ].reset_index(drop=True)
203
-
204
- # in case the sample_ids provided by the user are not present in the dataset_ids provided.
205
- if clinical_tags.empty or clinical_tags.shape[0] < len(sample_ids):
206
- warnings.warn(
207
- "The output is empty or has missing sample ids because they are not present in given datasets."
208
- )
209
-
210
- # return sample level tags here
211
- return clinical_tags
212
- # if no sample_ids were passed by the user, then
213
- # returning dataset level tags by removing sample id and removing duplicate columns
214
- return (
215
- clinical_tags.drop(columns=["sample_id"])
216
- .drop_duplicates()
217
- .reset_index(drop=True)
218
- )
219
-
220
66
  def _handle_perform_inference_api_error(self, response):
221
67
  if response.status_code == http_codes.UNAUTHORIZED:
222
68
  raise UnauthorizedException("User is unauthorized to access this")
@@ -482,84 +328,3 @@ class Curation:
482
328
  sample_metadata["is_control"] = output["is_control"].values
483
329
  sample_metadata["control_prob"] = output["control_prob"].values
484
330
  return sample_metadata
485
-
486
- @Track.track_decorator
487
- def assign_clinical_labels(
488
- self,
489
- repo_name: str,
490
- dataset_ids: List[str],
491
- sample_ids: Optional[List[str]] = None,
492
- ) -> pd.DataFrame:
493
- """
494
- Returns a list of clinical or non clinical labels for the given datasets or samples.
495
-
496
- Arguments:
497
- repo_name (str): name of the repository for fetching datasets.
498
- dataset_ids (List[str]): dataset ids to be used for inference
499
-
500
- Keyword Arguments:
501
- sample_ids (List[str], optional): Optional Parameter. Sample ids if that is needed.
502
-
503
- Raises:
504
- RequestException: API response exception
505
- ParamException: Invalid parameters
506
- err
507
-
508
- Returns:
509
- dataframe which is a list of clinical tags for given ids
510
- """
511
- warnings.formatwarning = lambda msg, *args, **kwargs: f"WARNING: {msg}\n"
512
-
513
- try:
514
- self._clinical_model_param_checks(repo_name, dataset_ids, sample_ids)
515
- # evaluating the inference level based on if the user has provided sample_ids
516
- is_sample_tag = sample_ids is not None
517
- inference_level = "sample_id" if (is_sample_tag) else "dataset_id"
518
-
519
- sample_metadata, dataset_to_sample_id = self._fetch_metadata_from_cohort(
520
- repo_name=repo_name, dataset_ids=dataset_ids
521
- )
522
-
523
- clinical_model_predictions = []
524
-
525
- for dataset_id in sample_metadata:
526
- # Get output from model endpoint and structure output
527
- payload = {
528
- "sample_metadata": sample_metadata[dataset_id],
529
- "sample_id_column": "sample_id",
530
- "dataset_id_column": "dataset_id",
531
- "is_sample_tag": is_sample_tag,
532
- }
533
-
534
- output = self._perform_inference("clinical-classifier", payload)
535
- if "errors" in output:
536
- title, detail = self._handle_errors(output)
537
- raise RequestException(title, detail)
538
-
539
- output = output["clinical_predictions"]
540
-
541
- clinical_model_predictions += output
542
-
543
- # creating dataframe with inference_level and clinical_tags with values from the clinical_model_predictions
544
- clinical_tags = pd.DataFrame(
545
- {
546
- inference_level: [
547
- tag["tag_id"] for tag in clinical_model_predictions
548
- ],
549
- "clinical_tag": [
550
- tag["clinical_tag"] for tag in clinical_model_predictions
551
- ],
552
- }
553
- )
554
-
555
- clinical_tags = pd.merge(
556
- dataset_to_sample_id, clinical_tags, on=inference_level
557
- )
558
-
559
- clinical_tags = self._post_process_clinical_tags(
560
- clinical_tags, is_sample_tag, sample_ids
561
- )
562
- except Exception as err:
563
- raise err
564
-
565
- return clinical_tags
@@ -97,68 +97,6 @@ class InvalidDirectoryPathException(Exception):
97
97
  return "This path does not represent an existing directory. Please try again."
98
98
 
99
99
 
100
- class InvalidCohortPathException(Exception):
101
- def __str__(self):
102
- return "This path does not represent a Cohort. Please try again."
103
-
104
-
105
- class InvalidCohortNameException(Exception):
106
- def __str__(self, cohort_name):
107
- return f"The identifier {cohort_name} does not represent a valid cohort name. Please try again."
108
-
109
-
110
- class InvalidRepoException(Exception):
111
- def __init__(self, repo_name):
112
- self.repo_name = repo_name
113
-
114
- def __str__(self):
115
- return f"The repository : {self.repo_name} is not supported. Please contact Polly Support."
116
-
117
-
118
- class InvalidDatasetException(Exception):
119
- def __str__(self):
120
- return "Dataset/s not added."
121
-
122
-
123
- class InvalidCohortOperationException(Exception):
124
- def __str__(self):
125
- return "This operation is not valid as no cohort has been instantiated."
126
-
127
-
128
- class EmptyCohortException(Exception):
129
- def __str__(self):
130
- return "There are no datasets in the cohort. Please try adding datasets using add_to_cohort() function."
131
-
132
-
133
- class CohortEditException(Exception):
134
- def __str__(self):
135
- return "No parameter specified for editing in cohort"
136
-
137
-
138
- class InvalidCohortMergeOperation(Exception):
139
- def __str__(self):
140
- return "Incorrect or blank parameter specified for merging in cohort"
141
-
142
-
143
- class InvalidCohortAddition(Exception):
144
- def __str__(self):
145
- return "The repository type is not compatible with the cohort due to different file structure. Please try again."
146
-
147
-
148
- class OutdatedCohortVersion(Exception):
149
- def __init__(self, version):
150
- self.version = version
151
-
152
- def __str__(self):
153
- return f"The Cohort version is outdated. Please try again with the new version VERSION-{self.version}."
154
-
155
-
156
- class TechnicalFaultException(Exception):
157
- def __str__(self):
158
- return "Samples not downloaded due to a technical fault. Please check \
159
- the arguments passed and try again. Contact Polly Support in case of repeated failure."
160
-
161
-
162
100
  class RequestFailureException(Exception):
163
101
  def __str__(self):
164
102
  return "Sorry, we're unable to fetch the metadata now. Please contact polly.support@elucidata.io"
@@ -179,7 +179,7 @@ def checkclass(cls) -> None:
179
179
  print("Note : use class to get help")
180
180
  raise TypeError(title="Use class")
181
181
 
182
- if cls.__name__ not in ["Polly", "OmixAtlas", "Cohort", "Workspaces"]:
182
+ if cls.__name__ not in ["Polly", "OmixAtlas", "Workspaces"]:
183
183
  print("Other class methods not allowed")
184
184
  raise Exception(title="Other class are not allowed")
185
185
 
@@ -210,7 +210,6 @@ def get_line(fun: str, kind: str, txt: str, function_name: str, cls, doc: bool)
210
210
  # function will return lines to print
211
211
  # for a function or class
212
212
  Link = {
213
- "cohort": "https://github.com/ElucidataInc/PublicAssets/blob/master/polly-python/example/cohort.ipynb",
214
213
  "omixatlas": "https://github.com/ElucidataInc/PublicAssets/blob/master/polly-python/example/omixatlas.ipynb",
215
214
  "polly": "https://github.com/ElucidataInc/PublicAssets/blob/master/polly-python/example/polly.ipynb",
216
215
  "workspaces": "https://github.com/ElucidataInc/PublicAssets/blob/master/polly-python/example/workspaces.ipynb",
@@ -301,7 +300,7 @@ def get_txt(
301
300
 
302
301
  def example(cls, function_name: str = "") -> None:
303
302
  """
304
- function to see examples for class - Polly, OmixAtlas, Workspaces, Cohort and it's member funtions
303
+ function to see examples for class - Polly, OmixAtlas, Workspaces and it's member funtions
305
304
 
306
305
  ``Args:``
307
306
  ``function_name (optional) str:`` provide function name to see examples default empty.
@@ -1,7 +1,8 @@
1
1
  import os
2
2
  import re
3
3
  import json
4
- import logging
4
+
5
+ # import logging
5
6
  import requests
6
7
  import urllib.request
7
8
  from cloudpathlib import S3Client
@@ -16,16 +17,14 @@ from polly.errors import (
16
17
  OperationFailedException,
17
18
  paramException,
18
19
  AccessDeniedError,
19
- InvalidRepoException,
20
20
  DatatypeNotFoundException,
21
21
  RepositoryNotFoundException,
22
22
  )
23
- from polly.constants import COHORT_CONSTANTS_URL
24
- import contextlib
25
- import joblib
23
+
26
24
  import urllib
27
25
  import pandas as pd
28
- import polly.http_response_codes as http_codes
26
+
27
+ # import polly.http_response_codes as http_codes
29
28
  from polly.tracking import Track
30
29
  import polly.constants as const
31
30
  import string
@@ -411,43 +410,6 @@ def elastic_query(index_name: str, dataset_id: str) -> dict:
411
410
  return query
412
411
 
413
412
 
414
- def get_cohort_constants() -> json:
415
- """
416
- Returns cohort info from public assests url
417
- """
418
- response = requests.get(COHORT_CONSTANTS_URL)
419
- error_handler(response)
420
- return json.loads(response.text)
421
-
422
-
423
- def validate_datatype(datatype: str):
424
- """
425
- Function to validate datatype of a dataset
426
- Returns 1 in case of datatype is Single Cell, 0 otherwise
427
- """
428
- if datatype == "Single cell":
429
- return 1
430
- return 0
431
-
432
-
433
- @contextlib.contextmanager
434
- def tqdm_joblib(tqdm_object):
435
- """Context manager to patch joblib to report into tqdm progress bar given as argument"""
436
-
437
- class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack):
438
- def __call__(self, *args, **kwargs):
439
- tqdm_object.update(n=self.batch_size)
440
- return super().__call__(*args, **kwargs)
441
-
442
- old_batch_callback = joblib.parallel.BatchCompletionCallBack
443
- joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
444
- try:
445
- yield tqdm_object
446
- finally:
447
- joblib.parallel.BatchCompletionCallBack = old_batch_callback
448
- tqdm_object.close()
449
-
450
-
451
413
  def check_empty(x):
452
414
  """
453
415
  Function to validate if the entry is an empty list or not.
@@ -593,21 +555,6 @@ def workspaces_permission_check(self, workspace_id) -> bool:
593
555
  )
594
556
 
595
557
 
596
- def return_entity_type(data_source: str, cohort_info: json) -> str:
597
- """
598
- Function to return entity type based on the cohort info present in public assets
599
- """
600
- if data_source not in cohort_info:
601
- raise InvalidRepoException(data_source)
602
- for repo, dict in cohort_info.items():
603
- if data_source == repo:
604
- if dict["file_structure"] == "single":
605
- entity_type = "dataset"
606
- elif dict["file_structure"] == "multiple":
607
- entity_type = "sample"
608
- return entity_type
609
-
610
-
611
558
  def get_files_in_dir(path_to_dir: str) -> list:
612
559
  """
613
560
  returns the files in a given directory
@@ -722,35 +669,6 @@ def replace_original_name_field(
722
669
  return replaced_metadata
723
670
 
724
671
 
725
- def upload_html_file(
726
- session, workspace_id: int, workspace_path: str, local_report_path: str
727
- ):
728
- """
729
- Function to upload an html file to a workspace.
730
- """
731
- upload_url = f"https://v2.api.{session.env}.elucidata.io/workspaces/{workspace_id}/upload_url"
732
- params = {"file_path": workspace_path, "content_type": "text/html"}
733
- # get request to get the signed url for s3
734
- response = session.get(upload_url, params=params)
735
- error_handler(response)
736
- attributes = response.json().get("data").get("attributes")
737
- try:
738
- with open(local_report_path, "rb") as file_to_upload:
739
- # uploading the local file to the signed url
740
- files = {"file": (local_report_path, file_to_upload)}
741
- upload_response = requests.post(
742
- attributes["url"], data=attributes["fields"], files=files
743
- )
744
- error_handler(upload_response)
745
- if upload_response.status_code == http_codes.CREATED:
746
- logging.basicConfig(level=logging.INFO)
747
- logging.info(
748
- f"File uploaded successfully to workspace-id = {workspace_id} at path = {workspace_path}!"
749
- )
750
- except Exception as e:
751
- raise e
752
-
753
-
754
672
  def get_folder_list_from_list_of_filepaths(filenames_fullpath_list: list) -> list:
755
673
  """
756
674
  gives back only the folders from a list of filepaths provided.