PyPI - polly-python - Versions diffs - 2.5.0__tar.gz → 3.1.0__tar.gz - Mend

polly-python 2.5.0tar.gz → 3.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

{polly_python-2.5.0/polly_python.egg-info → polly_python-3.1.0}/PKG-INFO RENAMED Viewed

@@ -1,14 +1,14 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: polly_python
-Version: 2.5.0
+Version: 3.1.0
 Summary: Polly SDK
 Home-page: https://github.com/ElucidataInc/polly-python
 Project-URL: Documentation, https://docs.elucidata.io
 Project-URL: Tutorial Notebooks, https://github.com/ElucidataInc/polly-python
-Requires-Python: >3.8
+Requires-Python: <3.12,>=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE.md
-Requires-Dist: elucidatacmapPy==3.3.4
+Requires-Dist: cmapPy<=4.0.1
 Requires-Dist: cloudpathlib>=0.15.0
 Requires-Dist: retrying==1.3.4
 Requires-Dist: rst2txt==1.1.0
@@ -17,22 +17,15 @@ Requires-Dist: mixpanel==4.10.0
 Requires-Dist: Deprecated>=1.2.12
 Requires-Dist: pytest>=6.2.5
 Requires-Dist: cryptography<=38.0.0,>=37.0.1
-Requires-Dist: plotly<5.0.0,>=4.8.1; python_version > "3.6" and python_version < "3.7"
 Requires-Dist: plotly>=5.0.0; python_version >= "3.7"
-Requires-Dist: pandas<1.2.0,>=1.1.0; python_version > "3.6" and python_version < "3.7"
-Requires-Dist: pandas>=1.3.5; python_version >= "3.7"
-Requires-Dist: pydantic<1.10.0a1,>=1.8.2; python_version > "3.6" and python_version < "3.7"
+Requires-Dist: pandas<=2.2.2,>=1.3.5; python_version >= "3.7"
+Requires-Dist: numpy<=1.26.4
 Requires-Dist: pydantic==1.10.12; python_version >= "3.7"
 Requires-Dist: requests==2.28.1
-Requires-Dist: numpy==1.26.4
-Requires-Dist: boto3<1.24.0,>=1.17.73; python_version > "3.6" and python_version < "3.7"
-Requires-Dist: boto3>=1.24.0; python_version >= "3.7"
-Requires-Dist: botocore<1.27.0,>=1.20.73; python_version > "3.6" and python_version < "3.7"
-Requires-Dist: botocore>=1.27.0; python_version >= "3.7"
-Requires-Dist: joblib<=1.1.0,>0.11.0; python_version > "3.6" and python_version < "3.7"
+Requires-Dist: boto3<2.0,>=1.24.0; python_version >= "3.7"
+Requires-Dist: botocore<2.0,>=1.27.0; python_version >= "3.7"
 Requires-Dist: joblib>=1.2.0; python_version >= "3.7"
 Requires-Dist: tabulate==0.9.0
-Requires-Dist: tqdm<4.65.0,>=4.61.0; python_version > "3.6" and python_version < "3.7"
 Requires-Dist: tqdm==4.65.0; python_version >= "3.7"
 Provides-Extra: testing
 Requires-Dist: black; extra == "testing"

polly_python-3.1.0/polly/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "3.1.0"

{polly_python-2.5.0 → polly_python-3.1.0}/polly/constants.py RENAMED Viewed

@@ -91,24 +91,6 @@ IO_CHUNKSIZE_LARGE_FILE_SIZE = 100 * MB
 # S3 Exceptions
 EXPIRED_TOKEN = "ExpiredToken"
-# cohort constants
-COHORT_VERSION = "0.2"
-COHORT_CONSTANTS_URL = (
-    "https://elucidatainc.github.io/PublicAssets/cohort_constants.txt"
-)
-OBSOLETE_METADATA_FIELDS = [
-    "package",
-    "region",
-    "bucket",
-    "key",
-    "file_type",
-    "file_location",
-    "src_uri",
-    "timestamp_",
-]
-dot = "."
 GETTING_UPLOAD_URLS_PAYLOAD = {"data": {"type": "files", "attributes": {"folder": ""}}}
 INGESTION_LEVEL_METADATA = {

{polly_python-2.5.0 → polly_python-3.1.0}/polly/curation.py RENAMED Viewed

@@ -1,9 +1,8 @@
 import json
 from collections import namedtuple
-import os
-import shutil
 from typing import Dict, Optional, List
-import warnings
 import pandas as pd
 from functools import lru_cache
 from polly.errors import (
@@ -13,15 +12,15 @@ from polly.errors import (
     RequestException,
     UnauthorizedException,
     extract_json_api_error,
-    paramException,
 )
 from polly.auth import Polly
-from polly.cohort import Cohort
 from polly import helpers, constants as const, application_error_info as app_err_info
 from polly.help import example
 import polly.http_response_codes as http_codes
-from polly.constants import SUPPORTED_ENTITY_TYPES, CURATION_COHORT_CACHE
-from polly.helpers import get_cohort_constants
+from polly.constants import SUPPORTED_ENTITY_TYPES
 from polly.tracking import Track
@@ -48,8 +47,6 @@ class Curation:
         env="",
         default_env="polly",
     ) -> None:
-        # check if COMPUTE_ENV_VARIABLE present or not
-        # if COMPUTE_ENV_VARIABLE, give priority
         env = helpers.get_platform_value_from_env(
             const.COMPUTE_ENV_VARIABLE, default_env, env
         )
@@ -60,163 +57,12 @@ class Curation:
             f"https://api.datalake.discover.{self.session.env}.elucidata.io/elastic/v2"
         )
         self.inference_url = f"https://api.discover.{self.session.env}.elucidata.io/curations/inferences/"
-        self.cohort = Cohort()
-        self.cohort_constants = get_cohort_constants()
     def _handle_errors(self, response):
         detail = response.get("errors")[0].get("detail", [])
         title = response.get("errors")[0].get("title", [])
         return title, detail
-    def _fetch_metadata_from_cohort(self, repo_name: str, dataset_ids: List[str]):
-        """
-        Utility function for fetching metadata using cohorts.
-        Arguments:
-            repo_name (str) : name of the repository for fetching datasets.
-            dataset_ids (List[str]): dataset ids to be used for inference
-        Returns:
-            Returns sample metadata, dataset and sample ids.
-        """
-        sample_metadata = {}
-        dataset_to_sample_id = {"dataset_id": [], "sample_id": []}
-        if not (os.path.isdir(CURATION_COHORT_CACHE)):
-            os.mkdir(CURATION_COHORT_CACHE)
-        else:
-            shutil.rmtree(CURATION_COHORT_CACHE)
-            os.mkdir(CURATION_COHORT_CACHE)
-        self.cohort.create_cohort(
-            CURATION_COHORT_CACHE, "sample_metadata_query", "desc"
-        )
-        # Fetch metadata using cohorts
-        for dataset_id in dataset_ids:
-            datasets_sample_metadata = []
-            if not (
-                repo_name in self.cohort_constants
-                and self.cohort_constants[repo_name]["file_structure"] != "multiple"
-            ):
-                # multiple mapped repo such as GEO
-                self.cohort.add_to_cohort(repo_name, dataset_id=dataset_id)
-            else:
-                # for single mapped repos such as TCGA
-                self.cohort.add_to_cohort(repo_name, dataset_id=[dataset_id])
-            col_metadata = self.cohort.merge_data("sample")
-            all_sample_ids = col_metadata.index.tolist()
-            col_metadata.loc[:, "dataset_id"] = dataset_id
-            dataset_to_sample_id["dataset_id"] += [dataset_id] * len(all_sample_ids)
-            col_metadata.loc[:, "sample_id"] = all_sample_ids
-            dataset_to_sample_id["sample_id"] += all_sample_ids
-            datasets_sample_metadata += list(col_metadata.T.to_dict().values())
-            if not (
-                repo_name in self.cohort_constants
-                and self.cohort_constants[repo_name]["file_structure"] != "multiple"
-            ):
-                self.cohort.remove_from_cohort(dataset_id)
-            else:
-                self.cohort.remove_from_cohort([dataset_id])
-            sample_metadata[dataset_id] = datasets_sample_metadata
-        dataset_to_sample_id = pd.DataFrame.from_dict(dataset_to_sample_id)
-        return sample_metadata, dataset_to_sample_id
-    def _clinical_model_param_checks(
-        self,
-        repo_name: str,
-        dataset_ids: List[str],
-        sample_ids: Optional[List[str]] = None,
-    ):
-        """
-        Checking the parameter passed to the clinical label assigning model.
-        Arguments:
-            repo_name (str): repo name
-            dataset_ids (list[str]): list of dataset ids
-        Keyword Arguments:
-            sample_ids (list[str], optional): Optional Parameter. List of sample ids.
-            Default is 'None'.
-        Raises:
-            paramException
-        """
-        if dataset_ids is None or type(dataset_ids) is not list:
-            raise paramException(
-                title="Param Exception",
-                detail="Dataset IDs should be given as a valid list of strings",
-            )
-        if sample_ids is not None and type(sample_ids) is not list:
-            raise paramException(
-                title="Param Exception",
-                detail="Sample IDs should be given as a valid list of strings",
-            )
-        if repo_name != "geo" and not any(
-            ["GSE" in dataset_id for dataset_id in dataset_ids]
-        ):
-            warnings.warn(
-                "The model is tested with GEO metadata and the labels may be wrong for other repos"
-            )
-    def _post_process_clinical_tags(
-        self,
-        clinical_tags: pd.DataFrame,
-        is_sample_tag: bool,
-        sample_ids: Optional[List[str]] = None,
-    ) -> pd.DataFrame:
-        """
-        process the response of the model (dataframe with clinical tags and samples)
-        and return relevant feilds.
-        incase no sample_ids are provided by the user, we return the dataset_ids and the clinical tags
-        incase sample_ids are also provided, then we return the dataset_ids, the sample_ids and the clincal tags.
-        Arguments:
-            clinical_tags (pd.DataFrame): dataframe of the sample_ids and assigned clinical tags
-            is_sample_tag (bool): if samples passed
-        Keyword Arguments:
-            sample_ids (list[str]): list of sample ids (default: {None})
-        Returns:
-            a dataframe with the the dataset_ids, sample_ids and the assigned clinical tags
-        """
-        if is_sample_tag:
-            # if the user has provided list of samples, then we filter in just those sample ids
-            # for the dataset ids.
-            # taking only those clinical tags and samples where the sample_ids are in the sample_id list
-            # provided by the user.
-            clinical_tags = clinical_tags[
-                clinical_tags["sample_id"].isin(sample_ids)
-            ].reset_index(drop=True)
-            # in case the sample_ids provided by the user are not present in the dataset_ids provided.
-            if clinical_tags.empty or clinical_tags.shape[0] < len(sample_ids):
-                warnings.warn(
-                    "The output is empty or has missing sample ids because they are not present in given datasets."
-                )
-            # return sample level tags here
-            return clinical_tags
-        # if no sample_ids were passed by the user, then
-        # returning dataset level tags by removing sample id and removing duplicate columns
-        return (
-            clinical_tags.drop(columns=["sample_id"])
-            .drop_duplicates()
-            .reset_index(drop=True)
-        )
     def _handle_perform_inference_api_error(self, response):
         if response.status_code == http_codes.UNAUTHORIZED:
             raise UnauthorizedException("User is unauthorized to access this")
@@ -482,84 +328,3 @@ class Curation:
         sample_metadata["is_control"] = output["is_control"].values
         sample_metadata["control_prob"] = output["control_prob"].values
         return sample_metadata
-    @Track.track_decorator
-    def assign_clinical_labels(
-        self,
-        repo_name: str,
-        dataset_ids: List[str],
-        sample_ids: Optional[List[str]] = None,
-    ) -> pd.DataFrame:
-        """
-        Returns a list of clinical or non clinical labels for the given datasets or samples.
-        Arguments:
-            repo_name (str): name of the repository for fetching datasets.
-            dataset_ids (List[str]): dataset ids to be used for inference
-        Keyword Arguments:
-            sample_ids (List[str], optional): Optional Parameter. Sample ids if that is needed.
-        Raises:
-            RequestException: API response exception
-            ParamException: Invalid parameters
-            err
-        Returns:
-            dataframe which is a list of clinical tags for given ids
-        """
-        warnings.formatwarning = lambda msg, *args, **kwargs: f"WARNING: {msg}\n"
-        try:
-            self._clinical_model_param_checks(repo_name, dataset_ids, sample_ids)
-            # evaluating the inference level based on if the user has provided sample_ids
-            is_sample_tag = sample_ids is not None
-            inference_level = "sample_id" if (is_sample_tag) else "dataset_id"
-            sample_metadata, dataset_to_sample_id = self._fetch_metadata_from_cohort(
-                repo_name=repo_name, dataset_ids=dataset_ids
-            )
-            clinical_model_predictions = []
-            for dataset_id in sample_metadata:
-                # Get output from model endpoint and structure output
-                payload = {
-                    "sample_metadata": sample_metadata[dataset_id],
-                    "sample_id_column": "sample_id",
-                    "dataset_id_column": "dataset_id",
-                    "is_sample_tag": is_sample_tag,
-                }
-                output = self._perform_inference("clinical-classifier", payload)
-                if "errors" in output:
-                    title, detail = self._handle_errors(output)
-                    raise RequestException(title, detail)
-                output = output["clinical_predictions"]
-                clinical_model_predictions += output
-            # creating dataframe with inference_level and clinical_tags with values from the clinical_model_predictions
-            clinical_tags = pd.DataFrame(
-                {
-                    inference_level: [
-                        tag["tag_id"] for tag in clinical_model_predictions
-                    ],
-                    "clinical_tag": [
-                        tag["clinical_tag"] for tag in clinical_model_predictions
-                    ],
-                }
-            )
-            clinical_tags = pd.merge(
-                dataset_to_sample_id, clinical_tags, on=inference_level
-            )
-            clinical_tags = self._post_process_clinical_tags(
-                clinical_tags, is_sample_tag, sample_ids
-            )
-        except Exception as err:
-            raise err
-        return clinical_tags

{polly_python-2.5.0 → polly_python-3.1.0}/polly/errors.py RENAMED Viewed

@@ -97,68 +97,6 @@ class InvalidDirectoryPathException(Exception):
         return "This path does not represent an existing directory. Please try again."
-class InvalidCohortPathException(Exception):
-    def __str__(self):
-        return "This path does not represent a Cohort. Please try again."
-class InvalidCohortNameException(Exception):
-    def __str__(self, cohort_name):
-        return f"The identifier {cohort_name} does not represent a valid cohort name. Please try again."
-class InvalidRepoException(Exception):
-    def __init__(self, repo_name):
-        self.repo_name = repo_name
-    def __str__(self):
-        return f"The repository : {self.repo_name} is not supported. Please contact Polly Support."
-class InvalidDatasetException(Exception):
-    def __str__(self):
-        return "Dataset/s not added."
-class InvalidCohortOperationException(Exception):
-    def __str__(self):
-        return "This operation is not valid as no cohort has been instantiated."
-class EmptyCohortException(Exception):
-    def __str__(self):
-        return "There are no datasets in the cohort. Please try adding datasets using add_to_cohort() function."
-class CohortEditException(Exception):
-    def __str__(self):
-        return "No parameter specified for editing in cohort"
-class InvalidCohortMergeOperation(Exception):
-    def __str__(self):
-        return "Incorrect or blank parameter specified for merging in cohort"
-class InvalidCohortAddition(Exception):
-    def __str__(self):
-        return "The repository type is not compatible with the cohort due to different file structure. Please try again."
-class OutdatedCohortVersion(Exception):
-    def __init__(self, version):
-        self.version = version
-    def __str__(self):
-        return f"The Cohort version is outdated. Please try again with the new version VERSION-{self.version}."
-class TechnicalFaultException(Exception):
-    def __str__(self):
-        return "Samples not downloaded due to a technical fault. Please check \
-the arguments passed and try again. Contact Polly Support in case of repeated failure."
 class RequestFailureException(Exception):
     def __str__(self):
         return "Sorry, we're unable to fetch the metadata now. Please contact polly.support@elucidata.io"

{polly_python-2.5.0 → polly_python-3.1.0}/polly/help.py RENAMED Viewed

@@ -179,7 +179,7 @@ def checkclass(cls) -> None:
         print("Note : use class to get help")
         raise TypeError(title="Use class")
-    if cls.__name__ not in ["Polly", "OmixAtlas", "Cohort", "Workspaces"]:
+    if cls.__name__ not in ["Polly", "OmixAtlas", "Workspaces"]:
         print("Other class methods not allowed")
         raise Exception(title="Other class are not allowed")
@@ -210,7 +210,6 @@ def get_line(fun: str, kind: str, txt: str, function_name: str, cls, doc: bool)
     # function will return lines to print
     # for a function or class
     Link = {
-        "cohort": "https://github.com/ElucidataInc/PublicAssets/blob/master/polly-python/example/cohort.ipynb",
         "omixatlas": "https://github.com/ElucidataInc/PublicAssets/blob/master/polly-python/example/omixatlas.ipynb",
         "polly": "https://github.com/ElucidataInc/PublicAssets/blob/master/polly-python/example/polly.ipynb",
         "workspaces": "https://github.com/ElucidataInc/PublicAssets/blob/master/polly-python/example/workspaces.ipynb",
@@ -301,7 +300,7 @@ def get_txt(
 def example(cls, function_name: str = "") -> None:
     """
-    function to see examples for class - Polly, OmixAtlas, Workspaces, Cohort and it's member funtions
+    function to see examples for class - Polly, OmixAtlas, Workspaces and it's member funtions
     ``Args:``
         ``function_name (optional) str:`` provide function name to see examples default empty.

{polly_python-2.5.0 → polly_python-3.1.0}/polly/helpers.py RENAMED Viewed

@@ -1,7 +1,8 @@
 import os
 import re
 import json
-import logging
+# import logging
 import requests
 import urllib.request
 from cloudpathlib import S3Client
@@ -16,16 +17,14 @@ from polly.errors import (
     OperationFailedException,
     paramException,
     AccessDeniedError,
-    InvalidRepoException,
     DatatypeNotFoundException,
     RepositoryNotFoundException,
 )
-from polly.constants import COHORT_CONSTANTS_URL
-import contextlib
-import joblib
 import urllib
 import pandas as pd
-import polly.http_response_codes as http_codes
+# import polly.http_response_codes as http_codes
 from polly.tracking import Track
 import polly.constants as const
 import string
@@ -411,43 +410,6 @@ def elastic_query(index_name: str, dataset_id: str) -> dict:
     return query
-def get_cohort_constants() -> json:
-    """
-    Returns cohort info from public assests url
-    """
-    response = requests.get(COHORT_CONSTANTS_URL)
-    error_handler(response)
-    return json.loads(response.text)
-def validate_datatype(datatype: str):
-    """
-    Function to validate datatype of a dataset
-    Returns 1 in case of datatype is Single Cell, 0 otherwise
-    """
-    if datatype == "Single cell":
-        return 1
-    return 0
-@contextlib.contextmanager
-def tqdm_joblib(tqdm_object):
-    """Context manager to patch joblib to report into tqdm progress bar given as argument"""
-    class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack):
-        def __call__(self, *args, **kwargs):
-            tqdm_object.update(n=self.batch_size)
-            return super().__call__(*args, **kwargs)
-    old_batch_callback = joblib.parallel.BatchCompletionCallBack
-    joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
-    try:
-        yield tqdm_object
-    finally:
-        joblib.parallel.BatchCompletionCallBack = old_batch_callback
-        tqdm_object.close()
 def check_empty(x):
     """
     Function to validate if the entry is an empty list or not.
@@ -593,21 +555,6 @@ def workspaces_permission_check(self, workspace_id) -> bool:
         )
-def return_entity_type(data_source: str, cohort_info: json) -> str:
-    """
-    Function to return entity type based on the cohort info present in public assets
-    """
-    if data_source not in cohort_info:
-        raise InvalidRepoException(data_source)
-    for repo, dict in cohort_info.items():
-        if data_source == repo:
-            if dict["file_structure"] == "single":
-                entity_type = "dataset"
-            elif dict["file_structure"] == "multiple":
-                entity_type = "sample"
-    return entity_type
 def get_files_in_dir(path_to_dir: str) -> list:
     """
     returns the files in a given directory
@@ -722,35 +669,6 @@ def replace_original_name_field(
     return replaced_metadata
-def upload_html_file(
-    session, workspace_id: int, workspace_path: str, local_report_path: str
-):
-    """
-    Function to upload an html file to a workspace.
-    """
-    upload_url = f"https://v2.api.{session.env}.elucidata.io/workspaces/{workspace_id}/upload_url"
-    params = {"file_path": workspace_path, "content_type": "text/html"}
-    # get request to get the signed url for s3
-    response = session.get(upload_url, params=params)
-    error_handler(response)
-    attributes = response.json().get("data").get("attributes")
-    try:
-        with open(local_report_path, "rb") as file_to_upload:
-            # uploading the local file to the signed url
-            files = {"file": (local_report_path, file_to_upload)}
-            upload_response = requests.post(
-                attributes["url"], data=attributes["fields"], files=files
-            )
-            error_handler(upload_response)
-            if upload_response.status_code == http_codes.CREATED:
-                logging.basicConfig(level=logging.INFO)
-                logging.info(
-                    f"File uploaded successfully to workspace-id = {workspace_id} at path = {workspace_path}!"
-                )
-    except Exception as e:
-        raise e
 def get_folder_list_from_list_of_filepaths(filenames_fullpath_list: list) -> list:
     """
     gives back only the folders from a list of filepaths provided.

polly-python 2.5.0__tar.gz → 3.1.0__tar.gz

polly-python 2.5.0tar.gz → 3.1.0tar.gz