PyPI - scdataloader - Versions diffs - 2.0.6__py3-none-any.whl → 2.0.8__py3-none-any.whl - Mend

scdataloader 2.0.6py3-none-any.whl → 2.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

scdataloader/data.py +2 -2
scdataloader/datamodule.py +10 -10
scdataloader/preprocess.py +27 -8
scdataloader/utils.py +34 -25
{scdataloader-2.0.6.dist-info → scdataloader-2.0.8.dist-info}/METADATA +115 -23
scdataloader-2.0.8.dist-info/RECORD +15 -0
scdataloader/data.json +0 -384
scdataloader-2.0.6.dist-info/RECORD +0 -16
{scdataloader-2.0.6.dist-info → scdataloader-2.0.8.dist-info}/WHEEL +0 -0
{scdataloader-2.0.6.dist-info → scdataloader-2.0.8.dist-info}/entry_points.txt +0 -0
{scdataloader-2.0.6.dist-info → scdataloader-2.0.8.dist-info}/licenses/LICENSE +0 -0

scdataloader/data.py CHANGED Viewed

@@ -200,7 +200,7 @@ class Dataset(torchDataset):
     def get_label_cats(
         self,
         obs_keys: Union[str, List[str]],
-    ):
+    ) -> np.ndarray:
         """
         Get combined categorical codes for one or more label columns.
@@ -226,7 +226,7 @@ class Dataset(torchDataset):
                 labels = concat_categorical_codes([labels, labels_to_str])
         return np.array(labels.codes)
-    def get_unseen_mapped_dataset_elements(self, idx: int):
+    def get_unseen_mapped_dataset_elements(self, idx: int) -> list[str]:
         """
         Get genes marked as unseen for a specific sample.

scdataloader/datamodule.py CHANGED Viewed

@@ -136,7 +136,7 @@ class DataModule(L.LightningDataModule):
                 increases sampling weight balance over epochs. Defaults to 0.
             start_at (int, optional): Starting index for resuming inference. Requires same
                 number of GPUs as previous run. Defaults to 0.
-            **kwargs: Additional arguments passed to PyTorch DataLoader (e.g., batch_size,
+            **kwargs: dict[str, Any]: Additional arguments passed to PyTorch DataLoader (e.g., batch_size,
                 num_workers, pin_memory).
         Attributes:
@@ -256,7 +256,7 @@ class DataModule(L.LightningDataModule):
         )
     @property
-    def decoders(self):
+    def decoders(self) -> dict[str, dict[int, str]]:
         """
         decoders the decoders for any labels that would have been encoded
@@ -269,7 +269,7 @@ class DataModule(L.LightningDataModule):
         return decoders
     @property
-    def labels_hierarchy(self):
+    def labels_hierarchy(self) -> dict[str, dict[str, str]]:
         """
         labels_hierarchy the hierarchy of labels for any cls that would have a hierarchy
@@ -287,7 +287,7 @@ class DataModule(L.LightningDataModule):
         return labels_hierarchy
     @property
-    def genes(self):
+    def genes(self) -> list:
         """
         genes the genes used in this datamodule
@@ -343,7 +343,7 @@ class DataModule(L.LightningDataModule):
     def num_datasets(self):
         return len(self.dataset.mapped_dataset.storages)
-    def setup(self, stage=None):
+    def setup(self, stage: Optional[str] = None) -> list[str]:
         """
         Prepare data splits for training, validation, and testing.
@@ -512,7 +512,7 @@ class DataModule(L.LightningDataModule):
         print(f"done setup, took {time.time() - start_time:.2f} seconds")
         return self.test_datasets
-    def train_dataloader(self, **kwargs):
+    def train_dataloader(self, **kwargs) -> DataLoader:
         """
         Create the training DataLoader with weighted random sampling.
@@ -521,7 +521,7 @@ class DataModule(L.LightningDataModule):
         distributed training without weighting.
         Args:
-            **kwargs: Additional arguments passed to DataLoader, overriding defaults.
+            **kwargs: dict[str, Any]: Additional arguments passed to DataLoader, overriding defaults.
         Returns:
             DataLoader: Training DataLoader instance.
@@ -560,7 +560,7 @@ class DataModule(L.LightningDataModule):
             **current_loader_kwargs,
         )
-    def val_dataloader(self):
+    def val_dataloader(self) -> Union[DataLoader, list]:
         """
         Create the validation DataLoader.
@@ -576,7 +576,7 @@ class DataModule(L.LightningDataModule):
             else []
         )
-    def test_dataloader(self):
+    def test_dataloader(self) -> Union[DataLoader, list]:
         """
         Create the test DataLoader with sequential sampling.
@@ -591,7 +591,7 @@ class DataModule(L.LightningDataModule):
             else []
         )
-    def predict_dataloader(self):
+    def predict_dataloader(self) -> DataLoader:
         """
         Create a DataLoader for prediction over all training data.

scdataloader/preprocess.py CHANGED Viewed

@@ -87,10 +87,12 @@ class Preprocessor:
                 If int, filters cells with counts. Defaults to False.
             normalize_sum (float or bool, optional): Determines whether to normalize the total counts of each cell to a specific value.
                 Defaults to 1e4.
-            log1p (bool, optional): Determines whether to apply log1p transform to the normalized data.
-                Defaults to True.
             n_hvg_for_postp (int or bool, optional): Determines whether to subset to highly variable genes for the PCA.
                 Defaults to False.
+            use_layer (str, optional): The layer to use for preprocessing.
+                Defaults to None.
+            is_symbol (bool, optional): Whether genes are provided as symbols instead of Ensembl IDs.
+                Defaults to False.
             hvg_flavor (str, optional): Specifies the flavor of highly variable genes selection.
                 See :func:`scanpy.pp.highly_variable_genes` for more details. Defaults to "seurat_v3".
             binning (int, optional): Determines whether to bin the data into discrete values of number of bins provided.
@@ -112,10 +114,20 @@ class Preprocessor:
                 Defaults to 5.
             pct_mt_outlier (int, optional): The maximum percentage of mitochondrial genes outlier.
                 Defaults to 8.
-            batch_key (str, optional): The key of :class:`~anndata.AnnData.obs` to use for batch information.
+            batch_keys (List[str], optional): The keys of :class:`~anndata.AnnData.obs` to use for batch information.
                 This arg is used in the highly variable gene selection step.
             skip_validate (bool, optional): Determines whether to skip the validation step.
                 Defaults to False.
+            additional_preprocess (Callable, optional): Additional preprocessing function.
+                Defaults to None.
+            additional_postprocess (Callable, optional): Additional postprocessing function.
+                Defaults to None.
+            do_postp (bool, optional): Whether to perform postprocessing.
+                Defaults to True.
+            organisms (List[str], optional): List of organisms to support.
+                Defaults to ["NCBITaxon:9606", "NCBITaxon:10090"].
+            use_raw (bool, optional): Whether to use raw counts.
+                Defaults to True.
             keepdata (bool, optional): Determines whether to keep the data in the AnnData object.
                 Defaults to False.
             drop_non_primary (bool, optional): Determines whether to drop non-primary cells.
@@ -483,13 +495,20 @@ class LaminPreprocessor(Preprocessor):
         version: str = "2",
     ):
         """
-        format controls the different input value wrapping, including categorical
-        binned style, fixed-sum normalized counts, log1p fixed-sum normalized counts, etc.
+        Process data with format controlling different input value wrapping.
+        Includes support for categorical binned style, fixed-sum normalized counts,
+        log1p fixed-sum normalized counts, etc.
         Args:
-            adata (AnnData): The AnnData object to preprocess.
-            batch_key (str, optional): The key of AnnData.obs to use for batch information. This arg
-                is used in the highly variable gene selection step.
+            data (Union[ln.Collection, AnnData]): The AnnData object or Collection to preprocess.
+            name (str, optional): Name for the preprocessed dataset. Defaults to "preprocessed dataset".
+            description (str, optional): Description for the preprocessed dataset.
+                Defaults to "preprocessed dataset using scprint".
+            start_at (int, optional): Starting index for resuming preprocessing.
+                Defaults to 0.
+            version (str, optional): Version string for the dataset.
+                Defaults to "2".
         """
         files = []
         all_ready_processed_keys = set()

scdataloader/utils.py CHANGED Viewed

@@ -28,10 +28,11 @@ def fileToList(filename: str, strconv: callable = lambda x: x) -> list:
     loads an input file with a\\n b\\n.. into a list [a,b,..]
     Args:
-        input_str (str): The input string to be completed.
+        filename (str): The filename to load from.
+        strconv (callable): A function to convert each line. Defaults to identity function.
     Returns:
-        str: The completed string with 'complete' appended.
+        list: The list of converted elements from the file.
     """
     with open(filename) as f:
         return [strconv(val[:-1]) for val in f.readlines()]
@@ -44,7 +45,7 @@ def listToFile(
     listToFile loads a list with [a,b,..] into an input file a\\n b\\n..
     Args:
-        l (list): The list of elements to be written to the file.
+        li (list): The list of elements to be written to the file.
         filename (str): The name of the file where the list will be written.
         strconv (callable, optional): A function to convert each element of the list to a string. Defaults to str.
@@ -124,7 +125,7 @@ def getBiomartTable(
     attributes: List[str] = [],
     bypass_attributes: bool = False,
     database: str = "hsapiens_gene_ensembl",
-):
+) -> pd.DataFrame:
     """generate a genelist dataframe from ensembl's biomart
     Args:
@@ -175,14 +176,14 @@ def getBiomartTable(
     return res
-def validate(adata: AnnData, organism: str, need_all=False):
+def validate(adata: AnnData, organism: str, need_all: bool = False) -> bool:
     """
     validate checks if the adata object is valid for lamindb
     Args:
-        adata (anndata): the anndata object
-        lb (lamindb): the lamindb instance
-        organism (str): the organism
+        adata (AnnData): the anndata object
+        organism (str): the organism ontology ID
+        need_all (bool, optional): whether all columns should be present. Defaults to False.
     Raises:
         ValueError: if the adata object is not valid
@@ -298,7 +299,7 @@ def get_descendants(val, df):
     return r_onto | ontos
-def get_ancestry_mapping(all_elem: List[str], onto_df: pd.DataFrame):
+def get_ancestry_mapping(all_elem: List[str], onto_df: pd.DataFrame) -> dict:
     """
     This function generates a mapping of all elements to their ancestors in the ontology dataframe.
@@ -339,13 +340,12 @@ def load_dataset_local(
     description: str,
     use_cache: bool = True,
     only: Optional[List[int]] = None,
-):
+) -> ln.Dataset:
     """
     This function loads a remote lamindb dataset to local.
     Args:
-        lb (lamindb): The lamindb instance.
-        remote_dataset (lamindb.Dataset): The remote Dataset.
+        remote_dataset (lamindb.Collection): The remote Collection.
         download_folder (str): The path to the download folder.
         name (str): The name of the dataset.
         description (str): The description of the dataset.
@@ -396,7 +396,7 @@ def load_dataset_local(
 def load_genes(
     organisms: Union[str, List[str]] = "NCBITaxon:9606",
-):  # "NCBITaxon:10090",
+) -> pd.DataFrame:  # "NCBITaxon:10090",
     """
     Loads genes from the given organisms.
@@ -454,9 +454,18 @@ def _adding_scbasecamp_genes(
     if len(species) == 0:
         species = set(
             bt.Organism.using("laminlabs/arc-virtual-cell-atlas").df().ontology_id
-        )
-        -set(["NCBITaxon:10090", "NCBITaxon:9606"])
+        ) - set(["NCBITaxon:10090", "NCBITaxon:9606"])
     species = list(species)
+    for i in set(
+        bt.Organism.using("laminlabs/arc-virtual-cell-atlas").df().ontology_id
+    ) - set(bt.Organism.filter().df().ontology_id):
+        print(i)
+        rec = (
+            bt.Organism.using("laminlabs/arc-virtual-cell-atlas")
+            .filter(ontology_id=i)
+            .first()
+        )
+        rec.save()
     if len(bt.Organism.filter(ontology_id="NCBITaxon:9593")) == 0:
         bt.Organism(
             name="gorilla gorilla",
@@ -655,7 +664,7 @@ def populate_my_ontology(
             ln.save(records)
-def random_str(stringLength=6, stype="all", withdigits=True):
+def random_str(stringLength=6, stype="all", withdigits=True) -> str:
     """
     Generate a random string of letters and digits
@@ -664,7 +673,7 @@ def random_str(stringLength=6, stype="all", withdigits=True):
         stype (str, optional): one of lowercase, uppercase, all. Defaults to 'all'.
         withdigits (bool, optional): digits allowed in the string? Defaults to True.
-        Returns:
+    Returns:
         str: random string
     """
     if stype == "lowercase":
@@ -678,12 +687,12 @@ def random_str(stringLength=6, stype="all", withdigits=True):
     return "".join(random.choice(lettersAndDigits) for i in range(stringLength))
-def is_outlier(adata: AnnData, metric: str, nmads: int):
+def is_outlier(adata: AnnData, metric: str, nmads: int) -> pd.Series:
     """
     is_outlier detects outliers in adata.obs[metric]
     Args:
-        adata (annData): the anndata object
+        adata (AnnData): the anndata object
         metric (str): the metric column to use
         nmads (int): the number of median absolute deviations to use as a threshold
@@ -697,16 +706,16 @@ def is_outlier(adata: AnnData, metric: str, nmads: int):
     return outlier
-def length_normalize(adata: AnnData, gene_lengths: list):
+def length_normalize(adata: AnnData, gene_lengths: list) -> AnnData:
     """
     length_normalize normalizes the counts by the gene length
     Args:
-        adata (anndata): the anndata object
+        adata (AnnData): the anndata object
         gene_lengths (list): the gene lengths
     Returns:
-        anndata: the anndata object
+        AnnData: the normalized anndata object
     """
     adata.X = csr_matrix((adata.X.T / gene_lengths).T)
     return adata
@@ -714,13 +723,13 @@ def length_normalize(adata: AnnData, gene_lengths: list):
 def translate(
     val: Union[str, list, set, Counter, dict], t: str = "cell_type_ontology_term_id"
-):
+) -> dict:
     """
     translate translates the ontology term id to the name
     Args:
-        val (str, dict, set, list, dict): the object to translate
-        t (flat, optional): the type of ontology terms.
+        val (Union[str, dict, set, list]): the object to translate
+        t (str, optional): the type of ontology terms.
             one of cell_type_ontology_term_id, assay_ontology_term_id, tissue_ontology_term_id.
             Defaults to "cell_type_ontology_term_id".

{scdataloader-2.0.6.dist-info → scdataloader-2.0.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scdataloader
-Version: 2.0.6
+Version: 2.0.8
 Summary: a dataloader for single cell data in lamindb
 Project-URL: repository, https://github.com/jkobject/scDataLoader
 Author-email: jkobject <jkobject@gmail.com>
@@ -52,9 +52,10 @@ Description-Content-Type: text/markdown
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![DOI](https://zenodo.org/badge/731248665.svg)](https://doi.org/10.5281/zenodo.10573143)
-<img src="scdataloader.png" width="600">
+<img src="./docs/scdataloader.png" width="600">
-This single cell pytorch dataloader / lighting datamodule is designed to be used with:
+This single cell pytorch dataloader / lighting datamodule is designed to be used
+with:
 - [lamindb](https://lamin.ai/)
@@ -66,11 +67,13 @@ and:
 It allows you to:
 1. load thousands of datasets containing millions of cells in a few seconds.
-2. preprocess the data per dataset and download it locally (normalization, filtering, etc.)
+2. preprocess the data per dataset and download it locally (normalization,
+   filtering, etc.)
 3. create a more complex single cell dataset
 4. extend it to your need
-built on top of `lamindb` and the `.mapped()` function by Sergei: https://github.com/Koncopd
+built on top of `lamindb` and the `.mapped()` function by Sergei:
+https://github.com/Koncopd
 ```
 Portions of the mapped.py file are derived from Lamin Labs
@@ -81,11 +84,17 @@ Please see https://github.com/laminlabs/lamindb/blob/main/lamindb/core/_mapped_c
 for the original implementation
 ```
-The package has been designed together with the [scPRINT paper](https://doi.org/10.1101/2024.07.29.605556) and [model](https://github.com/cantinilab/scPRINT).
+The package has been designed together with the
+[scPRINT paper](https://doi.org/10.1101/2024.07.29.605556) and
+[model](https://github.com/cantinilab/scPRINT).
 ## More
-I needed to create this Data Loader for my PhD project. I am using it to load & preprocess thousands of datasets containing millions of cells in a few seconds. I believed that individuals employing AI for single-cell RNA sequencing and other sequencing datasets would eagerly utilize and desire such a tool, which presently does not exist.
+I needed to create this Data Loader for my PhD project. I am using it to load &
+preprocess thousands of datasets containing millions of cells in a few seconds.
+I believed that individuals employing AI for single-cell RNA sequencing and
+other sequencing datasets would eagerly utilize and desire such a tool, which
+presently does not exist.
 ![scdataloader.drawio.png](docs/scdataloader.drawio.png)
@@ -99,12 +108,14 @@ pip install scDataLoader[dev] # for dev dependencies
 lamin init --storage ./testdb --name test --schema bionty
 ```
-if you start with lamin and had to do a `lamin init`, you will also need to populate your ontologies. This is because scPRINT is using ontologies to define its cell types, diseases, sexes, ethnicities, etc.
+if you start with lamin and had to do a `lamin init`, you will also need to
+populate your ontologies. This is because scPRINT is using ontologies to define
+its cell types, diseases, sexes, ethnicities, etc.
 you can do it manually or with our function:
 ```python
-from scdataloader.utils import populate_my_ontology
+from scdataloader.utils import populate_my_ontology, _adding_scbasecamp_genes
 populate_my_ontology() #to populate everything (recommended) (can take 2-10mns)
@@ -118,11 +129,14 @@ organisms: List[str] = ["NCBITaxon:10090", "NCBITaxon:9606"],
     diseases = None,
     dev_stages = None,
 )
+# if you want to load the gene names and species for the arc scbasecount species, also add this:
+_adding_scbasecamp_genes()
 ```
 ### Dev install
-If you want to use the latest version of scDataLoader and work on the code yourself use `git clone` and `pip -e` instead of `pip install`.
+If you want to use the latest version of scDataLoader and work on the code
+yourself use `git clone` and `pip -e` instead of `pip install`.
 ```bash
 git clone https://github.com/jkobject/scDataLoader.git
@@ -161,6 +175,12 @@ datamodule = DataModule(
 )
 ```
+see the notebooks in [docs](https://www.jkobject.com/scDataLoader/) to learn
+more
+1. [load a dataset](https://www.jkobject.com/scDataLoader/notebooks/1_download_and_preprocess/)
+2. [create a dataset](https://www.jkobject.com/scDataLoader/notebooks/2_create_dataloader/)
 ### lightning-free usage (Dataset+Collator+DataLoader)
 ```python
@@ -211,7 +231,17 @@ for batch in tqdm(dataloader):
     )
 ```
-### Usage on all of cellxgene
+## Gathering a pre-training database
+Here I will explain how to gather and preprocess all of cellxgene (scPRINT-1
+pretraining database) with scDataLoader, and the scPRINT-2 corpus (scPRINT-2
+pretraining database).
+### Getting all of cellxgene
+Here is an example of how to download and preprocess all of cellxgene with
+scDataLoader as a script (a notebook version is also available in
+[./notebooks/update_lamin_or_cellxgene.ipynb](https://github.com/jkobject/scdataloader/blob/main/notebooks/update_lamin_or_cellxgene.ipynb)).
 ```python
 # initialize a local lamin database
@@ -226,11 +256,25 @@ DESCRIPTION='preprocessed by scDataLoader'
 cx_dataset = ln.Collection.using(instance="laminlabs/cellxgene").filter(name="cellxgene-census", version='2023-12-15').one()
 cx_dataset, len(cx_dataset.artifacts.all())
+# (OPTIONAL) if you want to do you preprocessing on a slurm cluster without internet connections,
+# you can first do this:
+load_dataset_local(
+    cx_dataset,
+    download_folder="/my_download_folder",
+    name="cached-cellxgene-census",
+    description="all of it topreprocess",
+)
+# preprocessing
 do_preprocess = LaminPreprocessor(additional_postprocess=additional_postprocess, additional_preprocess=additional_preprocess, skip_validate=True, subset_hvg=0)
 preprocessed_dataset = do_preprocess(cx_dataset, name=DESCRIPTION, description=DESCRIPTION, start_at=6, version="2")
+```
+After this you can use the preprocessed dataset with the DataModule below.
+```python
 # create dataloaders
 from scdataloader import DataModule
 import tqdm
@@ -252,27 +296,52 @@ for i in tqdm.tqdm(datamodule.train_dataloader()):
 # with lightning:
 # Trainer(model, datamodule)
+```
+You can use the command line to preprocess a large database of datasets like
+here for cellxgene. this allows parallelizing and easier usage.
+```bash
+scdataloader --instance "laminlabs/cellxgene" --name "cellxgene-census" --version "2023-12-15" --description "preprocessed for scprint" --new_name "scprint main" --start_at 10 >> scdataloader.out
 ```
-see the notebooks in [docs](https://www.jkobject.com/scDataLoader/):
+### Getting the rest of the scPRINT-2 corpus
-1. [load a dataset](https://www.jkobject.com/scDataLoader/notebooks/1_download_and_preprocess/)
-2. [create a dataset](https://www.jkobject.com/scDataLoader/notebooks/2_create_dataloader/)
+by now, using the command / scripts above you should be able to get all of
+cellxgene (and preprocess it). laminlabs now also hosts the rest of the
+scPRINT-2 corpus in `laminlabs/arc-virtual-cell-atlas` and they can be
+downloaded and preprocessed the same way as cellxgene above. Be careful however
+that there is no metadata for these datasets.
-### command line preprocessing
+You can have a look at my notebooks:
+[./notebooks/adding_tahoe.ipynb](https://github.com/jkobject/scdataloader/blob/main/notebooks/adding_tahoe.ipynb)
+and
+[./notebooks/adding_scbasecount.ipynb](https://github.com/jkobject/scdataloader/blob/main/notebooks/adding_scbasecount.ipynb)
+where I create some remmaping to retrive metadata that can be used by
+scdataloader and lamindb from these datasets.
-You can use the command line to preprocess a large database of datasets like here for cellxgene. this allows parallelizing and easier usage.
+If you do not have access for some reason to these datasets, please contact
+laminlabs. But another solution, is to download them from the original sources
+and add them one by one in your instance and then do the same preprocessing but
+this time use `your_account/your_instance` instead of
+`laminlabs/arc-virtual-cell-atlas`.
-```bash
-scdataloader --instance "laminlabs/cellxgene" --name "cellxgene-census" --version "2023-12-15" --description "preprocessed for scprint" --new_name "scprint main" --start_at 10 >> scdataloader.out
-```
+This is actually what I did in my own instance to create the full scPRINT-2
+corpus and you can see some of it in the notebooks above.
+### Getting even more
+They also host a pertubation atlas in `laminlabs/pertdata` that can be
+downloaded the same way.
-### command line usage
+### command line usage to train a moel
 The main way to use
-> please refer to the [scPRINT documentation](https://www.jkobject.com/scPRINT/) and [lightning documentation](https://lightning.ai/docs/pytorch/stable/cli/lightning_cli_intermediate.html) for more information on command line usage
+> please refer to the [scPRINT documentation](https://www.jkobject.com/scPRINT/)
+> and
+> [lightning documentation](https://lightning.ai/docs/pytorch/stable/cli/lightning_cli_intermediate.html)
+> for more information on command line usage
 ## FAQ
@@ -295,13 +364,36 @@ from scdataloader import utils
 utils.populate_ontologies() # this might take from 5-20mins
 ```
+### how to move my lamin instance to another folder?
+you cannot just move your folder from one place to another because lamin is
+using absolute paths. You need to do 3 things:
+1. move your folder to the new place
+2. update your lamin config file (usually in `~/.lamin/my_env.yml`) to point to
+   the new place
+3. update the absolute paths in your lamin database. You can do it like this:
+```python
+import lamin as ln
+ln.Storage.df()
+# view what is your current storage id (in my case it was GZgLW1TQ)
+ln.Storage.filter(uid="GZgLW1TI").update(
+    root=Path("your_new_locations").as_posix().rstrip("/")
+)
+```
 ## Development
-Read the [CONTRIBUTING.md](CONTRIBUTING.md) file.
+Read the
+[CONTRIBUTING.md](https://github.com/jkobject/scdataloader/blob/main/CONTRIBUTING.md)
+file.
 ## License
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+This project is licensed under the MIT License - see the
+[LICENSE](https://github.com/jkobject/scdataloader/blob/main/LICENSE) file for
+details.
 ## Acknowledgments

scdataloader-2.0.8.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+scdataloader/__init__.py,sha256=Z5HURehoWw1GrecImmTXIkv4ih8Q5RxNQWPm8zjjXOA,226
+scdataloader/__main__.py,sha256=xPOtrEpQQQZUGTnm8KTvsQcA_jR45oMG_VHqd0Ny7_M,8677
+scdataloader/base.py,sha256=M1gD59OffRdLOgS1vHKygOomUoAMuzjpRtAfM3SBKF8,338
+scdataloader/collator.py,sha256=VcFJcVAIeKvYkG1DPRXzoBaw2wQ6D_0lsv5Mcv-9USI,17419
+scdataloader/config.py,sha256=wGlCR3tWyEVa69ajovJKYc86CTCJR8e1xC7BTlUOJQE,34582
+scdataloader/data.py,sha256=tXvONJNgcdMQIRh2KlAq9KCsf-Sz2L4GUlcGyf1OMhw,25160
+scdataloader/datamodule.py,sha256=pFBGUOHl3ibi8QhiV8x5ukjzVjnJMsZWNw3Ekk3P83Y,43810
+scdataloader/mapped.py,sha256=h9YKQ8SG9tyZL8c6_Wu5Xov5ODGK6FzVuFopz58xwN4,29887
+scdataloader/preprocess.py,sha256=VFmyJluk4drR4fcH5qBAcJLf0cJg26ElA0HDuHOK68s,40730
+scdataloader/utils.py,sha256=B81iwnR6aJs9lzkOCSRF85RszAdwS-dvPZPXA7yoMg4,27734
+scdataloader-2.0.8.dist-info/METADATA,sha256=g7UW_1EeYX1P0LoJsL9MENZ9f0Q7JjLu8opWbV7CnDo,13448
+scdataloader-2.0.8.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+scdataloader-2.0.8.dist-info/entry_points.txt,sha256=VXAN1m_CjbdLJ6SKYR0sBLGDV4wvv31ri7fWWuwbpno,60
+scdataloader-2.0.8.dist-info/licenses/LICENSE,sha256=rGy_eYmnxtbOvKs7qt5V0czSWxJwgX_MlgMyTZwDHbc,1073
+scdataloader-2.0.8.dist-info/RECORD,,

scdataloader/data.json DELETED Viewed

@@ -1,384 +0,0 @@
-'lung': 42877488
-'blood': 34180713
-'brain': 29530595
-'colon': 25830811
-'unknown': 23810521
-'pancreas': 21597602
-'embryo': 16976623
-'skin': 12513695
-'liver': 10683313
-'breast': 10539762
-'marrow': 9543512
-'kidney': 8213043
-'heart': 4800567
-'immune system': 4687291
-'eye': 4677504
-# 'other (310 tissues)': 87200000
-'UBERON:0002106': 4559701
-'UBERON:0000029': 4314740
-'UBERON:0000945': 4244624
-'UBERON:0001295': 3485757
-'UBERON:0002049': 3350893
-'UBERON:0002108': 3226581
-'UBERON:0001255': 2961628
-'UBERON:0001434': 2823653
-'UBERON:0001004': 2588530
-'UBERON:0002037': 2101623
-'UBERON:0001015': 1920181
-'UBERON:0000995': 1864451
-'UBERON:0001013': 1853702
-'UBERON:0001017': 1818861
-'UBERON:0002240': 1704269
-'UBERON:0009834': 1631471
-'UBERON:0000002': 1596082
-'UBERON:0001893': 1565863
-'UBERON:0001851': 1513857
-'UBERON:0002771': 1317262
-'UBERON:0002367': 1301074
-'UBERON:0002369': 1279963
-'UBERON:0000956': 1252422
-'UBERON:0013682': 1195809
-'UBERON:0000160': 1156931
-'UBERON:0001987': 1127465
-'UBERON:0001043': 1015278
-'UBERON:0001032': 1011915
-'UBERON:0000992': 948464
-'UBERON:0000010': 897312
-'UBERON:0000473': 838235
-'UBERON:0002368': 814761
-'UBERON:0002084': 761793
-'UBERON:0001870': 730928
-'UBERON:0000344': 655220
-'UBERON:0001384': 625538
-'UBERON:0000966': 598023
-'UBERON:0002421': 578257
-'UBERON:0001225': 542410
-'UBERON:0000991': 507126
-'UBERON:0002116': 506392
-'UBERON:8440012': 494325
-'UBERON:0001898': 483451
-'UBERON:0000990': 440483
-'UBERON:0002370': 405221
-'UBERON:0002436': 399661
-'UBERON:0001965': 387483
-'UBERON:0000006': 362898
-'UBERON:0001005': 358584
-'UBERON:0010225': 343320
-'UBERON:0002102': 335470
-'UBERON:0008946': 334444
-'UBERON:0000053': 310875
-'UBERON:0008933': 310653
-'UBERON:0001117': 308212
-'UBERON:0001007': 307511
-'UBERON:0000059': 288232
-'UBERON:0002080': 286423
-'UBERON:0002094': 284283
-'UBERON:0000362': 283305
-'UBERON:0002365': 282382
-'UBERON:0002103': 264447
-'UBERON:0001891': 249305
-'UBERON:0001894': 249065
-'UBERON:0000411': 236082
-'UBERON:0002728': 221808
-'UBERON:0000451': 219655
-'UBERON:0001161': 214938
-'UBERON:0000030': 207230
-'UBERON:0009835': 206197
-'UBERON:0000988': 204168
-'UBERON:0001707': 198868
-'UBERON:0016538': 192147
-'UBERON:0002450': 189973
-'UBERON:0016540': 187526
-'UBERON:0000977': 185185
-'UBERON:0001913': 183332
-'UBERON:0001786': 179309
-'UBERON:0034751': 145757
-'UBERON:0001040': 144260
-'UBERON:0016530': 141537
-'UBERON:0001238': 136529
-'UBERON:0003889': 132985
-'UBERON:0000453': 125672
-'UBERON:0001723': 125523
-'UBERON:0002098': 120306
-'UBERON:0016525': 118039
-'UBERON:0000004': 114936
-'UBERON:0002686': 110752
-'UBERON:0022352': 110671
-'UBERON:0002351': 107573
-'UBERON:0001828': 105079
-'UBERON:0001003': 104237
-'UBERON:0002067': 100247
-'UBERON:0002190': 99711
-'UBERON:0005290': 99110
-'UBERON:0002811': 98536
-'UBERON:0001871': 97537
-'UBERON:0003688': 92733
-'UBERON:0010410': 91248
-'UBERON:0000403': 90975
-'UBERON:0000175': 90741
-'UBERON:0001976': 87947
-'UBERON:0002822': 87214
-'UBERON:0001890': 86604
-'UBERON:0014454': 86134
-'UBERON:0002810': 85525
-'UBERON:0002079': 82676
-'UBERON:0010414': 80085
-'UBERON:0003126': 79986
-'UBERON:0003027': 75807
-'UBERON:0008345': 73634
-'UBERON:0002352': 73045
-'UBERON:8410025': 72957
-'UBERON:0001393': 72549
-'UBERON:0010412': 71752
-'UBERON:0001159': 69025
-'UBERON:0014918': 65710
-'UBERON:0002078': 64486
-'UBERON:0007650': 64093
-'UBERON:0002808': 63227
-'UBERON:0007644': 62647
-'UBERON:8410010': 61264
-'UBERON:0001157': 60907
-'UBERON:0002185': 59377
-'UBERON:0002114': 59363
-'UBERON:0000916': 59001
-'UBERON:0014455': 58536
-'UBERON:0002661': 58226
-'UBERON:0001873': 57571
-'UBERON:0001769': 57422
-'UBERON:0008953': 56919
-'UBERON:0002372': 56899
-'UBERON:0002509': 55694
-'UBERON:0000397': 55666
-'UBERON:0001156': 54735
-'UBERON:0023787': 54666
-'UBERON:0002299': 54085
-'UBERON:0034893': 53956
-'UBERON:0001872': 53269
-'UBERON:0007177': 49990
-'UBERON:0004026': 48185
-'UBERON:0012648': 48068
-'UBERON:0001630': 47291
-'UBERON:0002803': 47176
-'UBERON:0016632': 45130
-'UBERON:0008803': 43943
-'UBERON:0001049': 43485
-'UBERON:0016475': 42323
-'UBERON:0002363': 42319
-'UBERON:0001874': 41277
-'UBERON:0000964': 40990
-'UBERON:0011189': 40231
-'UBERON:0036288': 36574
-'UBERON:0008954': 35284
-'UBERON:0018131': 34687
-'UBERON:0004648': 34173
-'UBERON:0034891': 34153
-'UBERON:0001775': 34132
-'UBERON:0018707': 33610
-'UBERON:0003661': 32722
-'UBERON:0003403': 32138
-'UBERON:0035328': 31696
-'UBERON:0001728': 31325
-'UBERON:0001388': 30877
-'UBERON:0008952': 29895
-'UBERON:0000080': 29606
-'UBERON:0004024': 29064
-'UBERON:0035886': 28873
-'UBERON:0004023': 28857
-'UBERON:0013473': 28621
-'UBERON:0018105': 28367
-'UBERON:0005969': 27736
-'UBERON:0012168': 27154
-'UBERON:0001886': 27092
-'UBERON:0000400': 27087
-'UBERON:0001911': 26952
-'UBERON:0000088': 26853
-'UBERON:0001153': 25865
-'UBERON:0001471': 24982
-'UBERON:0001085': 24807
-'UBERON:0000057': 24700
-'UBERON:0006761': 24573
-'UBERON:0002809': 24445
-'UBERON:0001158': 23887
-'UBERON:0008972': 23110
-'UBERON:0002807': 22796
-'UBERON:0010506': 22652
-'UBERON:0001459': 21633
-'UBERON:8410000': 21592
-'UBERON:0001831': 21003
-'UBERON:0003544': 20212
-'UBERON:0002110': 19880
-'UBERON:0014614': 19650
-'UBERON:8300000': 19408
-'UBERON:0035895': 18814
-'UBERON:0035213': 18775
-'UBERON:0001162': 18404
-'UBERON:0000056': 18354
-'UBERON:0001228': 17958
-'UBERON:0008971': 17831
-'UBERON:0013756': 17625
-'UBERON:0001052': 16913
-'UBERON:0012474': 16607
-'UBERON:0039167': 16527
-'UBERON:0002317': 15963
-'UBERON:0002115': 15762
-'UBERON:0014648': 15580
-'UBERON:8480028': 15307
-'UBERON:0000014': 15215
-'UBERON:0002489': 15127
-'UBERON:0001836': 14502
-'UBERON:0005343': 14336
-'UBERON:8410026': 14090
-'UBERON:0002132': 13953
-'UBERON:0000965': 13900
-'UBERON:0010415': 12330
-'UBERON:0000017': 11977
-'UBERON:0018303': 11937
-'UBERON:0002382': 11898
-'UBERON:0002046': 11840
-'UBERON:0001087': 11702
-'UBERON:0009958': 11377
-'UBERON:0005616': 11243
-'UBERON:8480009': 10533
-'UBERON:0013535': 9915
-'UBERON:0007106': 9898
-'UBERON:0001513': 9887
-'UBERON:0015790': 9816
-'UBERON:0001068': 9773
-'UBERON:0035894': 9667
-'UBERON:0015476': 9656
-'UBERON:0001637': 9652
-'UBERON:0002129': 9649
-'UBERON:0010033': 9467
-'UBERON:0000947': 9290
-'UBERON:0001511': 9288
-'UBERON:0004946': 9195
-'UBERON:0016435': 9097
-'UBERON:0002420': 9000
-'UBERON:0001868': 8799
-'UBERON:0002021': 8799
-'UBERON:0001542': 8683
-'UBERON:0002081': 8279
-'UBERON:0004070': 8033
-'UBERON:0008612': 7825
-'UBERON:0001901': 7228
-'UBERON:0004929': 7133
-'UBERON:0003528': 6670
-'UBERON:0002427': 6529
-'UBERON:0004025': 6448
-'UBERON:0009472': 6348
-'UBERON:0002756': 6279
-'UBERON:0001832': 6196
-'UBERON:0002378': 6142
-'UBERON:0004167': 5874
-'UBERON:0002228': 5725
-'UBERON:0003968': 5625
-'UBERON:0001154': 5515
-'UBERON:0001046': 5420
-'UBERON:0010032': 5367
-'UBERON:0004339': 4969
-'UBERON:0002385': 4881
-'UBERON:0001621': 4867
-'UBERON:0001416': 4808
-'UBERON:0001638': 4395
-'UBERON:0002429': 4355
-'UBERON:0001165': 4028
-'UBERON:0008989': 3997
-'UBERON:0001902': 3883
-'UBERON:0003532': 3443
-'UBERON:0003428': 3406
-'UBERON:0002082': 3226
-'UBERON:0001296': 3025
-'UBERON:0015143': 3014
-'UBERON:0000074': 3011
-'UBERON:0002245': 2971
-'UBERON:0001293': 2693
-'UBERON:0007625': 2348
-'UBERON:0003547': 2344
-'UBERON:0022277': 2333
-'UBERON:0001554': 2272
-'UBERON:0001348': 2223
-'UBERON:0005406': 2121
-'UBERON:0001811': 2084
-'UBERON:0013531': 2055
-'UBERON:0008934': 1866
-'UBERON:0001103': 1858
-'UBERON:0005636': 1651
-'UBERON:0007225': 1594
-'UBERON:0007224': 1564
-'UBERON:0000016': 1520
-'UBERON:8440075': 1482
-'UBERON:0004264': 1475
-'UBERON:0001773': 1431
-'UBERON:0013706': 1150
-'UBERON:0023852': 1143
-'UBERON:0001294': 849
-'UBERON:0001134': 835
-'UBERON:0003902': 675
-'UBERON:0001224': 569
-'UBERON:0005564': 399
-'UBERON:0001817': 248
-'UBERON:0002802': 163
-'UBERON:0003072': 146
-'UBERON:0000926': 139
-'UBERON:0000416': 82
-'UBERON:0003517': 48
-'UBERON:0001483': 37
-'UBERON:1000021': 37
-'UBERON:0002023': 1
-human: 230822337
-mouse: 102753838
-# other 14070000
-macaque: 3161179
-zebrafish: 3117237
-pig: 1641179
-thale cress: 1564711
-drosophila: 1564460
-chicken: 606680
-nake mole rat: 441042
-rabbit: 417091
-cow: 390675
-corn: 336166
-chimpanzee: 293472
-c. elegans: 251759
-sheep: 177871
-marmoset: 115584
-10x 3': 146909525
-10x 3' v2: 12021763
-10x 3' v3: 121241920
-# 10x 3': 280173208
-10x 5' v1: 4716686
-10x 5' v2: 3104399
-10x 5': 28919386
-# 10x 5': 36740471
-10x multiome: 6942314
-10x (vdj): 1620181
-10x (CITE): 1580477
-# 10x multiome: 10142972
-sciRNA-seq: 14924076
-slide-seq: 1867286
-scalebio: 685024
-microwell: 599459
-sciPlex: 581480
-drop-seq: 517799
-# other (17 assays): 1423506
-EFO:0010961: 286087
-EFO:0008931: 192101
-EFO:0700003: 146278
-EFO:0700010: 133430
-EFO:0700016: 128855
-EFO:0030007: 105584
-EFO:0008919: 87181
-EFO:0009901: 76022
-EFO:0008796: 68645
-EFO:0009919: 68544
-EFO:0700011: 58981
-EFO:0030019: 31775
-EFO:0008780: 25652
-EFO:0010010: 5231
-EFO:0008953: 4693
-EFO:0008720: 2768
-EFO:0008930: 1679

scdataloader-2.0.6.dist-info/RECORD DELETED Viewed

@@ -1,16 +0,0 @@
-scdataloader/__init__.py,sha256=Z5HURehoWw1GrecImmTXIkv4ih8Q5RxNQWPm8zjjXOA,226
-scdataloader/__main__.py,sha256=xPOtrEpQQQZUGTnm8KTvsQcA_jR45oMG_VHqd0Ny7_M,8677
-scdataloader/base.py,sha256=M1gD59OffRdLOgS1vHKygOomUoAMuzjpRtAfM3SBKF8,338
-scdataloader/collator.py,sha256=VcFJcVAIeKvYkG1DPRXzoBaw2wQ6D_0lsv5Mcv-9USI,17419
-scdataloader/config.py,sha256=wGlCR3tWyEVa69ajovJKYc86CTCJR8e1xC7BTlUOJQE,34582
-scdataloader/data.json,sha256=Zb8c27yk3rwMgtAU8kkiWWAyUwYBrlCqKUyEtaAx9i8,8785
-scdataloader/data.py,sha256=fMW1OgllPCz87si3DpkzOSoqnufgKlh8aW5rEVmeC_c,25133
-scdataloader/datamodule.py,sha256=ojX0zr2cpGLoKGjWE1S_bHAEdwbFg0Ljl55hqTagW1k,43600
-scdataloader/mapped.py,sha256=h9YKQ8SG9tyZL8c6_Wu5Xov5ODGK6FzVuFopz58xwN4,29887
-scdataloader/preprocess.py,sha256=oAGMilgdIgggyp9B9c9627kdo6SCco2tnFhhIHY4-yc,39642
-scdataloader/utils.py,sha256=2zIgmQHPVKHOFWqLX56Ihqtqci3_rOfCcOs642CPnX4,27183
-scdataloader-2.0.6.dist-info/METADATA,sha256=lnOF9PLih91AxcSI3L2OsD-0FH2kh3Qt2G7p6h3JESk,10314
-scdataloader-2.0.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-scdataloader-2.0.6.dist-info/entry_points.txt,sha256=VXAN1m_CjbdLJ6SKYR0sBLGDV4wvv31ri7fWWuwbpno,60
-scdataloader-2.0.6.dist-info/licenses/LICENSE,sha256=rGy_eYmnxtbOvKs7qt5V0czSWxJwgX_MlgMyTZwDHbc,1073
-scdataloader-2.0.6.dist-info/RECORD,,

{scdataloader-2.0.6.dist-info → scdataloader-2.0.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{scdataloader-2.0.6.dist-info → scdataloader-2.0.8.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{scdataloader-2.0.6.dist-info → scdataloader-2.0.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

scdataloader 2.0.6__py3-none-any.whl → 2.0.8__py3-none-any.whl

scdataloader 2.0.6py3-none-any.whl → 2.0.8py3-none-any.whl