PyPI - scdataloader - Versions diffs - 0.0.3__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

scdataloader 0.0.3py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

scdataloader/VERSION +1 -1
scdataloader/__init__.py +1 -1
scdataloader/__main__.py +66 -42
scdataloader/collator.py +136 -67
scdataloader/config.py +112 -0
scdataloader/data.py +160 -169
scdataloader/datamodule.py +403 -0
scdataloader/mapped.py +285 -109
scdataloader/preprocess.py +240 -109
scdataloader/utils.py +162 -70
{scdataloader-0.0.3.dist-info → scdataloader-1.0.1.dist-info}/METADATA +87 -18
scdataloader-1.0.1.dist-info/RECORD +16 -0
scdataloader/dataloader.py +0 -318
scdataloader-0.0.3.dist-info/RECORD +0 -15
{scdataloader-0.0.3.dist-info → scdataloader-1.0.1.dist-info}/LICENSE +0 -0
{scdataloader-0.0.3.dist-info → scdataloader-1.0.1.dist-info}/WHEEL +0 -0
{scdataloader-0.0.3.dist-info → scdataloader-1.0.1.dist-info}/entry_points.txt +0 -0

scdataloader/utils.py CHANGED Viewed

@@ -11,9 +11,50 @@ from django.db import IntegrityError
 from scipy.sparse import csr_matrix
 from scipy.stats import median_abs_deviation
 from functools import lru_cache
+from collections import Counter
+from torch import Tensor
+import torch
+from typing import Union, List, Optional
-def createFoldersFor(filepath):
+from anndata import AnnData
+def downsample_profile(mat: Tensor, dropout: float):
+    """
+    This function downsamples the expression profile of a given single cell RNA matrix.
+    The noise is applied based on the renoise parameter,
+    the total counts of the matrix, and the number of genes. The function first calculates the noise
+    threshold (scaler) based on the renoise parameter. It then generates an initial matrix count by
+    applying a Poisson distribution to a random tensor scaled by the total counts and the number of genes.
+    The function then models the sampling zeros by applying a Poisson distribution to a random tensor
+    scaled by the noise threshold, the total counts, and the number of genes. The function also models
+    the technical zeros by generating a random tensor and comparing it to the noise threshold. The final
+    matrix count is calculated by subtracting the sampling zeros from the initial matrix count and
+    multiplying by the technical zeros. The function ensures that the final matrix count is not less
+    than zero by taking the maximum of the final matrix count and a tensor of zeros. The function
+    returns the final matrix count.
+    Args:
+        mat (torch.Tensor): The input matrix.
+        dropout (float): The renoise parameter.
+    Returns:
+        torch.Tensor: The matrix count after applying noise.
+    """
+    batch = mat.shape[0]
+    ngenes = mat.shape[1]
+    dropout = dropout * 1.1
+    # we model the sampling zeros (dropping 30% of the reads)
+    res = torch.poisson((mat * (dropout / 2))).int()
+    # we model the technical zeros (dropping 50% of the genes)
+    notdrop = (torch.rand((batch, ngenes), device=mat.device) >= (dropout / 2)).int()
+    mat = (mat - res) * notdrop
+    return torch.maximum(mat, torch.zeros((1, 1), device=mat.device, dtype=torch.int))
+def createFoldersFor(filepath: str):
     """
     will recursively create folders if needed until having all the folders required to save the file in this filepath
     """
@@ -24,19 +65,22 @@ def createFoldersFor(filepath):
             os.mkdir(prevval)
-def _fetchFromServer(ensemble_server, attributes):
+def _fetchFromServer(
+    ensemble_server: str, attributes: list, database: str = "hsapiens_gene_ensembl"
+):
     """
     Fetches data from the specified ensemble server.
     Args:
         ensemble_server (str): The URL of the ensemble server to fetch data from.
         attributes (list): The list of attributes to fetch from the server.
+        database (str): The database to fetch data from.
     Returns:
         pd.DataFrame: A pandas DataFrame containing the fetched data.
     """
     server = BiomartServer(ensemble_server)
-    ensmbl = server.datasets["hsapiens_gene_ensembl"]
+    ensmbl = server.datasets[database]
     print(attributes)
     res = pd.read_csv(
         io.StringIO(
@@ -48,11 +92,12 @@ def _fetchFromServer(ensemble_server, attributes):
 def getBiomartTable(
-    ensemble_server="http://jul2023.archive.ensembl.org/biomart",
-    useCache=False,
-    cache_folder="/tmp/biomart/",
-    attributes=[],
-    bypass_attributes=False,
+    ensemble_server: str = "http://jul2023.archive.ensembl.org/biomart",
+    useCache: bool = False,
+    cache_folder: str = "/tmp/biomart/",
+    attributes: List[str] = [],
+    bypass_attributes: bool = False,
+    database: str = "hsapiens_gene_ensembl",
 ):
     """generate a genelist dataframe from ensembl's biomart
@@ -60,6 +105,9 @@ def getBiomartTable(
         ensemble_server (str, optional): the biomart server. Defaults to "http://jul2023.archive.ensembl.org/biomart".
         useCache (bool, optional): whether to use the cache or not. Defaults to False.
         cache_folder (str, optional): the cache folder. Defaults to "/tmp/biomart/".
+        attributes (List[str], optional): the attributes to fetch. Defaults to [].
+        bypass_attributes (bool, optional): whether to bypass the attributes or not. Defaults to False.
+        database (str, optional): the database to fetch from. Defaults to "hsapiens_gene_ensembl".
     Raises:
         ValueError: should be a dataframe (when the result from the server is something else)
@@ -88,21 +136,22 @@ def getBiomartTable(
     else:
         print("downloading gene names from biomart")
-        res = _fetchFromServer(ensemble_server, attr + attributes)
+        res = _fetchFromServer(ensemble_server, attr + attributes, database=database)
         res.to_csv(cachefile, index=False)
     res.columns = attr + attributes
     if type(res) is not type(pd.DataFrame()):
         raise ValueError("should be a dataframe")
-    res = res[~(res["ensembl_gene_id"].isna() & res["hgnc_symbol"].isna())]
-    res.loc[res[res.hgnc_symbol.isna()].index, "hgnc_symbol"] = res[
-        res.hgnc_symbol.isna()
-    ]["ensembl_gene_id"]
+    res = res[~(res["ensembl_gene_id"].isna())]
+    if "hgnc_symbol" in res.columns:
+        res = res[res["hgnc_symbol"].isna()]
+        res.loc[res[res.hgnc_symbol.isna()].index, "hgnc_symbol"] = res[
+            res.hgnc_symbol.isna()
+        ]["ensembl_gene_id"]
     return res
-def validate(adata, organism):
+def validate(adata: AnnData, organism: str):
     """
     validate checks if the adata object is valid for lamindb
@@ -144,9 +193,6 @@ def validate(adata, organism):
             raise ValueError(
                 f"Column '{val}' is missing in the provided anndata object."
             )
-    bionty_source = bt.PublicSource.filter(
-        entity="DevelopmentalStage", organism=organism
-    ).one()
     if not bt.Ethnicity.validate(
         adata.obs["self_reported_ethnicity_ontology_term_id"],
@@ -169,14 +215,10 @@ def validate(adata, organism):
         adata.obs["cell_type_ontology_term_id"], field="ontology_id"
     ).all():
         raise ValueError("Invalid cell type ontology term id found")
-    if (
-        not bt.DevelopmentalStage.filter(bionty_source=bionty_source)
-        .validate(
-            adata.obs["development_stage_ontology_term_id"],
-            field="ontology_id",
-        )
-        .all()
-    ):
+    if not bt.DevelopmentalStage.validate(
+        adata.obs["development_stage_ontology_term_id"],
+        field="ontology_id",
+    ).all():
         raise ValueError("Invalid dev stage ontology term id found")
     if not bt.Tissue.validate(
         adata.obs["tissue_ontology_term_id"], field="ontology_id"
@@ -186,18 +228,16 @@ def validate(adata, organism):
         adata.obs["assay_ontology_term_id"], field="ontology_id"
     ).all():
         raise ValueError("Invalid assay ontology term id found")
-    if (
-        not bt.Gene.filter(organism=bt.settings.organism)
-        .validate(adata.var.index, field="ensembl_gene_id")
-        .all()
-    ):
+    if not bt.Gene.validate(
+        adata.var.index, field="ensembl_gene_id", organism=organism
+    ).all():
         raise ValueError("Invalid gene ensembl id found")
     return True
 # setting a cache of 200 elements
 # @lru_cache(maxsize=200)
-def get_all_ancestors(val, df):
+def get_all_ancestors(val: str, df: pd.DataFrame):
     if val not in df.index:
         return set()
     parents = df.loc[val].parents__ontology_id
@@ -207,7 +247,17 @@ def get_all_ancestors(val, df):
         return set.union(set(parents), *[get_all_ancestors(val, df) for val in parents])
-def get_ancestry_mapping(all_elem, onto_df):
+# setting a cache of 200 elements
+# @lru_cache(maxsize=200)
+def get_descendants(val, df):
+    ontos = set(df[df.parents__ontology_id.str.contains(val)].index.tolist())
+    r_onto = set()
+    for onto in ontos:
+        r_onto |= get_descendants(onto, df)
+    return r_onto | ontos
+def get_ancestry_mapping(all_elem: list, onto_df: pd.DataFrame):
     """
     This function generates a mapping of all elements to their ancestors in the ontology dataframe.
@@ -242,12 +292,12 @@ def get_ancestry_mapping(all_elem, onto_df):
 def load_dataset_local(
-    remote_dataset,
-    download_folder,
-    name,
-    description,
-    use_cache=True,
-    only=None,
+    remote_dataset: ln.Collection,
+    download_folder: str,
+    name: str,
+    description: str,
+    use_cache: bool = True,
+    only: Optional[List[int]] = None,
 ):
     """
     This function loads a remote lamindb dataset to local.
@@ -303,7 +353,7 @@ def load_dataset_local(
     return dataset
-def load_genes(organisms):
+def load_genes(organisms: Union[str, list] = "NCBITaxon:9606"):  # "NCBITaxon:10090",
     organismdf = []
     if type(organisms) == str:
         organisms = [organisms]
@@ -313,7 +363,7 @@ def load_genes(organisms):
         ).df()
         genesdf = genesdf[~genesdf["public_source_id"].isna()]
         genesdf = genesdf.drop_duplicates(subset="ensembl_gene_id")
-        genesdf = genesdf.set_index("ensembl_gene_id")
+        genesdf = genesdf.set_index("ensembl_gene_id").sort_index()
         # mitochondrial genes
         genesdf["mt"] = genesdf.symbol.astype(str).str.startswith("MT-")
         # ribosomal genes
@@ -326,14 +376,14 @@ def load_genes(organisms):
 def populate_my_ontology(
-    organisms=["NCBITaxon:10090", "NCBITaxon:9606"],
-    sex=["PATO:0000384", "PATO:0000383"],
-    celltypes=[],
-    ethnicities=[],
-    assays=[],
-    tissues=[],
-    diseases=[],
-    dev_stages=[],
+    organisms: List[str] = ["NCBITaxon:10090", "NCBITaxon:9606"],
+    sex: List[str] = ["PATO:0000384", "PATO:0000383"],
+    celltypes: List[str] = [],
+    ethnicities: List[str] = [],
+    assays: List[str] = [],
+    tissues: List[str] = [],
+    diseases: List[str] = [],
+    dev_stages: List[str] = [],
 ):
     """
     creates a local version of the lamin ontologies and add the required missing values in base ontologies
@@ -360,20 +410,20 @@ def populate_my_ontology(
         dev_stages (list, optional): List of developmental stages. Defaults to [].
     """
-    names = bt.CellType.from_public().df().index if not celltypes else celltypes
+    names = bt.CellType.public().df().index if not celltypes else celltypes
     records = bt.CellType.from_values(names, field="ontology_id")
-    ln.save(records)
+    ln.save(records, parents=bool(celltypes))
     bt.CellType(name="unknown", ontology_id="unknown").save()
     # Organism
-    names = bt.Organism.from_public().df().index if not organisms else organisms
+    names = bt.Organism.public().df().index if not organisms else organisms
     records = [
         i[0] if type(i) is list else i
         for i in [bt.Organism.from_public(ontology_id=i) for i in names]
     ]
-    ln.save(records)
+    ln.save(records, parents=bool(organisms))
     bt.Organism(name="unknown", ontology_id="unknown").save()
     # Phenotype
-    names = bt.Phenotype.from_public().df().index if not sex else sex
+    names = bt.Phenotype.public().df().index if not sex else sex
     records = [
         bt.Phenotype.from_public(
             ontology_id=i,
@@ -383,38 +433,47 @@ def populate_my_ontology(
         )
         for i in names
     ]
-    ln.save(records)
+    ln.save(records, parents=bool(sex))
     bt.Phenotype(name="unknown", ontology_id="unknown").save()
     # ethnicity
-    names = bt.Ethnicity.from_public().df().index if not ethnicities else ethnicities
+    names = bt.Ethnicity.public().df().index if not ethnicities else ethnicities
     records = bt.Ethnicity.from_values(names, field="ontology_id")
-    ln.save(records)
+    ln.save(records, parents=bool(ethnicities))
     bt.Ethnicity(
         name="unknown", ontology_id="unknown"
     ).save()  # multi ethnic will have to get renamed
     # ExperimentalFactor
-    names = bt.ExperimentalFactor.from_public().df().index if not assays else assays
+    names = bt.ExperimentalFactor.public().df().index if not assays else assays
     records = bt.ExperimentalFactor.from_values(names, field="ontology_id")
-    ln.save(records)
+    ln.save(records, parents=bool(assays))
     bt.ExperimentalFactor(name="unknown", ontology_id="unknown").save()
     # lookup = bt.ExperimentalFactor.lookup()
     # lookup.smart_seq_v4.parents.add(lookup.smart_like)
     # Tissue
-    names = bt.Tissue.from_public().df().index if not tissues else tissues
+    names = bt.Tissue.public().df().index if not tissues else tissues
     records = bt.Tissue.from_values(names, field="ontology_id")
-    ln.save(records)
+    ln.save(records, parents=bool(tissues))
     bt.Tissue(name="unknown", ontology_id="unknown").save()
     # DevelopmentalStage
-    names = (
-        bt.DevelopmentalStage.from_public().df().index if not dev_stages else dev_stages
-    )
+    names = bt.DevelopmentalStage.public().df().index if not dev_stages else dev_stages
     records = bt.DevelopmentalStage.from_values(names, field="ontology_id")
-    ln.save(records)
+    ln.save(records, parents=bool(dev_stages))
     bt.DevelopmentalStage(name="unknown", ontology_id="unknown").save()
+    names = bt.DevelopmentalStage.public(organism="mouse").df().name
+    bionty_source = bt.PublicSource.filter(
+        entity="DevelopmentalStage", organism="mouse"
+    ).one()
+    records = [
+        bt.DevelopmentalStage.from_public(name=i, public_source=bionty_source)
+        for i in names.tolist()
+    ]
+    records[-4] = records[-4][0]
+    ln.save(records)
     # Disease
-    names = bt.Disease.from_public().df().index if not diseases else diseases
+    names = bt.Disease.public().df().index if not diseases else diseases
     records = bt.Disease.from_values(names, field="ontology_id")
-    ln.save(records)
+    ln.save(records, parents=bool(diseases))
     bt.Disease(name="normal", ontology_id="PATO:0000461").save()
     bt.Disease(name="unknown", ontology_id="unknown").save()
     # genes
@@ -430,7 +489,7 @@ def populate_my_ontology(
         ln.save(records)
-def is_outlier(adata, metric: str, nmads: int):
+def is_outlier(adata: AnnData, metric: str, nmads: int):
     """
     is_outlier detects outliers in adata.obs[metric]
@@ -449,7 +508,7 @@ def is_outlier(adata, metric: str, nmads: int):
     return outlier
-def length_normalize(adata, gene_lengths):
+def length_normalize(adata: AnnData, gene_lengths: list):
     """
     length_normalize normalizes the counts by the gene length
@@ -464,7 +523,7 @@ def length_normalize(adata, gene_lengths):
     return adata
-def pd_load_cached(url, loc="/tmp/", cache=True, **kwargs):
+def pd_load_cached(url: str, loc: str = "/tmp/", cache: bool = True, **kwargs):
     """
     pd_load_cached downloads a file from a url and loads it as a pandas dataframe
@@ -482,3 +541,36 @@ def pd_load_cached(url, loc="/tmp/", cache=True, **kwargs):
         urllib.request.urlretrieve(url, loc)
     # Load the data from the file
     return pd.read_csv(loc, **kwargs)
+def translate(
+    val: Union[str, list, set, Counter, dict], t: str = "cell_type_ontology_term_id"
+):
+    """
+    translate translates the ontology term id to the name
+    Args:
+        val (str, dict, set, list, dict): the object to translate
+        t (flat, optional): the type of ontology terms.
+            one of cell_type_ontology_term_id, assay_ontology_term_id, tissue_ontology_term_id.
+            Defaults to "cell_type_ontology_term_id".
+    Returns:
+        dict: the mapping for the translation
+    """
+    if t == "cell_type_ontology_term_id":
+        obj = bt.CellType.public(organism="all")
+    elif t == "assay_ontology_term_id":
+        obj = bt.ExperimentalFactor.public()
+    elif t == "tissue_ontology_term_id":
+        obj = bt.Tissue.public()
+    else:
+        return None
+    if type(val) is str:
+        return {val: obj.search(val, field=obj.ontology_id).name.iloc[0]}
+    elif type(val) is list or type(val) is set:
+        return {i: obj.search(i, field=obj.ontology_id).name.iloc[0] for i in set(val)}
+    elif type(val) is dict or type(val) is Counter:
+        return {
+            obj.search(k, field=obj.ontology_id).name.iloc[0]: v for k, v in val.items()
+        }

{scdataloader-0.0.3.dist-info → scdataloader-1.0.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: scdataloader
-Version: 0.0.3
+Version: 1.0.1
 Summary: a dataloader for single cell data in lamindb
 Home-page: https://github.com/jkobject/scDataLoader
 License: GPL3
@@ -34,12 +34,16 @@ Description-Content-Type: text/markdown
 [![codecov](https://codecov.io/gh/jkobject/scDataLoader/branch/main/graph/badge.svg?token=scDataLoader_token_here)](https://codecov.io/gh/jkobject/scDataLoader)
 [![CI](https://github.com/jkobject/scDataLoader/actions/workflows/main.yml/badge.svg)](https://github.com/jkobject/scDataLoader/actions/workflows/main.yml)
+[![PyPI version](https://badge.fury.io/py/scDataLoader.svg)](https://badge.fury.io/py/scDataLoader)
+[![Documentation Status](https://readthedocs.org/projects/scDataLoader/badge/?version=latest)](https://scDataLoader.readthedocs.io/en/latest/?badge=latest)
+[![Downloads](https://pepy.tech/badge/scDataLoader)](https://pepy.tech/project/scDataLoader)
+[![Downloads](https://pepy.tech/badge/scDataLoader/month)](https://pepy.tech/project/scDataLoader)
+[![Downloads](https://pepy.tech/badge/scDataLoader/week)](https://pepy.tech/project/scDataLoader)
+[![GitHub issues](https://img.shields.io/github/issues/jkobject/scDataLoader)](https://img.shields.io/github/issues/jkobject/scDataLoader)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![DOI](https://img.shields.io/badge/DOI-10.1101%2F2024.07.29.605556-blue)](https://doi.org/10.1101/2024.07.29.605556)
-Awesome single cell dataloader created by @jkobject
-built on top of `lamindb` and the `.mapped()` function by Sergey: https://github.com/Koncopd
-This data loader is designed to be used with:
+This single cell pytorch dataloader / lighting datamodule is designed to be used with:
 - [lamindb](https://lamin.ai/)
@@ -55,18 +59,13 @@ It allows you to:
 3. create a more complex single cell dataset
 4. extend it to your need
-## About
-the idea is to use it to train models like scGPT / GeneFormer (and soon, scPrint ;)). It is:
+built on top of `lamindb` and the `.mapped()` function by Sergey: https://github.com/Koncopd
-1. loading from lamin
-2. doing some dataset specific preprocessing if needed
-3. creating a dataset object on top of .mapped() (that is needed for mapping genes, cell labels etc..)
-4. passing it to a dataloader object that can work with it correctly
+## More
-Currently one would have to use the preprocess function to make the dataset fit for different tools like scGPT / Geneformer. But I would want to enable it through different Collators. This is still missing and a WIP... (please do contribute!)
+I needed to create this Data Loader for my PhD project. I am using it to load & preprocess thousands of datasets containing millions of cells in a few seconds. I believed that individuals employing AI for single-cell RNA sequencing and other sequencing datasets would eagerly utilize and desire such a tool, which presently does not exist.
-![](docs/scdataloader.drawio.png)
+![scdataloader.drawio.png](docs/scdataloader.drawio.png)
 ## Install it from PyPI
@@ -85,15 +84,85 @@ then run the notebooks with the poetry installed environment
 ## Usage
-see the notebooks in [docs](https://jkobject.github.io/scDataLoader/):
+### Direct Usage
+```python
+# initialize a local lamin database
+# !lamin init --storage ~/scdataloader --schema bionty
+from scdataloader import utils
+from scdataloader.preprocess import LaminPreprocessor, additional_postprocess, additional_preprocess
+# preprocess datasets
+DESCRIPTION='preprocessed by scDataLoader'
+cx_dataset = ln.Collection.using(instance="laminlabs/cellxgene").filter(name="cellxgene-census", version='2023-12-15').one()
+cx_dataset, len(cx_dataset.artifacts.all())
-1. [load a dataset](https://jkobject.github.io/scDataLoader/notebooks/01_load_dataset.html)
-2. [create a dataset](https://jkobject.github.io/scDataLoader/notebooks/02_create_dataset.html)
+do_preprocess = LaminPreprocessor(additional_postprocess=additional_postprocess, additional_preprocess=additional_preprocess, skip_validate=True, subset_hvg=0)
+preprocessed_dataset = do_preprocess(cx_dataset, name=DESCRIPTION, description=DESCRIPTION, start_at=6, version="2")
+# create dataloaders
+from scdataloader import DataModule
+import tqdm
+datamodule = DataModule(
+    collection_name="preprocessed dataset",
+    organisms=["NCBITaxon:9606"], #organism that we will work on
+    how="most expr", # for the collator (most expr genes only will be selected)
+    max_len=1000, # only the 1000 most expressed
+    batch_size=64,
+    num_workers=1,
+    validation_split=0.1,
+    test_split=0)
+for i in tqdm.tqdm(datamodule.train_dataloader()):
+    # pass #or do pass
+    print(i)
+    break
+# with lightning:
+# Trainer(model, datamodule)
+```
+see the notebooks in [docs](https://www.jkobject.com/scDataLoader/):
+1. [load a dataset](https://www.jkobject.com/scDataLoader/notebooks/1_download_and_preprocess/)
+2. [create a dataset](https://www.jkobject.com/scDataLoader/notebooks/2_create_dataloader/)
+### command line preprocessing
+You can use the command line to preprocess a large database of datasets like here for cellxgene. this allows parallelizing and easier usage.
+```bash
+scdataloader --instance "laminlabs/cellxgene" --name "cellxgene-census" --version "2023-12-15" --description "preprocessed for scprint" --new_name "scprint main" --start_at 10 >> scdataloader.out
+```
+### command line usage
+The main way to use
+> please refer to the [scPRINT documentation](https://www.jkobject.com/scPRINT/) and [lightning documentation](https://lightning.ai/docs/pytorch/stable/cli/lightning_cli_intermediate.html) for more information on command line usage
 ## Development
 Read the [CONTRIBUTING.md](CONTRIBUTING.md) file.
+## License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+## Acknowledgments
+- [lamin.ai](https://lamin.ai/)
+- [scanpy](https://scanpy.readthedocs.io/en/stable/)
+- [anndata](https://anndata.readthedocs.io/en/latest/)
+- [scprint](https://www.jkobject.com/scPRINT/)
+Awesome single cell dataloader created by @jkobject
                     GNU GENERAL PUBLIC LICENSE
                        Version 3, 29 June 2007

scdataloader-1.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+scdataloader/VERSION,sha256=WYVJhIUxBN9cNT4vaBoV_HkkdC-aLkaMKa8kjc5FzgM,6
+scdataloader/__init__.py,sha256=NIlE4oTUPRZ3uSW_maozoEHp470I7PV1vMOJ4XpSmL4,122
+scdataloader/__main__.py,sha256=db_upDq3tNEtcDH17mPIczToAqGkSKfLy0Qbj6B4YmE,6385
+scdataloader/base.py,sha256=M1gD59OffRdLOgS1vHKygOomUoAMuzjpRtAfM3SBKF8,338
+scdataloader/collator.py,sha256=zkFdxirTDub1dJ1OJXO0p48kvd2r2ncKMdevAKIdTTc,13447
+scdataloader/config.py,sha256=rrW2DZxG4J2_pmpDbXXsaKJkpNC57w5dIlItiFbANYw,2905
+scdataloader/data.py,sha256=VugtHo9T9PqoJSv3lkJJAB89KD-fRwdVw1D76gnCc9c,12584
+scdataloader/datamodule.py,sha256=WLEWcDMcC1G3VD5tORfhfqRRHcTscpI0EzPikg3udbI,16881
+scdataloader/mapped.py,sha256=yF9l3obuRWbQjW8QZGRSKhc50fizXTWf3Pe1m542fW8,19481
+scdataloader/preprocess.py,sha256=noynYWuy9clhFu9UnN-vSvAHJHwakDttkI5aj1e_T98,29055
+scdataloader/utils.py,sha256=xyDsWaqkjhzlVBP8FiYdBUWHsel3twcVWmI53PhKqTM,21888
+scdataloader-1.0.1.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+scdataloader-1.0.1.dist-info/METADATA,sha256=2Xd8M1dq_JmvmFjmrrzn-1U4eOtwU6L51Y_7MCkGxvY,41327
+scdataloader-1.0.1.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
+scdataloader-1.0.1.dist-info/entry_points.txt,sha256=nLqucZaa5wiF7-1FCgMXO916WDQ9Qm0TcxQp0f1DwE4,59
+scdataloader-1.0.1.dist-info/RECORD,,

scdataloader 0.0.3__py3-none-any.whl → 1.0.1__py3-none-any.whl

scdataloader 0.0.3py3-none-any.whl → 1.0.1py3-none-any.whl