scdataloader 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scdataloader/VERSION +1 -1
- scdataloader/__init__.py +1 -1
- scdataloader/__main__.py +63 -42
- scdataloader/collator.py +87 -43
- scdataloader/config.py +106 -0
- scdataloader/data.py +78 -98
- scdataloader/datamodule.py +375 -0
- scdataloader/mapped.py +22 -7
- scdataloader/preprocess.py +444 -109
- scdataloader/utils.py +106 -63
- {scdataloader-0.0.3.dist-info → scdataloader-0.0.4.dist-info}/METADATA +46 -2
- scdataloader-0.0.4.dist-info/RECORD +16 -0
- scdataloader/dataloader.py +0 -318
- scdataloader-0.0.3.dist-info/RECORD +0 -15
- {scdataloader-0.0.3.dist-info → scdataloader-0.0.4.dist-info}/LICENSE +0 -0
- {scdataloader-0.0.3.dist-info → scdataloader-0.0.4.dist-info}/WHEEL +0 -0
- {scdataloader-0.0.3.dist-info → scdataloader-0.0.4.dist-info}/entry_points.txt +0 -0
scdataloader/utils.py
CHANGED
|
@@ -11,9 +11,14 @@ from django.db import IntegrityError
|
|
|
11
11
|
from scipy.sparse import csr_matrix
|
|
12
12
|
from scipy.stats import median_abs_deviation
|
|
13
13
|
from functools import lru_cache
|
|
14
|
+
from collections import Counter
|
|
14
15
|
|
|
16
|
+
from typing import Union, List, Optional
|
|
15
17
|
|
|
16
|
-
|
|
18
|
+
from anndata import AnnData
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def createFoldersFor(filepath: str):
|
|
17
22
|
"""
|
|
18
23
|
will recursively create folders if needed until having all the folders required to save the file in this filepath
|
|
19
24
|
"""
|
|
@@ -24,7 +29,9 @@ def createFoldersFor(filepath):
|
|
|
24
29
|
os.mkdir(prevval)
|
|
25
30
|
|
|
26
31
|
|
|
27
|
-
def _fetchFromServer(
|
|
32
|
+
def _fetchFromServer(
|
|
33
|
+
ensemble_server: str, attributes: list, database: str = "hsapiens_gene_ensembl"
|
|
34
|
+
):
|
|
28
35
|
"""
|
|
29
36
|
Fetches data from the specified ensemble server.
|
|
30
37
|
|
|
@@ -36,7 +43,7 @@ def _fetchFromServer(ensemble_server, attributes):
|
|
|
36
43
|
pd.DataFrame: A pandas DataFrame containing the fetched data.
|
|
37
44
|
"""
|
|
38
45
|
server = BiomartServer(ensemble_server)
|
|
39
|
-
ensmbl = server.datasets[
|
|
46
|
+
ensmbl = server.datasets[database]
|
|
40
47
|
print(attributes)
|
|
41
48
|
res = pd.read_csv(
|
|
42
49
|
io.StringIO(
|
|
@@ -48,11 +55,12 @@ def _fetchFromServer(ensemble_server, attributes):
|
|
|
48
55
|
|
|
49
56
|
|
|
50
57
|
def getBiomartTable(
|
|
51
|
-
ensemble_server="http://jul2023.archive.ensembl.org/biomart",
|
|
52
|
-
useCache=False,
|
|
53
|
-
cache_folder="/tmp/biomart/",
|
|
54
|
-
attributes=[],
|
|
55
|
-
bypass_attributes=False,
|
|
58
|
+
ensemble_server: str = "http://jul2023.archive.ensembl.org/biomart",
|
|
59
|
+
useCache: bool = False,
|
|
60
|
+
cache_folder: str = "/tmp/biomart/",
|
|
61
|
+
attributes: List[str] = [],
|
|
62
|
+
bypass_attributes: bool = False,
|
|
63
|
+
database: str = "hsapiens_gene_ensembl",
|
|
56
64
|
):
|
|
57
65
|
"""generate a genelist dataframe from ensembl's biomart
|
|
58
66
|
|
|
@@ -88,7 +96,7 @@ def getBiomartTable(
|
|
|
88
96
|
else:
|
|
89
97
|
print("downloading gene names from biomart")
|
|
90
98
|
|
|
91
|
-
res = _fetchFromServer(ensemble_server, attr + attributes)
|
|
99
|
+
res = _fetchFromServer(ensemble_server, attr + attributes, database=database)
|
|
92
100
|
res.to_csv(cachefile, index=False)
|
|
93
101
|
|
|
94
102
|
res.columns = attr + attributes
|
|
@@ -102,7 +110,7 @@ def getBiomartTable(
|
|
|
102
110
|
return res
|
|
103
111
|
|
|
104
112
|
|
|
105
|
-
def validate(adata, organism):
|
|
113
|
+
def validate(adata: AnnData, organism: str):
|
|
106
114
|
"""
|
|
107
115
|
validate checks if the adata object is valid for lamindb
|
|
108
116
|
|
|
@@ -144,9 +152,6 @@ def validate(adata, organism):
|
|
|
144
152
|
raise ValueError(
|
|
145
153
|
f"Column '{val}' is missing in the provided anndata object."
|
|
146
154
|
)
|
|
147
|
-
bionty_source = bt.PublicSource.filter(
|
|
148
|
-
entity="DevelopmentalStage", organism=organism
|
|
149
|
-
).one()
|
|
150
155
|
|
|
151
156
|
if not bt.Ethnicity.validate(
|
|
152
157
|
adata.obs["self_reported_ethnicity_ontology_term_id"],
|
|
@@ -169,14 +174,10 @@ def validate(adata, organism):
|
|
|
169
174
|
adata.obs["cell_type_ontology_term_id"], field="ontology_id"
|
|
170
175
|
).all():
|
|
171
176
|
raise ValueError("Invalid cell type ontology term id found")
|
|
172
|
-
if (
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
field="ontology_id",
|
|
177
|
-
)
|
|
178
|
-
.all()
|
|
179
|
-
):
|
|
177
|
+
if not bt.DevelopmentalStage.validate(
|
|
178
|
+
adata.obs["development_stage_ontology_term_id"],
|
|
179
|
+
field="ontology_id",
|
|
180
|
+
).all():
|
|
180
181
|
raise ValueError("Invalid dev stage ontology term id found")
|
|
181
182
|
if not bt.Tissue.validate(
|
|
182
183
|
adata.obs["tissue_ontology_term_id"], field="ontology_id"
|
|
@@ -186,18 +187,16 @@ def validate(adata, organism):
|
|
|
186
187
|
adata.obs["assay_ontology_term_id"], field="ontology_id"
|
|
187
188
|
).all():
|
|
188
189
|
raise ValueError("Invalid assay ontology term id found")
|
|
189
|
-
if (
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
.all()
|
|
193
|
-
):
|
|
190
|
+
if not bt.Gene.validate(
|
|
191
|
+
adata.var.index, field="ensembl_gene_id", organism=organism
|
|
192
|
+
).all():
|
|
194
193
|
raise ValueError("Invalid gene ensembl id found")
|
|
195
194
|
return True
|
|
196
195
|
|
|
197
196
|
|
|
198
197
|
# setting a cache of 200 elements
|
|
199
198
|
# @lru_cache(maxsize=200)
|
|
200
|
-
def get_all_ancestors(val, df):
|
|
199
|
+
def get_all_ancestors(val: str, df: pd.DataFrame):
|
|
201
200
|
if val not in df.index:
|
|
202
201
|
return set()
|
|
203
202
|
parents = df.loc[val].parents__ontology_id
|
|
@@ -207,7 +206,7 @@ def get_all_ancestors(val, df):
|
|
|
207
206
|
return set.union(set(parents), *[get_all_ancestors(val, df) for val in parents])
|
|
208
207
|
|
|
209
208
|
|
|
210
|
-
def get_ancestry_mapping(all_elem, onto_df):
|
|
209
|
+
def get_ancestry_mapping(all_elem: list, onto_df: pd.DataFrame):
|
|
211
210
|
"""
|
|
212
211
|
This function generates a mapping of all elements to their ancestors in the ontology dataframe.
|
|
213
212
|
|
|
@@ -242,12 +241,12 @@ def get_ancestry_mapping(all_elem, onto_df):
|
|
|
242
241
|
|
|
243
242
|
|
|
244
243
|
def load_dataset_local(
|
|
245
|
-
remote_dataset,
|
|
246
|
-
download_folder,
|
|
247
|
-
name,
|
|
248
|
-
description,
|
|
249
|
-
use_cache=True,
|
|
250
|
-
only=None,
|
|
244
|
+
remote_dataset: ln.Collection,
|
|
245
|
+
download_folder: str,
|
|
246
|
+
name: str,
|
|
247
|
+
description: str,
|
|
248
|
+
use_cache: bool = True,
|
|
249
|
+
only: Optional[List[int]] = None,
|
|
251
250
|
):
|
|
252
251
|
"""
|
|
253
252
|
This function loads a remote lamindb dataset to local.
|
|
@@ -303,7 +302,7 @@ def load_dataset_local(
|
|
|
303
302
|
return dataset
|
|
304
303
|
|
|
305
304
|
|
|
306
|
-
def load_genes(organisms):
|
|
305
|
+
def load_genes(organisms: Union[str, list] = "NCBITaxon:9606"): # "NCBITaxon:10090",
|
|
307
306
|
organismdf = []
|
|
308
307
|
if type(organisms) == str:
|
|
309
308
|
organisms = [organisms]
|
|
@@ -313,7 +312,7 @@ def load_genes(organisms):
|
|
|
313
312
|
).df()
|
|
314
313
|
genesdf = genesdf[~genesdf["public_source_id"].isna()]
|
|
315
314
|
genesdf = genesdf.drop_duplicates(subset="ensembl_gene_id")
|
|
316
|
-
genesdf = genesdf.set_index("ensembl_gene_id")
|
|
315
|
+
genesdf = genesdf.set_index("ensembl_gene_id").sort_index()
|
|
317
316
|
# mitochondrial genes
|
|
318
317
|
genesdf["mt"] = genesdf.symbol.astype(str).str.startswith("MT-")
|
|
319
318
|
# ribosomal genes
|
|
@@ -326,14 +325,14 @@ def load_genes(organisms):
|
|
|
326
325
|
|
|
327
326
|
|
|
328
327
|
def populate_my_ontology(
|
|
329
|
-
organisms=["NCBITaxon:10090", "NCBITaxon:9606"],
|
|
330
|
-
sex=["PATO:0000384", "PATO:0000383"],
|
|
331
|
-
celltypes=[],
|
|
332
|
-
ethnicities=[],
|
|
333
|
-
assays=[],
|
|
334
|
-
tissues=[],
|
|
335
|
-
diseases=[],
|
|
336
|
-
dev_stages=[],
|
|
328
|
+
organisms: List[str] = ["NCBITaxon:10090", "NCBITaxon:9606"],
|
|
329
|
+
sex: List[str] = ["PATO:0000384", "PATO:0000383"],
|
|
330
|
+
celltypes: List[str] = [],
|
|
331
|
+
ethnicities: List[str] = [],
|
|
332
|
+
assays: List[str] = [],
|
|
333
|
+
tissues: List[str] = [],
|
|
334
|
+
diseases: List[str] = [],
|
|
335
|
+
dev_stages: List[str] = [],
|
|
337
336
|
):
|
|
338
337
|
"""
|
|
339
338
|
creates a local version of the lamin ontologies and add the required missing values in base ontologies
|
|
@@ -360,20 +359,20 @@ def populate_my_ontology(
|
|
|
360
359
|
dev_stages (list, optional): List of developmental stages. Defaults to [].
|
|
361
360
|
"""
|
|
362
361
|
|
|
363
|
-
names = bt.CellType.
|
|
362
|
+
names = bt.CellType.public().df().index if not celltypes else celltypes
|
|
364
363
|
records = bt.CellType.from_values(names, field="ontology_id")
|
|
365
|
-
ln.save(records)
|
|
364
|
+
ln.save(records, parents=bool(celltypes))
|
|
366
365
|
bt.CellType(name="unknown", ontology_id="unknown").save()
|
|
367
366
|
# Organism
|
|
368
|
-
names = bt.Organism.
|
|
367
|
+
names = bt.Organism.public().df().index if not organisms else organisms
|
|
369
368
|
records = [
|
|
370
369
|
i[0] if type(i) is list else i
|
|
371
370
|
for i in [bt.Organism.from_public(ontology_id=i) for i in names]
|
|
372
371
|
]
|
|
373
|
-
ln.save(records)
|
|
372
|
+
ln.save(records, parents=bool(organisms))
|
|
374
373
|
bt.Organism(name="unknown", ontology_id="unknown").save()
|
|
375
374
|
# Phenotype
|
|
376
|
-
names = bt.Phenotype.
|
|
375
|
+
names = bt.Phenotype.public().df().index if not sex else sex
|
|
377
376
|
records = [
|
|
378
377
|
bt.Phenotype.from_public(
|
|
379
378
|
ontology_id=i,
|
|
@@ -383,38 +382,49 @@ def populate_my_ontology(
|
|
|
383
382
|
)
|
|
384
383
|
for i in names
|
|
385
384
|
]
|
|
386
|
-
ln.save(records)
|
|
385
|
+
ln.save(records, parents=bool(sex))
|
|
387
386
|
bt.Phenotype(name="unknown", ontology_id="unknown").save()
|
|
388
387
|
# ethnicity
|
|
389
|
-
names = bt.Ethnicity.
|
|
388
|
+
names = bt.Ethnicity.public().df().index if not ethnicities else ethnicities
|
|
390
389
|
records = bt.Ethnicity.from_values(names, field="ontology_id")
|
|
391
|
-
ln.save(records)
|
|
390
|
+
ln.save(records, parents=bool(ethnicities))
|
|
392
391
|
bt.Ethnicity(
|
|
393
392
|
name="unknown", ontology_id="unknown"
|
|
394
393
|
).save() # multi ethnic will have to get renamed
|
|
395
394
|
# ExperimentalFactor
|
|
396
|
-
names = bt.ExperimentalFactor.
|
|
395
|
+
names = bt.ExperimentalFactor.public().df().index if not assays else assays
|
|
397
396
|
records = bt.ExperimentalFactor.from_values(names, field="ontology_id")
|
|
398
|
-
ln.save(records)
|
|
397
|
+
ln.save(records, parents=bool(assays))
|
|
399
398
|
bt.ExperimentalFactor(name="unknown", ontology_id="unknown").save()
|
|
400
399
|
# lookup = bt.ExperimentalFactor.lookup()
|
|
401
400
|
# lookup.smart_seq_v4.parents.add(lookup.smart_like)
|
|
402
401
|
# Tissue
|
|
403
|
-
names = bt.Tissue.
|
|
402
|
+
names = bt.Tissue.public().df().index if not tissues else tissues
|
|
404
403
|
records = bt.Tissue.from_values(names, field="ontology_id")
|
|
405
|
-
ln.save(records)
|
|
404
|
+
ln.save(records, parents=bool(tissues))
|
|
406
405
|
bt.Tissue(name="unknown", ontology_id="unknown").save()
|
|
407
406
|
# DevelopmentalStage
|
|
408
407
|
names = (
|
|
409
|
-
bt.DevelopmentalStage.
|
|
408
|
+
bt.DevelopmentalStage.public().df().index if not dev_stages else dev_stages
|
|
410
409
|
)
|
|
411
410
|
records = bt.DevelopmentalStage.from_values(names, field="ontology_id")
|
|
412
|
-
ln.save(records)
|
|
411
|
+
ln.save(records, parents=bool(dev_stages))
|
|
413
412
|
bt.DevelopmentalStage(name="unknown", ontology_id="unknown").save()
|
|
413
|
+
|
|
414
|
+
names = bt.DevelopmentalStage.public(organism="mouse").df().name
|
|
415
|
+
bionty_source = bt.PublicSource.filter(
|
|
416
|
+
entity="DevelopmentalStage", organism="mouse"
|
|
417
|
+
).one()
|
|
418
|
+
records = [
|
|
419
|
+
bt.DevelopmentalStage.from_public(name=i, public_source=bionty_source)
|
|
420
|
+
for i in names.tolist()
|
|
421
|
+
]
|
|
422
|
+
records[-4] = records[-4][0]
|
|
423
|
+
ln.save(records)
|
|
414
424
|
# Disease
|
|
415
|
-
names = bt.Disease.
|
|
425
|
+
names = bt.Disease.public().df().index if not diseases else diseases
|
|
416
426
|
records = bt.Disease.from_values(names, field="ontology_id")
|
|
417
|
-
ln.save(records)
|
|
427
|
+
ln.save(records, parents=bool(diseases))
|
|
418
428
|
bt.Disease(name="normal", ontology_id="PATO:0000461").save()
|
|
419
429
|
bt.Disease(name="unknown", ontology_id="unknown").save()
|
|
420
430
|
# genes
|
|
@@ -430,7 +440,7 @@ def populate_my_ontology(
|
|
|
430
440
|
ln.save(records)
|
|
431
441
|
|
|
432
442
|
|
|
433
|
-
def is_outlier(adata, metric: str, nmads: int):
|
|
443
|
+
def is_outlier(adata: AnnData, metric: str, nmads: int):
|
|
434
444
|
"""
|
|
435
445
|
is_outlier detects outliers in adata.obs[metric]
|
|
436
446
|
|
|
@@ -449,7 +459,7 @@ def is_outlier(adata, metric: str, nmads: int):
|
|
|
449
459
|
return outlier
|
|
450
460
|
|
|
451
461
|
|
|
452
|
-
def length_normalize(adata, gene_lengths):
|
|
462
|
+
def length_normalize(adata: AnnData, gene_lengths: list):
|
|
453
463
|
"""
|
|
454
464
|
length_normalize normalizes the counts by the gene length
|
|
455
465
|
|
|
@@ -464,7 +474,7 @@ def length_normalize(adata, gene_lengths):
|
|
|
464
474
|
return adata
|
|
465
475
|
|
|
466
476
|
|
|
467
|
-
def pd_load_cached(url, loc="/tmp/", cache=True, **kwargs):
|
|
477
|
+
def pd_load_cached(url: str, loc: str = "/tmp/", cache: bool = True, **kwargs):
|
|
468
478
|
"""
|
|
469
479
|
pd_load_cached downloads a file from a url and loads it as a pandas dataframe
|
|
470
480
|
|
|
@@ -482,3 +492,36 @@ def pd_load_cached(url, loc="/tmp/", cache=True, **kwargs):
|
|
|
482
492
|
urllib.request.urlretrieve(url, loc)
|
|
483
493
|
# Load the data from the file
|
|
484
494
|
return pd.read_csv(loc, **kwargs)
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def translate(
|
|
498
|
+
val: Union[str, list, set, Counter, dict], t: str = "cell_type_ontology_term_id"
|
|
499
|
+
):
|
|
500
|
+
"""
|
|
501
|
+
translate translates the ontology term id to the name
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
val (str, dict, set, list, dict): the object to translate
|
|
505
|
+
t (flat, optional): the type of ontology terms.
|
|
506
|
+
one of cell_type_ontology_term_id, assay_ontology_term_id, tissue_ontology_term_id.
|
|
507
|
+
Defaults to "cell_type_ontology_term_id".
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
dict: the mapping for the translation
|
|
511
|
+
"""
|
|
512
|
+
if t == "cell_type_ontology_term_id":
|
|
513
|
+
obj = bt.CellType.public(organism="all")
|
|
514
|
+
elif t == "assay_ontology_term_id":
|
|
515
|
+
obj = bt.ExperimentalFactor.public()
|
|
516
|
+
elif t == "tissue_ontology_term_id":
|
|
517
|
+
obj = bt.Tissue.public()
|
|
518
|
+
else:
|
|
519
|
+
return None
|
|
520
|
+
if type(val) is str:
|
|
521
|
+
return {val: obj.search(val, field=obj.ontology_id).name.iloc[0]}
|
|
522
|
+
elif type(val) is list or type(val) is set:
|
|
523
|
+
return {i: obj.search(i, field=obj.ontology_id).name.iloc[0] for i in set(val)}
|
|
524
|
+
elif type(val) is dict or type(val) is Counter:
|
|
525
|
+
return {
|
|
526
|
+
obj.search(k, field=obj.ontology_id).name.iloc[0]: v for k, v in val.items()
|
|
527
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: scdataloader
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: a dataloader for single cell data in lamindb
|
|
5
5
|
Home-page: https://github.com/jkobject/scDataLoader
|
|
6
6
|
License: GPL3
|
|
@@ -34,6 +34,8 @@ Description-Content-Type: text/markdown
|
|
|
34
34
|
|
|
35
35
|
[](https://codecov.io/gh/jkobject/scDataLoader)
|
|
36
36
|
[](https://github.com/jkobject/scDataLoader/actions/workflows/main.yml)
|
|
37
|
+
[](https://zenodo.org/doi/10.5281/zenodo.10573143)
|
|
38
|
+
|
|
37
39
|
|
|
38
40
|
Awesome single cell dataloader created by @jkobject
|
|
39
41
|
|
|
@@ -66,7 +68,7 @@ the idea is to use it to train models like scGPT / GeneFormer (and soon, scPrint
|
|
|
66
68
|
|
|
67
69
|
Currently one would have to use the preprocess function to make the dataset fit for different tools like scGPT / Geneformer. But I would want to enable it through different Collators. This is still missing and a WIP... (please do contribute!)
|
|
68
70
|
|
|
69
|
-

|
|
71
|
+

|
|
70
72
|
|
|
71
73
|
## Install it from PyPI
|
|
72
74
|
|
|
@@ -85,6 +87,48 @@ then run the notebooks with the poetry installed environment
|
|
|
85
87
|
|
|
86
88
|
## Usage
|
|
87
89
|
|
|
90
|
+
```python
|
|
91
|
+
# initialize a local lamin database
|
|
92
|
+
# !lamin init --storage ~/scdataloader --schema bionty
|
|
93
|
+
|
|
94
|
+
from scdataloader import utils
|
|
95
|
+
from scdataloader.preprocess import LaminPreprocessor, additional_postprocess, additional_preprocess
|
|
96
|
+
|
|
97
|
+
# preprocess datasets
|
|
98
|
+
DESCRIPTION='preprocessed by scDataLoader'
|
|
99
|
+
|
|
100
|
+
cx_dataset = ln.Collection.using(instance="laminlabs/cellxgene").filter(name="cellxgene-census", version='2023-12-15').one()
|
|
101
|
+
cx_dataset, len(cx_dataset.artifacts.all())
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
do_preprocess = LaminPreprocessor(additional_postprocess=additional_postprocess, additional_preprocess=additional_preprocess, skip_validate=True, subset_hvg=0)
|
|
105
|
+
|
|
106
|
+
preprocessed_dataset = do_preprocess(cx_dataset, name=DESCRIPTION, description=DESCRIPTION, start_at=6, version="2")
|
|
107
|
+
|
|
108
|
+
# create dataloaders
|
|
109
|
+
from scdataloader import DataModule
|
|
110
|
+
import tqdm
|
|
111
|
+
|
|
112
|
+
datamodule = DataModule(
|
|
113
|
+
collection_name="preprocessed dataset",
|
|
114
|
+
organisms=["NCBITaxon:9606"], #organism that we will work on
|
|
115
|
+
how="most expr", # for the collator (most expr genes only will be selected)
|
|
116
|
+
max_len=1000, # only the 1000 most expressed
|
|
117
|
+
batch_size=64,
|
|
118
|
+
num_workers=1,
|
|
119
|
+
validation_split=0.1,
|
|
120
|
+
test_split=0)
|
|
121
|
+
|
|
122
|
+
for i in tqdm.tqdm(datamodule.train_dataloader()):
|
|
123
|
+
# pass #or do pass
|
|
124
|
+
print(i)
|
|
125
|
+
break
|
|
126
|
+
|
|
127
|
+
# with lightning:
|
|
128
|
+
# Trainer(model, datamodule)
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
|
|
88
132
|
see the notebooks in [docs](https://jkobject.github.io/scDataLoader/):
|
|
89
133
|
|
|
90
134
|
1. [load a dataset](https://jkobject.github.io/scDataLoader/notebooks/01_load_dataset.html)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
scdataloader/VERSION,sha256=ln2a-xATRmZxZvLnboGRC8GQSI19QdUMoAcunZLwDjI,6
|
|
2
|
+
scdataloader/__init__.py,sha256=NIlE4oTUPRZ3uSW_maozoEHp470I7PV1vMOJ4XpSmL4,122
|
|
3
|
+
scdataloader/__main__.py,sha256=UyXtFHgWxE-ecJmM_oEDLlzBDBbH-uEKAVj1A7BkwmM,6297
|
|
4
|
+
scdataloader/base.py,sha256=M1gD59OffRdLOgS1vHKygOomUoAMuzjpRtAfM3SBKF8,338
|
|
5
|
+
scdataloader/collator.py,sha256=Ykjdw24GUvHdbowWUDtp28YTkaF3w65SiWTU2PKBzy4,11714
|
|
6
|
+
scdataloader/config.py,sha256=0_LoIblgdZZ19yM2qvPE-padMGQzdhuaxX20zYrhWq0,2780
|
|
7
|
+
scdataloader/data.py,sha256=faJWN--06N7irWBKcjeU6fcX5NbzyEPXs2_EVGxfBpw,12292
|
|
8
|
+
scdataloader/datamodule.py,sha256=OhHPb3jhGG5HbvahzTGxgzJ_lxbVJ4PfZspVW9h7SZk,14789
|
|
9
|
+
scdataloader/mapped.py,sha256=rhE11Xl3x_wIKu3m_wu8Is6mYsXdblu3nQpT5lNqr60,13301
|
|
10
|
+
scdataloader/preprocess.py,sha256=67ewe6b4HIjz_vTDjlOAJ4lMe4K2oCw2HHHUS-7S77M,38205
|
|
11
|
+
scdataloader/utils.py,sha256=6eKU3_cotEaQcxONMrCWzMx7U8DybabteNhk-vNqfUQ,19365
|
|
12
|
+
scdataloader-0.0.4.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
13
|
+
scdataloader-0.0.4.dist-info/METADATA,sha256=Bf8UjMwRcqSbWW8VbWrLhSb7qKQYdjZtJ7d6Oz4-rn8,39733
|
|
14
|
+
scdataloader-0.0.4.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
15
|
+
scdataloader-0.0.4.dist-info/entry_points.txt,sha256=nLqucZaa5wiF7-1FCgMXO916WDQ9Qm0TcxQp0f1DwE4,59
|
|
16
|
+
scdataloader-0.0.4.dist-info/RECORD,,
|