scdataloader 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scdataloader/VERSION +1 -1
- scdataloader/__main__.py +11 -4
- scdataloader/utils.py +17 -10
- {scdataloader-1.2.1.dist-info → scdataloader-1.2.2.dist-info}/METADATA +3 -3
- {scdataloader-1.2.1.dist-info → scdataloader-1.2.2.dist-info}/RECORD +7 -7
- {scdataloader-1.2.1.dist-info → scdataloader-1.2.2.dist-info}/WHEEL +1 -1
- {scdataloader-1.2.1.dist-info → scdataloader-1.2.2.dist-info}/licenses/LICENSE +0 -0
scdataloader/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
1.2.
|
|
1
|
+
1.2.2
|
scdataloader/__main__.py
CHANGED
|
@@ -53,14 +53,14 @@ def main():
|
|
|
53
53
|
)
|
|
54
54
|
parser.add_argument(
|
|
55
55
|
"--filter_gene_by_counts",
|
|
56
|
-
type=
|
|
57
|
-
default=
|
|
56
|
+
type=int,
|
|
57
|
+
default=0,
|
|
58
58
|
help="Determines whether to filter genes by counts.",
|
|
59
59
|
)
|
|
60
60
|
parser.add_argument(
|
|
61
61
|
"--filter_cell_by_counts",
|
|
62
|
-
type=
|
|
63
|
-
default=
|
|
62
|
+
type=int,
|
|
63
|
+
default=0,
|
|
64
64
|
help="Determines whether to filter cells by counts.",
|
|
65
65
|
)
|
|
66
66
|
parser.add_argument(
|
|
@@ -153,6 +153,12 @@ def main():
|
|
|
153
153
|
default=False,
|
|
154
154
|
help="Determines whether to do postprocessing.",
|
|
155
155
|
)
|
|
156
|
+
parser.add_argument(
|
|
157
|
+
"--cache",
|
|
158
|
+
type=bool,
|
|
159
|
+
default=True,
|
|
160
|
+
help="Determines whether to cache the dataset.",
|
|
161
|
+
)
|
|
156
162
|
args = parser.parse_args()
|
|
157
163
|
|
|
158
164
|
# Load the collection
|
|
@@ -178,6 +184,7 @@ def main():
|
|
|
178
184
|
normalize_sum=args.normalize_sum,
|
|
179
185
|
subset_hvg=args.subset_hvg,
|
|
180
186
|
hvg_flavor=args.hvg_flavor,
|
|
187
|
+
cache=args.cache,
|
|
181
188
|
binning=args.binning,
|
|
182
189
|
result_binned_key=args.result_binned_key,
|
|
183
190
|
length_normalize=args.length_normalize,
|
scdataloader/utils.py
CHANGED
|
@@ -127,15 +127,15 @@ def getBiomartTable(
|
|
|
127
127
|
|
|
128
128
|
cache_folder = os.path.expanduser(cache_folder)
|
|
129
129
|
createFoldersFor(cache_folder)
|
|
130
|
-
cachefile = os.path.join(cache_folder, ".biomart.
|
|
130
|
+
cachefile = os.path.join(cache_folder, ".biomart.parquet")
|
|
131
131
|
if useCache & os.path.isfile(cachefile):
|
|
132
132
|
print("fetching gene names from biomart cache")
|
|
133
|
-
res = pd.
|
|
133
|
+
res = pd.read_parquet(cachefile)
|
|
134
134
|
else:
|
|
135
135
|
print("downloading gene names from biomart")
|
|
136
136
|
|
|
137
137
|
res = _fetchFromServer(ensemble_server, attr + attributes, database=database)
|
|
138
|
-
res.
|
|
138
|
+
res.to_parquet(cachefile, index=False)
|
|
139
139
|
res.columns = attr + attributes
|
|
140
140
|
if type(res) is not type(pd.DataFrame()):
|
|
141
141
|
raise ValueError("should be a dataframe")
|
|
@@ -368,7 +368,14 @@ def load_genes(organisms: Union[str, list] = "NCBITaxon:9606"): # "NCBITaxon:10
|
|
|
368
368
|
genesdf["organism"] = organism
|
|
369
369
|
organismdf.append(genesdf)
|
|
370
370
|
organismdf = pd.concat(organismdf)
|
|
371
|
-
for col in [
|
|
371
|
+
for col in [
|
|
372
|
+
"source_id",
|
|
373
|
+
"run_id",
|
|
374
|
+
"created_by_id",
|
|
375
|
+
"updated_at",
|
|
376
|
+
"stable_id",
|
|
377
|
+
"created_at",
|
|
378
|
+
]:
|
|
372
379
|
if col in organismdf.columns:
|
|
373
380
|
organismdf.drop(columns=[col], inplace=True)
|
|
374
381
|
return organismdf
|
|
@@ -412,7 +419,7 @@ def populate_my_ontology(
|
|
|
412
419
|
# cell type
|
|
413
420
|
if celltypes is not None:
|
|
414
421
|
if len(celltypes) == 0:
|
|
415
|
-
bt.CellType.import_from_source()
|
|
422
|
+
bt.CellType.import_from_source(update=True)
|
|
416
423
|
else:
|
|
417
424
|
names = bt.CellType.public().df().index if not celltypes else celltypes
|
|
418
425
|
records = bt.CellType.from_values(names, field="ontology_id")
|
|
@@ -446,7 +453,7 @@ def populate_my_ontology(
|
|
|
446
453
|
# ethnicity
|
|
447
454
|
if ethnicities is not None:
|
|
448
455
|
if len(ethnicities) == 0:
|
|
449
|
-
bt.Ethnicity.import_from_source()
|
|
456
|
+
bt.Ethnicity.import_from_source(update=True)
|
|
450
457
|
else:
|
|
451
458
|
names = bt.Ethnicity.public().df().index if not ethnicities else ethnicities
|
|
452
459
|
records = bt.Ethnicity.from_values(names, field="ontology_id")
|
|
@@ -457,7 +464,7 @@ def populate_my_ontology(
|
|
|
457
464
|
# ExperimentalFactor
|
|
458
465
|
if assays is not None:
|
|
459
466
|
if len(assays) == 0:
|
|
460
|
-
bt.ExperimentalFactor.import_from_source()
|
|
467
|
+
bt.ExperimentalFactor.import_from_source(update=True)
|
|
461
468
|
else:
|
|
462
469
|
names = bt.ExperimentalFactor.public().df().index if not assays else assays
|
|
463
470
|
records = bt.ExperimentalFactor.from_values(names, field="ontology_id")
|
|
@@ -468,7 +475,7 @@ def populate_my_ontology(
|
|
|
468
475
|
# Tissue
|
|
469
476
|
if tissues is not None:
|
|
470
477
|
if len(tissues) == 0:
|
|
471
|
-
bt.Tissue.import_from_source()
|
|
478
|
+
bt.Tissue.import_from_source(update=True)
|
|
472
479
|
else:
|
|
473
480
|
names = bt.Tissue.public().df().index if not tissues else tissues
|
|
474
481
|
records = bt.Tissue.from_values(names, field="ontology_id")
|
|
@@ -477,7 +484,7 @@ def populate_my_ontology(
|
|
|
477
484
|
# DevelopmentalStage
|
|
478
485
|
if dev_stages is not None:
|
|
479
486
|
if len(dev_stages) == 0:
|
|
480
|
-
bt.DevelopmentalStage.import_from_source()
|
|
487
|
+
bt.DevelopmentalStage.import_from_source(update=True)
|
|
481
488
|
source = bt.PublicSource.filter(organism="mouse", name="mmusdv").last()
|
|
482
489
|
bt.DevelopmentalStage.import_from_source(source=source)
|
|
483
490
|
else:
|
|
@@ -493,7 +500,7 @@ def populate_my_ontology(
|
|
|
493
500
|
# Disease
|
|
494
501
|
if diseases is not None:
|
|
495
502
|
if len(diseases) == 0:
|
|
496
|
-
bt.Disease.import_from_source()
|
|
503
|
+
bt.Disease.import_from_source(update=True)
|
|
497
504
|
else:
|
|
498
505
|
names = bt.Disease.public().df().index if not diseases else diseases
|
|
499
506
|
records = bt.Disease.from_values(names, field="ontology_id")
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: scdataloader
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.2
|
|
4
4
|
Summary: a dataloader for single cell data in lamindb
|
|
5
5
|
Project-URL: repository, https://github.com/jkobject/scDataLoader
|
|
6
6
|
Author-email: jkobject <jkobject@gmail.com>
|
|
7
|
-
License
|
|
8
|
-
License-File: LICENSE
|
|
7
|
+
License: MIT
|
|
9
8
|
Keywords: dataloader,lamindb,pytorch,scPRINT,scRNAseq
|
|
10
9
|
Requires-Python: <3.11,>=3.10
|
|
11
10
|
Requires-Dist: anndata>=0.9.0
|
|
@@ -18,6 +17,7 @@ Requires-Dist: leidenalg>=0.8.0
|
|
|
18
17
|
Requires-Dist: lightning>=2.0.0
|
|
19
18
|
Requires-Dist: matplotlib>=3.5.0
|
|
20
19
|
Requires-Dist: numpy>=1.26.0
|
|
20
|
+
Requires-Dist: palantir>=1.3.3
|
|
21
21
|
Requires-Dist: pandas>=2.0.0
|
|
22
22
|
Requires-Dist: scikit-misc>=0.5.0
|
|
23
23
|
Requires-Dist: seaborn>=0.11.0
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
scdataloader/VERSION,sha256=
|
|
1
|
+
scdataloader/VERSION,sha256=xipcxhrEUlk1dT9ewoTAoFKksdpLOjWA3OK313ohVK4,6
|
|
2
2
|
scdataloader/__init__.py,sha256=5y9VzRhOAUWeYMn2MrRRRlzgdiMjRFytr7gcn-I6IkE,147
|
|
3
|
-
scdataloader/__main__.py,sha256=
|
|
3
|
+
scdataloader/__main__.py,sha256=VXrt2IykBypnIXWydwA7NfF7LtRGc-0Khjtm5OIBNpI,6527
|
|
4
4
|
scdataloader/base.py,sha256=M1gD59OffRdLOgS1vHKygOomUoAMuzjpRtAfM3SBKF8,338
|
|
5
5
|
scdataloader/collator.py,sha256=gzHiuixUwK8JClhAbG12kgWMU_VTKkowibA-tDFpbwo,11341
|
|
6
6
|
scdataloader/config.py,sha256=rrW2DZxG4J2_pmpDbXXsaKJkpNC57w5dIlItiFbANYw,2905
|
|
7
7
|
scdataloader/data.py,sha256=3dCp-lIAfOkCi76SH5W3iSqFmAWZslwARkN9v5mylz8,14907
|
|
8
8
|
scdataloader/datamodule.py,sha256=B-udBevPSPF__hfy0pOz1dGovgE95K2pxPupjB7RblI,16936
|
|
9
9
|
scdataloader/preprocess.py,sha256=pH4EPrcRqH34o3t5X3A4kETiYdCZngih5SdP_PPfgOo,29178
|
|
10
|
-
scdataloader/utils.py,sha256=
|
|
11
|
-
scdataloader-1.2.
|
|
12
|
-
scdataloader-1.2.
|
|
13
|
-
scdataloader-1.2.
|
|
14
|
-
scdataloader-1.2.
|
|
10
|
+
scdataloader/utils.py,sha256=7tgt3sPj_XTKb-UlJDAZWvQr0_DG9VTC6ioiLdBWFFE,22498
|
|
11
|
+
scdataloader-1.2.2.dist-info/METADATA,sha256=XMtKO9ImiyY--F92njvMUe69OaJgDx8C3xQtBAXqo8g,9800
|
|
12
|
+
scdataloader-1.2.2.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
|
13
|
+
scdataloader-1.2.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
14
|
+
scdataloader-1.2.2.dist-info/RECORD,,
|
|
File without changes
|