scdataloader 1.2.1__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
scdataloader/VERSION CHANGED
@@ -1 +1 @@
1
- 1.2.1
1
+ 1.2.2
scdataloader/__main__.py CHANGED
@@ -53,14 +53,14 @@ def main():
53
53
  )
54
54
  parser.add_argument(
55
55
  "--filter_gene_by_counts",
56
- type=Union[int, bool],
57
- default=False,
56
+ type=int,
57
+ default=0,
58
58
  help="Determines whether to filter genes by counts.",
59
59
  )
60
60
  parser.add_argument(
61
61
  "--filter_cell_by_counts",
62
- type=Union[int, bool],
63
- default=False,
62
+ type=int,
63
+ default=0,
64
64
  help="Determines whether to filter cells by counts.",
65
65
  )
66
66
  parser.add_argument(
@@ -153,6 +153,12 @@ def main():
153
153
  default=False,
154
154
  help="Determines whether to do postprocessing.",
155
155
  )
156
+ parser.add_argument(
157
+ "--cache",
158
+ type=bool,
159
+ default=True,
160
+ help="Determines whether to cache the dataset.",
161
+ )
156
162
  args = parser.parse_args()
157
163
 
158
164
  # Load the collection
@@ -178,6 +184,7 @@ def main():
178
184
  normalize_sum=args.normalize_sum,
179
185
  subset_hvg=args.subset_hvg,
180
186
  hvg_flavor=args.hvg_flavor,
187
+ cache=args.cache,
181
188
  binning=args.binning,
182
189
  result_binned_key=args.result_binned_key,
183
190
  length_normalize=args.length_normalize,
scdataloader/utils.py CHANGED
@@ -127,15 +127,15 @@ def getBiomartTable(
127
127
 
128
128
  cache_folder = os.path.expanduser(cache_folder)
129
129
  createFoldersFor(cache_folder)
130
- cachefile = os.path.join(cache_folder, ".biomart.csv")
130
+ cachefile = os.path.join(cache_folder, ".biomart.parquet")
131
131
  if useCache & os.path.isfile(cachefile):
132
132
  print("fetching gene names from biomart cache")
133
- res = pd.read_csv(cachefile)
133
+ res = pd.read_parquet(cachefile)
134
134
  else:
135
135
  print("downloading gene names from biomart")
136
136
 
137
137
  res = _fetchFromServer(ensemble_server, attr + attributes, database=database)
138
- res.to_csv(cachefile, index=False)
138
+ res.to_parquet(cachefile, index=False)
139
139
  res.columns = attr + attributes
140
140
  if type(res) is not type(pd.DataFrame()):
141
141
  raise ValueError("should be a dataframe")
@@ -368,7 +368,14 @@ def load_genes(organisms: Union[str, list] = "NCBITaxon:9606"): # "NCBITaxon:10
368
368
  genesdf["organism"] = organism
369
369
  organismdf.append(genesdf)
370
370
  organismdf = pd.concat(organismdf)
371
- for col in ["source_id", "run_id", "created_by_id", "updated_at", "stable_id", "created_at"]:
371
+ for col in [
372
+ "source_id",
373
+ "run_id",
374
+ "created_by_id",
375
+ "updated_at",
376
+ "stable_id",
377
+ "created_at",
378
+ ]:
372
379
  if col in organismdf.columns:
373
380
  organismdf.drop(columns=[col], inplace=True)
374
381
  return organismdf
@@ -412,7 +419,7 @@ def populate_my_ontology(
412
419
  # cell type
413
420
  if celltypes is not None:
414
421
  if len(celltypes) == 0:
415
- bt.CellType.import_from_source()
422
+ bt.CellType.import_from_source(update=True)
416
423
  else:
417
424
  names = bt.CellType.public().df().index if not celltypes else celltypes
418
425
  records = bt.CellType.from_values(names, field="ontology_id")
@@ -446,7 +453,7 @@ def populate_my_ontology(
446
453
  # ethnicity
447
454
  if ethnicities is not None:
448
455
  if len(ethnicities) == 0:
449
- bt.Ethnicity.import_from_source()
456
+ bt.Ethnicity.import_from_source(update=True)
450
457
  else:
451
458
  names = bt.Ethnicity.public().df().index if not ethnicities else ethnicities
452
459
  records = bt.Ethnicity.from_values(names, field="ontology_id")
@@ -457,7 +464,7 @@ def populate_my_ontology(
457
464
  # ExperimentalFactor
458
465
  if assays is not None:
459
466
  if len(assays) == 0:
460
- bt.ExperimentalFactor.import_from_source()
467
+ bt.ExperimentalFactor.import_from_source(update=True)
461
468
  else:
462
469
  names = bt.ExperimentalFactor.public().df().index if not assays else assays
463
470
  records = bt.ExperimentalFactor.from_values(names, field="ontology_id")
@@ -468,7 +475,7 @@ def populate_my_ontology(
468
475
  # Tissue
469
476
  if tissues is not None:
470
477
  if len(tissues) == 0:
471
- bt.Tissue.import_from_source()
478
+ bt.Tissue.import_from_source(update=True)
472
479
  else:
473
480
  names = bt.Tissue.public().df().index if not tissues else tissues
474
481
  records = bt.Tissue.from_values(names, field="ontology_id")
@@ -477,7 +484,7 @@ def populate_my_ontology(
477
484
  # DevelopmentalStage
478
485
  if dev_stages is not None:
479
486
  if len(dev_stages) == 0:
480
- bt.DevelopmentalStage.import_from_source()
487
+ bt.DevelopmentalStage.import_from_source(update=True)
481
488
  source = bt.PublicSource.filter(organism="mouse", name="mmusdv").last()
482
489
  bt.DevelopmentalStage.import_from_source(source=source)
483
490
  else:
@@ -493,7 +500,7 @@ def populate_my_ontology(
493
500
  # Disease
494
501
  if diseases is not None:
495
502
  if len(diseases) == 0:
496
- bt.Disease.import_from_source()
503
+ bt.Disease.import_from_source(update=True)
497
504
  else:
498
505
  names = bt.Disease.public().df().index if not diseases else diseases
499
506
  records = bt.Disease.from_values(names, field="ontology_id")
@@ -1,11 +1,10 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: scdataloader
3
- Version: 1.2.1
3
+ Version: 1.2.2
4
4
  Summary: a dataloader for single cell data in lamindb
5
5
  Project-URL: repository, https://github.com/jkobject/scDataLoader
6
6
  Author-email: jkobject <jkobject@gmail.com>
7
- License-Expression: MIT
8
- License-File: LICENSE
7
+ License: MIT
9
8
  Keywords: dataloader,lamindb,pytorch,scPRINT,scRNAseq
10
9
  Requires-Python: <3.11,>=3.10
11
10
  Requires-Dist: anndata>=0.9.0
@@ -18,6 +17,7 @@ Requires-Dist: leidenalg>=0.8.0
18
17
  Requires-Dist: lightning>=2.0.0
19
18
  Requires-Dist: matplotlib>=3.5.0
20
19
  Requires-Dist: numpy>=1.26.0
20
+ Requires-Dist: palantir>=1.3.3
21
21
  Requires-Dist: pandas>=2.0.0
22
22
  Requires-Dist: scikit-misc>=0.5.0
23
23
  Requires-Dist: seaborn>=0.11.0
@@ -1,14 +1,14 @@
1
- scdataloader/VERSION,sha256=bPTghLR_M8mwLveSedFXgzho-PcFFBaadovjU-4yj-o,6
1
+ scdataloader/VERSION,sha256=xipcxhrEUlk1dT9ewoTAoFKksdpLOjWA3OK313ohVK4,6
2
2
  scdataloader/__init__.py,sha256=5y9VzRhOAUWeYMn2MrRRRlzgdiMjRFytr7gcn-I6IkE,147
3
- scdataloader/__main__.py,sha256=Hu7Bnc7P4UfOzNWyDAVoNZsItgy27hldaw3y8OS3gPM,6387
3
+ scdataloader/__main__.py,sha256=VXrt2IykBypnIXWydwA7NfF7LtRGc-0Khjtm5OIBNpI,6527
4
4
  scdataloader/base.py,sha256=M1gD59OffRdLOgS1vHKygOomUoAMuzjpRtAfM3SBKF8,338
5
5
  scdataloader/collator.py,sha256=gzHiuixUwK8JClhAbG12kgWMU_VTKkowibA-tDFpbwo,11341
6
6
  scdataloader/config.py,sha256=rrW2DZxG4J2_pmpDbXXsaKJkpNC57w5dIlItiFbANYw,2905
7
7
  scdataloader/data.py,sha256=3dCp-lIAfOkCi76SH5W3iSqFmAWZslwARkN9v5mylz8,14907
8
8
  scdataloader/datamodule.py,sha256=B-udBevPSPF__hfy0pOz1dGovgE95K2pxPupjB7RblI,16936
9
9
  scdataloader/preprocess.py,sha256=pH4EPrcRqH34o3t5X3A4kETiYdCZngih5SdP_PPfgOo,29178
10
- scdataloader/utils.py,sha256=5-6CnI3Utn5XFpqgZiJa0MT6gfvkFNg078SgrE6P4s8,22365
11
- scdataloader-1.2.1.dist-info/METADATA,sha256=JeE7j8HkByp_MMGVXp4GOvpdkjIjoyEoByXA-FWISuk,9802
12
- scdataloader-1.2.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
13
- scdataloader-1.2.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
14
- scdataloader-1.2.1.dist-info/RECORD,,
10
+ scdataloader/utils.py,sha256=7tgt3sPj_XTKb-UlJDAZWvQr0_DG9VTC6ioiLdBWFFE,22498
11
+ scdataloader-1.2.2.dist-info/METADATA,sha256=XMtKO9ImiyY--F92njvMUe69OaJgDx8C3xQtBAXqo8g,9800
12
+ scdataloader-1.2.2.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
13
+ scdataloader-1.2.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
14
+ scdataloader-1.2.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.25.0
2
+ Generator: hatchling 1.26.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any