scdataloader 2.0.10__tar.gz → 2.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scdataloader
3
- Version: 2.0.10
3
+ Version: 2.0.12
4
4
  Summary: a dataloader for single cell data in lamindb
5
5
  Project-URL: repository, https://github.com/jkobject/scDataLoader
6
6
  Author-email: jkobject <jkobject@gmail.com>
@@ -14,7 +14,7 @@ Requires-Dist: cellxgene-census>=0.1.0
14
14
  Requires-Dist: django>=4.0.0
15
15
  Requires-Dist: ipykernel>=6.20.0
16
16
  Requires-Dist: jupytext>=1.16.0
17
- Requires-Dist: lamindb[bionty,jupyter,zarr]==1.6.2
17
+ Requires-Dist: lamindb[gcp]==2.0.1
18
18
  Requires-Dist: leidenalg>=0.8.0
19
19
  Requires-Dist: lightning>=2.3.0
20
20
  Requires-Dist: matplotlib>=3.5.0
@@ -162,7 +162,7 @@ adata = preprocessor(adata)
162
162
 
163
163
  art = ln.Artifact(adata, description="test")
164
164
  art.save()
165
- ln.Collection(art, name="test", description="test").save()
165
+ ln.Collection(art, key="test", description="test").save()
166
166
 
167
167
  datamodule = DataModule(
168
168
  collection_name="test",
@@ -376,7 +376,7 @@ using absolute paths. You need to do 3 things:
376
376
 
377
377
  ```python
378
378
  import lamin as ln
379
- ln.Storage.df()
379
+ ln.Storage.to_dataframe(limit=None)
380
380
  # view what is your current storage id (in my case it was GZgLW1TQ)
381
381
  ln.Storage.filter(uid="GZgLW1TI").update(
382
382
  root=Path("your_new_locations").as_posix().rstrip("/")
@@ -120,7 +120,7 @@ adata = preprocessor(adata)
120
120
 
121
121
  art = ln.Artifact(adata, description="test")
122
122
  art.save()
123
- ln.Collection(art, name="test", description="test").save()
123
+ ln.Collection(art, key="test", description="test").save()
124
124
 
125
125
  datamodule = DataModule(
126
126
  collection_name="test",
@@ -334,7 +334,7 @@ using absolute paths. You need to do 3 things:
334
334
 
335
335
  ```python
336
336
  import lamin as ln
337
- ln.Storage.df()
337
+ ln.Storage.to_dataframe(limit=None)
338
338
  # view what is your current storage id (in my case it was GZgLW1TQ)
339
339
  ln.Storage.filter(uid="GZgLW1TI").update(
340
340
  root=Path("your_new_locations").as_posix().rstrip("/")
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "scdataloader"
3
- version = "2.0.10"
3
+ version = "2.0.12"
4
4
  description = "a dataloader for single cell data in lamindb"
5
5
  authors = [
6
6
  {name = "jkobject", email = "jkobject@gmail.com"}
@@ -11,7 +11,7 @@ requires-python = ">=3.10,<3.13"
11
11
  keywords = ["scRNAseq", "dataloader", "pytorch", "lamindb", "scPRINT"]
12
12
  dependencies = [
13
13
  "numpy<=2.2.0",
14
- "lamindb[bionty,jupyter,zarr]==1.6.2",
14
+ "lamindb[gcp]==2.0.1",
15
15
  "cellxgene-census>=0.1.0",
16
16
  "torch>=2.2.0",
17
17
  "pytorch-lightning>=2.3.0",
@@ -282,19 +282,25 @@ class Dataset(torchDataset):
282
282
  elif clss == "cell_type_ontology_term_id":
283
283
  parentdf = (
284
284
  bt.CellType.filter()
285
- .df(include=["parents__ontology_id", "ontology_id"])
285
+ .to_dataframe(
286
+ limit=None, include=["parents__ontology_id", "ontology_id"]
287
+ )
286
288
  .set_index("ontology_id")
287
289
  )
288
290
  elif clss == "tissue_ontology_term_id":
289
291
  parentdf = (
290
292
  bt.Tissue.filter()
291
- .df(include=["parents__ontology_id", "ontology_id"])
293
+ .to_dataframe(
294
+ limit=None, include=["parents__ontology_id", "ontology_id"]
295
+ )
292
296
  .set_index("ontology_id")
293
297
  )
294
298
  elif clss == "disease_ontology_term_id":
295
299
  parentdf = (
296
300
  bt.Disease.filter()
297
- .df(include=["parents__ontology_id", "ontology_id"])
301
+ .to_dataframe(
302
+ limit=None, include=["parents__ontology_id", "ontology_id"]
303
+ )
298
304
  .set_index("ontology_id")
299
305
  )
300
306
  elif clss in [
@@ -304,19 +310,25 @@ class Dataset(torchDataset):
304
310
  ]:
305
311
  parentdf = (
306
312
  bt.DevelopmentalStage.filter()
307
- .df(include=["parents__ontology_id", "ontology_id"])
313
+ .to_dataframe(
314
+ limit=None, include=["parents__ontology_id", "ontology_id"]
315
+ )
308
316
  .set_index("ontology_id")
309
317
  )
310
318
  elif clss == "assay_ontology_term_id":
311
319
  parentdf = (
312
320
  bt.ExperimentalFactor.filter()
313
- .df(include=["parents__ontology_id", "ontology_id"])
321
+ .to_dataframe(
322
+ limit=None, include=["parents__ontology_id", "ontology_id"]
323
+ )
314
324
  .set_index("ontology_id")
315
325
  )
316
326
  elif clss == "self_reported_ethnicity_ontology_term_id":
317
327
  parentdf = (
318
328
  bt.Ethnicity.filter()
319
- .df(include=["parents__ontology_id", "ontology_id"])
329
+ .to_dataframe(
330
+ limit=None, include=["parents__ontology_id", "ontology_id"]
331
+ )
320
332
  .set_index("ontology_id")
321
333
  )
322
334
 
@@ -654,7 +654,7 @@ class LaminPreprocessor(Preprocessor):
654
654
  # Reconstruct collection using keys
655
655
  dataset = ln.Collection(
656
656
  [ln.Artifact.filter(key=k).one() for k in files],
657
- name=name,
657
+ key=name,
658
658
  description=description,
659
659
  )
660
660
  dataset.save()
@@ -819,9 +819,15 @@ def additional_postprocess(adata):
819
819
  # else:
820
820
  print("starting post processing")
821
821
  sc.pp.neighbors(adata, use_rep="X_pca")
822
- sc.tl.leiden(adata, key_added="leiden_2", resolution=2.0)
823
- sc.tl.leiden(adata, key_added="leiden_1", resolution=1.0)
824
- sc.tl.leiden(adata, key_added="leiden_0.5", resolution=0.5)
822
+ sc.tl.leiden(
823
+ adata, key_added="leiden_2", resolution=2.0, flavor="igraph", n_iterations=2
824
+ )
825
+ sc.tl.leiden(
826
+ adata, key_added="leiden_1", resolution=1.0, flavor="igraph", n_iterations=2
827
+ )
828
+ sc.tl.leiden(
829
+ adata, key_added="leiden_0.5", resolution=0.5, flavor="igraph", n_iterations=2
830
+ )
825
831
  sc.tl.umap(adata)
826
832
  mid = adata.uns["dataset_id"] if "dataset_id" in adata.uns else "unknown_id"
827
833
  sc.pl.umap(
@@ -389,7 +389,7 @@ def load_dataset_local(
389
389
  except IntegrityError:
390
390
  print(f"File {file.key} already exists in storage")
391
391
  saved_files.append(file)
392
- dataset = ln.Collection(saved_files, name=name, description=description)
392
+ dataset = ln.Collection(saved_files, key=name, description=description)
393
393
  dataset.save()
394
394
  return dataset
395
395
 
@@ -412,7 +412,7 @@ def load_genes(
412
412
  for organism in organisms:
413
413
  genesdf = bt.Gene.filter(
414
414
  organism_id=bt.Organism.filter(ontology_id=organism).first().id
415
- ).df()
415
+ ).to_dataframe(limit=None)
416
416
  genesdf.loc[genesdf.ensembl_gene_id.isna(), "ensembl_gene_id"] = genesdf.loc[
417
417
  genesdf.ensembl_gene_id.isna(), "stable_id"
418
418
  ]
@@ -453,12 +453,16 @@ def _adding_scbasecamp_genes(
453
453
  ):
454
454
  if len(species) == 0:
455
455
  species = set(
456
- bt.Organism.using("laminlabs/arc-virtual-cell-atlas").df().ontology_id
456
+ bt.Organism.using("laminlabs/arc-virtual-cell-atlas")
457
+ .to_dataframe(limit=None)
458
+ .ontology_id
457
459
  ) - set(["NCBITaxon:10090", "NCBITaxon:9606"])
458
460
  species = list(species)
459
461
  for i in set(
460
- bt.Organism.using("laminlabs/arc-virtual-cell-atlas").df().ontology_id
461
- ) - set(bt.Organism.filter().df().ontology_id):
462
+ bt.Organism.using("laminlabs/arc-virtual-cell-atlas")
463
+ .to_dataframe(limit=None)
464
+ .ontology_id
465
+ ) - set(bt.Organism.filter().to_dataframe(limit=None).ontology_id):
462
466
  print(i)
463
467
  rec = (
464
468
  bt.Organism.using("laminlabs/arc-virtual-cell-atlas")
@@ -485,7 +489,7 @@ def _adding_scbasecamp_genes(
485
489
  bt.Gene.using("laminlabs/arc-virtual-cell-atlas")
486
490
  .filter(organism__ontology_id=i)
487
491
  .all()
488
- .df()
492
+ .to_dataframe(limit=None)
489
493
  )
490
494
  genes = []
491
495
  org = bt.Organism.filter(ontology_id=i).one()
@@ -545,7 +549,11 @@ def populate_my_ontology(
545
549
  if len(celltypes) == 0:
546
550
  bt.CellType.import_source()
547
551
  else:
548
- names = bt.CellType.public().df().index if not celltypes else celltypes
552
+ names = (
553
+ bt.CellType.public().to_dataframe().index
554
+ if not celltypes
555
+ else celltypes
556
+ )
549
557
  records = bt.CellType.from_values(names, field="ontology_id")
550
558
  ln.save(records)
551
559
  elem = bt.CellType(name="unknown", ontology_id="unknown")
@@ -555,7 +563,7 @@ def populate_my_ontology(
555
563
  if organisms_clade is not None:
556
564
  records = []
557
565
  for organism_clade in organisms_clade:
558
- names = bt.Organism.public(organism=organism_clade).df().index
566
+ names = bt.Organism.public(organism=organism_clade).to_dataframe().index
559
567
  source = bt.Source.filter(name="ensembl", organism=organism_clade).last()
560
568
  for name in names:
561
569
  try:
@@ -581,7 +589,7 @@ def populate_my_ontology(
581
589
  ln.save([elem], ignore_conflicts=True)
582
590
  # Phenotype
583
591
  if sex is not None:
584
- names = bt.Phenotype.public().df().index if not sex else sex
592
+ names = bt.Phenotype.public().to_dataframe().index if not sex else sex
585
593
  source = bt.Source.filter(name="pato").first()
586
594
  records = [
587
595
  bt.Phenotype.from_source(ontology_id=i, source=source) for i in names
@@ -594,7 +602,11 @@ def populate_my_ontology(
594
602
  if len(ethnicities) == 0:
595
603
  bt.Ethnicity.import_source()
596
604
  else:
597
- names = bt.Ethnicity.public().df().index if not ethnicities else ethnicities
605
+ names = (
606
+ bt.Ethnicity.public().to_dataframe().index
607
+ if not ethnicities
608
+ else ethnicities
609
+ )
598
610
  records = bt.Ethnicity.from_values(names, field="ontology_id")
599
611
  ln.save(records)
600
612
  elem = bt.Ethnicity(name="unknown", ontology_id="unknown")
@@ -604,7 +616,11 @@ def populate_my_ontology(
604
616
  if len(assays) == 0:
605
617
  bt.ExperimentalFactor.import_source()
606
618
  else:
607
- names = bt.ExperimentalFactor.public().df().index if not assays else assays
619
+ names = (
620
+ bt.ExperimentalFactor.public().to_dataframe().index
621
+ if not assays
622
+ else assays
623
+ )
608
624
  records = bt.ExperimentalFactor.from_values(names, field="ontology_id")
609
625
  ln.save(records)
610
626
  elem = bt.ExperimentalFactor(name="unknown", ontology_id="unknown").save()
@@ -616,7 +632,11 @@ def populate_my_ontology(
616
632
  if len(tissues) == 0:
617
633
  bt.Tissue.import_source()
618
634
  else:
619
- names = bt.Tissue.public().df().index if not tissues else tissues
635
+ names = (
636
+ bt.Tissue.public().to_dataframe().index
637
+ if not tissues
638
+ else tissues
639
+ )
620
640
  records = bt.Tissue.from_values(names, field="ontology_id")
621
641
  ln.save(records)
622
642
  elem = bt.Tissue(name="unknown", ontology_id="unknown").save()
@@ -629,7 +649,7 @@ def populate_my_ontology(
629
649
  bt.DevelopmentalStage.import_source(source=source)
630
650
  else:
631
651
  names = (
632
- bt.DevelopmentalStage.public().df().index
652
+ bt.DevelopmentalStage.public().to_dataframe().index
633
653
  if not dev_stages
634
654
  else dev_stages
635
655
  )
@@ -641,7 +661,11 @@ def populate_my_ontology(
641
661
  if len(diseases) == 0:
642
662
  bt.Disease.import_source()
643
663
  else:
644
- names = bt.Disease.public().df().index if not diseases else diseases
664
+ names = (
665
+ bt.Disease.public().to_dataframe().index
666
+ if not diseases
667
+ else diseases
668
+ )
645
669
  records = bt.Disease.from_values(names, field="ontology_id")
646
670
  ln.save(records)
647
671
  bt.Disease(name="normal", ontology_id="PATO:0000461").save()
@@ -650,7 +674,9 @@ def populate_my_ontology(
650
674
  for organism in genes_from:
651
675
  # convert onto to name
652
676
  organism = bt.Organism.filter(ontology_id=organism).one().name
653
- names = bt.Gene.public(organism=organism).df()["ensembl_gene_id"]
677
+ names = bt.Gene.public(organism=organism).to_dataframe()[
678
+ "ensembl_gene_id"
679
+ ]
654
680
 
655
681
  # Process names in blocks of 10,000 elements
656
682
  block_size = 10000
File without changes
File without changes