scdataloader 2.0.4__py3-none-any.whl → 2.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scdataloader/config.py +1151 -0
- scdataloader/datamodule.py +1 -0
- scdataloader/utils.py +31 -181
- {scdataloader-2.0.4.dist-info → scdataloader-2.0.5.dist-info}/METADATA +1 -1
- {scdataloader-2.0.4.dist-info → scdataloader-2.0.5.dist-info}/RECORD +8 -8
- {scdataloader-2.0.4.dist-info → scdataloader-2.0.5.dist-info}/WHEEL +0 -0
- {scdataloader-2.0.4.dist-info → scdataloader-2.0.5.dist-info}/entry_points.txt +0 -0
- {scdataloader-2.0.4.dist-info → scdataloader-2.0.5.dist-info}/licenses/LICENSE +0 -0
scdataloader/datamodule.py
CHANGED
|
@@ -190,6 +190,7 @@ class DataModule(L.LightningDataModule):
|
|
|
190
190
|
self.classes = {k: len(v) for k, v in mdataset.class_topred.items()}
|
|
191
191
|
# we might want not to order the genes by expression (or do it?)
|
|
192
192
|
# we might want to not introduce zeros and
|
|
193
|
+
|
|
193
194
|
if use_default_col:
|
|
194
195
|
kwargs["collate_fn"] = Collator(
|
|
195
196
|
organisms=mdataset.organisms if organisms is None else organisms,
|
scdataloader/utils.py
CHANGED
|
@@ -20,6 +20,8 @@ from scipy.sparse import csr_matrix
|
|
|
20
20
|
from scipy.stats import median_abs_deviation
|
|
21
21
|
from torch import Tensor
|
|
22
22
|
|
|
23
|
+
from .config import DROP
|
|
24
|
+
|
|
23
25
|
|
|
24
26
|
def fileToList(filename: str, strconv: callable = lambda x: x) -> list:
|
|
25
27
|
"""
|
|
@@ -442,168 +444,7 @@ def load_genes(
|
|
|
442
444
|
if col in organismdf.columns:
|
|
443
445
|
organismdf.drop(columns=[col], inplace=True)
|
|
444
446
|
# temp fix
|
|
445
|
-
|
|
446
|
-
"ENSG00000112096",
|
|
447
|
-
"ENSG00000137808",
|
|
448
|
-
"ENSG00000161149",
|
|
449
|
-
"ENSG00000182230",
|
|
450
|
-
"ENSG00000203812",
|
|
451
|
-
"ENSG00000204092",
|
|
452
|
-
"ENSG00000205485",
|
|
453
|
-
"ENSG00000212951",
|
|
454
|
-
"ENSG00000215271",
|
|
455
|
-
"ENSG00000221995",
|
|
456
|
-
"ENSG00000224739",
|
|
457
|
-
"ENSG00000224745",
|
|
458
|
-
"ENSG00000225178",
|
|
459
|
-
"ENSG00000225932",
|
|
460
|
-
"ENSG00000226377",
|
|
461
|
-
"ENSG00000226380",
|
|
462
|
-
"ENSG00000226403",
|
|
463
|
-
"ENSG00000227021",
|
|
464
|
-
"ENSG00000227220",
|
|
465
|
-
"ENSG00000227902",
|
|
466
|
-
"ENSG00000228139",
|
|
467
|
-
"ENSG00000228206",
|
|
468
|
-
"ENSG00000228906",
|
|
469
|
-
"ENSG00000229352",
|
|
470
|
-
"ENSG00000231575",
|
|
471
|
-
"ENSG00000232196",
|
|
472
|
-
"ENSG00000232295",
|
|
473
|
-
"ENSG00000233776",
|
|
474
|
-
"ENSG00000236166",
|
|
475
|
-
"ENSG00000236673",
|
|
476
|
-
"ENSG00000236740",
|
|
477
|
-
"ENSG00000236886",
|
|
478
|
-
"ENSG00000236996",
|
|
479
|
-
"ENSG00000237133",
|
|
480
|
-
"ENSG00000237513",
|
|
481
|
-
"ENSG00000237548",
|
|
482
|
-
"ENSG00000237838",
|
|
483
|
-
"ENSG00000239446",
|
|
484
|
-
"ENSG00000239467",
|
|
485
|
-
"ENSG00000239665",
|
|
486
|
-
"ENSG00000244693",
|
|
487
|
-
"ENSG00000244952",
|
|
488
|
-
"ENSG00000249860",
|
|
489
|
-
"ENSG00000251044",
|
|
490
|
-
"ENSG00000253878",
|
|
491
|
-
"ENSG00000254561",
|
|
492
|
-
"ENSG00000254740",
|
|
493
|
-
"ENSG00000255633",
|
|
494
|
-
"ENSG00000255823",
|
|
495
|
-
"ENSG00000256045",
|
|
496
|
-
"ENSG00000256222",
|
|
497
|
-
"ENSG00000256374",
|
|
498
|
-
"ENSG00000256427",
|
|
499
|
-
"ENSG00000256618",
|
|
500
|
-
"ENSG00000256863",
|
|
501
|
-
"ENSG00000256892",
|
|
502
|
-
"ENSG00000258414",
|
|
503
|
-
"ENSG00000258808",
|
|
504
|
-
"ENSG00000258861",
|
|
505
|
-
"ENSG00000259444",
|
|
506
|
-
"ENSG00000259820",
|
|
507
|
-
"ENSG00000259834",
|
|
508
|
-
"ENSG00000259855",
|
|
509
|
-
"ENSG00000260461",
|
|
510
|
-
"ENSG00000261068",
|
|
511
|
-
"ENSG00000261438",
|
|
512
|
-
"ENSG00000261490",
|
|
513
|
-
"ENSG00000261534",
|
|
514
|
-
"ENSG00000261737",
|
|
515
|
-
"ENSG00000261773",
|
|
516
|
-
"ENSG00000261963",
|
|
517
|
-
"ENSG00000262668",
|
|
518
|
-
"ENSG00000263464",
|
|
519
|
-
"ENSG00000267637",
|
|
520
|
-
"ENSG00000268955",
|
|
521
|
-
"ENSG00000269028",
|
|
522
|
-
"ENSG00000269900",
|
|
523
|
-
"ENSG00000269933",
|
|
524
|
-
"ENSG00000269966",
|
|
525
|
-
"ENSG00000270188",
|
|
526
|
-
"ENSG00000270394",
|
|
527
|
-
"ENSG00000270672",
|
|
528
|
-
"ENSG00000271043",
|
|
529
|
-
"ENSG00000271409",
|
|
530
|
-
"ENSG00000271734",
|
|
531
|
-
"ENSG00000271870",
|
|
532
|
-
"ENSG00000272040",
|
|
533
|
-
"ENSG00000272196",
|
|
534
|
-
"ENSG00000272267",
|
|
535
|
-
"ENSG00000272354",
|
|
536
|
-
"ENSG00000272370",
|
|
537
|
-
"ENSG00000272551",
|
|
538
|
-
"ENSG00000272567",
|
|
539
|
-
"ENSG00000272880",
|
|
540
|
-
"ENSG00000272904",
|
|
541
|
-
"ENSG00000272934",
|
|
542
|
-
"ENSG00000273301",
|
|
543
|
-
"ENSG00000273370",
|
|
544
|
-
"ENSG00000273496",
|
|
545
|
-
"ENSG00000273576",
|
|
546
|
-
"ENSG00000273614",
|
|
547
|
-
"ENSG00000273837",
|
|
548
|
-
"ENSG00000273888",
|
|
549
|
-
"ENSG00000273923",
|
|
550
|
-
"ENSG00000276612",
|
|
551
|
-
"ENSG00000276814",
|
|
552
|
-
"ENSG00000277050",
|
|
553
|
-
"ENSG00000277077",
|
|
554
|
-
"ENSG00000277352",
|
|
555
|
-
"ENSG00000277666",
|
|
556
|
-
"ENSG00000277761",
|
|
557
|
-
"ENSG00000278198",
|
|
558
|
-
"ENSG00000278782",
|
|
559
|
-
"ENSG00000278927",
|
|
560
|
-
"ENSG00000278955",
|
|
561
|
-
"ENSG00000279226",
|
|
562
|
-
"ENSG00000279765",
|
|
563
|
-
"ENSG00000279769",
|
|
564
|
-
"ENSG00000279948",
|
|
565
|
-
"ENSG00000280058",
|
|
566
|
-
"ENSG00000280095",
|
|
567
|
-
"ENSG00000280250",
|
|
568
|
-
"ENSG00000280346",
|
|
569
|
-
"ENSG00000280374",
|
|
570
|
-
"ENSG00000280710",
|
|
571
|
-
"ENSG00000282080",
|
|
572
|
-
"ENSG00000282246",
|
|
573
|
-
"ENSG00000282965",
|
|
574
|
-
"ENSG00000283486",
|
|
575
|
-
"ENSG00000284299",
|
|
576
|
-
"ENSG00000284741",
|
|
577
|
-
"ENSG00000285106",
|
|
578
|
-
"ENSG00000285162",
|
|
579
|
-
"ENSG00000285476",
|
|
580
|
-
"ENSG00000285762",
|
|
581
|
-
"ENSG00000286065",
|
|
582
|
-
"ENSG00000286228",
|
|
583
|
-
"ENSG00000286601",
|
|
584
|
-
"ENSG00000286699",
|
|
585
|
-
"ENSG00000286949",
|
|
586
|
-
"ENSG00000286996",
|
|
587
|
-
"ENSG00000287116",
|
|
588
|
-
"ENSG00000287388",
|
|
589
|
-
"ENSG00000288541",
|
|
590
|
-
"ENSG00000288546",
|
|
591
|
-
"ENSG00000288630",
|
|
592
|
-
"ENSG00000288639",
|
|
593
|
-
"ENSMUSG00000069518",
|
|
594
|
-
"ENSMUSG00000073682",
|
|
595
|
-
"ENSMUSG00000075014",
|
|
596
|
-
"ENSMUSG00000075015",
|
|
597
|
-
"ENSMUSG00000078091",
|
|
598
|
-
"ENSMUSG00000094958",
|
|
599
|
-
"ENSMUSG00000095547",
|
|
600
|
-
"ENSMUSG00000095891",
|
|
601
|
-
"ENSMUSG00000096385",
|
|
602
|
-
"ENSMUSG00000096519",
|
|
603
|
-
"ENSMUSG00000096923",
|
|
604
|
-
"ENSMUSG00000097078",
|
|
605
|
-
}
|
|
606
|
-
organismdf = organismdf[~organismdf.index.isin(drop)]
|
|
447
|
+
organismdf = organismdf[~organismdf.index.isin(DROP)]
|
|
607
448
|
return organismdf
|
|
608
449
|
|
|
609
450
|
|
|
@@ -656,15 +497,16 @@ def _adding_scbasecamp_genes(
|
|
|
656
497
|
|
|
657
498
|
|
|
658
499
|
def populate_my_ontology(
|
|
659
|
-
sex: List[str] = ["PATO:0000384", "PATO:0000383"],
|
|
660
|
-
celltypes: List[str] = [],
|
|
661
|
-
ethnicities: List[str] = [],
|
|
662
|
-
assays: List[str] = [],
|
|
663
|
-
tissues: List[str] = [],
|
|
664
|
-
diseases: List[str] = [],
|
|
665
|
-
dev_stages: List[str] = [],
|
|
666
|
-
organisms_clade: List[str] = ["vertebrates", "plants", "metazoa"],
|
|
667
|
-
|
|
500
|
+
sex: Optional[List[str]] = ["PATO:0000384", "PATO:0000383"],
|
|
501
|
+
celltypes: Optional[List[str]] = [],
|
|
502
|
+
ethnicities: Optional[List[str]] = [],
|
|
503
|
+
assays: Optional[List[str]] = [],
|
|
504
|
+
tissues: Optional[List[str]] = [],
|
|
505
|
+
diseases: Optional[List[str]] = [],
|
|
506
|
+
dev_stages: Optional[List[str]] = [],
|
|
507
|
+
organisms_clade: Optional[List[str]] = ["vertebrates"], # "plants", "metazoa"],
|
|
508
|
+
organisms: Optional[List[str]] = ["NCBITaxon:10090", "NCBITaxon:9606"],
|
|
509
|
+
genes_from: Optional[List[str]] = ["NCBITaxon:10090", "NCBITaxon:9606"],
|
|
668
510
|
):
|
|
669
511
|
"""
|
|
670
512
|
creates a local version of the lamin ontologies and add the required missing values in base ontologies
|
|
@@ -697,8 +539,10 @@ def populate_my_ontology(
|
|
|
697
539
|
names = bt.CellType.public().df().index if not celltypes else celltypes
|
|
698
540
|
records = bt.CellType.from_values(names, field="ontology_id")
|
|
699
541
|
ln.save(records)
|
|
700
|
-
bt.CellType(name="unknown", ontology_id="unknown")
|
|
542
|
+
elem = bt.CellType(name="unknown", ontology_id="unknown")
|
|
543
|
+
ln.save([elem], ignore_conflicts=True)
|
|
701
544
|
# OrganismClade
|
|
545
|
+
nrecords = []
|
|
702
546
|
if organisms_clade is not None:
|
|
703
547
|
records = []
|
|
704
548
|
for organism_clade in organisms_clade:
|
|
@@ -709,7 +553,7 @@ def populate_my_ontology(
|
|
|
709
553
|
records.append(bt.Organism.from_source(name=name, source=source))
|
|
710
554
|
except DoesNotExist:
|
|
711
555
|
print(f"Organism {name} not found in source {source}")
|
|
712
|
-
|
|
556
|
+
|
|
713
557
|
prevrec = set()
|
|
714
558
|
for rec in records:
|
|
715
559
|
if rec is None:
|
|
@@ -717,10 +561,15 @@ def populate_my_ontology(
|
|
|
717
561
|
if not isinstance(rec, bt.Organism):
|
|
718
562
|
rec = rec[0]
|
|
719
563
|
if rec.uid not in prevrec:
|
|
564
|
+
if organisms is not None:
|
|
565
|
+
if rec.ontology_id not in organisms:
|
|
566
|
+
continue
|
|
720
567
|
nrecords.append(rec)
|
|
721
568
|
prevrec.add(rec.uid)
|
|
569
|
+
|
|
722
570
|
ln.save(nrecords)
|
|
723
|
-
bt.Organism(name="unknown", ontology_id="unknown").save()
|
|
571
|
+
elem = bt.Organism(name="unknown", ontology_id="unknown").save()
|
|
572
|
+
ln.save([elem], ignore_conflicts=True)
|
|
724
573
|
# Phenotype
|
|
725
574
|
if sex is not None:
|
|
726
575
|
names = bt.Phenotype.public().df().index if not sex else sex
|
|
@@ -729,7 +578,8 @@ def populate_my_ontology(
|
|
|
729
578
|
bt.Phenotype.from_source(ontology_id=i, source=source) for i in names
|
|
730
579
|
]
|
|
731
580
|
ln.save(records)
|
|
732
|
-
bt.Phenotype(name="unknown", ontology_id="unknown").save()
|
|
581
|
+
elem = bt.Phenotype(name="unknown", ontology_id="unknown").save()
|
|
582
|
+
ln.save([elem], ignore_conflicts=True)
|
|
733
583
|
# ethnicity
|
|
734
584
|
if ethnicities is not None:
|
|
735
585
|
if len(ethnicities) == 0:
|
|
@@ -738,9 +588,8 @@ def populate_my_ontology(
|
|
|
738
588
|
names = bt.Ethnicity.public().df().index if not ethnicities else ethnicities
|
|
739
589
|
records = bt.Ethnicity.from_values(names, field="ontology_id")
|
|
740
590
|
ln.save(records)
|
|
741
|
-
bt.Ethnicity(
|
|
742
|
-
|
|
743
|
-
).save() # multi ethnic will have to get renamed
|
|
591
|
+
elem = bt.Ethnicity(name="unknown", ontology_id="unknown")
|
|
592
|
+
ln.save([elem], ignore_conflicts=True)
|
|
744
593
|
# ExperimentalFactor
|
|
745
594
|
if assays is not None:
|
|
746
595
|
if len(assays) == 0:
|
|
@@ -749,7 +598,8 @@ def populate_my_ontology(
|
|
|
749
598
|
names = bt.ExperimentalFactor.public().df().index if not assays else assays
|
|
750
599
|
records = bt.ExperimentalFactor.from_values(names, field="ontology_id")
|
|
751
600
|
ln.save(records)
|
|
752
|
-
bt.ExperimentalFactor(name="unknown", ontology_id="unknown").save()
|
|
601
|
+
elem = bt.ExperimentalFactor(name="unknown", ontology_id="unknown").save()
|
|
602
|
+
ln.save([elem], ignore_conflicts=True)
|
|
753
603
|
# lookup = bt.ExperimentalFactor.lookup()
|
|
754
604
|
# lookup.smart_seq_v4.parents.add(lookup.smart_like)
|
|
755
605
|
# Tissue
|
|
@@ -760,7 +610,8 @@ def populate_my_ontology(
|
|
|
760
610
|
names = bt.Tissue.public().df().index if not tissues else tissues
|
|
761
611
|
records = bt.Tissue.from_values(names, field="ontology_id")
|
|
762
612
|
ln.save(records)
|
|
763
|
-
bt.Tissue(name="unknown", ontology_id="unknown").save()
|
|
613
|
+
elem = bt.Tissue(name="unknown", ontology_id="unknown").save()
|
|
614
|
+
ln.save([elem], ignore_conflicts=True)
|
|
764
615
|
# DevelopmentalStage
|
|
765
616
|
if dev_stages is not None:
|
|
766
617
|
if len(dev_stages) == 0:
|
|
@@ -775,7 +626,6 @@ def populate_my_ontology(
|
|
|
775
626
|
)
|
|
776
627
|
records = bt.DevelopmentalStage.from_values(names, field="ontology_id")
|
|
777
628
|
ln.save(records)
|
|
778
|
-
bt.DevelopmentalStage(name="unknown", ontology_id="unknown").save()
|
|
779
629
|
|
|
780
630
|
# Disease
|
|
781
631
|
if diseases is not None:
|
|
@@ -2,15 +2,15 @@ scdataloader/__init__.py,sha256=Z5HURehoWw1GrecImmTXIkv4ih8Q5RxNQWPm8zjjXOA,226
|
|
|
2
2
|
scdataloader/__main__.py,sha256=xPOtrEpQQQZUGTnm8KTvsQcA_jR45oMG_VHqd0Ny7_M,8677
|
|
3
3
|
scdataloader/base.py,sha256=M1gD59OffRdLOgS1vHKygOomUoAMuzjpRtAfM3SBKF8,338
|
|
4
4
|
scdataloader/collator.py,sha256=VcFJcVAIeKvYkG1DPRXzoBaw2wQ6D_0lsv5Mcv-9USI,17419
|
|
5
|
-
scdataloader/config.py,sha256=
|
|
5
|
+
scdataloader/config.py,sha256=wGlCR3tWyEVa69ajovJKYc86CTCJR8e1xC7BTlUOJQE,34582
|
|
6
6
|
scdataloader/data.json,sha256=Zb8c27yk3rwMgtAU8kkiWWAyUwYBrlCqKUyEtaAx9i8,8785
|
|
7
7
|
scdataloader/data.py,sha256=fMW1OgllPCz87si3DpkzOSoqnufgKlh8aW5rEVmeC_c,25133
|
|
8
|
-
scdataloader/datamodule.py,sha256=
|
|
8
|
+
scdataloader/datamodule.py,sha256=ojX0zr2cpGLoKGjWE1S_bHAEdwbFg0Ljl55hqTagW1k,43600
|
|
9
9
|
scdataloader/mapped.py,sha256=h9YKQ8SG9tyZL8c6_Wu5Xov5ODGK6FzVuFopz58xwN4,29887
|
|
10
10
|
scdataloader/preprocess.py,sha256=oAGMilgdIgggyp9B9c9627kdo6SCco2tnFhhIHY4-yc,39642
|
|
11
|
-
scdataloader/utils.py,sha256=
|
|
12
|
-
scdataloader-2.0.
|
|
13
|
-
scdataloader-2.0.
|
|
14
|
-
scdataloader-2.0.
|
|
15
|
-
scdataloader-2.0.
|
|
16
|
-
scdataloader-2.0.
|
|
11
|
+
scdataloader/utils.py,sha256=2zIgmQHPVKHOFWqLX56Ihqtqci3_rOfCcOs642CPnX4,27183
|
|
12
|
+
scdataloader-2.0.5.dist-info/METADATA,sha256=doiHkl9Iv_n4X5ifDm3cppsIkhPGEXE1zzWynFS1NHI,10314
|
|
13
|
+
scdataloader-2.0.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
14
|
+
scdataloader-2.0.5.dist-info/entry_points.txt,sha256=VXAN1m_CjbdLJ6SKYR0sBLGDV4wvv31ri7fWWuwbpno,60
|
|
15
|
+
scdataloader-2.0.5.dist-info/licenses/LICENSE,sha256=rGy_eYmnxtbOvKs7qt5V0czSWxJwgX_MlgMyTZwDHbc,1073
|
|
16
|
+
scdataloader-2.0.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|