scdataloader 1.0.5__tar.gz → 1.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scdataloader
3
- Version: 1.0.5
3
+ Version: 1.1.3
4
4
  Summary: a dataloader for single cell data in lamindb
5
5
  Home-page: https://github.com/jkobject/scDataLoader
6
6
  License: GPL3
@@ -13,7 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
13
13
  Provides-Extra: dev
14
14
  Requires-Dist: anndata
15
15
  Requires-Dist: biomart
16
- Requires-Dist: bionty (==0.48.0)
16
+ Requires-Dist: bionty (==0.49.0)
17
17
  Requires-Dist: black (>=23.10.1,<24.0.0) ; extra == "dev"
18
18
  Requires-Dist: cellxgene-census
19
19
  Requires-Dist: coverage (>=7.3.2,<8.0.0) ; extra == "dev"
@@ -23,7 +23,7 @@ Requires-Dist: flake8 (>=6.1.0,<7.0.0) ; extra == "dev"
23
23
  Requires-Dist: gitchangelog (>=3.0.4,<4.0.0) ; extra == "dev"
24
24
  Requires-Dist: ipykernel
25
25
  Requires-Dist: isort (>=5.12.0,<6.0.0) ; extra == "dev"
26
- Requires-Dist: lamindb (==0.75.1)
26
+ Requires-Dist: lamindb (==0.76.3)
27
27
  Requires-Dist: leidenalg
28
28
  Requires-Dist: lightning
29
29
  Requires-Dist: matplotlib
@@ -82,13 +82,41 @@ I needed to create this Data Loader for my PhD project. I am using it to load &
82
82
 
83
83
  ```bash
84
84
  pip install scdataloader
85
+ # or
86
+ pip install scDataLoader[dev] # for dev dependencies
87
+
88
+ lamin login <email> --key <API-key>
89
+ lamin init --storage [folder-name-where-lamin-data-will-be-stored] --schema bionty
85
90
  ```
86
91
 
87
- ### Install it locally and run the notebooks:
92
+ if you start with lamin and had to do a `lamin init`, you will also need to populate your ontologies. This is because scPRINT is using ontologies to define its cell types, diseases, sexes, ethnicities, etc.
93
+
94
+ you can do it manually or with our function:
95
+
96
+ ```python
97
+ from scdataloader.utils import populate_my_ontology
98
+
99
+ populate_my_ontology() #to populate everything (recommended) (can take 2-10mns)
100
+
101
+ populate_my_ontology( #the minimum for scprint to run some inferences (denoising, grn inference)
102
+ organisms: List[str] = ["NCBITaxon:10090", "NCBITaxon:9606"],
103
+ sex: List[str] = ["PATO:0000384", "PATO:0000383"],
104
+ celltypes = None,
105
+ ethnicities = None,
106
+ assays = None,
107
+ tissues = None,
108
+ diseases = None,
109
+ dev_stages = None,
110
+ )
111
+ ```
112
+
113
+ ### Dev install
114
+
115
+ If you want to use the latest version of scDataLoader and work on the code yourself use `git clone` and `pip -e` instead of `pip install`.
88
116
 
89
117
  ```bash
90
118
  git clone https://github.com/jkobject/scDataLoader.git
91
- pip install -e scDataLoader
119
+ pip install -e scDataLoader[dev]
92
120
  ```
93
121
 
94
122
  ## Usage
@@ -41,13 +41,41 @@ I needed to create this Data Loader for my PhD project. I am using it to load &
41
41
 
42
42
  ```bash
43
43
  pip install scdataloader
44
+ # or
45
+ pip install scDataLoader[dev] # for dev dependencies
46
+
47
+ lamin login <email> --key <API-key>
48
+ lamin init --storage [folder-name-where-lamin-data-will-be-stored] --schema bionty
44
49
  ```
45
50
 
46
- ### Install it locally and run the notebooks:
51
+ if you start with lamin and had to do a `lamin init`, you will also need to populate your ontologies. This is because scPRINT is using ontologies to define its cell types, diseases, sexes, ethnicities, etc.
52
+
53
+ you can do it manually or with our function:
54
+
55
+ ```python
56
+ from scdataloader.utils import populate_my_ontology
57
+
58
+ populate_my_ontology() #to populate everything (recommended) (can take 2-10mns)
59
+
60
+ populate_my_ontology( #the minimum for scprint to run some inferences (denoising, grn inference)
61
+ organisms: List[str] = ["NCBITaxon:10090", "NCBITaxon:9606"],
62
+ sex: List[str] = ["PATO:0000384", "PATO:0000383"],
63
+ celltypes = None,
64
+ ethnicities = None,
65
+ assays = None,
66
+ tissues = None,
67
+ diseases = None,
68
+ dev_stages = None,
69
+ )
70
+ ```
71
+
72
+ ### Dev install
73
+
74
+ If you want to use the latest version of scDataLoader and work on the code yourself use `git clone` and `pip -e` instead of `pip install`.
47
75
 
48
76
  ```bash
49
77
  git clone https://github.com/jkobject/scDataLoader.git
50
- pip install -e scDataLoader
78
+ pip install -e scDataLoader[dev]
51
79
  ```
52
80
 
53
81
  ## Usage
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "scdataloader"
3
- version = "1.0.5"
3
+ version = "1.1.3"
4
4
  description = "a dataloader for single cell data in lamindb"
5
5
  authors = ["jkobject"]
6
6
  license = "GPL3"
@@ -10,8 +10,8 @@ keywords = ["scRNAseq", "dataloader", "pytorch", "lamindb", "scPRINT"]
10
10
 
11
11
  [tool.poetry.dependencies]
12
12
  python = "3.10.*"
13
- lamindb = "0.75.1"
14
- bionty = "0.48.0"
13
+ lamindb = "0.76.3"
14
+ bionty = "0.49.0"
15
15
  cellxgene-census = "*"
16
16
  torch = "*"
17
17
  lightning = "*"
@@ -0,0 +1 @@
1
+ 1.1.3
@@ -161,7 +161,10 @@ class Collator:
161
161
  raise ValueError("how must be either most expr or random expr")
162
162
  if (
163
163
  (self.add_zero_genes > 0) or (self.max_len > len(nnz_loc))
164
- ) and self.how not in ["all", "some"]:
164
+ ) and self.how not in [
165
+ "all",
166
+ "some",
167
+ ]:
165
168
  zero_loc = np.where(expr == 0)[0]
166
169
  zero_loc = zero_loc[
167
170
  np.random.choice(
@@ -433,7 +433,7 @@ def populate_my_ontology(
433
433
  names = bt.Phenotype.public().df().index if not sex else sex
434
434
  records = [
435
435
  bt.Phenotype.from_source(
436
- ontology_id=i,
436
+ ontology_id=i, source=bt.PublicSource.filter(name="pato").first()
437
437
  )
438
438
  for i in names
439
439
  ]
@@ -472,9 +472,12 @@ def populate_my_ontology(
472
472
 
473
473
  names = bt.DevelopmentalStage.public(organism="mouse").df().index
474
474
  records = [
475
- bt.DevelopmentalStage.from_source(ontology_id=i) for i in names.tolist()
475
+ bt.DevelopmentalStage.from_source(
476
+ ontology_id=i,
477
+ source=bt.PublicSource.filter(organism="mouse", name="mmusdv").first(),
478
+ )
479
+ for i in names.tolist()
476
480
  ]
477
- records[-4] = records[-4][0]
478
481
  ln.save(records)
479
482
  # Disease
480
483
  if diseases is not None:
@@ -1 +0,0 @@
1
- 1.0.5
File without changes