scdataloader 0.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scdataloader/VERSION +1 -1
- scdataloader/__init__.py +2 -2
- scdataloader/__main__.py +3 -0
- scdataloader/collator.py +61 -96
- scdataloader/config.py +6 -0
- scdataloader/data.py +138 -90
- scdataloader/datamodule.py +67 -39
- scdataloader/mapped.py +302 -120
- scdataloader/preprocess.py +4 -213
- scdataloader/utils.py +128 -92
- {scdataloader-0.0.4.dist-info → scdataloader-1.0.5.dist-info}/METADATA +82 -26
- scdataloader-1.0.5.dist-info/RECORD +16 -0
- scdataloader-0.0.4.dist-info/RECORD +0 -16
- {scdataloader-0.0.4.dist-info → scdataloader-1.0.5.dist-info}/LICENSE +0 -0
- {scdataloader-0.0.4.dist-info → scdataloader-1.0.5.dist-info}/WHEEL +0 -0
- {scdataloader-0.0.4.dist-info → scdataloader-1.0.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,28 +1,37 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: scdataloader
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: a dataloader for single cell data in lamindb
|
|
5
5
|
Home-page: https://github.com/jkobject/scDataLoader
|
|
6
6
|
License: GPL3
|
|
7
|
-
Keywords: scRNAseq,dataloader,pytorch,lamindb,
|
|
7
|
+
Keywords: scRNAseq,dataloader,pytorch,lamindb,scPRINT
|
|
8
8
|
Author: jkobject
|
|
9
9
|
Requires-Python: ==3.10.*
|
|
10
10
|
Classifier: License :: Other/Proprietary License
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Provides-Extra: dev
|
|
13
14
|
Requires-Dist: anndata
|
|
14
15
|
Requires-Dist: biomart
|
|
15
|
-
Requires-Dist: bionty
|
|
16
|
+
Requires-Dist: bionty (==0.48.0)
|
|
17
|
+
Requires-Dist: black (>=23.10.1,<24.0.0) ; extra == "dev"
|
|
16
18
|
Requires-Dist: cellxgene-census
|
|
19
|
+
Requires-Dist: coverage (>=7.3.2,<8.0.0) ; extra == "dev"
|
|
17
20
|
Requires-Dist: decoupler
|
|
18
21
|
Requires-Dist: django
|
|
22
|
+
Requires-Dist: flake8 (>=6.1.0,<7.0.0) ; extra == "dev"
|
|
23
|
+
Requires-Dist: gitchangelog (>=3.0.4,<4.0.0) ; extra == "dev"
|
|
19
24
|
Requires-Dist: ipykernel
|
|
20
|
-
Requires-Dist:
|
|
25
|
+
Requires-Dist: isort (>=5.12.0,<6.0.0) ; extra == "dev"
|
|
26
|
+
Requires-Dist: lamindb (==0.75.1)
|
|
21
27
|
Requires-Dist: leidenalg
|
|
22
28
|
Requires-Dist: lightning
|
|
23
|
-
Requires-Dist: lnschema-bionty
|
|
24
29
|
Requires-Dist: matplotlib
|
|
30
|
+
Requires-Dist: mkdocs (>=1.5.3,<2.0.0) ; extra == "dev"
|
|
31
|
+
Requires-Dist: mypy (>=1.6.1,<2.0.0) ; extra == "dev"
|
|
25
32
|
Requires-Dist: pandas (>=2.0.0)
|
|
33
|
+
Requires-Dist: pytest (>=7.4.3,<8.0.0) ; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-cov (>=4.1.0,<5.0.0) ; extra == "dev"
|
|
26
35
|
Requires-Dist: scikit-misc
|
|
27
36
|
Requires-Dist: seaborn
|
|
28
37
|
Requires-Dist: torch
|
|
@@ -34,14 +43,16 @@ Description-Content-Type: text/markdown
|
|
|
34
43
|
|
|
35
44
|
[](https://codecov.io/gh/jkobject/scDataLoader)
|
|
36
45
|
[](https://github.com/jkobject/scDataLoader/actions/workflows/main.yml)
|
|
37
|
-
[](https://badge.fury.io/py/scDataLoader)
|
|
47
|
+
[](https://scDataLoader.readthedocs.io/en/latest/?badge=latest)
|
|
48
|
+
[](https://pepy.tech/project/scDataLoader)
|
|
49
|
+
[](https://pepy.tech/project/scDataLoader)
|
|
50
|
+
[](https://pepy.tech/project/scDataLoader)
|
|
51
|
+
[](https://img.shields.io/github/issues/jkobject/scDataLoader)
|
|
52
|
+
[](https://github.com/psf/black)
|
|
53
|
+
[](https://doi.org/10.1101/2024.07.29.605556)
|
|
38
54
|
|
|
39
|
-
|
|
40
|
-
Awesome single cell dataloader created by @jkobject
|
|
41
|
-
|
|
42
|
-
built on top of `lamindb` and the `.mapped()` function by Sergey: https://github.com/Koncopd
|
|
43
|
-
|
|
44
|
-
This data loader is designed to be used with:
|
|
55
|
+
This single cell pytorch dataloader / lighting datamodule is designed to be used with:
|
|
45
56
|
|
|
46
57
|
- [lamindb](https://lamin.ai/)
|
|
47
58
|
|
|
@@ -57,18 +68,15 @@ It allows you to:
|
|
|
57
68
|
3. create a more complex single cell dataset
|
|
58
69
|
4. extend it to your need
|
|
59
70
|
|
|
60
|
-
|
|
71
|
+
built on top of `lamindb` and the `.mapped()` function by Sergey: https://github.com/Koncopd
|
|
61
72
|
|
|
62
|
-
|
|
73
|
+
The package has been designed together with the [scPRINT paper](https://doi.org/10.1101/2024.07.29.605556) and [model](https://github.com/cantinilab/scPRINT).
|
|
63
74
|
|
|
64
|
-
|
|
65
|
-
2. doing some dataset specific preprocessing if needed
|
|
66
|
-
3. creating a dataset object on top of .mapped() (that is needed for mapping genes, cell labels etc..)
|
|
67
|
-
4. passing it to a dataloader object that can work with it correctly
|
|
75
|
+
## More
|
|
68
76
|
|
|
69
|
-
|
|
77
|
+
I needed to create this Data Loader for my PhD project. I am using it to load & preprocess thousands of datasets containing millions of cells in a few seconds. I believed that individuals employing AI for single-cell RNA sequencing and other sequencing datasets would eagerly utilize and desire such a tool, which presently does not exist.
|
|
70
78
|
|
|
71
|
-

|
|
72
80
|
|
|
73
81
|
## Install it from PyPI
|
|
74
82
|
|
|
@@ -80,13 +88,13 @@ pip install scdataloader
|
|
|
80
88
|
|
|
81
89
|
```bash
|
|
82
90
|
git clone https://github.com/jkobject/scDataLoader.git
|
|
83
|
-
|
|
84
|
-
poetry install
|
|
91
|
+
pip install -e scDataLoader
|
|
85
92
|
```
|
|
86
|
-
then run the notebooks with the poetry installed environment
|
|
87
93
|
|
|
88
94
|
## Usage
|
|
89
95
|
|
|
96
|
+
### Direct Usage
|
|
97
|
+
|
|
90
98
|
```python
|
|
91
99
|
# initialize a local lamin database
|
|
92
100
|
# !lamin init --storage ~/scdataloader --schema bionty
|
|
@@ -129,15 +137,63 @@ for i in tqdm.tqdm(datamodule.train_dataloader()):
|
|
|
129
137
|
|
|
130
138
|
```
|
|
131
139
|
|
|
132
|
-
see the notebooks in [docs](https://jkobject.
|
|
140
|
+
see the notebooks in [docs](https://www.jkobject.com/scDataLoader/):
|
|
141
|
+
|
|
142
|
+
1. [load a dataset](https://www.jkobject.com/scDataLoader/notebooks/1_download_and_preprocess/)
|
|
143
|
+
2. [create a dataset](https://www.jkobject.com/scDataLoader/notebooks/2_create_dataloader/)
|
|
133
144
|
|
|
134
|
-
|
|
135
|
-
|
|
145
|
+
### command line preprocessing
|
|
146
|
+
|
|
147
|
+
You can use the command line to preprocess a large database of datasets like here for cellxgene. this allows parallelizing and easier usage.
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
scdataloader --instance "laminlabs/cellxgene" --name "cellxgene-census" --version "2023-12-15" --description "preprocessed for scprint" --new_name "scprint main" --start_at 10 >> scdataloader.out
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### command line usage
|
|
154
|
+
|
|
155
|
+
The main way to use
|
|
156
|
+
|
|
157
|
+
> please refer to the [scPRINT documentation](https://www.jkobject.com/scPRINT/) and [lightning documentation](https://lightning.ai/docs/pytorch/stable/cli/lightning_cli_intermediate.html) for more information on command line usage
|
|
158
|
+
|
|
159
|
+
## FAQ
|
|
160
|
+
|
|
161
|
+
### how to update my ontologies?
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
import bionty as bt
|
|
165
|
+
bt.reset_sources()
|
|
166
|
+
|
|
167
|
+
# Run via CLI: lamin load <your instance>
|
|
168
|
+
|
|
169
|
+
import lnschema_bionty as lb
|
|
170
|
+
lb.dev.sync_bionty_source_to_latest()
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### how to load all ontologies?
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
from scdataloader import utils
|
|
177
|
+
utils.populate_ontologies() # this might take from 5-20mins
|
|
178
|
+
```
|
|
136
179
|
|
|
137
180
|
## Development
|
|
138
181
|
|
|
139
182
|
Read the [CONTRIBUTING.md](CONTRIBUTING.md) file.
|
|
140
183
|
|
|
184
|
+
## License
|
|
185
|
+
|
|
186
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
187
|
+
|
|
188
|
+
## Acknowledgments
|
|
189
|
+
|
|
190
|
+
- [lamin.ai](https://lamin.ai/)
|
|
191
|
+
- [scanpy](https://scanpy.readthedocs.io/en/stable/)
|
|
192
|
+
- [anndata](https://anndata.readthedocs.io/en/latest/)
|
|
193
|
+
- [scprint](https://www.jkobject.com/scPRINT/)
|
|
194
|
+
|
|
195
|
+
Awesome single cell dataloader created by @jkobject
|
|
196
|
+
|
|
141
197
|
GNU GENERAL PUBLIC LICENSE
|
|
142
198
|
Version 3, 29 June 2007
|
|
143
199
|
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
scdataloader/VERSION,sha256=jFS_q38a6b0acUjq5B57Co9K03JuDKxw-COi1F255gw,6
|
|
2
|
+
scdataloader/__init__.py,sha256=lbO3lGiXXgirB07KXj1Fu0BzL7T43VmitqJBTyfSz7M,147
|
|
3
|
+
scdataloader/__main__.py,sha256=db_upDq3tNEtcDH17mPIczToAqGkSKfLy0Qbj6B4YmE,6385
|
|
4
|
+
scdataloader/base.py,sha256=M1gD59OffRdLOgS1vHKygOomUoAMuzjpRtAfM3SBKF8,338
|
|
5
|
+
scdataloader/collator.py,sha256=O5VK2asIfFIQc-Ozm55Bc-OORIlPj_yOt7qn6xqXd74,11292
|
|
6
|
+
scdataloader/config.py,sha256=rrW2DZxG4J2_pmpDbXXsaKJkpNC57w5dIlItiFbANYw,2905
|
|
7
|
+
scdataloader/data.py,sha256=3a9jUhREIzbxC797COGNSn6QqbRiiC30FzxXCoYsTNo,13773
|
|
8
|
+
scdataloader/datamodule.py,sha256=JZq8g274ce3ARW59qwg5GKAt2SzOTaMPGh3CySGQS70,16893
|
|
9
|
+
scdataloader/mapped.py,sha256=s_Fg-lwaXjHFyQcKnp9El2IceMoaEajynyUgOnpVnXQ,20750
|
|
10
|
+
scdataloader/preprocess.py,sha256=9dgsq7c5jD2l-CUGfwC2uG98MCIgnrYFkqknqAyu5dU,28841
|
|
11
|
+
scdataloader/utils.py,sha256=aXIdnJGrANL5Q4Kcpq2_HNZOT8TcnvyoZgLiLuzchDA,21306
|
|
12
|
+
scdataloader-1.0.5.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
13
|
+
scdataloader-1.0.5.dist-info/METADATA,sha256=ypdRTtPWFQz_Y4YQeZlrRsGmU6b4_xbAttUEAwIPxwg,42295
|
|
14
|
+
scdataloader-1.0.5.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
15
|
+
scdataloader-1.0.5.dist-info/entry_points.txt,sha256=nLqucZaa5wiF7-1FCgMXO916WDQ9Qm0TcxQp0f1DwE4,59
|
|
16
|
+
scdataloader-1.0.5.dist-info/RECORD,,
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
scdataloader/VERSION,sha256=ln2a-xATRmZxZvLnboGRC8GQSI19QdUMoAcunZLwDjI,6
|
|
2
|
-
scdataloader/__init__.py,sha256=NIlE4oTUPRZ3uSW_maozoEHp470I7PV1vMOJ4XpSmL4,122
|
|
3
|
-
scdataloader/__main__.py,sha256=UyXtFHgWxE-ecJmM_oEDLlzBDBbH-uEKAVj1A7BkwmM,6297
|
|
4
|
-
scdataloader/base.py,sha256=M1gD59OffRdLOgS1vHKygOomUoAMuzjpRtAfM3SBKF8,338
|
|
5
|
-
scdataloader/collator.py,sha256=Ykjdw24GUvHdbowWUDtp28YTkaF3w65SiWTU2PKBzy4,11714
|
|
6
|
-
scdataloader/config.py,sha256=0_LoIblgdZZ19yM2qvPE-padMGQzdhuaxX20zYrhWq0,2780
|
|
7
|
-
scdataloader/data.py,sha256=faJWN--06N7irWBKcjeU6fcX5NbzyEPXs2_EVGxfBpw,12292
|
|
8
|
-
scdataloader/datamodule.py,sha256=OhHPb3jhGG5HbvahzTGxgzJ_lxbVJ4PfZspVW9h7SZk,14789
|
|
9
|
-
scdataloader/mapped.py,sha256=rhE11Xl3x_wIKu3m_wu8Is6mYsXdblu3nQpT5lNqr60,13301
|
|
10
|
-
scdataloader/preprocess.py,sha256=67ewe6b4HIjz_vTDjlOAJ4lMe4K2oCw2HHHUS-7S77M,38205
|
|
11
|
-
scdataloader/utils.py,sha256=6eKU3_cotEaQcxONMrCWzMx7U8DybabteNhk-vNqfUQ,19365
|
|
12
|
-
scdataloader-0.0.4.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
13
|
-
scdataloader-0.0.4.dist-info/METADATA,sha256=Bf8UjMwRcqSbWW8VbWrLhSb7qKQYdjZtJ7d6Oz4-rn8,39733
|
|
14
|
-
scdataloader-0.0.4.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
15
|
-
scdataloader-0.0.4.dist-info/entry_points.txt,sha256=nLqucZaa5wiF7-1FCgMXO916WDQ9Qm0TcxQp0f1DwE4,59
|
|
16
|
-
scdataloader-0.0.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|