lamindb 0.32.0rc1__py2.py3-none-any.whl → 0.33.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +21 -14
- lamindb/_check_versions.py +6 -6
- lamindb/_context.py +196 -0
- lamindb/_delete.py +13 -7
- lamindb/_load.py +23 -33
- lamindb/_nb.py +19 -84
- lamindb/_record.py +55 -21
- lamindb/dev/db/_add.py +22 -7
- lamindb/schema/__init__.py +1 -15
- lamindb-0.33.0.dist-info/METADATA +236 -0
- {lamindb-0.32.0rc1.dist-info → lamindb-0.33.0.dist-info}/RECORD +14 -16
- {lamindb-0.32.0rc1.dist-info → lamindb-0.33.0.dist-info}/WHEEL +1 -1
- lamindb/knowledge/__init__.py +0 -34
- lamindb/knowledge/_core.py +0 -71
- lamindb/knowledge/_lookup.py +0 -18
- lamindb-0.32.0rc1.dist-info/METADATA +0 -178
- {lamindb-0.32.0rc1.dist-info → lamindb-0.33.0.dist-info}/LICENSE +0 -0
- {lamindb-0.32.0rc1.dist-info → lamindb-0.33.0.dist-info}/entry_points.txt +0 -0
lamindb/dev/db/_add.py
CHANGED
@@ -29,14 +29,25 @@ Guide: :doc:`/guide/add-delete`.
|
|
29
29
|
|
30
30
|
Example:
|
31
31
|
|
32
|
-
|
33
|
-
>>> ln.add(
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
1) Add a record (by passing a record)
|
33
|
+
>>> ln.add(lns.Pipeline(name="My pipeline", v="1"))
|
34
|
+
Pipeline(id="0Cb86EZj", name="My pipeline", v="1", ...)
|
35
|
+
|
36
|
+
2) Update an existing record
|
37
|
+
>>> pipeline = ln.select(lns.Pipeline, id="0Cb86EZj").one()
|
38
|
+
>>> pipeline.name = "New name"
|
37
39
|
>>> ln.add(experiment)
|
38
|
-
|
39
|
-
|
40
|
+
Pipeline(id="0Cb86EZj", name="New name", v="1", ...)
|
41
|
+
|
42
|
+
3) Add a record if not exist in the DB
|
43
|
+
>>> # add a record if the metadata combination is not already exist in the DB
|
44
|
+
>>> # if exists, returns the existing record from the DB
|
45
|
+
>>> ln.add(lns.Pipeline, name="My pipeline", v="1")
|
46
|
+
>>> # under the hood, this runs a query first based on passed fields
|
47
|
+
>>> # equivalent to the following:
|
48
|
+
>>> pipeline = ln.select(lns.Pipeline, name="My pipeline", v="1").one_or_none()
|
49
|
+
>>> if pipeline is None:
|
50
|
+
>>> ln.add(pipeline)
|
40
51
|
|
41
52
|
Args:
|
42
53
|
record: One or multiple records as instances of `SQLModel`.
|
@@ -93,6 +104,10 @@ def add( # type: ignore
|
|
93
104
|
db_error = None
|
94
105
|
for record in records:
|
95
106
|
write_objectkey(record)
|
107
|
+
# the following ensures that queried objects (within __init__)
|
108
|
+
# behave like queried objects, only example right now: Run
|
109
|
+
if hasattr(record, "_ln_identity_key") and record._ln_identity_key is not None:
|
110
|
+
record._sa_instance_state.key = record._ln_identity_key
|
96
111
|
session.add(record)
|
97
112
|
try:
|
98
113
|
session.commit()
|
lamindb/schema/__init__.py
CHANGED
@@ -12,13 +12,6 @@ Import this submodule as::
|
|
12
12
|
Core entities
|
13
13
|
=============
|
14
14
|
|
15
|
-
Data objects ({class}`lamindb.DObject`) are transformed by runs:
|
16
|
-
|
17
|
-
.. autosummary::
|
18
|
-
:toctree: .
|
19
|
-
|
20
|
-
Run
|
21
|
-
|
22
15
|
Runs transform data using code:
|
23
16
|
|
24
17
|
.. autosummary::
|
@@ -27,13 +20,6 @@ Runs transform data using code:
|
|
27
20
|
Pipeline
|
28
21
|
Notebook
|
29
22
|
|
30
|
-
Grouping data objects by features:
|
31
|
-
|
32
|
-
.. autosummary::
|
33
|
-
:toctree: .
|
34
|
-
|
35
|
-
Features
|
36
|
-
|
37
23
|
Users, projects, storage locations, and usage:
|
38
24
|
|
39
25
|
.. autosummary::
|
@@ -84,7 +70,7 @@ from lnschema_core import (
|
|
84
70
|
from packaging import version as _v
|
85
71
|
|
86
72
|
_check_v = {
|
87
|
-
"bionty": "0.8.
|
73
|
+
"bionty": "0.8.0",
|
88
74
|
"wetlab": "0.13.4",
|
89
75
|
}
|
90
76
|
|
@@ -0,0 +1,236 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: lamindb
|
3
|
+
Version: 0.33.0
|
4
|
+
Summary: LaminDB: Manage R&D data & analyses.
|
5
|
+
Author-email: Lamin Labs <laminlabs@gmail.com>
|
6
|
+
Description-Content-Type: text/markdown
|
7
|
+
Classifier: Programming Language :: Python :: 3.8
|
8
|
+
Classifier: Programming Language :: Python :: 3.9
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
10
|
+
Requires-Dist: lndb>=0.37.4
|
11
|
+
Requires-Dist: lnschema_core==0.29.5
|
12
|
+
Requires-Dist: lnschema_wetlab==0.13.4
|
13
|
+
Requires-Dist: lnschema_bionty==0.8.0
|
14
|
+
Requires-Dist: nbproject>=0.8.3
|
15
|
+
Requires-Dist: readfcs>=1.1.0
|
16
|
+
Requires-Dist: anndata>=0.8.0
|
17
|
+
Requires-Dist: hjson
|
18
|
+
Requires-Dist: sqlmodel>=0.0.8
|
19
|
+
Requires-Dist: typeguard
|
20
|
+
Requires-Dist: tabulate
|
21
|
+
Requires-Dist: erdiagram
|
22
|
+
Requires-Dist: zarr
|
23
|
+
Requires-Dist: pre-commit ; extra == "dev"
|
24
|
+
Requires-Dist: nox ; extra == "dev"
|
25
|
+
Requires-Dist: laminci>=0.2.5 ; extra == "dev"
|
26
|
+
Requires-Dist: pytest>=6.0 ; extra == "test"
|
27
|
+
Requires-Dist: coverage ; extra == "test"
|
28
|
+
Requires-Dist: pytest-cov ; extra == "test"
|
29
|
+
Requires-Dist: scanpy ; extra == "test"
|
30
|
+
Requires-Dist: scikit-learn>=1.1.1 ; extra == "test"
|
31
|
+
Requires-Dist: nbproject_test>=0.3.0 ; extra == "test"
|
32
|
+
Requires-Dist: psycopg2-binary ; extra == "test"
|
33
|
+
Requires-Dist: matplotlib<3.7 ; extra == "test"
|
34
|
+
Project-URL: Home, https://github.com/laminlabs/lamindb
|
35
|
+
Provides-Extra: dev
|
36
|
+
Provides-Extra: test
|
37
|
+
|
38
|
+
[](https://github.com/laminlabs/lamindb)
|
39
|
+
[](https://codecov.io/gh/laminlabs/lamindb)
|
40
|
+
[](https://pypi.org/project/lamindb)
|
41
|
+
|
42
|
+
# LaminDB: Manage R&D data & analyses
|
43
|
+
|
44
|
+
_Curate, store, track, query, integrate, and learn from biological data._
|
45
|
+
|
46
|
+
LaminDB is an open-source data lake for R&D in biology. It manages indexed **object storage** (local directories, S3, GCP) with a mapped **SQL database** (SQLite, Postgres, and soon, BigQuery).
|
47
|
+
|
48
|
+
One cool thing is that you can readily create distributed _LaminDB instances_ at any scale. Get started on your laptop, deploy in the cloud, or work with a mesh of instances for different teams and purposes.
|
49
|
+
|
50
|
+
```{warning}
|
51
|
+
|
52
|
+
Public beta: Currently only recommended for collaborators as we still make breaking changes.
|
53
|
+
|
54
|
+
```
|
55
|
+
|
56
|
+
## Installation
|
57
|
+
|
58
|
+
LaminDB is a python package available for Python versions 3.8+.
|
59
|
+
|
60
|
+
```shell
|
61
|
+
pip install lamindb
|
62
|
+
```
|
63
|
+
|
64
|
+
## Import
|
65
|
+
|
66
|
+
In your python script, import LaminDB as:
|
67
|
+
|
68
|
+
```python
|
69
|
+
import lamindb as ln
|
70
|
+
```
|
71
|
+
|
72
|
+
## Quick setup
|
73
|
+
|
74
|
+
Quick setup on the command line:
|
75
|
+
|
76
|
+
- Sign up via `lamin signup <email>`
|
77
|
+
- Log in via `lamin login <handle>`
|
78
|
+
- Set up an instance via `lamin init --storage <storage> --schema <schema_modules>`
|
79
|
+
|
80
|
+
:::{dropdown} Example code
|
81
|
+
|
82
|
+
```shell
|
83
|
+
lamin signup testuser1@lamin.ai
|
84
|
+
lamin login testuser1
|
85
|
+
lamin init --storage ./mydata --schema bionty,wetlab
|
86
|
+
```
|
87
|
+
|
88
|
+
:::
|
89
|
+
|
90
|
+
See {doc}`/guide/setup` for more.
|
91
|
+
|
92
|
+
## Track & query data
|
93
|
+
|
94
|
+
### Track data sources, data, and metadata
|
95
|
+
|
96
|
+
::::{tab-set}
|
97
|
+
:::{tab-item} Within an interactive notebook
|
98
|
+
|
99
|
+
```{code-block} python
|
100
|
+
import lamindb as ln
|
101
|
+
|
102
|
+
ln.Run() # data source (a run record) is created
|
103
|
+
#> ℹ️ Instance: testuser2/mydata
|
104
|
+
#> ℹ️ User: testuser2
|
105
|
+
#> ℹ️ Loaded run:
|
106
|
+
#> Run(id='L1oBMKW60ndt5YtjRqav', notebook_id='sePTpDsGJRq3', notebook_v='0', created_by='bKeW4T6E', created_at=datetime.datetime(2023, 3, 14, 21, 49, 36))
|
107
|
+
|
108
|
+
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
|
109
|
+
|
110
|
+
# create a data object with SQL metadata record including hash
|
111
|
+
# link run record
|
112
|
+
dobject = ln.DObject(df, name="My dataframe")
|
113
|
+
#> DObject(id='dZvGD7YUKCKG4X4aLd5K', name='My dataframe', suffix='.parquet', size=2240, hash='R2_kKlH1nBGesMdyulMYkA', source_id='L1oBMKW60ndt5YtjRqav', storage_id='wor0ul6c')
|
114
|
+
|
115
|
+
# upload serialized version to the configured storage
|
116
|
+
# commit a DObject record to the SQL database
|
117
|
+
ln.add(dobject)
|
118
|
+
#> DObject(id='dZvGD7YUKCKG4X4aLd5K', name='My dataframe', suffix='.parquet', size=2240, hash='R2_kKlH1nBGesMdyulMYkA', source_id='L1oBMKW60ndt5YtjRqav', storage_id='wor0ul6c', created_at=datetime.datetime(2023, 3, 14, 21, 49, 46))
|
119
|
+
```
|
120
|
+
|
121
|
+
:::
|
122
|
+
:::{tab-item} Within a regular pipeline
|
123
|
+
|
124
|
+
```{code-block} python
|
125
|
+
# create (or query) a pipeline record
|
126
|
+
pipeline = lns.Pipeline(name="My pipeline")
|
127
|
+
#> Pipeline(id='fhn5Zydf', v='1', name='My pipeline', created_by='bKeW4T6E')
|
128
|
+
|
129
|
+
# create a run from the above pipeline as the data source
|
130
|
+
run = ln.Run(pipeline=pipeline)
|
131
|
+
#> Run(id='2aaKWH8dwBE6hnj3n9K9', pipeline_id='fhn5Zydf', pipeline_v='1', created_by='bKeW4T6E')
|
132
|
+
|
133
|
+
# access pipeline from run via
|
134
|
+
print(run.pipeline)
|
135
|
+
#> Pipeline(id='fhn5Zydf', v='1', name='My pipeline', created_by='bKeW4T6E')
|
136
|
+
|
137
|
+
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
|
138
|
+
|
139
|
+
# create a data object with SQL metadata record including hash and link run record
|
140
|
+
dobject = ln.DObject(df, name="My dataframe", source=run)
|
141
|
+
#> DObject(id='dZvGD7YUKCKG4X4aLd5K', name='My dataframe', suffix='.parquet', size=2240, hash='R2_kKlH1nBGesMdyulMYkA', source_id='L1oBMKW60ndt5YtjRqav', storage_id='wor0ul6c')
|
142
|
+
|
143
|
+
# Tip: If you work with a single thread, you can pass `global_context=True` to ln.Run(), allowing you to omit source=run
|
144
|
+
|
145
|
+
# upload serialized version to the configured storage
|
146
|
+
# commit a DObject record to the SQL database
|
147
|
+
ln.add(dobject)
|
148
|
+
#> DObject(id='dZvGD7YUKCKG4X4aLd5K', name='My dataframe', suffix='.parquet', size=2240, hash='R2_kKlH1nBGesMdyulMYkA', source_id='L1oBMKW60ndt5YtjRqav', storage_id='wor0ul6c', created_at=datetime.datetime(2023, 3, 14, 21, 49, 46))
|
149
|
+
```
|
150
|
+
|
151
|
+
:::
|
152
|
+
::::
|
153
|
+
|
154
|
+
### Query & load data
|
155
|
+
|
156
|
+
```python
|
157
|
+
dobject = ln.select(ln.DObject, name="My dataframe").one()
|
158
|
+
#> [DObject(id='dZvGD7YUKCKG4X4aLd5K', name='My dataframe', suffix='.parquet', size=2240, hash='R2_kKlH1nBGesMdyulMYkA', source_id='L1oBMKW60ndt5YtjRqav', storage_id='wor0ul6c', created_at=datetime.datetime(2023, 3, 14, 21, 49, 46))]
|
159
|
+
df = dobject.load()
|
160
|
+
#> a b
|
161
|
+
#> 0 1 3
|
162
|
+
#> 1 2 4
|
163
|
+
```
|
164
|
+
|
165
|
+
Get the data ingested by the latest run:
|
166
|
+
|
167
|
+
```python
|
168
|
+
run = ln.select(ln.Run).order_by(ln.Run.created_at.desc()).first()
|
169
|
+
#> Run(id='L1oBMKW60ndt5YtjRqav', notebook_id='sePTpDsGJRq3', notebook_v='0', created_by='bKeW4T6E', created_at=datetime.datetime(2023, 3, 14, 21, 49, 36))
|
170
|
+
dobject = ln.select(ln.DObject).where(ln.DObject.source == run).all()
|
171
|
+
#> [DObject(id='dZvGD7YUKCKG4X4aLd5K', name='My dataframe', suffix='.parquet', size=2240, hash='R2_kKlH1nBGesMdyulMYkA', source_id='L1oBMKW60ndt5YtjRqav', storage_id='wor0ul6c', created_at=datetime.datetime(2023, 3, 14, 21, 49, 46))]
|
172
|
+
```
|
173
|
+
|
174
|
+
<br>
|
175
|
+
|
176
|
+
See {doc}`/guide/track` for more.
|
177
|
+
|
178
|
+
## Track biological metadata
|
179
|
+
|
180
|
+
### Track biological features
|
181
|
+
|
182
|
+
```python
|
183
|
+
import bionty as bt # Lamin's manager for biological knowledge
|
184
|
+
import lamindb as ln
|
185
|
+
|
186
|
+
ln.Run() # assume we're in a notebook and don't need to pass pipeline_name
|
187
|
+
|
188
|
+
# a sample single cell RNA-seq dataset
|
189
|
+
adata = ln.dev.datasets.anndata_mouse_sc_lymph_node()
|
190
|
+
|
191
|
+
# Create a reference
|
192
|
+
# - ensembl id as the standardized id
|
193
|
+
# - mouse as the species
|
194
|
+
reference = bt.Gene(species="mouse")
|
195
|
+
|
196
|
+
# parse gene identifiers from data and map on reference
|
197
|
+
features = ln.Features(adata, reference)
|
198
|
+
#> 🔶 id column not found, using index as features.
|
199
|
+
#> ✅ 0 terms (0.0%) are mapped.
|
200
|
+
#> 🔶 10000 terms (100.0%) are not mapped.
|
201
|
+
# The result is a hashed feature set record:
|
202
|
+
print(features)
|
203
|
+
#> Features(id='2Mv3JtH-ScBVYHilbLaQ', type='gene', created_by='bKeW4T6E')
|
204
|
+
# genes records can be accessed via:
|
205
|
+
print(features.genes[:3])
|
206
|
+
#> [Gene(id='ENSMUSG00000020592', species_id='NCBI_10090'),
|
207
|
+
#> Gene(id='ENSMUSG00000034931', species_id='NCBI_10090'),
|
208
|
+
#> Gene(id='ENSMUSG00000071005', species_id='NCBI_10090')]
|
209
|
+
|
210
|
+
# track data with features
|
211
|
+
dobject = ln.DObject(adata, name="Mouse Lymph Node scRNA-seq", features=features)
|
212
|
+
|
213
|
+
# access linked gene references
|
214
|
+
print(dobject.features.genes[:3])
|
215
|
+
#> [Gene(id='ENSMUSG00000020592', species_id='NCBI_10090'),
|
216
|
+
#> Gene(id='ENSMUSG00000034931', species_id='NCBI_10090'),
|
217
|
+
#> Gene(id='ENSMUSG00000071005', species_id='NCBI_10090')]
|
218
|
+
|
219
|
+
# upload serialized data to configured storage
|
220
|
+
# commit a DObject record to the SQL database
|
221
|
+
# commit all linked features to the SQL database
|
222
|
+
ln.add(dobject)
|
223
|
+
```
|
224
|
+
|
225
|
+
<br>
|
226
|
+
|
227
|
+
See {doc}`/guide/features` for more.
|
228
|
+
|
229
|
+
```{tip}
|
230
|
+
- Each page in this guide is a Jupyter Notebook, which you can download [here](https://github.com/laminlabs/lamindb/tree/main/docs/guide).
|
231
|
+
- You can run these notebooks in hosted versions of JupyterLab, e.g., [Saturn Cloud](https://github.com/laminlabs/run-lamin-on-saturn), Google Vertex AI, and others.
|
232
|
+
- We recommend using [JupyterLab](https://jupyterlab.readthedocs.io/) for best notebook tracking experience.
|
233
|
+
```
|
234
|
+
|
235
|
+
📬 [Reach out](https://lamin.ai/contact) to report issues, learn about data modules that connect your assays, pipelines & workflows within our data platform enterprise plan.
|
236
|
+
|
@@ -1,11 +1,12 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
2
|
-
lamindb/_check_versions.py,sha256=
|
3
|
-
lamindb/
|
1
|
+
lamindb/__init__.py,sha256=1qOrs9tEMQVexTUI2xUHCJFdE8feiWgT1IxKcWhvNwU,4583
|
2
|
+
lamindb/_check_versions.py,sha256=3pOwEFLxMQLxnIg2408vDoAzvOEIJJB9zlOboOdiBiA,1003
|
3
|
+
lamindb/_context.py,sha256=11eKvLvGpNzJT3I1JcTb06FkSJjot4T6Of9MMthcBlY,7334
|
4
|
+
lamindb/_delete.py,sha256=h9rEwsfmyR2qKZBTMH6pR_v36_K4WCpE6nAoA8oXvGw,4662
|
4
5
|
lamindb/_folder.py,sha256=JGK1saXTa_j4OdWmD2VmlaasdSpLYTq5tSgLsh2hPA8,5201
|
5
|
-
lamindb/_load.py,sha256=
|
6
|
+
lamindb/_load.py,sha256=bkxlQudPzxEyj3bG5PK3MiD00lL__0tragrwMQIU-U0,1337
|
6
7
|
lamindb/_logger.py,sha256=d1jQdFL4gMKbzcc1dMOgKoEaUlWhyK-VsnxYLlG9caY,48
|
7
|
-
lamindb/_nb.py,sha256=
|
8
|
-
lamindb/_record.py,sha256=
|
8
|
+
lamindb/_nb.py,sha256=1Sgv-XC3Y2-yzS5CUh25wAtk8GJd-Y5GjEdwyueAJP4,3780
|
9
|
+
lamindb/_record.py,sha256=krx8WmANrrG_W4AQqFyVR0UGr4NsVcQrSP5D6FKYD0k,11593
|
9
10
|
lamindb/_settings.py,sha256=dX1A2Hx5aSRC4YjY5jF2J1Tv5K8w_UOuAPFCcxdpMHI,818
|
10
11
|
lamindb/_subset.py,sha256=37DH5HxG0xdmc4kBBypanYWuMlMKumTKH6IM8mcG-ps,2430
|
11
12
|
lamindb/_view.py,sha256=WsBT9VZGUJyT3sGcJfQYtZXM5T6G9O6NfUWeGFV0UQY,1364
|
@@ -15,7 +16,7 @@ lamindb/dev/_docs.py,sha256=0ha3cZqzvIQnAXVC_JlAiRrFi3VpEEVookP_oURhBr4,240
|
|
15
16
|
lamindb/dev/datasets/__init__.py,sha256=BvV0ONUj0Iz5JIawb8ow2TpLOQIuEsvGW7_1zdbXrxU,613
|
16
17
|
lamindb/dev/datasets/_core.py,sha256=h-I3fIEyhKaOryo9aCn2Wgmh_jN-Xa5Z0TJwEi1lbtw,7434
|
17
18
|
lamindb/dev/db/__init__.py,sha256=Rp9LZ2QPQxEF6J_z2M8vUrUZztG0qQdf9j3Zi3dJhRQ,216
|
18
|
-
lamindb/dev/db/_add.py,sha256=
|
19
|
+
lamindb/dev/db/_add.py,sha256=30pm1XLBHWsn5Wi53mJArkFlE9f1Sy9CDQ2u89vsBcI,9660
|
19
20
|
lamindb/dev/db/_core.py,sha256=6x9fj0LxxPtrPJ7t5Ee_-e1AJgh5H2KI33VnLjuVZXg,897
|
20
21
|
lamindb/dev/db/_select.py,sha256=IE_Nwh_kFXRzIJpz4rsSIOaxFTt9jwkncvIK6KUO15Q,7424
|
21
22
|
lamindb/dev/db/_session.py,sha256=Vwt2gItSnFoHTTtXVZ77ZxyCNjwca9b0MkViWnxemqc,1855
|
@@ -32,10 +33,7 @@ lamindb/dev/object/_anndata_sizes.py,sha256=OOM9mJmhvho5JacsuMsHTXoWfvF0vjxRvg_P
|
|
32
33
|
lamindb/dev/object/_core.py,sha256=z42LBQb_zICJrY7vXI2GrBzpnDN3WhFmW32STfAgzis,900
|
33
34
|
lamindb/dev/object/_lazy_field.py,sha256=KJG8YpA_Y0UWvEu4K-h-J69J29Q-tdnutEEutVijX0M,4116
|
34
35
|
lamindb/dev/object/_subset_anndata.py,sha256=t8vIJfKLxl386pIURHm8yk49Vg1NPUK-Bxv06KRUZRU,3533
|
35
|
-
lamindb/
|
36
|
-
lamindb/knowledge/_core.py,sha256=9C-8TTMsyAwgFYCJVjsi8LEVbOh2HZaCU4q3qm-RzUk,1639
|
37
|
-
lamindb/knowledge/_lookup.py,sha256=RvUsYMS6d58zgCZy6WmCJJpCnQvSXua9FivgW2iVwjY,404
|
38
|
-
lamindb/schema/__init__.py,sha256=w8odXPFGtkXC5HcyfaH3t8iC9_2eEseuIk5VLbEJEWo,2167
|
36
|
+
lamindb/schema/__init__.py,sha256=cN4PH_UzNANQcPbsWkMmjsPt3JUQuKmU2D_RDn4oy5E,1975
|
39
37
|
lamindb/schema/_core.py,sha256=knEAUQqkNZMYUotEEW_Yo6D1Ijw6gnUpcyh7iPjjFK4,2109
|
40
38
|
lamindb/schema/_table.py,sha256=yEPakpgx3R7ytBe-n4NgmOwfOuiYoOLuE7uXT16TqGQ,1509
|
41
39
|
lamindb/schema/dev/__init__.py,sha256=XvATVfSXU9DPLiNTsKqQMprMvdGEdojkvsBXmJdUe1E,128
|
@@ -44,8 +42,8 @@ lamindb/schema/dev/sqlmodel.py,sha256=qscffXVlPKE18OP2duCkRo8ZQQf6VNYUMDI4D65z-X
|
|
44
42
|
lamindb/schema/dev/type.py,sha256=V1mrlmkdQQWOi20SwsZutccKf8n1b0jD0E60bRPxZI0,114
|
45
43
|
lamindb/setup/__init__.py,sha256=jPc-SG6T_GQXfh81I86v_Yj2XFBS8FPuY5I0sGwIRJc,213
|
46
44
|
lamindb/setup/dev.py,sha256=7wl6L-Gj3eV2mmsnxFeuqcaEkmGN1QyQm0bCEDyddFI,84
|
47
|
-
lamindb-0.
|
48
|
-
lamindb-0.
|
49
|
-
lamindb-0.
|
50
|
-
lamindb-0.
|
51
|
-
lamindb-0.
|
45
|
+
lamindb-0.33.0.dist-info/entry_points.txt,sha256=8ro7RnWnay39wF5Ojvnfnd8TvyI7WrPOCVQHNxZAA6w,44
|
46
|
+
lamindb-0.33.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
47
|
+
lamindb-0.33.0.dist-info/WHEEL,sha256=kdeDBNPvBI0w3meLKPoGgAnEr54n1jzrZWUoaLmGzVY,99
|
48
|
+
lamindb-0.33.0.dist-info/METADATA,sha256=DoyNt1jUs-X3cbz9gkDTk9nUL74sRFY8m9pLHFA8FUM,8736
|
49
|
+
lamindb-0.33.0.dist-info/RECORD,,
|
lamindb/knowledge/__init__.py
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
"""Knowledge.
|
2
|
-
|
3
|
-
Feature tables from bionty:
|
4
|
-
|
5
|
-
.. autosummary::
|
6
|
-
:toctree: .
|
7
|
-
|
8
|
-
Species
|
9
|
-
Gene
|
10
|
-
Protein
|
11
|
-
CellMarker
|
12
|
-
Tissue
|
13
|
-
CellType
|
14
|
-
Disease
|
15
|
-
|
16
|
-
Lookup knowledge table identifiers:
|
17
|
-
|
18
|
-
.. autosummary::
|
19
|
-
:toctree: .
|
20
|
-
|
21
|
-
lookup
|
22
|
-
"""
|
23
|
-
|
24
|
-
|
25
|
-
from lamin_logger import logger
|
26
|
-
|
27
|
-
from ._core import CellMarker, CellType, Disease, Gene, Protein, Species, Tissue
|
28
|
-
from ._lookup import lookup
|
29
|
-
|
30
|
-
# currently shows up also when initializing ln.DObject, so not emit it yet
|
31
|
-
# logger.warning(
|
32
|
-
# "The lamindb.knowledge API is deprecated.\nPlease, use bionty directly.\nYou can"
|
33
|
-
# " replace all occurances of ln.knowledge with bionty without breaking changes!"
|
34
|
-
# )
|
lamindb/knowledge/_core.py
DELETED
@@ -1,71 +0,0 @@
|
|
1
|
-
import bionty as bt
|
2
|
-
|
3
|
-
|
4
|
-
class Species(bt.Species):
|
5
|
-
"""Bionty Species.
|
6
|
-
|
7
|
-
See `bionty.Species <https://lamin.ai/docs/bionty/bionty.Species>`__.
|
8
|
-
"""
|
9
|
-
|
10
|
-
def __init__(self, id="name") -> None:
|
11
|
-
super().__init__(id=id)
|
12
|
-
|
13
|
-
|
14
|
-
class Gene(bt.Gene):
|
15
|
-
"""Bionty Gene.
|
16
|
-
|
17
|
-
See `bionty.Gene <https://lamin.ai/docs/bionty/bionty.Gene>`__.
|
18
|
-
"""
|
19
|
-
|
20
|
-
def __init__(self, species="human", id="ensembl_gene_id") -> None:
|
21
|
-
super().__init__(species=species, id=id)
|
22
|
-
|
23
|
-
|
24
|
-
class Protein(bt.Protein):
|
25
|
-
"""Bionty Protein.
|
26
|
-
|
27
|
-
See `bionty.Protein <https://lamin.ai/docs/bionty/bionty.Protein>`__.
|
28
|
-
"""
|
29
|
-
|
30
|
-
def __init__(self, species="human", id="uniprotkb_id") -> None:
|
31
|
-
super().__init__(species=species, id=id)
|
32
|
-
|
33
|
-
|
34
|
-
class CellMarker(bt.CellMarker):
|
35
|
-
"""Bionty CellMarker.
|
36
|
-
|
37
|
-
See `bionty.CellMarker <https://lamin.ai/docs/bionty/bionty.CellMarker>`__.
|
38
|
-
"""
|
39
|
-
|
40
|
-
def __init__(self, species="human", id="name") -> None:
|
41
|
-
super().__init__(species=species, id=id)
|
42
|
-
|
43
|
-
|
44
|
-
class CellType(bt.CellType):
|
45
|
-
"""Bionty CellType.
|
46
|
-
|
47
|
-
See `bionty.CellType <https://lamin.ai/docs/bionty/bionty.CellType>`__.
|
48
|
-
"""
|
49
|
-
|
50
|
-
def __init__(self, id="ontology_id") -> None:
|
51
|
-
super().__init__(id=id)
|
52
|
-
|
53
|
-
|
54
|
-
class Tissue(bt.Tissue):
|
55
|
-
"""Bionty Tissue.
|
56
|
-
|
57
|
-
See `bionty.Tissue <https://lamin.ai/docs/bionty/bionty.Tissue>`__.
|
58
|
-
"""
|
59
|
-
|
60
|
-
def __init__(self, id="ontology_id") -> None:
|
61
|
-
super().__init__(id=id)
|
62
|
-
|
63
|
-
|
64
|
-
class Disease(bt.Disease):
|
65
|
-
"""Bionty Disease.
|
66
|
-
|
67
|
-
See `bionty.Disease <https://lamin.ai/docs/bionty/bionty.Disease>`__.
|
68
|
-
"""
|
69
|
-
|
70
|
-
def __init__(self, id="ontology_id") -> None:
|
71
|
-
super().__init__(id=id)
|
lamindb/knowledge/_lookup.py
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
from bionty import lookup as bionty_lookup
|
2
|
-
|
3
|
-
|
4
|
-
class lookup:
|
5
|
-
"""Lookup identifiers of knowledge tables."""
|
6
|
-
|
7
|
-
pass
|
8
|
-
|
9
|
-
|
10
|
-
def _get_all_methods(lookup):
|
11
|
-
return [i for i in dir(lookup) if not i.startswith("_")]
|
12
|
-
|
13
|
-
|
14
|
-
for module in [bionty_lookup]:
|
15
|
-
methods = _get_all_methods(module)
|
16
|
-
for method in methods:
|
17
|
-
model = getattr(module, method)
|
18
|
-
setattr(lookup, method, staticmethod(model))
|
@@ -1,178 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: lamindb
|
3
|
-
Version: 0.32.0rc1
|
4
|
-
Summary: LaminDB: Manage R&D data & analyses.
|
5
|
-
Author-email: Lamin Labs <laminlabs@gmail.com>
|
6
|
-
Description-Content-Type: text/markdown
|
7
|
-
Classifier: Programming Language :: Python :: 3.8
|
8
|
-
Classifier: Programming Language :: Python :: 3.9
|
9
|
-
Classifier: Programming Language :: Python :: 3.10
|
10
|
-
Requires-Dist: lndb==0.37.1
|
11
|
-
Requires-Dist: lnschema_core==0.29.1
|
12
|
-
Requires-Dist: lnschema_wetlab==0.13.4
|
13
|
-
Requires-Dist: lnschema_bionty==0.8.0rc2
|
14
|
-
Requires-Dist: nbproject>=0.8.2
|
15
|
-
Requires-Dist: readfcs>=1.1.0
|
16
|
-
Requires-Dist: anndata>=0.8.0
|
17
|
-
Requires-Dist: hjson
|
18
|
-
Requires-Dist: sqlmodel>=0.0.8
|
19
|
-
Requires-Dist: typeguard
|
20
|
-
Requires-Dist: tabulate
|
21
|
-
Requires-Dist: erdiagram
|
22
|
-
Requires-Dist: zarr
|
23
|
-
Requires-Dist: pre-commit ; extra == "dev"
|
24
|
-
Requires-Dist: nox ; extra == "dev"
|
25
|
-
Requires-Dist: laminci>=0.2.5 ; extra == "dev"
|
26
|
-
Requires-Dist: pytest>=6.0 ; extra == "test"
|
27
|
-
Requires-Dist: coverage ; extra == "test"
|
28
|
-
Requires-Dist: pytest-cov ; extra == "test"
|
29
|
-
Requires-Dist: scanpy ; extra == "test"
|
30
|
-
Requires-Dist: scikit-learn>=1.1.1 ; extra == "test"
|
31
|
-
Requires-Dist: nbproject_test>=0.3.0 ; extra == "test"
|
32
|
-
Requires-Dist: psycopg2-binary ; extra == "test"
|
33
|
-
Requires-Dist: matplotlib<3.7 ; extra == "test"
|
34
|
-
Project-URL: Home, https://github.com/laminlabs/lamindb
|
35
|
-
Provides-Extra: dev
|
36
|
-
Provides-Extra: test
|
37
|
-
|
38
|
-
[](https://github.com/laminlabs/lamindb)
|
39
|
-
[](https://codecov.io/gh/laminlabs/lamindb)
|
40
|
-
[](https://pypi.org/project/lamindb)
|
41
|
-
|
42
|
-
# LaminDB: Manage R&D data & analyses
|
43
|
-
|
44
|
-
_Curate, store, track, query, integrate, and learn from biological data._
|
45
|
-
|
46
|
-
LaminDB is an open-source data lake for R&D in biology. It manages indexed **object storage** (local directories, S3, GCP) with a mapped **SQL database** (SQLite, Postgres, and soon, BigQuery).
|
47
|
-
|
48
|
-
One cool thing is that you can readily create distributed _LaminDB instances_ at any scale. Get started on your laptop, deploy in the cloud, or work with a mesh of instances for different teams and purposes.
|
49
|
-
|
50
|
-
```{warning}
|
51
|
-
|
52
|
-
Public beta: Currently only recommended for collaborators as we still make breaking changes.
|
53
|
-
|
54
|
-
```
|
55
|
-
|
56
|
-
## Installation
|
57
|
-
|
58
|
-
LaminDB is a python package available for Python versions 3.8+.
|
59
|
-
|
60
|
-
```shell
|
61
|
-
pip install lamindb
|
62
|
-
```
|
63
|
-
|
64
|
-
## Import
|
65
|
-
|
66
|
-
In your python script, import LaminDB as:
|
67
|
-
|
68
|
-
```python
|
69
|
-
import lamindb as ln
|
70
|
-
```
|
71
|
-
|
72
|
-
## Quick setup
|
73
|
-
|
74
|
-
Quick setup on the command line:
|
75
|
-
|
76
|
-
- Sign up via `lamin signup <email>`
|
77
|
-
- Log in via `lamin login <handle>`
|
78
|
-
- Set up an instance via `lamin init --storage <storage> --schema <schema_modules>`
|
79
|
-
|
80
|
-
:::{dropdown} Example code
|
81
|
-
|
82
|
-
```shell
|
83
|
-
lamin signup testuser1@lamin.ai
|
84
|
-
lamin login testuser1
|
85
|
-
lamin init --storage ./mydata --schema bionty,wetlab
|
86
|
-
```
|
87
|
-
|
88
|
-
:::
|
89
|
-
|
90
|
-
See {doc}`/guide/setup` for more.
|
91
|
-
|
92
|
-
## Track & query data
|
93
|
-
|
94
|
-
### Track data source & data
|
95
|
-
|
96
|
-
::::{tab-set}
|
97
|
-
:::{tab-item} Within a notebook
|
98
|
-
|
99
|
-
```{code-block} python
|
100
|
-
ln.nb.header() # data source is created and linked
|
101
|
-
|
102
|
-
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
|
103
|
-
|
104
|
-
# create a data object with SQL metadata record
|
105
|
-
dobject = ln.DObject(df, name="My dataframe")
|
106
|
-
|
107
|
-
# upload the data file to the configured storage
|
108
|
-
# and commit a DObject record to the SQL database
|
109
|
-
ln.add(dobject)
|
110
|
-
```
|
111
|
-
|
112
|
-
:::
|
113
|
-
:::{tab-item} Within a pipeline
|
114
|
-
|
115
|
-
```{code-block} python
|
116
|
-
# create a pipeline record
|
117
|
-
pipeline = lns.Pipeline(name="my pipeline", version="1")
|
118
|
-
|
119
|
-
# create a run from the above pipeline as the data source
|
120
|
-
run = lns.Run(pipeline=pipeline, name="my run")
|
121
|
-
|
122
|
-
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
|
123
|
-
|
124
|
-
# create a data object with SQL metadata record
|
125
|
-
dobject = ln.DObject(df, name="My dataframe", source=run)
|
126
|
-
|
127
|
-
# upload the data file to the configured storage
|
128
|
-
# and commit a DObject record to the SQL database
|
129
|
-
ln.add(dobject)
|
130
|
-
```
|
131
|
-
|
132
|
-
:::
|
133
|
-
::::
|
134
|
-
|
135
|
-
### Query & load data
|
136
|
-
|
137
|
-
```python
|
138
|
-
dobject = ln.select(ln.DObject, name="My dataframe").one()
|
139
|
-
df = dobject.load()
|
140
|
-
```
|
141
|
-
|
142
|
-
<br>
|
143
|
-
|
144
|
-
See {doc}`/guide/ingest` for more.
|
145
|
-
|
146
|
-
## Track biological features
|
147
|
-
|
148
|
-
```python
|
149
|
-
import bionty as bt
|
150
|
-
|
151
|
-
# An sample single cell RNA-seq dataset
|
152
|
-
adata = ln.dev.datasets.anndata_mouse_sc_lymph_node()
|
153
|
-
|
154
|
-
# Start to track genes mapped to a Bionty Entity
|
155
|
-
# - ensembl id as the standardized id
|
156
|
-
# - mouse as the species
|
157
|
-
reference = bt.Gene(id=bt.gene_id.ensembl_gene_id, species=bt.Species().lookup.mouse)
|
158
|
-
|
159
|
-
# Create a data object with features
|
160
|
-
dobject = ln.DObject(adata, name="Mouse Lymph Node scRNA-seq", features_ref=reference)
|
161
|
-
|
162
|
-
# upload the data file to the configured storage
|
163
|
-
# and commit a DObject record to the sql database
|
164
|
-
ln.add(dobject)
|
165
|
-
```
|
166
|
-
|
167
|
-
<br>
|
168
|
-
|
169
|
-
See {doc}`/guide/link-features` for more.
|
170
|
-
|
171
|
-
```{tip}
|
172
|
-
- Each page in this guide is a Jupyter Notebook, which you can download [here](https://github.com/laminlabs/lamindb/tree/main/docs/guide).
|
173
|
-
- You can run these notebooks in hosted versions of JupyterLab, e.g., [Saturn Cloud](https://github.com/laminlabs/run-lamin-on-saturn), Google Vertex AI, and others.
|
174
|
-
- We recommend using [JupyterLab](https://jupyterlab.readthedocs.io/) for best notebook tracking experience.
|
175
|
-
```
|
176
|
-
|
177
|
-
📬 [Reach out](https://lamin.ai/contact) to report issues, learn about data modules that connect your assays, pipelines & workflows within our data platform enterprise plan.
|
178
|
-
|
File without changes
|