lamindb 1.11.1__py3-none-any.whl → 1.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/models/query_set.py +24 -16
- lamindb/models/save.py +12 -2
- lamindb/models/sqlrecord.py +1 -1
- lamindb-1.11.2.dist-info/METADATA +180 -0
- {lamindb-1.11.1.dist-info → lamindb-1.11.2.dist-info}/RECORD +8 -8
- lamindb-1.11.1.dist-info/METADATA +0 -139
- {lamindb-1.11.1.dist-info → lamindb-1.11.2.dist-info}/LICENSE +0 -0
- {lamindb-1.11.1.dist-info → lamindb-1.11.2.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/models/query_set.py
CHANGED
@@ -155,19 +155,29 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
|
155
155
|
expressions,
|
156
156
|
)
|
157
157
|
if issubclass(queryset.model, SQLRecord):
|
158
|
-
# branch_id is set to 1 unless expressions contains id or
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
158
|
+
# branch_id is set to 1 unless expressions contains id, uid or hash
|
159
|
+
id_uid_hash = {"id", "uid", "hash", "id__in", "uid__in", "hash__in"}
|
160
|
+
if not any(expression in id_uid_hash for expression in expressions):
|
161
|
+
expressions_have_branch = False
|
162
|
+
branch_branch_id = {"branch", "branch_id"}
|
163
|
+
branch_branch_id__ = ("branch__", "branch_id__")
|
164
|
+
for expression in expressions:
|
165
|
+
if expression in branch_branch_id or expression.startswith(
|
166
|
+
branch_branch_id__
|
167
|
+
):
|
168
|
+
expressions_have_branch = True
|
169
|
+
break
|
170
|
+
if not expressions_have_branch:
|
171
|
+
# TODO: should be set to the current default branch
|
172
|
+
expressions["branch_id"] = 1
|
173
|
+
else:
|
174
|
+
# if branch_id is None, do not apply a filter
|
175
|
+
# otherwise, it would mean filtering for NULL values, which doesn't make
|
176
|
+
# sense for a non-NULLABLE column
|
177
|
+
if "branch_id" in expressions and expressions["branch_id"] is None:
|
178
|
+
expressions.pop("branch_id")
|
179
|
+
if "branch" in expressions and expressions["branch"] is None:
|
180
|
+
expressions.pop("branch")
|
171
181
|
if queryset._db is not None:
|
172
182
|
# only check for database mismatch if there is a defined database on the
|
173
183
|
# queryset
|
@@ -226,8 +236,6 @@ def get(
|
|
226
236
|
else:
|
227
237
|
assert idlike is None # noqa: S101
|
228
238
|
expressions = process_expressions(qs, expressions)
|
229
|
-
# don't want branch_id here in .get(), only in .filter()
|
230
|
-
expressions.pop("branch_id", None)
|
231
239
|
# inject is_latest for consistency with idlike
|
232
240
|
is_latest_was_not_in_expressions = "is_latest" not in expressions
|
233
241
|
if issubclass(registry, IsVersioned) and is_latest_was_not_in_expressions:
|
@@ -241,7 +249,7 @@ def get(
|
|
241
249
|
result = qs.filter(**expressions).order_by("-created_at").first()
|
242
250
|
if result is not None:
|
243
251
|
return result
|
244
|
-
raise
|
252
|
+
raise e
|
245
253
|
|
246
254
|
|
247
255
|
class SQLRecordList(UserList, Generic[T]):
|
lamindb/models/save.py
CHANGED
@@ -234,7 +234,15 @@ def check_and_attempt_upload(
|
|
234
234
|
try:
|
235
235
|
copy_or_move_to_cache(artifact, storage_path, cache_path)
|
236
236
|
except Exception as e:
|
237
|
-
|
237
|
+
if not str(e).startswith(
|
238
|
+
"[WinError 32] The process cannot access the file "
|
239
|
+
"because it is being used by another process"
|
240
|
+
):
|
241
|
+
# ignore WinError 32 error, this just means that the file is still open on save
|
242
|
+
# it is saved at this point, so not a big deal if copy or move to cache fails
|
243
|
+
# this mostly happens for run logs
|
244
|
+
# just ignore without a warning
|
245
|
+
logger.warning(f"A problem with cache on saving: {e}")
|
238
246
|
# after successful upload, we should remove the attribute so that another call
|
239
247
|
# call to save won't upload again, the user should call replace() then
|
240
248
|
del artifact._local_filepath
|
@@ -269,7 +277,9 @@ def copy_or_move_to_cache(
|
|
269
277
|
# non-local storage_path further
|
270
278
|
if local_path != cache_path:
|
271
279
|
if cache_path.exists():
|
272
|
-
logger.
|
280
|
+
logger.important_hint(
|
281
|
+
f"replacing the existing cache path {cache_path.as_posix()}"
|
282
|
+
)
|
273
283
|
if cache_path.is_dir():
|
274
284
|
shutil.rmtree(cache_path)
|
275
285
|
else:
|
lamindb/models/sqlrecord.py
CHANGED
@@ -350,7 +350,7 @@ def delete_record(record: BaseSQLRecord, is_soft: bool = True):
|
|
350
350
|
with transaction.atomic():
|
351
351
|
new_latest.save()
|
352
352
|
delete()
|
353
|
-
logger.
|
353
|
+
logger.important_hint(f"new latest version is: {new_latest}")
|
354
354
|
return None
|
355
355
|
# deal with all other cases of the nested if condition now
|
356
356
|
delete()
|
@@ -0,0 +1,180 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: lamindb
|
3
|
+
Version: 1.11.2
|
4
|
+
Summary: A data framework for biology.
|
5
|
+
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
|
+
Requires-Python: >=3.10,<3.14
|
7
|
+
Description-Content-Type: text/markdown
|
8
|
+
Classifier: Programming Language :: Python :: 3.10
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
11
|
+
Classifier: Programming Language :: Python :: 3.13
|
12
|
+
Requires-Dist: lamin_utils==0.15.0
|
13
|
+
Requires-Dist: lamin_cli==1.7.1
|
14
|
+
Requires-Dist: lamindb_setup[aws]==1.10.1
|
15
|
+
Requires-Dist: bionty>=1.7a1
|
16
|
+
Requires-Dist: wetlab>=1.5a1
|
17
|
+
Requires-Dist: nbproject==0.11.1
|
18
|
+
Requires-Dist: jupytext
|
19
|
+
Requires-Dist: nbconvert>=7.2.1
|
20
|
+
Requires-Dist: mistune!=3.1.0
|
21
|
+
Requires-Dist: pyyaml
|
22
|
+
Requires-Dist: pyarrow
|
23
|
+
Requires-Dist: pandera>=0.24.0
|
24
|
+
Requires-Dist: typing_extensions!=4.6.0
|
25
|
+
Requires-Dist: python-dateutil
|
26
|
+
Requires-Dist: pandas>=2.0.0
|
27
|
+
Requires-Dist: scipy<1.15.0
|
28
|
+
Requires-Dist: anndata>=0.8.0,<=0.12.1
|
29
|
+
Requires-Dist: fsspec
|
30
|
+
Requires-Dist: graphviz
|
31
|
+
Requires-Dist: psycopg2-binary
|
32
|
+
Requires-Dist: tomlkit ; extra == "dev"
|
33
|
+
Requires-Dist: line_profiler ; extra == "dev"
|
34
|
+
Requires-Dist: pre-commit ; extra == "dev"
|
35
|
+
Requires-Dist: nox ; extra == "dev"
|
36
|
+
Requires-Dist: laminci>=0.3 ; extra == "dev"
|
37
|
+
Requires-Dist: pytest>=6.0 ; extra == "dev"
|
38
|
+
Requires-Dist: coverage ; extra == "dev"
|
39
|
+
Requires-Dist: pytest-cov<7.0.0 ; extra == "dev"
|
40
|
+
Requires-Dist: mudata ; extra == "dev"
|
41
|
+
Requires-Dist: nbproject_test>=0.6.0 ; extra == "dev"
|
42
|
+
Requires-Dist: faker-biology ; extra == "dev"
|
43
|
+
Requires-Dist: pronto ; extra == "dev"
|
44
|
+
Requires-Dist: readfcs>=2.0.1 ; extra == "fcs"
|
45
|
+
Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
|
46
|
+
Requires-Dist: numcodecs<0.16.0 ; extra == "zarr"
|
47
|
+
Requires-Dist: zarr>=2.16.0,<3.0.0a0 ; extra == "zarr"
|
48
|
+
Project-URL: Home, https://github.com/laminlabs/lamindb
|
49
|
+
Provides-Extra: dev
|
50
|
+
Provides-Extra: fcs
|
51
|
+
Provides-Extra: gcp
|
52
|
+
Provides-Extra: zarr
|
53
|
+
|
54
|
+
[](https://github.com/laminlabs/lamindb)
|
55
|
+
[](https://codecov.io/gh/laminlabs/lamindb)
|
56
|
+
[](https://docs.lamin.ai)
|
57
|
+
[](https://docs.lamin.ai/summary.md)
|
58
|
+
[](https://pypi.org/project/lamindb)
|
59
|
+
[](https://pepy.tech/project/lamindb)
|
60
|
+
|
61
|
+
# LaminDB - A data lakehouse for biology
|
62
|
+
|
63
|
+
LaminDB is an open-source data lakehouse to enable learning at scale in biology.
|
64
|
+
It organizes datasets through validation & annotation and provides data lineage, queryability, and reproducibility on top of [FAIR](https://en.wikipedia.org/wiki/FAIR_data) data.
|
65
|
+
|
66
|
+
<details>
|
67
|
+
<summary>Why?</summary>
|
68
|
+
|
69
|
+
Reproducing analytical results or understanding how a dataset or model was created can be a pain.
|
70
|
+
Let alone training models on historical data, LIMS & ELN systems, orthogonal assays, or datasets generated by other teams.
|
71
|
+
Even maintaining a mere overview of a project's or team's datasets & analyses is harder than it sounds.
|
72
|
+
|
73
|
+
Biological datasets are typically managed with versioned storage systems, GUI-focused community or SaaS platforms, structureless data lakes, rigid data warehouses (SQL, monolithic arrays), and data lakehouses for tabular data.
|
74
|
+
|
75
|
+
LaminDB extends the lakehouse architecture to biological registries & datasets beyond tables (`DataFrame`, `AnnData`, `.zarr`, `.tiledbsoma`, ...) with enough structure to enable queries and enough freedom to keep the pace of R&D high.
|
76
|
+
Moreover, it provides context through data lineage -- tracing data and code, scientists and models -- and abstractions for biological domain knowledge and experimental metadata.
|
77
|
+
|
78
|
+
</details>
|
79
|
+
|
80
|
+
**Highlights.**
|
81
|
+
|
82
|
+
- **data lineage:** track inputs & outputs of notebooks, scripts, functions & pipelines with a single line of code
|
83
|
+
- **unified infrastructure:** access diverse storage locations (local, S3, GCP, ...), SQL databases (Postgres, SQLite) & ontologies
|
84
|
+
- **lakehouse capabilities**: manage, monitor & validate features, labels & dataset schemas; perform distributed queries and batch loading
|
85
|
+
- **biological data formats:** validate & annotate formats like `DataFrame`, `AnnData`, `MuData`, ... backed by `parquet`, `zarr`, HDF5, LanceDB, DuckDB, ...
|
86
|
+
- **biological entities**: organize experimental metadata & extensible ontologies in registries based on the Django ORM
|
87
|
+
- **reproducible & auditable:** auto-version & timestamp execution reports, source code & compute environments, attribute records to users
|
88
|
+
- **zero lock-in & scalable:** runs in your infrastructure; is _not_ a client for a rate-limited REST API
|
89
|
+
- **extendable:** create custom plug-ins for your own applications based on the Django ecosystem
|
90
|
+
- **integrations:** visualization tools like [vitessce](https://docs.lamin.ai/vitessce), workflow managers like [nextflow](https://docs.lamin.ai/nextflow) & [redun](https://docs.lamin.ai/redun), and [other tools](https://docs.lamin.ai/integrations)
|
91
|
+
- **production-ready:** used in BigPharma, BioTech, hospitals & top labs
|
92
|
+
|
93
|
+
LaminDB can be connected to LaminHub to serve as a [LIMS](https://en.wikipedia.org/wiki/Laboratory_information_management_system) for wetlab scientists, closing the drylab-wetlab feedback loop: [lamin.ai](https://lamin.ai)
|
94
|
+
|
95
|
+
## Docs
|
96
|
+
|
97
|
+
Copy [summary.md](https://docs.lamin.ai/summary.md) into an LLM chat and let AI explain or read the [docs](https://docs.lamin.ai).
|
98
|
+
|
99
|
+
## Setup
|
100
|
+
|
101
|
+
<!-- copied from quick-setup-lamindb.md -->
|
102
|
+
|
103
|
+
Install the `lamindb` Python package:
|
104
|
+
|
105
|
+
```shell
|
106
|
+
pip install lamindb
|
107
|
+
```
|
108
|
+
|
109
|
+
Create a LaminDB instance:
|
110
|
+
|
111
|
+
```shell
|
112
|
+
lamin init --storage ./quickstart-data # or s3://my-bucket, gs://my-bucket
|
113
|
+
```
|
114
|
+
|
115
|
+
Or if you have write access to an instance, connect to it:
|
116
|
+
|
117
|
+
```shell
|
118
|
+
lamin connect account/name
|
119
|
+
```
|
120
|
+
|
121
|
+
## Quickstart
|
122
|
+
|
123
|
+
<!-- copied from preface.md -->
|
124
|
+
|
125
|
+
Track a script or notebook run with source code, inputs, outputs, logs, and environment.
|
126
|
+
|
127
|
+
<!-- copied from py-quickstart.py -->
|
128
|
+
|
129
|
+
```python
|
130
|
+
import lamindb as ln
|
131
|
+
|
132
|
+
ln.track() # track a run
|
133
|
+
open("sample.fasta", "w").write(">seq1\nACGT\n")
|
134
|
+
ln.Artifact("sample.fasta", key="sample.fasta").save() # create an artifact
|
135
|
+
ln.finish() # finish the run
|
136
|
+
```
|
137
|
+
|
138
|
+
<!-- from here on, slight deviation from preface.md, where all this is treated in the walk through in more depth -->
|
139
|
+
|
140
|
+
This code snippet creates an artifact, which can store a dataset or model as a file or folder in various formats.
|
141
|
+
Running the snippet as a script (`python create-fasta.py`) produces the following data lineage.
|
142
|
+
|
143
|
+
```python
|
144
|
+
artifact = ln.Artifact.get(key="sample.fasta") # query artifact by key
|
145
|
+
artifact.view_lineage()
|
146
|
+
```
|
147
|
+
|
148
|
+
<img src="https://lamin-site-assets.s3.amazonaws.com/.lamindb/EkQATsQL5wqC95Wj0005.png" width="250">
|
149
|
+
|
150
|
+
You'll know how that artifact was created and what it's used for ([interactive visualization](https://lamin.ai/laminlabs/lamindata/artifact/8incOOgjn6F0K1TS)) in addition to capturing basic metadata:
|
151
|
+
|
152
|
+
```python
|
153
|
+
artifact.describe()
|
154
|
+
```
|
155
|
+
|
156
|
+
<img src="https://lamin-site-assets.s3.amazonaws.com/.lamindb/BOTCBgHDAvwglN3U0002.png" width="550">
|
157
|
+
|
158
|
+
You can organize datasets with validation & annotation of any kind of metadata to then access them via queries & search. Here is a more [comprehensive example](https://lamin.ai/laminlabs/lamindata/artifact/9K1dteZ6Qx0EXK8g).
|
159
|
+
|
160
|
+
<img src="https://lamin-site-assets.s3.amazonaws.com/.lamindb/6sofuDVvTANB0f480002.png" width="850">
|
161
|
+
|
162
|
+
To annotate an artifact with a label, use:
|
163
|
+
|
164
|
+
```python
|
165
|
+
my_experiment = ln.ULabel(name="My experiment").save() # create a label in the universal label ontology
|
166
|
+
artifact.ulabels.add(my_experiment) # annotate the artifact with the label
|
167
|
+
```
|
168
|
+
|
169
|
+
To query for a set of artifacts, use the `filter()` statement.
|
170
|
+
|
171
|
+
```python
|
172
|
+
ln.Artifact.filter(ulabels=my_experiment, suffix=".fasta").to_dataframe() # query by suffix and the ulabel we just created
|
173
|
+
ln.Artifact.filter(transform__key="create-fasta.py").to_dataframe() # query by the name of the script we just ran
|
174
|
+
```
|
175
|
+
|
176
|
+
If you have a structured dataset like a `DataFrame`, an `AnnData`, or another array, you can validate the content of the dataset (and parse annotations).
|
177
|
+
Here is an example for a dataframe: [docs.lamin.ai/introduction#validate-an-artifact](https://docs.lamin.ai/introduction#validate-an-artifact).
|
178
|
+
|
179
|
+
With a large body of validated datasets, you can then access data through distributed queries & batch streaming, see here: [docs.lamin.ai/arrays](https://docs.lamin.ai/arrays).
|
180
|
+
|
@@ -1,4 +1,4 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=HGxNpCYe-NeHQHiQXY8bEEfDXbH0zN_ucGrXIEO7cBw,3241
|
2
2
|
lamindb/_finish.py,sha256=4KkFyb9d-GEGjDw_zDrnGt_bq3auQ_OQu0hV-2U73AQ,21355
|
3
3
|
lamindb/_tracked.py,sha256=fls9yd7EEGO9Ni51kA_pcBkeLpzm2HZrWtwYGQequNE,4395
|
4
4
|
lamindb/_view.py,sha256=GOKTfwnEaly9fdeWo9SlhYRc3UWEyLDmTlIUzjFXMYY,4960
|
@@ -128,12 +128,12 @@ lamindb/models/feature.py,sha256=6kI3UZMp3gBi75NhXBp_jn-1tRlc42YdJOEJFNgjtNo,418
|
|
128
128
|
lamindb/models/has_parents.py,sha256=Ok-Tsh4-oBDjkyevyMSuOCb0mzDiswiO_0ufTqWz8o4,20341
|
129
129
|
lamindb/models/project.py,sha256=AhaY24iE8ilpS5dRFYzY6xQNVlqPHWMk72ib2OI_N8U,18732
|
130
130
|
lamindb/models/query_manager.py,sha256=zjO31kbj1t08WTxeZB9-BPjooz6axh06EFu90dfvpSA,11358
|
131
|
-
lamindb/models/query_set.py,sha256=
|
131
|
+
lamindb/models/query_set.py,sha256=zwMlYRfjvAQ4NjIKpk19ipiP43UhGiUoU2wxntnBDUY,39591
|
132
132
|
lamindb/models/record.py,sha256=2eKEDqpiX5-aN2kUVXLuTVvQUspqQ5h10onbQM6Ta7g,13041
|
133
133
|
lamindb/models/run.py,sha256=LVZ2z5QV4aVYYsqGcLmMfqvPpKj4EGGHVts_RR8_c-E,14443
|
134
|
-
lamindb/models/save.py,sha256=
|
134
|
+
lamindb/models/save.py,sha256=CEifxocJuoLIuapqtiGk-eKvXfQSK4v389CWBo4FxOA,17264
|
135
135
|
lamindb/models/schema.py,sha256=LQuPQhyLitloRGxq6DWZMHcR-xDZY1NctPHjrC6t1iw,49827
|
136
|
-
lamindb/models/sqlrecord.py,sha256=
|
136
|
+
lamindb/models/sqlrecord.py,sha256=kL2YS0lAENLXVMC5bzQuOZrRpujdLsBCJCf0sX_4PlE,72686
|
137
137
|
lamindb/models/storage.py,sha256=n7jth0RQ19K0W8ICbrAO942d5jBm1-h7DsrSjMJgAB0,15551
|
138
138
|
lamindb/models/transform.py,sha256=FcIPqmSk1hahg2Cr8q6lm1kKpbdvu2JUceSlPIV9Dww,12780
|
139
139
|
lamindb/models/ulabel.py,sha256=UznidEEoiIns_KetWgCbDejXuM5hmzF6F49yqf2kLpQ,9495
|
@@ -142,7 +142,7 @@ lamindb/setup/_switch.py,sha256=njZJN__JOhVrBFGClQG1wobdhJJp6l_XzPGKtKSCrfU,434
|
|
142
142
|
lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
|
143
143
|
lamindb/setup/errors/__init__.py,sha256=bAHTxOUJW1rm4zpF0Pvqkftn8W6iMGnQ-uyNBu13Nfg,171
|
144
144
|
lamindb/setup/types/__init__.py,sha256=ATaosOi6q-cDWB52T69_sRmLMqj8cHfc-vljzZsrJNw,169
|
145
|
-
lamindb-1.11.
|
146
|
-
lamindb-1.11.
|
147
|
-
lamindb-1.11.
|
148
|
-
lamindb-1.11.
|
145
|
+
lamindb-1.11.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
146
|
+
lamindb-1.11.2.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
|
147
|
+
lamindb-1.11.2.dist-info/METADATA,sha256=69LKvBDk40m1gfwl-DIpwIxQqCxNt5zPWf0zuIz9pHc,8552
|
148
|
+
lamindb-1.11.2.dist-info/RECORD,,
|
@@ -1,139 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.3
|
2
|
-
Name: lamindb
|
3
|
-
Version: 1.11.1
|
4
|
-
Summary: A data framework for biology.
|
5
|
-
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
|
-
Requires-Python: >=3.10,<3.14
|
7
|
-
Description-Content-Type: text/markdown
|
8
|
-
Classifier: Programming Language :: Python :: 3.10
|
9
|
-
Classifier: Programming Language :: Python :: 3.11
|
10
|
-
Classifier: Programming Language :: Python :: 3.12
|
11
|
-
Classifier: Programming Language :: Python :: 3.13
|
12
|
-
Requires-Dist: lamin_utils==0.15.0
|
13
|
-
Requires-Dist: lamin_cli==1.7.1
|
14
|
-
Requires-Dist: lamindb_setup[aws]==1.10.1
|
15
|
-
Requires-Dist: bionty>=1.7a1
|
16
|
-
Requires-Dist: wetlab>=1.5a1
|
17
|
-
Requires-Dist: nbproject==0.11.1
|
18
|
-
Requires-Dist: jupytext
|
19
|
-
Requires-Dist: nbconvert>=7.2.1
|
20
|
-
Requires-Dist: mistune!=3.1.0
|
21
|
-
Requires-Dist: pyyaml
|
22
|
-
Requires-Dist: pyarrow
|
23
|
-
Requires-Dist: pandera>=0.24.0
|
24
|
-
Requires-Dist: typing_extensions!=4.6.0
|
25
|
-
Requires-Dist: python-dateutil
|
26
|
-
Requires-Dist: pandas>=2.0.0
|
27
|
-
Requires-Dist: scipy<1.15.0
|
28
|
-
Requires-Dist: anndata>=0.8.0,<=0.12.1
|
29
|
-
Requires-Dist: fsspec
|
30
|
-
Requires-Dist: graphviz
|
31
|
-
Requires-Dist: psycopg2-binary
|
32
|
-
Requires-Dist: tomlkit ; extra == "dev"
|
33
|
-
Requires-Dist: line_profiler ; extra == "dev"
|
34
|
-
Requires-Dist: pre-commit ; extra == "dev"
|
35
|
-
Requires-Dist: nox ; extra == "dev"
|
36
|
-
Requires-Dist: laminci>=0.3 ; extra == "dev"
|
37
|
-
Requires-Dist: pytest>=6.0 ; extra == "dev"
|
38
|
-
Requires-Dist: coverage ; extra == "dev"
|
39
|
-
Requires-Dist: pytest-cov<7.0.0 ; extra == "dev"
|
40
|
-
Requires-Dist: mudata ; extra == "dev"
|
41
|
-
Requires-Dist: nbproject_test>=0.6.0 ; extra == "dev"
|
42
|
-
Requires-Dist: faker-biology ; extra == "dev"
|
43
|
-
Requires-Dist: pronto ; extra == "dev"
|
44
|
-
Requires-Dist: readfcs>=2.0.1 ; extra == "fcs"
|
45
|
-
Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
|
46
|
-
Requires-Dist: numcodecs<0.16.0 ; extra == "zarr"
|
47
|
-
Requires-Dist: zarr>=2.16.0,<3.0.0a0 ; extra == "zarr"
|
48
|
-
Project-URL: Home, https://github.com/laminlabs/lamindb
|
49
|
-
Provides-Extra: dev
|
50
|
-
Provides-Extra: fcs
|
51
|
-
Provides-Extra: gcp
|
52
|
-
Provides-Extra: zarr
|
53
|
-
|
54
|
-
[](https://github.com/laminlabs/lamindb)
|
55
|
-
[](https://codecov.io/gh/laminlabs/lamindb)
|
56
|
-
[](https://docs.lamin.ai)
|
57
|
-
[](https://docs.lamin.ai/summary.md)
|
58
|
-
[](https://pypi.org/project/lamindb)
|
59
|
-
[](https://pepy.tech/project/lamindb)
|
60
|
-
|
61
|
-
# LaminDB - A data framework for biology
|
62
|
-
|
63
|
-
<!-- first two sentences sync from preface.md -->
|
64
|
-
|
65
|
-
LaminDB is an open-source data framework to enable learning at scale in computational biology.
|
66
|
-
It lets you track data transformations, validate & annotate datasets, and query a built-in database for biological metadata & data structures.
|
67
|
-
|
68
|
-
## Setup
|
69
|
-
|
70
|
-
<!-- copied from quick-setup-lamindb.md -->
|
71
|
-
|
72
|
-
Install the `lamindb` Python package:
|
73
|
-
|
74
|
-
```shell
|
75
|
-
pip install lamindb
|
76
|
-
```
|
77
|
-
|
78
|
-
Create a LaminDB instance:
|
79
|
-
|
80
|
-
```shell
|
81
|
-
lamin init --storage ./quickstart-data # or s3://my-bucket, gs://my-bucket
|
82
|
-
```
|
83
|
-
|
84
|
-
Or if you have write access to an instance, connect to it:
|
85
|
-
|
86
|
-
```shell
|
87
|
-
lamin connect account/name
|
88
|
-
```
|
89
|
-
|
90
|
-
## Quickstart
|
91
|
-
|
92
|
-
<!-- copied from preface.md -->
|
93
|
-
|
94
|
-
Track a script or notebook run with source code, inputs, outputs, logs, and environment.
|
95
|
-
|
96
|
-
<!-- copied from py-quickstart.py -->
|
97
|
-
|
98
|
-
```python
|
99
|
-
import lamindb as ln
|
100
|
-
|
101
|
-
ln.track() # track a run
|
102
|
-
open("sample.fasta", "w").write(">seq1\nACGT\n")
|
103
|
-
ln.Artifact("sample.fasta", key="sample.fasta").save() # create an artifact
|
104
|
-
ln.finish() # finish the run
|
105
|
-
```
|
106
|
-
|
107
|
-
<!-- from here on, slight deviation from preface.md, where all this is treated in the walk through in more depth -->
|
108
|
-
|
109
|
-
Running this code inside a script via `python create-fasta.py` produces the following data lineage.
|
110
|
-
|
111
|
-
```python
|
112
|
-
artifact = ln.Artifact.get(key="sample.fasta") # query artifact by key
|
113
|
-
artifact.view_lineage()
|
114
|
-
```
|
115
|
-
|
116
|
-
<img src="https://lamin-site-assets.s3.amazonaws.com/.lamindb/EkQATsQL5wqC95Wj0003.png" width="220">
|
117
|
-
|
118
|
-
You'll know how that artifact was created.
|
119
|
-
|
120
|
-
```python
|
121
|
-
artifact.describe()
|
122
|
-
```
|
123
|
-
|
124
|
-
<img src="https://lamin-site-assets.s3.amazonaws.com/.lamindb/BOTCBgHDAvwglN3U0001.png" width="850">
|
125
|
-
|
126
|
-
Conversely, you can query artifacts by the script that created them.
|
127
|
-
|
128
|
-
```python
|
129
|
-
ln.Artifact.get(transform__key="create-fasta.py") # query artifact by transform key
|
130
|
-
```
|
131
|
-
|
132
|
-
Data lineage is just one type of metadata to help analysis and model training through queries, validation, and annotation. Here is a more [comprehensive example](https://lamin.ai/laminlabs/lamindata/artifact/9K1dteZ6Qx0EXK8g).
|
133
|
-
|
134
|
-
<img src="https://lamin-site-assets.s3.amazonaws.com/.lamindb/6sofuDVvTANB0f480001.png" width="850">
|
135
|
-
|
136
|
-
## Docs
|
137
|
-
|
138
|
-
Copy [summary.md](https://docs.lamin.ai/summary.md) into an LLM chat and let AI explain or read the [docs](https://docs.lamin.ai).
|
139
|
-
|
File without changes
|
File without changes
|