lamindb 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +30 -25
- lamindb/_tracked.py +1 -1
- lamindb/_view.py +2 -3
- lamindb/base/__init__.py +1 -1
- lamindb/base/ids.py +1 -10
- lamindb/core/__init__.py +7 -65
- lamindb/core/_compat.py +60 -0
- lamindb/core/_context.py +43 -20
- lamindb/core/_settings.py +6 -6
- lamindb/core/_sync_git.py +1 -1
- lamindb/core/loaders.py +30 -19
- lamindb/core/storage/_backed_access.py +4 -2
- lamindb/core/storage/_tiledbsoma.py +8 -6
- lamindb/core/storage/_zarr.py +104 -25
- lamindb/core/storage/objects.py +63 -28
- lamindb/core/storage/paths.py +4 -1
- lamindb/core/types.py +10 -0
- lamindb/curators/__init__.py +100 -85
- lamindb/errors.py +1 -1
- lamindb/integrations/_vitessce.py +4 -4
- lamindb/migrations/0089_subsequent_runs.py +159 -0
- lamindb/migrations/0090_runproject_project_runs.py +73 -0
- lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
- lamindb/models/__init__.py +79 -0
- lamindb/{core → models}/_describe.py +3 -3
- lamindb/{core → models}/_django.py +8 -5
- lamindb/{core → models}/_feature_manager.py +103 -87
- lamindb/{_from_values.py → models/_from_values.py} +5 -2
- lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
- lamindb/{core → models}/_label_manager.py +10 -17
- lamindb/{core/relations.py → models/_relations.py} +8 -1
- lamindb/models/artifact.py +2602 -0
- lamindb/{_can_curate.py → models/can_curate.py} +349 -180
- lamindb/models/collection.py +683 -0
- lamindb/models/core.py +135 -0
- lamindb/models/feature.py +643 -0
- lamindb/models/flextable.py +163 -0
- lamindb/{_parents.py → models/has_parents.py} +55 -49
- lamindb/models/project.py +384 -0
- lamindb/{_query_manager.py → models/query_manager.py} +10 -8
- lamindb/{_query_set.py → models/query_set.py} +40 -26
- lamindb/models/record.py +1762 -0
- lamindb/models/run.py +563 -0
- lamindb/{_save.py → models/save.py} +9 -7
- lamindb/models/schema.py +732 -0
- lamindb/models/transform.py +360 -0
- lamindb/models/ulabel.py +249 -0
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/METADATA +6 -6
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/RECORD +51 -51
- lamindb/_artifact.py +0 -1379
- lamindb/_collection.py +0 -440
- lamindb/_feature.py +0 -316
- lamindb/_is_versioned.py +0 -40
- lamindb/_record.py +0 -1064
- lamindb/_run.py +0 -60
- lamindb/_schema.py +0 -347
- lamindb/_storage.py +0 -15
- lamindb/_transform.py +0 -170
- lamindb/_ulabel.py +0 -56
- lamindb/_utils.py +0 -9
- lamindb/base/validation.py +0 -63
- lamindb/core/_data.py +0 -491
- lamindb/core/fields.py +0 -12
- lamindb/models.py +0 -4475
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/LICENSE +0 -0
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,360 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import warnings
|
4
|
+
from typing import TYPE_CHECKING, overload
|
5
|
+
|
6
|
+
from django.db import models
|
7
|
+
from django.db.models import PROTECT
|
8
|
+
from lamin_utils import logger
|
9
|
+
from lamindb_setup.core.hashing import HASH_LENGTH, hash_string
|
10
|
+
|
11
|
+
from lamindb.base.fields import (
|
12
|
+
CharField,
|
13
|
+
DateTimeField,
|
14
|
+
ForeignKey,
|
15
|
+
TextField,
|
16
|
+
)
|
17
|
+
from lamindb.base.users import current_user_id
|
18
|
+
|
19
|
+
from ..errors import InconsistentKey
|
20
|
+
from ..models._is_versioned import message_update_key_in_version_family, process_revises
|
21
|
+
from ._is_versioned import IsVersioned
|
22
|
+
from .record import Record, init_self_from_db, update_attributes
|
23
|
+
from .run import Run, User, delete_run_artifacts
|
24
|
+
|
25
|
+
if TYPE_CHECKING:
|
26
|
+
from datetime import datetime
|
27
|
+
|
28
|
+
from lamindb.base.types import TransformType
|
29
|
+
|
30
|
+
from .artifact import Artifact
|
31
|
+
from .collection import Collection
|
32
|
+
from .project import Project, Reference
|
33
|
+
from .ulabel import ULabel
|
34
|
+
|
35
|
+
|
36
|
+
# does not inherit from TracksRun because the Transform
|
37
|
+
# is needed to define a run
|
38
|
+
class Transform(Record, IsVersioned):
|
39
|
+
"""Data transformations.
|
40
|
+
|
41
|
+
A "transform" can refer to a Python function, a script, a notebook, or a
|
42
|
+
pipeline. If you execute a transform, you generate a run
|
43
|
+
(:class:`~lamindb.Run`). A run has inputs and outputs.
|
44
|
+
|
45
|
+
A pipeline is typically created with a workflow tool (Nextflow, Snakemake,
|
46
|
+
Prefect, Flyte, MetaFlow, redun, Airflow, ...) and stored in a versioned
|
47
|
+
repository.
|
48
|
+
|
49
|
+
Transforms are versioned so that a given transform version maps on a given
|
50
|
+
source code version.
|
51
|
+
|
52
|
+
.. dropdown:: Can I sync transforms to git?
|
53
|
+
|
54
|
+
If you switch on
|
55
|
+
:attr:`~lamindb.core.Settings.sync_git_repo` a script-like transform is
|
56
|
+
synched to its hashed state in a git repository upon calling `ln.track()`.
|
57
|
+
|
58
|
+
>>> ln.settings.sync_git_repo = "https://github.com/laminlabs/lamindb"
|
59
|
+
>>> ln.track()
|
60
|
+
|
61
|
+
The definition of transforms and runs is consistent the OpenLineage
|
62
|
+
specification where a :class:`~lamindb.Transform` record would be called a
|
63
|
+
"job" and a :class:`~lamindb.Run` record a "run".
|
64
|
+
|
65
|
+
Args:
|
66
|
+
name: `str` A name or title.
|
67
|
+
key: `str | None = None` A short name or path-like semantic key.
|
68
|
+
type: `TransformType | None = "pipeline"` See :class:`~lamindb.base.types.TransformType`.
|
69
|
+
revises: `Transform | None = None` An old version of the transform.
|
70
|
+
|
71
|
+
See Also:
|
72
|
+
:meth:`~lamindb.core.Context.track`
|
73
|
+
Globally track a script, notebook or pipeline run.
|
74
|
+
:class:`~lamindb.Run`
|
75
|
+
Executions of transforms.
|
76
|
+
|
77
|
+
Notes:
|
78
|
+
- :doc:`docs:track`
|
79
|
+
- :doc:`docs:data-flow`
|
80
|
+
- :doc:`docs:redun`
|
81
|
+
- :doc:`docs:nextflow`
|
82
|
+
- :doc:`docs:snakemake`
|
83
|
+
|
84
|
+
Examples:
|
85
|
+
|
86
|
+
Create a transform for a pipeline:
|
87
|
+
|
88
|
+
>>> transform = ln.Transform(key="Cell Ranger", version="7.2.0", type="pipeline").save()
|
89
|
+
|
90
|
+
Create a transform from a notebook:
|
91
|
+
|
92
|
+
>>> ln.track()
|
93
|
+
|
94
|
+
View predecessors of a transform:
|
95
|
+
|
96
|
+
>>> transform.view_lineage()
|
97
|
+
"""
|
98
|
+
|
99
|
+
class Meta(Record.Meta, IsVersioned.Meta):
|
100
|
+
abstract = False
|
101
|
+
|
102
|
+
_len_stem_uid: int = 12
|
103
|
+
_len_full_uid: int = 16
|
104
|
+
_name_field: str = "key"
|
105
|
+
|
106
|
+
id: int = models.AutoField(primary_key=True)
|
107
|
+
"""Internal id, valid only in one DB instance."""
|
108
|
+
uid: str = CharField(
|
109
|
+
editable=False, unique=True, db_index=True, max_length=_len_full_uid
|
110
|
+
)
|
111
|
+
"""Universal id."""
|
112
|
+
key: str | None = CharField(db_index=True, null=True)
|
113
|
+
"""A name or "/"-separated path-like string.
|
114
|
+
|
115
|
+
All transforms with the same key are part of the same version family.
|
116
|
+
"""
|
117
|
+
description: str | None = CharField(db_index=True, null=True)
|
118
|
+
"""A description."""
|
119
|
+
type: TransformType = CharField(
|
120
|
+
max_length=20,
|
121
|
+
db_index=True,
|
122
|
+
default="pipeline",
|
123
|
+
)
|
124
|
+
""":class:`~lamindb.base.types.TransformType` (default `"pipeline"`)."""
|
125
|
+
source_code: str | None = TextField(null=True)
|
126
|
+
"""Source code of the transform.
|
127
|
+
|
128
|
+
.. versionchanged:: 0.75
|
129
|
+
The `source_code` field is no longer an artifact, but a text field.
|
130
|
+
"""
|
131
|
+
# we have a unique constraint here but not on artifact because on artifact, we haven't yet
|
132
|
+
# settled how we model the same artifact in different storage locations
|
133
|
+
hash: str | None = CharField(
|
134
|
+
max_length=HASH_LENGTH, db_index=True, null=True, unique=True
|
135
|
+
)
|
136
|
+
"""Hash of the source code."""
|
137
|
+
reference: str | None = CharField(max_length=255, db_index=True, null=True)
|
138
|
+
"""Reference for the transform, e.g., a URL."""
|
139
|
+
reference_type: str | None = CharField(max_length=25, db_index=True, null=True)
|
140
|
+
"""Reference type of the transform, e.g., 'url'."""
|
141
|
+
runs: Run
|
142
|
+
"""Runs of this transform."""
|
143
|
+
ulabels: ULabel = models.ManyToManyField(
|
144
|
+
"ULabel", through="TransformULabel", related_name="transforms"
|
145
|
+
)
|
146
|
+
"""ULabel annotations of this transform."""
|
147
|
+
predecessors: Transform = models.ManyToManyField(
|
148
|
+
"self", symmetrical=False, related_name="successors"
|
149
|
+
)
|
150
|
+
"""Preceding transforms.
|
151
|
+
|
152
|
+
These are auto-populated whenever an artifact or collection serves as a run
|
153
|
+
input, e.g., `artifact.run` and `artifact.transform` get populated & saved.
|
154
|
+
|
155
|
+
The table provides a more convenient method to query for the predecessors that
|
156
|
+
bypasses querying the :class:`~lamindb.Run`.
|
157
|
+
|
158
|
+
It also allows to manually add predecessors whose outputs are not tracked in a run.
|
159
|
+
"""
|
160
|
+
successors: Transform
|
161
|
+
"""Subsequent transforms.
|
162
|
+
|
163
|
+
See :attr:`~lamindb.Transform.predecessors`.
|
164
|
+
"""
|
165
|
+
output_artifacts: Artifact
|
166
|
+
"""The artifacts generated by all runs of this transform.
|
167
|
+
|
168
|
+
If you're looking for the outputs of a single run, see :attr:`lamindb.Run.output_artifacts`.
|
169
|
+
"""
|
170
|
+
output_collections: Collection
|
171
|
+
"""The collections generated by all runs of this transform.
|
172
|
+
|
173
|
+
If you're looking for the outputs of a single run, see :attr:`lamindb.Run.output_collections`.
|
174
|
+
"""
|
175
|
+
projects: Project
|
176
|
+
"""Linked projects."""
|
177
|
+
references: Reference
|
178
|
+
"""Linked references."""
|
179
|
+
created_at: datetime = DateTimeField(
|
180
|
+
editable=False, db_default=models.functions.Now(), db_index=True
|
181
|
+
)
|
182
|
+
"""Time of creation of record."""
|
183
|
+
updated_at: datetime = DateTimeField(
|
184
|
+
editable=False, db_default=models.functions.Now(), db_index=True
|
185
|
+
)
|
186
|
+
"""Time of last update to record."""
|
187
|
+
created_by: User = ForeignKey(
|
188
|
+
User, PROTECT, default=current_user_id, related_name="created_transforms"
|
189
|
+
)
|
190
|
+
"""Creator of record."""
|
191
|
+
_template: Transform | None = ForeignKey(
|
192
|
+
"Transform", PROTECT, related_name="_derived_from", default=None, null=True
|
193
|
+
)
|
194
|
+
"""Creating template."""
|
195
|
+
|
196
|
+
@overload
|
197
|
+
def __init__(
|
198
|
+
self,
|
199
|
+
name: str,
|
200
|
+
key: str | None = None,
|
201
|
+
type: TransformType | None = None,
|
202
|
+
revises: Transform | None = None,
|
203
|
+
): ...
|
204
|
+
|
205
|
+
@overload
|
206
|
+
def __init__(
|
207
|
+
self,
|
208
|
+
*db_args,
|
209
|
+
): ...
|
210
|
+
|
211
|
+
def __init__(
|
212
|
+
self,
|
213
|
+
*args,
|
214
|
+
**kwargs,
|
215
|
+
):
|
216
|
+
if len(args) == len(self._meta.concrete_fields):
|
217
|
+
super().__init__(*args, **kwargs)
|
218
|
+
return None
|
219
|
+
key: str | None = kwargs.pop("key", None)
|
220
|
+
description: str | None = kwargs.pop("description", None)
|
221
|
+
revises: Transform | None = kwargs.pop("revises", None)
|
222
|
+
version: str | None = kwargs.pop("version", None)
|
223
|
+
type: TransformType | None = kwargs.pop("type", "pipeline")
|
224
|
+
reference: str | None = kwargs.pop("reference", None)
|
225
|
+
reference_type: str | None = kwargs.pop("reference_type", None)
|
226
|
+
using_key = kwargs.pop("using_key", None)
|
227
|
+
if "name" in kwargs:
|
228
|
+
if key is None:
|
229
|
+
key = kwargs.pop("name")
|
230
|
+
warnings.warn(
|
231
|
+
f"`name` will be removed soon, please pass '{key}' to `key` instead",
|
232
|
+
FutureWarning,
|
233
|
+
stacklevel=2,
|
234
|
+
)
|
235
|
+
else:
|
236
|
+
# description wasn't exist, so no check necessary
|
237
|
+
description = kwargs.pop("name")
|
238
|
+
warnings.warn(
|
239
|
+
f"`name` will be removed soon, please pass '{description}' to `description` instead",
|
240
|
+
FutureWarning,
|
241
|
+
stacklevel=2,
|
242
|
+
)
|
243
|
+
# below is internal use that we'll hopefully be able to eliminate
|
244
|
+
uid: str | None = kwargs.pop("uid") if "uid" in kwargs else None
|
245
|
+
source_code: str | None = (
|
246
|
+
kwargs.pop("source_code") if "source_code" in kwargs else None
|
247
|
+
)
|
248
|
+
if not len(kwargs) == 0:
|
249
|
+
raise ValueError(
|
250
|
+
"Only key, description, version, type, revises, reference, "
|
251
|
+
f"reference_type can be passed, but you passed: {kwargs}"
|
252
|
+
)
|
253
|
+
if revises is None:
|
254
|
+
# need to check uid before checking key
|
255
|
+
if uid is not None:
|
256
|
+
revises = (
|
257
|
+
Transform.objects.using(using_key)
|
258
|
+
.filter(uid__startswith=uid[:-4], is_latest=True)
|
259
|
+
.order_by("-created_at")
|
260
|
+
.first()
|
261
|
+
)
|
262
|
+
elif key is not None:
|
263
|
+
candidate_for_revises = (
|
264
|
+
Transform.objects.using(using_key)
|
265
|
+
.filter(key=key, is_latest=True)
|
266
|
+
.order_by("-created_at")
|
267
|
+
.first()
|
268
|
+
)
|
269
|
+
if candidate_for_revises is not None:
|
270
|
+
revises = candidate_for_revises
|
271
|
+
if candidate_for_revises.source_code is None:
|
272
|
+
# no source code was yet saved, return the same transform
|
273
|
+
logger.important(
|
274
|
+
"no source code was yet saved, returning existing transform with same key"
|
275
|
+
)
|
276
|
+
uid = revises.uid
|
277
|
+
if revises is not None and uid is not None and uid == revises.uid:
|
278
|
+
if revises.key != key:
|
279
|
+
logger.warning("ignoring inconsistent key")
|
280
|
+
init_self_from_db(self, revises)
|
281
|
+
update_attributes(self, {"description": description})
|
282
|
+
return None
|
283
|
+
if revises is not None and key is not None and revises.key != key:
|
284
|
+
note = message_update_key_in_version_family(
|
285
|
+
suid=revises.stem_uid,
|
286
|
+
existing_key=revises.key,
|
287
|
+
new_key=key,
|
288
|
+
registry="Transform",
|
289
|
+
)
|
290
|
+
raise InconsistentKey(
|
291
|
+
f"`key` is '{key}', but `revises.key` is '{revises.key}'\n\nEither do *not* pass `key`.\n\n{note}"
|
292
|
+
)
|
293
|
+
new_uid, version, key, description, revises = process_revises(
|
294
|
+
revises, version, key, description, Transform
|
295
|
+
)
|
296
|
+
# this is only because the user-facing constructor allows passing a uid
|
297
|
+
# most others don't
|
298
|
+
if uid is None:
|
299
|
+
has_consciously_provided_uid = False
|
300
|
+
uid = new_uid
|
301
|
+
else:
|
302
|
+
has_consciously_provided_uid = True
|
303
|
+
hash = None
|
304
|
+
if source_code is not None:
|
305
|
+
hash = hash_string(source_code)
|
306
|
+
transform_candidate = Transform.filter(
|
307
|
+
hash=hash, is_latest=True
|
308
|
+
).one_or_none()
|
309
|
+
if transform_candidate is not None:
|
310
|
+
init_self_from_db(self, transform_candidate)
|
311
|
+
update_attributes(self, {"key": key, "description": description})
|
312
|
+
return None
|
313
|
+
super().__init__( # type: ignore
|
314
|
+
uid=uid,
|
315
|
+
description=description,
|
316
|
+
key=key,
|
317
|
+
type=type,
|
318
|
+
version=version,
|
319
|
+
reference=reference,
|
320
|
+
reference_type=reference_type,
|
321
|
+
source_code=source_code,
|
322
|
+
hash=hash,
|
323
|
+
_has_consciously_provided_uid=has_consciously_provided_uid,
|
324
|
+
revises=revises,
|
325
|
+
)
|
326
|
+
|
327
|
+
@property
|
328
|
+
def name(self) -> str:
|
329
|
+
"""Name of the transform.
|
330
|
+
|
331
|
+
Splits `key` on `/` and returns the last element.
|
332
|
+
"""
|
333
|
+
return self.key.split("/")[-1]
|
334
|
+
|
335
|
+
@property
|
336
|
+
def latest_run(self) -> Run:
|
337
|
+
"""The latest run of this transform."""
|
338
|
+
return self.runs.order_by("-started_at").first()
|
339
|
+
|
340
|
+
def delete(self) -> None:
|
341
|
+
"""Delete."""
|
342
|
+
# query all runs and delete their artifacts
|
343
|
+
runs = Run.filter(transform=self)
|
344
|
+
for run in runs:
|
345
|
+
delete_run_artifacts(run)
|
346
|
+
# at this point, all artifacts have been taken care of
|
347
|
+
# we can now leverage CASCADE delete
|
348
|
+
super().delete()
|
349
|
+
|
350
|
+
def view_lineage(self, with_successors: bool = False, distance: int = 5):
|
351
|
+
"""View lineage of transforms."""
|
352
|
+
from .has_parents import _view_parents
|
353
|
+
|
354
|
+
return _view_parents(
|
355
|
+
record=self,
|
356
|
+
field="key",
|
357
|
+
with_children=with_successors,
|
358
|
+
distance=distance,
|
359
|
+
attr_name="predecessors",
|
360
|
+
)
|
lamindb/models/ulabel.py
ADDED
@@ -0,0 +1,249 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING, overload
|
4
|
+
|
5
|
+
from django.db import models
|
6
|
+
from django.db.models import CASCADE, PROTECT
|
7
|
+
|
8
|
+
from lamindb.base.fields import (
|
9
|
+
BooleanField,
|
10
|
+
CharField,
|
11
|
+
DateTimeField,
|
12
|
+
ForeignKey,
|
13
|
+
)
|
14
|
+
from lamindb.errors import FieldValidationError
|
15
|
+
|
16
|
+
from ..base.ids import base62_8
|
17
|
+
from .can_curate import CanCurate
|
18
|
+
from .feature import Feature
|
19
|
+
from .has_parents import HasParents
|
20
|
+
from .record import BasicRecord, LinkORM, Record, _get_record_kwargs
|
21
|
+
from .run import Run, TracksRun, TracksUpdates, User, current_user_id
|
22
|
+
from .transform import Transform
|
23
|
+
|
24
|
+
if TYPE_CHECKING:
|
25
|
+
from datetime import datetime
|
26
|
+
|
27
|
+
from .artifact import Artifact
|
28
|
+
from .collection import Collection
|
29
|
+
from .project import Project
|
30
|
+
|
31
|
+
|
32
|
+
class ULabel(Record, HasParents, CanCurate, TracksRun, TracksUpdates):
|
33
|
+
"""Universal labels.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
name: `str` A name.
|
37
|
+
description: `str` A description.
|
38
|
+
reference: `str | None = None` For instance, an external ID or a URL.
|
39
|
+
reference_type: `str | None = None` For instance, `"url"`.
|
40
|
+
|
41
|
+
A `ULabel` record provides the easiest way to annotate a dataset
|
42
|
+
with a label: `"My project"`, `"curated"`, or `"Batch X"`:
|
43
|
+
|
44
|
+
>>> my_project = ULabel(name="My project")
|
45
|
+
>>> my_project.save()
|
46
|
+
>>> artifact.ulabels.add(my_project)
|
47
|
+
|
48
|
+
Often, a ulabel is measured *within* a dataset. For instance, an artifact
|
49
|
+
might characterize 2 species of the Iris flower (`"setosa"` &
|
50
|
+
`"versicolor"`) measured by a `"species"` feature. Use the
|
51
|
+
:class:`~lamindb.Curator` flow to automatically parse, validate, and
|
52
|
+
annotate with labels that are contained in `DataFrame` or `AnnData`
|
53
|
+
artifacts.
|
54
|
+
|
55
|
+
.. note::
|
56
|
+
|
57
|
+
If you work with complex entities like cell lines, cell types, tissues,
|
58
|
+
etc., consider using the pre-defined biological registries in
|
59
|
+
:mod:`bionty` to label artifacts & collections.
|
60
|
+
|
61
|
+
If you work with biological samples, likely, the only sustainable way of
|
62
|
+
tracking metadata, is to create a custom schema module.
|
63
|
+
|
64
|
+
See Also:
|
65
|
+
:meth:`~lamindb.Feature`
|
66
|
+
Dimensions of measurement for artifacts & collections.
|
67
|
+
:attr:`~lamindb.Artifact.features`
|
68
|
+
Feature manager for an artifact.
|
69
|
+
|
70
|
+
Examples:
|
71
|
+
|
72
|
+
Create a new label:
|
73
|
+
|
74
|
+
>>> train_split = ln.ULabel(name="train").save()
|
75
|
+
|
76
|
+
Organize labels in a hierarchy:
|
77
|
+
|
78
|
+
>>> split_type = ln.ULabel(name="Split", is_type=True).save()
|
79
|
+
>>> train_split = ln.ULabel(name="train", type="split_type").save()
|
80
|
+
|
81
|
+
Label an artifact:
|
82
|
+
|
83
|
+
>>> artifact.ulabels.add(ulabel)
|
84
|
+
|
85
|
+
Query by `ULabel`:
|
86
|
+
|
87
|
+
>>> ln.Artifact.filter(ulabels=train_split)
|
88
|
+
"""
|
89
|
+
|
90
|
+
class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
91
|
+
abstract = False
|
92
|
+
|
93
|
+
_name_field: str = "name"
|
94
|
+
|
95
|
+
id: int = models.AutoField(primary_key=True)
|
96
|
+
"""Internal id, valid only in one DB instance."""
|
97
|
+
uid: str = CharField(
|
98
|
+
editable=False, unique=True, db_index=True, max_length=8, default=base62_8
|
99
|
+
)
|
100
|
+
"""A universal random id, valid across DB instances."""
|
101
|
+
name: str = CharField(max_length=150, db_index=True)
|
102
|
+
"""Name or title of ulabel."""
|
103
|
+
type: ULabel | None = ForeignKey("self", PROTECT, null=True, related_name="records")
|
104
|
+
"""Type of ulabel, e.g., `"donor"`, `"split"`, etc.
|
105
|
+
|
106
|
+
Allows to group ulabels by type, e.g., all donors, all split ulabels, etc.
|
107
|
+
"""
|
108
|
+
records: ULabel
|
109
|
+
"""Records of this type."""
|
110
|
+
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
111
|
+
"""Distinguish types from instances of the type.
|
112
|
+
|
113
|
+
For example, a ulabel "Project" would be a type, and the actual projects "Project 1", "Project 2", would be records of that `type`.
|
114
|
+
"""
|
115
|
+
description: str | None = CharField(null=True, db_index=True)
|
116
|
+
"""A description (optional)."""
|
117
|
+
reference: str | None = CharField(max_length=255, db_index=True, null=True)
|
118
|
+
"""A simple reference like URL or external ID."""
|
119
|
+
reference_type: str | None = CharField(max_length=25, db_index=True, null=True)
|
120
|
+
"""Type of simple reference."""
|
121
|
+
parents: ULabel = models.ManyToManyField(
|
122
|
+
"self", symmetrical=False, related_name="children"
|
123
|
+
)
|
124
|
+
"""Parent entities of this ulabel.
|
125
|
+
|
126
|
+
For advanced use cases, you can build an ontology under a given `type`.
|
127
|
+
|
128
|
+
Say, if you modeled `CellType` as a `ULabel`, you would introduce a type `CellType` and model the hiearchy of cell types under it.
|
129
|
+
"""
|
130
|
+
children: ULabel
|
131
|
+
"""Child entities of this ulabel.
|
132
|
+
|
133
|
+
Reverse accessor for parents.
|
134
|
+
"""
|
135
|
+
transforms: Transform
|
136
|
+
"""Linked transforms."""
|
137
|
+
runs: Run
|
138
|
+
"""Linked runs."""
|
139
|
+
artifacts: Artifact
|
140
|
+
"""Linked artifacts."""
|
141
|
+
collections: Collection
|
142
|
+
"""Linked collections."""
|
143
|
+
projects: Project
|
144
|
+
"""Linked projects."""
|
145
|
+
|
146
|
+
@overload
|
147
|
+
def __init__(
|
148
|
+
self,
|
149
|
+
name: str,
|
150
|
+
type: ULabel | None = None,
|
151
|
+
is_type: bool = False,
|
152
|
+
description: str | None = None,
|
153
|
+
reference: str | None = None,
|
154
|
+
reference_type: str | None = None,
|
155
|
+
): ...
|
156
|
+
|
157
|
+
@overload
|
158
|
+
def __init__(
|
159
|
+
self,
|
160
|
+
*db_args,
|
161
|
+
): ...
|
162
|
+
|
163
|
+
def __init__(
|
164
|
+
self,
|
165
|
+
*args,
|
166
|
+
**kwargs,
|
167
|
+
):
|
168
|
+
if len(args) == len(self._meta.concrete_fields):
|
169
|
+
super().__init__(*args, **kwargs)
|
170
|
+
return None
|
171
|
+
if len(args) > 0:
|
172
|
+
raise ValueError("Only one non-keyword arg allowed")
|
173
|
+
name: str = kwargs.pop("name", None)
|
174
|
+
type: str | None = kwargs.pop("type", None)
|
175
|
+
is_type: bool = kwargs.pop("is_type", False)
|
176
|
+
description: str | None = kwargs.pop("description", None)
|
177
|
+
reference: str | None = kwargs.pop("reference", None)
|
178
|
+
reference_type: str | None = kwargs.pop("reference_type", None)
|
179
|
+
if len(kwargs) > 0:
|
180
|
+
valid_keywords = ", ".join([val[0] for val in _get_record_kwargs(ULabel)])
|
181
|
+
raise FieldValidationError(
|
182
|
+
f"Only {valid_keywords} are valid keyword arguments"
|
183
|
+
)
|
184
|
+
super().__init__(
|
185
|
+
name=name,
|
186
|
+
type=type,
|
187
|
+
is_type=is_type,
|
188
|
+
description=description,
|
189
|
+
reference=reference,
|
190
|
+
reference_type=reference_type,
|
191
|
+
)
|
192
|
+
|
193
|
+
|
194
|
+
class ArtifactULabel(BasicRecord, LinkORM, TracksRun):
|
195
|
+
id: int = models.BigAutoField(primary_key=True)
|
196
|
+
artifact: Artifact = ForeignKey("Artifact", CASCADE, related_name="links_ulabel")
|
197
|
+
ulabel: ULabel = ForeignKey(ULabel, PROTECT, related_name="links_artifact")
|
198
|
+
feature: Feature | None = ForeignKey(
|
199
|
+
Feature, PROTECT, null=True, related_name="links_artifactulabel", default=None
|
200
|
+
)
|
201
|
+
label_ref_is_name: bool | None = BooleanField(null=True)
|
202
|
+
feature_ref_is_name: bool | None = BooleanField(null=True)
|
203
|
+
|
204
|
+
class Meta:
|
205
|
+
# can have the same label linked to the same artifact if the feature is
|
206
|
+
# different
|
207
|
+
unique_together = ("artifact", "ulabel", "feature")
|
208
|
+
|
209
|
+
|
210
|
+
class TransformULabel(BasicRecord, LinkORM, TracksRun):
|
211
|
+
id: int = models.BigAutoField(primary_key=True)
|
212
|
+
transform: Transform = ForeignKey(Transform, CASCADE, related_name="links_ulabel")
|
213
|
+
ulabel: ULabel = ForeignKey(ULabel, PROTECT, related_name="links_transform")
|
214
|
+
|
215
|
+
class Meta:
|
216
|
+
unique_together = ("transform", "ulabel")
|
217
|
+
|
218
|
+
|
219
|
+
class RunULabel(BasicRecord, LinkORM):
|
220
|
+
id: int = models.BigAutoField(primary_key=True)
|
221
|
+
run: Run = ForeignKey(Run, CASCADE, related_name="links_ulabel")
|
222
|
+
ulabel: ULabel = ForeignKey(ULabel, PROTECT, related_name="links_run")
|
223
|
+
created_at: datetime = DateTimeField(
|
224
|
+
editable=False, db_default=models.functions.Now(), db_index=True
|
225
|
+
)
|
226
|
+
"""Time of creation of record."""
|
227
|
+
created_by: User = ForeignKey(
|
228
|
+
"lamindb.User", PROTECT, default=current_user_id, related_name="+"
|
229
|
+
)
|
230
|
+
"""Creator of record."""
|
231
|
+
|
232
|
+
class Meta:
|
233
|
+
unique_together = ("run", "ulabel")
|
234
|
+
|
235
|
+
|
236
|
+
class CollectionULabel(BasicRecord, LinkORM, TracksRun):
|
237
|
+
id: int = models.BigAutoField(primary_key=True)
|
238
|
+
collection: Collection = ForeignKey(
|
239
|
+
"Collection", CASCADE, related_name="links_ulabel"
|
240
|
+
)
|
241
|
+
ulabel: ULabel = ForeignKey(ULabel, PROTECT, related_name="links_collection")
|
242
|
+
feature: Feature | None = ForeignKey(
|
243
|
+
Feature, PROTECT, null=True, related_name="links_collectionulabel", default=None
|
244
|
+
)
|
245
|
+
label_ref_is_name: bool | None = BooleanField(null=True)
|
246
|
+
feature_ref_is_name: bool | None = BooleanField(null=True)
|
247
|
+
|
248
|
+
class Meta:
|
249
|
+
unique_together = ("collection", "ulabel")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lamindb
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.2.0
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.10,<3.13
|
@@ -10,7 +10,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.12
|
11
11
|
Requires-Dist: lamin_utils==0.13.10
|
12
12
|
Requires-Dist: lamin_cli==1.1.0
|
13
|
-
Requires-Dist: lamindb_setup[aws]==1.
|
13
|
+
Requires-Dist: lamindb_setup[aws]==1.3.0
|
14
14
|
Requires-Dist: pyyaml
|
15
15
|
Requires-Dist: pyarrow
|
16
16
|
Requires-Dist: pandera
|
@@ -22,9 +22,9 @@ Requires-Dist: anndata>=0.8.0,<=0.11.3
|
|
22
22
|
Requires-Dist: fsspec
|
23
23
|
Requires-Dist: graphviz
|
24
24
|
Requires-Dist: psycopg2-binary
|
25
|
-
Requires-Dist: bionty
|
25
|
+
Requires-Dist: bionty ; extra == "bionty"
|
26
26
|
Requires-Dist: cellregistry ; extra == "cellregistry"
|
27
|
-
Requires-Dist: clinicore
|
27
|
+
Requires-Dist: clinicore ; extra == "clinicore"
|
28
28
|
Requires-Dist: tomlkit ; extra == "dev"
|
29
29
|
Requires-Dist: line_profiler ; extra == "dev"
|
30
30
|
Requires-Dist: pre-commit ; extra == "dev"
|
@@ -39,12 +39,12 @@ Requires-Dist: faker-biology ; extra == "dev"
|
|
39
39
|
Requires-Dist: django-schema-graph ; extra == "erdiagram"
|
40
40
|
Requires-Dist: readfcs>=2.0.1 ; extra == "fcs"
|
41
41
|
Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
|
42
|
-
Requires-Dist: nbproject==0.10.
|
42
|
+
Requires-Dist: nbproject==0.10.6 ; extra == "jupyter"
|
43
43
|
Requires-Dist: jupytext ; extra == "jupyter"
|
44
44
|
Requires-Dist: nbconvert>=7.2.1 ; extra == "jupyter"
|
45
45
|
Requires-Dist: mistune!=3.1.0 ; extra == "jupyter"
|
46
46
|
Requires-Dist: omop ; extra == "omop"
|
47
|
-
Requires-Dist: wetlab
|
47
|
+
Requires-Dist: wetlab ; extra == "wetlab"
|
48
48
|
Requires-Dist: zarr>=2.16.0,<3.0.0a0 ; extra == "zarr"
|
49
49
|
Project-URL: Home, https://github.com/laminlabs/lamindb
|
50
50
|
Provides-Extra: bionty
|