lamindb 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +25 -6
- lamindb/_finish.py +5 -5
- lamindb/_tracked.py +1 -1
- lamindb/_view.py +4 -4
- lamindb/core/_context.py +32 -6
- lamindb/core/_settings.py +1 -1
- lamindb/core/datasets/mini_immuno.py +8 -0
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_anndata_accessor.py +9 -9
- lamindb/core/storage/_valid_suffixes.py +1 -0
- lamindb/core/storage/_zarr.py +32 -107
- lamindb/curators/__init__.py +19 -2
- lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
- lamindb/curators/_legacy.py +15 -19
- lamindb/curators/core.py +247 -80
- lamindb/errors.py +2 -2
- lamindb/migrations/0069_squashed.py +8 -8
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
- lamindb/migrations/0073_merge_ourprojects.py +7 -7
- lamindb/migrations/0075_lamindbv1_part5.py +1 -1
- lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
- lamindb/migrations/0080_polish_lamindbv1.py +2 -2
- lamindb/migrations/0088_schema_components.py +1 -1
- lamindb/migrations/0090_runproject_project_runs.py +2 -2
- lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
- lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
- lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
- lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
- lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
- lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
- lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
- lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
- lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
- lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
- lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
- lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
- lamindb/models/__init__.py +35 -18
- lamindb/models/_describe.py +4 -4
- lamindb/models/_django.py +38 -4
- lamindb/models/_feature_manager.py +66 -123
- lamindb/models/_from_values.py +13 -13
- lamindb/models/_label_manager.py +8 -6
- lamindb/models/_relations.py +7 -7
- lamindb/models/artifact.py +166 -156
- lamindb/models/can_curate.py +25 -25
- lamindb/models/collection.py +48 -18
- lamindb/models/core.py +3 -3
- lamindb/models/feature.py +88 -60
- lamindb/models/has_parents.py +17 -17
- lamindb/models/project.py +52 -24
- lamindb/models/query_manager.py +5 -5
- lamindb/models/query_set.py +61 -37
- lamindb/models/record.py +158 -1583
- lamindb/models/run.py +39 -176
- lamindb/models/save.py +6 -6
- lamindb/models/schema.py +32 -43
- lamindb/models/sqlrecord.py +1743 -0
- lamindb/models/transform.py +17 -33
- lamindb/models/ulabel.py +21 -15
- {lamindb-1.5.3.dist-info → lamindb-1.6.0.dist-info}/METADATA +7 -11
- lamindb-1.6.0.dist-info/RECORD +118 -0
- lamindb/core/storage/_anndata_sizes.py +0 -41
- lamindb/models/flextable.py +0 -163
- lamindb-1.5.3.dist-info/RECORD +0 -109
- {lamindb-1.5.3.dist-info → lamindb-1.6.0.dist-info}/LICENSE +0 -0
- {lamindb-1.5.3.dist-info → lamindb-1.6.0.dist-info}/WHEEL +0 -0
lamindb/models/run.py
CHANGED
@@ -1,18 +1,16 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import TYPE_CHECKING,
|
3
|
+
from typing import TYPE_CHECKING, overload
|
4
4
|
|
5
5
|
import numpy as np
|
6
6
|
from django.db import models
|
7
7
|
from django.db.models import (
|
8
8
|
CASCADE,
|
9
9
|
PROTECT,
|
10
|
-
Q,
|
11
10
|
)
|
12
|
-
from django.db.utils import IntegrityError
|
13
11
|
from lamindb_setup import _check_instance_setup
|
14
|
-
from lamindb_setup.core.hashing import HASH_LENGTH, hash_dict
|
15
12
|
|
13
|
+
from lamindb.base import deprecated
|
16
14
|
from lamindb.base.fields import (
|
17
15
|
BooleanField,
|
18
16
|
CharField,
|
@@ -20,22 +18,20 @@ from lamindb.base.fields import (
|
|
20
18
|
ForeignKey,
|
21
19
|
)
|
22
20
|
from lamindb.base.users import current_user_id
|
23
|
-
from lamindb.errors import InvalidArgument
|
21
|
+
from lamindb.errors import InvalidArgument
|
24
22
|
|
25
|
-
from ..base.ids import
|
23
|
+
from ..base.ids import base62_16
|
26
24
|
from .can_curate import CanCurate
|
27
|
-
from .
|
25
|
+
from .sqlrecord import BaseSQLRecord, IsLink, SQLRecord
|
28
26
|
|
29
27
|
if TYPE_CHECKING:
|
30
28
|
from datetime import datetime
|
31
29
|
|
32
|
-
from lamindb.base.types import Dtype, FieldAttr
|
33
|
-
|
34
30
|
from .artifact import Artifact
|
35
31
|
from .collection import Collection
|
32
|
+
from .feature import FeatureValue
|
36
33
|
from .project import Project
|
37
34
|
from .query_set import QuerySet
|
38
|
-
from .schema import Schema
|
39
35
|
from .transform import Transform
|
40
36
|
from .ulabel import ULabel
|
41
37
|
|
@@ -43,14 +39,14 @@ if TYPE_CHECKING:
|
|
43
39
|
_TRACKING_READY: bool | None = None
|
44
40
|
|
45
41
|
|
46
|
-
class
|
47
|
-
"""
|
42
|
+
class FeatureManager:
|
43
|
+
"""Feature manager."""
|
48
44
|
|
49
45
|
pass
|
50
46
|
|
51
47
|
|
52
|
-
class
|
53
|
-
"""
|
48
|
+
class FeatureManagerRun(FeatureManager):
|
49
|
+
"""Feature manager."""
|
54
50
|
|
55
51
|
pass
|
56
52
|
|
@@ -140,7 +136,7 @@ class TracksUpdates(models.Model):
|
|
140
136
|
super().__init__(*args, **kwargs)
|
141
137
|
|
142
138
|
|
143
|
-
class User(
|
139
|
+
class User(BaseSQLRecord, CanCurate):
|
144
140
|
"""Users.
|
145
141
|
|
146
142
|
All data in this registry is synced from `lamin.ai` to ensure a universal
|
@@ -201,152 +197,7 @@ class User(BasicRecord, CanCurate):
|
|
201
197
|
super().__init__(*args, **kwargs)
|
202
198
|
|
203
199
|
|
204
|
-
class
|
205
|
-
"""Parameters of runs & models."""
|
206
|
-
|
207
|
-
class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
208
|
-
abstract = False
|
209
|
-
|
210
|
-
_name_field: str = "name"
|
211
|
-
|
212
|
-
name: str = CharField(max_length=100, db_index=True)
|
213
|
-
dtype: Dtype | None = CharField(db_index=True, null=True)
|
214
|
-
"""Data type (:class:`~lamindb.base.types.Dtype`)."""
|
215
|
-
type: Param | None = ForeignKey("self", PROTECT, null=True, related_name="records")
|
216
|
-
"""Type of param (e.g., 'Pipeline', 'ModelTraining', 'PostProcessing').
|
217
|
-
|
218
|
-
Allows to group features by type, e.g., all read outs, all metrics, etc.
|
219
|
-
"""
|
220
|
-
records: Param
|
221
|
-
"""Records of this type."""
|
222
|
-
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
223
|
-
"""Distinguish types from instances of the type."""
|
224
|
-
_expect_many: bool = models.BooleanField(default=False, db_default=False)
|
225
|
-
"""Indicates whether values for this param are expected to occur a single or multiple times for an artifact/run (default `False`).
|
226
|
-
|
227
|
-
- if it's `False` (default), the values mean artifact/run-level values and a dtype of `datetime` means `datetime`
|
228
|
-
- if it's `True`, the values are from an aggregation, which this seems like an edge case but when characterizing a model ensemble trained with different parameters it could be relevant
|
229
|
-
"""
|
230
|
-
schemas: Schema = models.ManyToManyField(
|
231
|
-
"Schema", through="SchemaParam", related_name="params"
|
232
|
-
)
|
233
|
-
"""Feature sets linked to this feature."""
|
234
|
-
# backward fields
|
235
|
-
values: ParamValue
|
236
|
-
"""Values for this parameter."""
|
237
|
-
|
238
|
-
@overload
|
239
|
-
def __init__(
|
240
|
-
self,
|
241
|
-
name: str,
|
242
|
-
dtype: Dtype | Registry | list[Registry] | FieldAttr,
|
243
|
-
type: Param | None = None,
|
244
|
-
is_type: bool = False,
|
245
|
-
): ...
|
246
|
-
|
247
|
-
@overload
|
248
|
-
def __init__(
|
249
|
-
self,
|
250
|
-
*db_args,
|
251
|
-
): ...
|
252
|
-
|
253
|
-
def __init__(self, *args, **kwargs):
|
254
|
-
from .feature import process_init_feature_param
|
255
|
-
|
256
|
-
if len(args) == len(self._meta.concrete_fields):
|
257
|
-
super().__init__(*args, **kwargs)
|
258
|
-
return None
|
259
|
-
|
260
|
-
dtype = kwargs.get("dtype", None)
|
261
|
-
kwargs = process_init_feature_param(args, kwargs, is_param=True)
|
262
|
-
super().__init__(*args, **kwargs)
|
263
|
-
dtype_str = kwargs.pop("dtype", None)
|
264
|
-
if not self._state.adding:
|
265
|
-
if not (
|
266
|
-
self.dtype.startswith("cat")
|
267
|
-
if dtype == "cat"
|
268
|
-
else self.dtype == dtype_str
|
269
|
-
):
|
270
|
-
raise ValidationError(
|
271
|
-
f"Feature {self.name} already exists with dtype {self.dtype}, you passed {dtype_str}"
|
272
|
-
)
|
273
|
-
|
274
|
-
|
275
|
-
# FeatureValue behaves in many ways like a link in a LinkORM
|
276
|
-
# in particular, we don't want a _public field on it
|
277
|
-
# Also, we don't inherit from TracksRun because a ParamValue
|
278
|
-
# is typically created before a run is created and we want to
|
279
|
-
# avoid delete cycles (for Model params though it might be helpful)
|
280
|
-
class ParamValue(Record):
|
281
|
-
"""Parameter values.
|
282
|
-
|
283
|
-
Is largely analogous to `FeatureValue`.
|
284
|
-
"""
|
285
|
-
|
286
|
-
# we do not have a unique constraint on param & value because it leads to hashing errors
|
287
|
-
# for large dictionaries: https://lamin.ai/laminlabs/lamindata/transform/jgTrkoeuxAfs0000
|
288
|
-
# we do not hash values because we have `get_or_create` logic all over the place
|
289
|
-
# and also for checking whether the (param, value) combination exists
|
290
|
-
# there does not seem an issue with querying for a dict-like value
|
291
|
-
# https://lamin.ai/laminlabs/lamindata/transform/jgTrkoeuxAfs0001
|
292
|
-
_name_field: str = "value"
|
293
|
-
|
294
|
-
param: Param = ForeignKey(Param, CASCADE, related_name="values")
|
295
|
-
"""The dimension metadata."""
|
296
|
-
value: Any = (
|
297
|
-
models.JSONField()
|
298
|
-
) # stores float, integer, boolean, datetime or dictionaries
|
299
|
-
"""The JSON-like value."""
|
300
|
-
# it'd be confusing and hard to populate a run here because these
|
301
|
-
# values are typically created upon creating a run
|
302
|
-
# hence, ParamValue does _not_ inherit from TracksRun but manually
|
303
|
-
# adds created_at & created_by
|
304
|
-
# because ParamValue cannot be updated, we don't need updated_at
|
305
|
-
created_at: datetime = DateTimeField(
|
306
|
-
editable=False, db_default=models.functions.Now(), db_index=True
|
307
|
-
)
|
308
|
-
"""Time of creation of record."""
|
309
|
-
created_by: User = ForeignKey(
|
310
|
-
User, PROTECT, default=current_user_id, related_name="+"
|
311
|
-
)
|
312
|
-
"""Creator of record."""
|
313
|
-
hash: str = CharField(max_length=HASH_LENGTH, null=True, db_index=True)
|
314
|
-
|
315
|
-
class Meta:
|
316
|
-
constraints = [
|
317
|
-
# For simple types, use direct value comparison
|
318
|
-
models.UniqueConstraint(
|
319
|
-
fields=["param", "value"],
|
320
|
-
name="unique_simple_param_value",
|
321
|
-
condition=Q(hash__isnull=True),
|
322
|
-
),
|
323
|
-
# For complex types (dictionaries), use hash
|
324
|
-
models.UniqueConstraint(
|
325
|
-
fields=["param", "hash"],
|
326
|
-
name="unique_complex_param_value",
|
327
|
-
condition=Q(hash__isnull=False),
|
328
|
-
),
|
329
|
-
]
|
330
|
-
|
331
|
-
@classmethod
|
332
|
-
def get_or_create(cls, param, value):
|
333
|
-
# Simple types: int, float, str, bool
|
334
|
-
if isinstance(value, (int, float, str, bool)):
|
335
|
-
try:
|
336
|
-
return cls.objects.create(param=param, value=value, hash=None), False
|
337
|
-
except IntegrityError:
|
338
|
-
return cls.objects.get(param=param, value=value), True
|
339
|
-
|
340
|
-
# Complex types: dict, list
|
341
|
-
else:
|
342
|
-
hash = hash_dict(value)
|
343
|
-
try:
|
344
|
-
return cls.objects.create(param=param, value=value, hash=hash), False
|
345
|
-
except IntegrityError:
|
346
|
-
return cls.objects.get(param=param, hash=hash), True
|
347
|
-
|
348
|
-
|
349
|
-
class Run(Record):
|
200
|
+
class Run(SQLRecord):
|
350
201
|
"""Runs of transforms such as the execution of a script.
|
351
202
|
|
352
203
|
A registry to store runs of transforms, such as an executation of a script.
|
@@ -381,14 +232,16 @@ class Run(Record):
|
|
381
232
|
|
382
233
|
_name_field: str = "started_at"
|
383
234
|
|
384
|
-
|
385
|
-
"""
|
235
|
+
features: FeatureManager = FeatureManagerRun # type: ignore
|
236
|
+
"""Features manager.
|
237
|
+
|
238
|
+
Run parameters are tracked via the `Feature` registry, just like all other variables.
|
386
239
|
|
387
240
|
Guide: :ref:`track-run-parameters`
|
388
241
|
|
389
242
|
Example::
|
390
243
|
|
391
|
-
run.
|
244
|
+
run.features.add_values({
|
392
245
|
"learning_rate": 0.01,
|
393
246
|
"input_dir": "s3://my-bucket/mydataset",
|
394
247
|
"downsample": True,
|
@@ -401,8 +254,9 @@ class Run(Record):
|
|
401
254
|
|
402
255
|
id: int = models.BigAutoField(primary_key=True)
|
403
256
|
"""Internal id, valid only in one DB instance."""
|
257
|
+
# default uid was changed from base62_20 to base62_16 in 1.6.0
|
404
258
|
uid: str = CharField(
|
405
|
-
editable=False, unique=True, db_index=True, max_length=20, default=
|
259
|
+
editable=False, unique=True, db_index=True, max_length=20, default=base62_16
|
406
260
|
)
|
407
261
|
"""Universal id, valid across DB instances."""
|
408
262
|
name: str | None = CharField(max_length=150, null=True)
|
@@ -446,10 +300,11 @@ class Run(Record):
|
|
446
300
|
"""The collections serving as input for this run."""
|
447
301
|
output_collections: Collection
|
448
302
|
"""The collections generated by this run."""
|
449
|
-
_param_values: ParamValue = models.ManyToManyField(
|
450
|
-
ParamValue, through="RunParamValue", related_name="runs"
|
451
|
-
)
|
452
303
|
"""Parameter values."""
|
304
|
+
_feature_values: FeatureValue = models.ManyToManyField(
|
305
|
+
"FeatureValue", through="RunFeatureValue", related_name="runs"
|
306
|
+
)
|
307
|
+
"""Feature values."""
|
453
308
|
reference: str | None = CharField(max_length=255, db_index=True, null=True)
|
454
309
|
"""A reference like a URL or external ID (such as from a workflow manager)."""
|
455
310
|
reference_type: str | None = CharField(max_length=25, db_index=True, null=True)
|
@@ -510,7 +365,7 @@ class Run(Record):
|
|
510
365
|
*args,
|
511
366
|
**kwargs,
|
512
367
|
):
|
513
|
-
self.
|
368
|
+
self.features = FeatureManager(self) # type: ignore
|
514
369
|
if len(args) == len(self._meta.concrete_fields):
|
515
370
|
super().__init__(*args, **kwargs)
|
516
371
|
return None
|
@@ -540,6 +395,11 @@ class Run(Record):
|
|
540
395
|
delete_run_artifacts(self)
|
541
396
|
super().delete()
|
542
397
|
|
398
|
+
@property
|
399
|
+
@deprecated("features")
|
400
|
+
def params(self) -> FeatureManager:
|
401
|
+
return self.features
|
402
|
+
|
543
403
|
@classmethod
|
544
404
|
def filter(
|
545
405
|
cls,
|
@@ -566,6 +426,7 @@ class Run(Record):
|
|
566
426
|
ln.Run.filter(hyperparam_x=100)
|
567
427
|
"""
|
568
428
|
from ._feature_manager import filter_base
|
429
|
+
from .feature import Feature
|
569
430
|
from .query_set import QuerySet
|
570
431
|
|
571
432
|
if expressions:
|
@@ -574,14 +435,14 @@ class Run(Record):
|
|
574
435
|
if field_or_feature_or_param in Run.__get_available_fields__():
|
575
436
|
return QuerySet(model=cls).filter(*queries, **expressions)
|
576
437
|
elif all(
|
577
|
-
params_validated :=
|
438
|
+
params_validated := Feature.validate(
|
578
439
|
keys_normalized, field="name", mute=True
|
579
440
|
)
|
580
441
|
):
|
581
|
-
return filter_base(
|
442
|
+
return filter_base(FeatureManagerRun, **expressions)
|
582
443
|
else:
|
583
444
|
params = ", ".join(sorted(np.array(keys_normalized)[~params_validated]))
|
584
|
-
message = f"
|
445
|
+
message = f"feature names: {params}"
|
585
446
|
fields = ", ".join(sorted(cls.__get_available_fields__()))
|
586
447
|
raise InvalidArgument(
|
587
448
|
f"You can query either by available fields: {fields}\n"
|
@@ -612,11 +473,13 @@ def delete_run_artifacts(run: Run) -> None:
|
|
612
473
|
report.delete(permanent=True)
|
613
474
|
|
614
475
|
|
615
|
-
class
|
476
|
+
class RunFeatureValue(BaseSQLRecord, IsLink):
|
616
477
|
id: int = models.BigAutoField(primary_key=True)
|
617
|
-
run: Run = ForeignKey(Run, CASCADE, related_name="
|
478
|
+
run: Run = ForeignKey(Run, CASCADE, related_name="links_featurevalue")
|
618
479
|
# we follow the lower() case convention rather than snake case for link models
|
619
|
-
|
480
|
+
featurevalue: FeatureValue = ForeignKey(
|
481
|
+
"FeatureValue", PROTECT, related_name="links_run"
|
482
|
+
)
|
620
483
|
created_at: datetime = DateTimeField(
|
621
484
|
editable=False, db_default=models.functions.Now(), db_index=True
|
622
485
|
)
|
@@ -627,4 +490,4 @@ class RunParamValue(BasicRecord, LinkORM):
|
|
627
490
|
"""Creator of record."""
|
628
491
|
|
629
492
|
class Meta:
|
630
|
-
unique_together = ("run", "
|
493
|
+
unique_together = ("run", "featurevalue")
|
lamindb/models/save.py
CHANGED
@@ -21,7 +21,7 @@ from ..core.storage.paths import (
|
|
21
21
|
delete_storage_using_key,
|
22
22
|
store_file_or_folder,
|
23
23
|
)
|
24
|
-
from .
|
24
|
+
from .sqlrecord import SQLRecord
|
25
25
|
|
26
26
|
if TYPE_CHECKING:
|
27
27
|
from collections.abc import Iterable
|
@@ -29,7 +29,7 @@ if TYPE_CHECKING:
|
|
29
29
|
from .artifact import Artifact
|
30
30
|
|
31
31
|
|
32
|
-
def save(records: Iterable[
|
32
|
+
def save(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False) -> None:
|
33
33
|
"""Bulk save records.
|
34
34
|
|
35
35
|
Note:
|
@@ -42,7 +42,7 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
|
|
42
42
|
existing records! Use ``record.save()`` for these use cases.
|
43
43
|
|
44
44
|
Args:
|
45
|
-
records: Multiple :class:`~lamindb.models.
|
45
|
+
records: Multiple :class:`~lamindb.models.SQLRecord` objects.
|
46
46
|
ignore_conflicts: If ``True``, do not error if some records violate a
|
47
47
|
unique or another constraint. However, it won't inplace update the id
|
48
48
|
fields of records. If you need records with ids, you need to query
|
@@ -69,7 +69,7 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
|
|
69
69
|
"""
|
70
70
|
from .artifact import Artifact
|
71
71
|
|
72
|
-
if isinstance(records,
|
72
|
+
if isinstance(records, SQLRecord):
|
73
73
|
raise ValueError("Please use record.save() if saving a single record.")
|
74
74
|
|
75
75
|
# previously, this was all set based,
|
@@ -107,7 +107,7 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
|
|
107
107
|
return None
|
108
108
|
|
109
109
|
|
110
|
-
def bulk_create(records: Iterable[
|
110
|
+
def bulk_create(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False):
|
111
111
|
records_by_orm = defaultdict(list)
|
112
112
|
for record in records:
|
113
113
|
records_by_orm[record.__class__].append(record)
|
@@ -116,7 +116,7 @@ def bulk_create(records: Iterable[Record], ignore_conflicts: bool | None = False
|
|
116
116
|
# records[:] = created # In-place list update; does not seem to be necessary
|
117
117
|
|
118
118
|
|
119
|
-
def bulk_update(records: Iterable[
|
119
|
+
def bulk_update(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False):
|
120
120
|
records_by_orm = defaultdict(list)
|
121
121
|
for record in records:
|
122
122
|
records_by_orm[record.__class__].append(record)
|
lamindb/models/schema.py
CHANGED
@@ -35,16 +35,16 @@ from .feature import (
|
|
35
35
|
serialize_dtype,
|
36
36
|
serialize_pandas_dtype,
|
37
37
|
)
|
38
|
-
from .
|
39
|
-
|
40
|
-
|
41
|
-
|
38
|
+
from .run import TracksRun, TracksUpdates
|
39
|
+
from .sqlrecord import (
|
40
|
+
BaseSQLRecord,
|
41
|
+
IsLink,
|
42
42
|
Registry,
|
43
|
+
SQLRecord,
|
43
44
|
_get_record_kwargs,
|
44
45
|
init_self_from_db,
|
45
46
|
update_attributes,
|
46
47
|
)
|
47
|
-
from .run import Param, TracksRun, TracksUpdates
|
48
48
|
|
49
49
|
if TYPE_CHECKING:
|
50
50
|
import pandas as pd
|
@@ -59,7 +59,7 @@ NUMBER_TYPE = "num"
|
|
59
59
|
DICT_KEYS_TYPE = type({}.keys()) # type: ignore
|
60
60
|
|
61
61
|
|
62
|
-
def validate_features(features: list[
|
62
|
+
def validate_features(features: list[SQLRecord]) -> SQLRecord:
|
63
63
|
"""Validate and return feature type."""
|
64
64
|
try:
|
65
65
|
if len(features) == 0:
|
@@ -70,7 +70,7 @@ def validate_features(features: list[Record]) -> Record:
|
|
70
70
|
) from None
|
71
71
|
if not hasattr(features, "__getitem__"):
|
72
72
|
raise TypeError("features has to be list-like")
|
73
|
-
if not isinstance(features[0],
|
73
|
+
if not isinstance(features[0], SQLRecord):
|
74
74
|
raise TypeError(
|
75
75
|
"features has to store feature records! use .from_values() otherwise"
|
76
76
|
)
|
@@ -84,8 +84,8 @@ def validate_features(features: list[Record]) -> Record:
|
|
84
84
|
|
85
85
|
|
86
86
|
def get_features_config(
|
87
|
-
features: list[
|
88
|
-
) -> tuple[list[
|
87
|
+
features: list[SQLRecord] | tuple[SQLRecord, dict],
|
88
|
+
) -> tuple[list[SQLRecord], list[tuple[SQLRecord, dict]]]:
|
89
89
|
"""Get features and their config from the return of feature.with_config()."""
|
90
90
|
features_list = []
|
91
91
|
configs = []
|
@@ -251,13 +251,13 @@ KNOWN_SCHEMAS = {
|
|
251
251
|
}
|
252
252
|
|
253
253
|
|
254
|
-
class Schema(
|
254
|
+
class Schema(SQLRecord, CanCurate, TracksRun):
|
255
255
|
"""Schemas of a dataset such as the set of columns of a `DataFrame`.
|
256
256
|
|
257
257
|
Composite schemas can have multiple slots, e.g., for an `AnnData`, one schema for slot `obs` and another one for `var`.
|
258
258
|
|
259
259
|
Args:
|
260
|
-
features: `list[
|
260
|
+
features: `list[SQLRecord] | list[tuple[Feature, dict]] | None = None` Feature
|
261
261
|
records, e.g., `[Feature(...), Feature(...)]` or Features with their config, e.g., `[Feature(...).with_config(optional=True)]`.
|
262
262
|
index: `Feature | None = None` A :class:`~lamindb.Feature` record to validate an index of a `DataFrame` and therefore also, e.g., `AnnData` obs and var indices.
|
263
263
|
slots: `dict[str, Schema] | None = None` A dictionary mapping slot names to :class:`~lamindb.Schema` objects.
|
@@ -350,7 +350,7 @@ class Schema(Record, CanCurate, TracksRun):
|
|
350
350
|
schema = ln.Schema.from_df(df)
|
351
351
|
"""
|
352
352
|
|
353
|
-
class Meta(
|
353
|
+
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
354
354
|
abstract = False
|
355
355
|
|
356
356
|
_name_field: str = "name"
|
@@ -363,18 +363,16 @@ class Schema(Record, CanCurate, TracksRun):
|
|
363
363
|
|
364
364
|
id: int = models.AutoField(primary_key=True)
|
365
365
|
"""Internal id, valid only in one DB instance."""
|
366
|
+
# Before lamindb 1.5, it was 20 char long. Since lamindb 1.5, it is 16 char long.
|
366
367
|
uid: str = CharField(editable=False, unique=True, db_index=True, max_length=20)
|
367
|
-
"""A universal id.
|
368
|
-
|
369
|
-
Before lamindb 1.5, it was 20 char long. Since lamindb 1.5, it is 16 char long.
|
370
|
-
"""
|
368
|
+
"""A universal id."""
|
371
369
|
name: str | None = CharField(max_length=150, null=True, db_index=True)
|
372
370
|
"""A name."""
|
373
371
|
description: str | None = CharField(null=True, db_index=True)
|
374
372
|
"""A description."""
|
375
373
|
n: int = IntegerField()
|
376
374
|
"""Number of features in the schema."""
|
377
|
-
type: Schema | None = ForeignKey("self", PROTECT, null=True, related_name="
|
375
|
+
type: Schema | None = ForeignKey("self", PROTECT, null=True, related_name="schemas")
|
378
376
|
"""Type of schema.
|
379
377
|
|
380
378
|
Allows to group schemas by type, e.g., all meassurements evaluating gene expression vs. protein expression vs. multi modal.
|
@@ -383,8 +381,8 @@ class Schema(Record, CanCurate, TracksRun):
|
|
383
381
|
|
384
382
|
Here are a few more examples for type names: `'ExpressionPanel'`, `'ProteinPanel'`, `'Multimodal'`, `'Metadata'`, `'Embedding'`.
|
385
383
|
"""
|
386
|
-
|
387
|
-
"""
|
384
|
+
instances: Schema
|
385
|
+
"""Schemas of this type (can only be non-empty if `is_type` is `True`)."""
|
388
386
|
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
389
387
|
"""Distinguish types from instances of the type."""
|
390
388
|
itype: str | None = CharField(
|
@@ -434,8 +432,6 @@ class Schema(Record, CanCurate, TracksRun):
|
|
434
432
|
"""
|
435
433
|
features: Feature
|
436
434
|
"""The features contained in the schema."""
|
437
|
-
params: Param
|
438
|
-
"""The params contained in the schema."""
|
439
435
|
artifacts: Artifact
|
440
436
|
"""The artifacts that measure a feature set that matches this schema."""
|
441
437
|
validated_artifacts: Artifact
|
@@ -468,7 +464,7 @@ class Schema(Record, CanCurate, TracksRun):
|
|
468
464
|
@overload
|
469
465
|
def __init__(
|
470
466
|
self,
|
471
|
-
features: list[
|
467
|
+
features: list[SQLRecord] | list[tuple[Feature, dict]] | None = None,
|
472
468
|
index: Feature | None = None,
|
473
469
|
slots: dict[str, Schema] | None = None,
|
474
470
|
name: str | None = None,
|
@@ -503,12 +499,14 @@ class Schema(Record, CanCurate, TracksRun):
|
|
503
499
|
if len(args) > 1:
|
504
500
|
raise ValueError("Only one non-keyword arg allowed: features")
|
505
501
|
|
506
|
-
features: list[
|
502
|
+
features: list[SQLRecord] | None = (
|
503
|
+
args[0] if args else kwargs.pop("features", [])
|
504
|
+
)
|
507
505
|
index: Feature | None = kwargs.pop("index", None)
|
508
506
|
slots: dict[str, Schema] = kwargs.pop("slots", {})
|
509
507
|
name: str | None = kwargs.pop("name", None)
|
510
508
|
description: str | None = kwargs.pop("description", None)
|
511
|
-
itype: str |
|
509
|
+
itype: str | SQLRecord | DeferredAttribute | None = kwargs.pop("itype", None)
|
512
510
|
flexible: bool | None = kwargs.pop("flexible", None)
|
513
511
|
type: Feature | None = kwargs.pop("type", None)
|
514
512
|
is_type: bool = kwargs.pop("is_type", False)
|
@@ -590,12 +588,12 @@ class Schema(Record, CanCurate, TracksRun):
|
|
590
588
|
|
591
589
|
def _validate_kwargs_calculate_hash(
|
592
590
|
self,
|
593
|
-
features: list[
|
591
|
+
features: list[SQLRecord],
|
594
592
|
index: Feature | None,
|
595
593
|
slots: dict[str, Schema],
|
596
594
|
name: str | None,
|
597
595
|
description: str | None,
|
598
|
-
itype: str |
|
596
|
+
itype: str | SQLRecord | DeferredAttribute | None,
|
599
597
|
flexible: bool | None,
|
600
598
|
type: Feature | None,
|
601
599
|
is_type: bool,
|
@@ -737,8 +735,8 @@ class Schema(Record, CanCurate, TracksRun):
|
|
737
735
|
type: str | None = None,
|
738
736
|
name: str | None = None,
|
739
737
|
mute: bool = False,
|
740
|
-
organism:
|
741
|
-
source:
|
738
|
+
organism: SQLRecord | str | None = None,
|
739
|
+
source: SQLRecord | None = None,
|
742
740
|
raise_validation_error: bool = True,
|
743
741
|
) -> Schema:
|
744
742
|
"""Create feature set for validated features.
|
@@ -772,7 +770,7 @@ class Schema(Record, CanCurate, TracksRun):
|
|
772
770
|
"""
|
773
771
|
if not isinstance(field, FieldAttr):
|
774
772
|
raise TypeError(
|
775
|
-
"Argument `field` must be a
|
773
|
+
"Argument `field` must be a SQLRecord field, e.g., `Feature.name`"
|
776
774
|
)
|
777
775
|
if len(values) == 0:
|
778
776
|
raise ValueError("Provide a list of at least one value")
|
@@ -815,8 +813,8 @@ class Schema(Record, CanCurate, TracksRun):
|
|
815
813
|
field: FieldAttr = Feature.name,
|
816
814
|
name: str | None = None,
|
817
815
|
mute: bool = False,
|
818
|
-
organism:
|
819
|
-
source:
|
816
|
+
organism: SQLRecord | str | None = None,
|
817
|
+
source: SQLRecord | None = None,
|
820
818
|
) -> Schema | None:
|
821
819
|
"""Create schema for valid columns."""
|
822
820
|
registry = field.field.model
|
@@ -1147,7 +1145,7 @@ def _get_related_name(self: Schema) -> str:
|
|
1147
1145
|
return related_name
|
1148
1146
|
|
1149
1147
|
|
1150
|
-
class SchemaFeature(
|
1148
|
+
class SchemaFeature(BaseSQLRecord, IsLink):
|
1151
1149
|
id: int = models.BigAutoField(primary_key=True)
|
1152
1150
|
schema: Schema = ForeignKey(Schema, CASCADE, related_name="links_feature")
|
1153
1151
|
feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_schema")
|
@@ -1156,16 +1154,7 @@ class SchemaFeature(BasicRecord, LinkORM):
|
|
1156
1154
|
unique_together = ("schema", "feature")
|
1157
1155
|
|
1158
1156
|
|
1159
|
-
class
|
1160
|
-
id: int = models.BigAutoField(primary_key=True)
|
1161
|
-
schema: Schema = ForeignKey(Schema, CASCADE, related_name="+")
|
1162
|
-
param: Param = ForeignKey(Param, PROTECT, related_name="+")
|
1163
|
-
|
1164
|
-
class Meta:
|
1165
|
-
unique_together = ("schema", "param")
|
1166
|
-
|
1167
|
-
|
1168
|
-
class ArtifactSchema(BasicRecord, LinkORM, TracksRun):
|
1157
|
+
class ArtifactSchema(BaseSQLRecord, IsLink, TracksRun):
|
1169
1158
|
id: int = models.BigAutoField(primary_key=True)
|
1170
1159
|
artifact: Artifact = ForeignKey("Artifact", CASCADE, related_name="_links_schema")
|
1171
1160
|
schema: Schema = ForeignKey(Schema, PROTECT, related_name="_links_artifact")
|
@@ -1176,7 +1165,7 @@ class ArtifactSchema(BasicRecord, LinkORM, TracksRun):
|
|
1176
1165
|
unique_together = (("artifact", "schema"), ("artifact", "slot"))
|
1177
1166
|
|
1178
1167
|
|
1179
|
-
class SchemaComponent(
|
1168
|
+
class SchemaComponent(BaseSQLRecord, IsLink, TracksRun):
|
1180
1169
|
id: int = models.BigAutoField(primary_key=True)
|
1181
1170
|
composite: Schema = ForeignKey(Schema, CASCADE, related_name="links_composite")
|
1182
1171
|
component: Schema = ForeignKey(Schema, PROTECT, related_name="links_component")
|