lamindb 1.5.2__py3-none-any.whl → 1.6a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. lamindb/__init__.py +24 -6
  2. lamindb/_finish.py +5 -5
  3. lamindb/_tracked.py +1 -1
  4. lamindb/_view.py +4 -4
  5. lamindb/core/_context.py +32 -6
  6. lamindb/core/_settings.py +1 -1
  7. lamindb/core/datasets/mini_immuno.py +8 -0
  8. lamindb/core/loaders.py +1 -1
  9. lamindb/core/storage/_anndata_accessor.py +9 -9
  10. lamindb/core/storage/_valid_suffixes.py +1 -0
  11. lamindb/core/storage/_zarr.py +32 -107
  12. lamindb/curators/__init__.py +19 -2
  13. lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
  14. lamindb/curators/_legacy.py +15 -19
  15. lamindb/curators/core.py +247 -80
  16. lamindb/errors.py +2 -2
  17. lamindb/migrations/0069_squashed.py +8 -8
  18. lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
  19. lamindb/migrations/0073_merge_ourprojects.py +7 -7
  20. lamindb/migrations/0075_lamindbv1_part5.py +1 -1
  21. lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
  22. lamindb/migrations/0080_polish_lamindbv1.py +2 -2
  23. lamindb/migrations/0088_schema_components.py +1 -1
  24. lamindb/migrations/0090_runproject_project_runs.py +2 -2
  25. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
  26. lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
  27. lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
  28. lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
  29. lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
  30. lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
  31. lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
  32. lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
  33. lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
  34. lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
  35. lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
  36. lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
  37. lamindb/models/__init__.py +35 -18
  38. lamindb/models/_describe.py +4 -4
  39. lamindb/models/_django.py +38 -4
  40. lamindb/models/_feature_manager.py +66 -123
  41. lamindb/models/_from_values.py +13 -13
  42. lamindb/models/_label_manager.py +8 -6
  43. lamindb/models/_relations.py +7 -7
  44. lamindb/models/artifact.py +166 -156
  45. lamindb/models/can_curate.py +25 -25
  46. lamindb/models/collection.py +48 -18
  47. lamindb/models/core.py +3 -3
  48. lamindb/models/feature.py +88 -60
  49. lamindb/models/has_parents.py +17 -17
  50. lamindb/models/project.py +52 -24
  51. lamindb/models/query_manager.py +5 -5
  52. lamindb/models/query_set.py +61 -37
  53. lamindb/models/record.py +158 -1583
  54. lamindb/models/run.py +39 -176
  55. lamindb/models/save.py +6 -6
  56. lamindb/models/schema.py +33 -44
  57. lamindb/models/sqlrecord.py +1743 -0
  58. lamindb/models/transform.py +17 -33
  59. lamindb/models/ulabel.py +21 -15
  60. {lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/METADATA +7 -11
  61. lamindb-1.6a2.dist-info/RECORD +118 -0
  62. lamindb/core/storage/_anndata_sizes.py +0 -41
  63. lamindb/models/flextable.py +0 -163
  64. lamindb-1.5.2.dist-info/RECORD +0 -109
  65. {lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/LICENSE +0 -0
  66. {lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/WHEEL +0 -0
@@ -14,19 +14,19 @@ from ._from_values import (
14
14
  _from_values,
15
15
  get_organism_record_from_field,
16
16
  )
17
- from .record import Record, get_name_field
17
+ from .sqlrecord import SQLRecord, get_name_field
18
18
 
19
19
  if TYPE_CHECKING:
20
20
  from lamin_utils._inspect import InspectResult
21
21
 
22
22
  from lamindb.base.types import ListLike, StrField
23
23
 
24
- from .query_set import RecordList
24
+ from .query_set import SQLRecordList
25
25
 
26
26
 
27
- def _check_if_record_in_db(record: str | Record | None, using_key: str | None):
27
+ def _check_if_record_in_db(record: str | SQLRecord | None, using_key: str | None):
28
28
  """Check if the record is from the using_key DB."""
29
- if isinstance(record, Record):
29
+ if isinstance(record, SQLRecord):
30
30
  if using_key is not None and using_key != "default":
31
31
  if record._state.db != using_key:
32
32
  raise ValueError(
@@ -55,8 +55,8 @@ def _inspect(
55
55
  field: StrField | None = None,
56
56
  *,
57
57
  mute: bool = False,
58
- organism: str | Record | None = None,
59
- source: Record | None = None,
58
+ organism: str | SQLRecord | None = None,
59
+ source: SQLRecord | None = None,
60
60
  from_source: bool = True,
61
61
  strict_source: bool = False,
62
62
  ) -> pd.DataFrame | dict[str, list[str]]:
@@ -69,7 +69,7 @@ def _inspect(
69
69
  queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
70
70
  registry = queryset.model
71
71
  model_name = registry._meta.model.__name__
72
- if isinstance(source, Record):
72
+ if isinstance(source, SQLRecord):
73
73
  _check_if_record_in_db(source, queryset.db)
74
74
  # if strict_source mode, restrict the query to the passed ontology source
75
75
  # otherwise, inspect across records present in the DB from all ontology sources and no-source
@@ -158,8 +158,8 @@ def _validate(
158
158
  field: StrField | None = None,
159
159
  *,
160
160
  mute: bool = False,
161
- organism: str | Record | None = None,
162
- source: Record | None = None,
161
+ organism: str | SQLRecord | None = None,
162
+ source: SQLRecord | None = None,
163
163
  strict_source: bool = False,
164
164
  ) -> np.ndarray:
165
165
  """{}""" # noqa: D415
@@ -172,7 +172,7 @@ def _validate(
172
172
 
173
173
  queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
174
174
  registry = queryset.model
175
- if isinstance(source, Record):
175
+ if isinstance(source, SQLRecord):
176
176
  _check_if_record_in_db(source, queryset.db)
177
177
  if strict_source:
178
178
  queryset = queryset.filter(source=source)
@@ -224,8 +224,8 @@ def _standardize(
224
224
  source_aware: bool = True,
225
225
  keep: Literal["first", "last", False] = "first",
226
226
  synonyms_field: str = "synonyms",
227
- organism: str | Record | None = None,
228
- source: Record | None = None,
227
+ organism: str | SQLRecord | None = None,
228
+ source: SQLRecord | None = None,
229
229
  strict_source: bool = False,
230
230
  ) -> list[str] | dict[str, str]:
231
231
  """{}""" # noqa: D415
@@ -240,7 +240,7 @@ def _standardize(
240
240
  )
241
241
  queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
242
242
  registry = queryset.model
243
- if isinstance(source, Record):
243
+ if isinstance(source, SQLRecord):
244
244
  _check_if_record_in_db(source, queryset.db)
245
245
  if strict_source:
246
246
  queryset = queryset.filter(source=source)
@@ -431,7 +431,7 @@ def _check_synonyms_field_exist(record: CanCurate):
431
431
 
432
432
  def _filter_queryset_with_organism(
433
433
  queryset: QuerySet,
434
- organism: Record | None = None,
434
+ organism: SQLRecord | None = None,
435
435
  values_list_field: str | None = None,
436
436
  values_list_fields: list[str] | None = None,
437
437
  ):
@@ -453,7 +453,7 @@ def _filter_queryset_with_organism(
453
453
 
454
454
 
455
455
  class CanCurate:
456
- """Base class providing :class:`~lamindb.models.Record`-based validation."""
456
+ """Base class providing :class:`~lamindb.models.SQLRecord`-based validation."""
457
457
 
458
458
  @classmethod
459
459
  def inspect(
@@ -462,8 +462,8 @@ class CanCurate:
462
462
  field: StrField | None = None,
463
463
  *,
464
464
  mute: bool = False,
465
- organism: Union[str, Record, None] = None,
466
- source: Record | None = None,
465
+ organism: Union[str, SQLRecord, None] = None,
466
+ source: SQLRecord | None = None,
467
467
  from_source: bool = True,
468
468
  strict_source: bool = False,
469
469
  ) -> InspectResult:
@@ -518,8 +518,8 @@ class CanCurate:
518
518
  field: StrField | None = None,
519
519
  *,
520
520
  mute: bool = False,
521
- organism: Union[str, Record, None] = None,
522
- source: Record | None = None,
521
+ organism: Union[str, SQLRecord, None] = None,
522
+ source: SQLRecord | None = None,
523
523
  strict_source: bool = False,
524
524
  ) -> np.ndarray:
525
525
  """Validate values against existing values of a string field.
@@ -571,16 +571,16 @@ class CanCurate:
571
571
  values: ListLike,
572
572
  field: StrField | None = None,
573
573
  create: bool = False,
574
- organism: Union[Record, str, None] = None,
575
- source: Record | None = None,
574
+ organism: Union[SQLRecord, str, None] = None,
575
+ source: SQLRecord | None = None,
576
576
  mute: bool = False,
577
- ) -> RecordList:
577
+ ) -> SQLRecordList:
578
578
  """Bulk create validated records by parsing values for an identifier such as a name or an id).
579
579
 
580
580
  Args:
581
581
  values: A list of values for an identifier, e.g.
582
582
  `["name1", "name2"]`.
583
- field: A `Record` field to look up, e.g., `bt.CellMarker.name`.
583
+ field: A `SQLRecord` field to look up, e.g., `bt.CellMarker.name`.
584
584
  create: Whether to create records if they don't exist.
585
585
  organism: A `bionty.Organism` name or record.
586
586
  source: A `bionty.Source` record to validate against to create records for.
@@ -629,8 +629,8 @@ class CanCurate:
629
629
  source_aware: bool = True,
630
630
  keep: Literal["first", "last", False] = "first",
631
631
  synonyms_field: str = "synonyms",
632
- organism: Union[str, Record, None] = None,
633
- source: Record | None = None,
632
+ organism: Union[str, SQLRecord, None] = None,
633
+ source: SQLRecord | None = None,
634
634
  strict_source: bool = False,
635
635
  ) -> list[str] | dict[str, str]:
636
636
  """Maps input synonyms to standardized names.
@@ -37,15 +37,15 @@ from .artifact import (
37
37
  save_schema_links,
38
38
  )
39
39
  from .has_parents import view_lineage
40
- from .record import (
41
- BasicRecord,
42
- LinkORM,
43
- Record,
40
+ from .run import Run, TracksRun, TracksUpdates
41
+ from .sqlrecord import (
42
+ BaseSQLRecord,
43
+ IsLink,
44
+ SQLRecord,
44
45
  _get_record_kwargs,
45
46
  init_self_from_db,
46
47
  update_attributes,
47
48
  )
48
- from .run import Run, TracksRun, TracksUpdates
49
49
 
50
50
  if TYPE_CHECKING:
51
51
  from collections.abc import Iterable, Iterator
@@ -128,7 +128,7 @@ def _load_concat_artifacts(
128
128
  return concat_object
129
129
 
130
130
 
131
- class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
131
+ class Collection(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
132
132
  """Collections of artifacts.
133
133
 
134
134
  Collections provide a simple way of versioning collections of artifacts.
@@ -158,7 +158,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
158
158
 
159
159
  """
160
160
 
161
- class Meta(Record.Meta, IsVersioned.Meta, TracksRun.Meta, TracksUpdates.Meta):
161
+ class Meta(SQLRecord.Meta, IsVersioned.Meta, TracksRun.Meta, TracksUpdates.Meta):
162
162
  abstract = False
163
163
 
164
164
  _len_full_uid: int = 20
@@ -272,7 +272,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
272
272
  run: Run | None = kwargs.pop("run", None)
273
273
  revises: Collection | None = kwargs.pop("revises", None)
274
274
  version: str | None = kwargs.pop("version", None)
275
- _branch_code: int | None = kwargs.pop("_branch_code", 1)
275
+ branch_id: int | None = kwargs.pop("branch_id", 1)
276
276
  key: str
277
277
  if "name" in kwargs:
278
278
  key = kwargs.pop("name")
@@ -340,7 +340,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
340
340
  hash=hash,
341
341
  run=run,
342
342
  version=version,
343
- _branch_code=_branch_code,
343
+ branch_id=branch_id,
344
344
  revises=revises,
345
345
  _skip_validation=_skip_validation,
346
346
  )
@@ -349,6 +349,38 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
349
349
  _track_run_input(revises, run=run)
350
350
  _track_run_input(artifacts, run=run)
351
351
 
352
+ @classmethod
353
+ def get(
354
+ cls,
355
+ idlike: int | str | None = None,
356
+ *,
357
+ is_run_input: bool | Run = False,
358
+ **expressions,
359
+ ) -> Artifact:
360
+ """Get a single collection.
361
+
362
+ Args:
363
+ idlike: Either a uid stub, uid or an integer id.
364
+ is_run_input: Whether to track this collection as run input.
365
+ expressions: Fields and values passed as Django query expressions.
366
+
367
+ Raises:
368
+ :exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
369
+
370
+ See Also:
371
+ - Method in `SQLRecord` base class: :meth:`~lamindb.models.SQLRecord.get`
372
+
373
+ Examples:
374
+
375
+ ::
376
+
377
+ collection = ln.Collection.get("okxPW6GIKBfRBE3B0000")
378
+ collection = ln.Collection.get(key="scrna/collection1")
379
+ """
380
+ from .query_set import QuerySet
381
+
382
+ return QuerySet(model=cls).get(idlike, is_run_input=is_run_input, **expressions)
383
+
352
384
  def append(self, artifact: Artifact, run: Run | None = None) -> Collection:
353
385
  """Append an artifact to the collection.
354
386
 
@@ -557,14 +589,12 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
557
589
 
558
590
  >>> collection.delete()
559
591
  """
560
- # change _branch_code to trash
561
- trash__branch_code = -1
562
- if self._branch_code > trash__branch_code and permanent is not True:
563
- self._branch_code = trash__branch_code
592
+ # change branch_id to trash
593
+ trash_branch_id = -1
594
+ if self.branch_id > trash_branch_id and permanent is not True:
595
+ self.branch_id = trash_branch_id
564
596
  self.save()
565
- logger.warning(
566
- f"moved collection to trash (_branch_code = {trash__branch_code})"
567
- )
597
+ logger.warning(f"moved collection to trash (branch_id = {trash_branch_id})")
568
598
  return
569
599
 
570
600
  # permanent delete
@@ -619,7 +649,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
619
649
 
620
650
  >>> collection.restore()
621
651
  """
622
- self._branch_code = 1
652
+ self.branch_id = 1
623
653
  self.save()
624
654
 
625
655
  @property
@@ -691,7 +721,7 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
691
721
  return hash
692
722
 
693
723
 
694
- class CollectionArtifact(BasicRecord, LinkORM, TracksRun):
724
+ class CollectionArtifact(BaseSQLRecord, IsLink, TracksRun):
695
725
  id: int = models.BigAutoField(primary_key=True)
696
726
  collection: Collection = ForeignKey(
697
727
  Collection, CASCADE, related_name="links_artifact"
lamindb/models/core.py CHANGED
@@ -12,8 +12,8 @@ from lamindb.base.fields import (
12
12
  )
13
13
 
14
14
  from ..base.ids import base62_12
15
- from .record import Record
16
15
  from .run import TracksRun, TracksUpdates
16
+ from .sqlrecord import SQLRecord
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from pathlib import Path
@@ -23,7 +23,7 @@ if TYPE_CHECKING:
23
23
  from .artifact import Artifact
24
24
 
25
25
 
26
- class Storage(Record, TracksRun, TracksUpdates):
26
+ class Storage(SQLRecord, TracksRun, TracksUpdates):
27
27
  """Storage locations of artifacts such as S3 buckets or local directories.
28
28
 
29
29
  A storage location is either a directory/folder (local or in the cloud) or
@@ -68,7 +68,7 @@ class Storage(Record, TracksRun, TracksUpdates):
68
68
  >>> ln.settings.storage = "./storage_2" # or a cloud bucket
69
69
  """
70
70
 
71
- class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta):
71
+ class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
72
72
  abstract = False
73
73
 
74
74
  _name_field: str = "root"
lamindb/models/feature.py CHANGED
@@ -6,12 +6,12 @@ from typing import TYPE_CHECKING, Any, get_args, overload
6
6
  import numpy as np
7
7
  import pandas as pd
8
8
  from django.db import models
9
- from django.db.models import CASCADE, PROTECT, Q
9
+ from django.db.models import CASCADE, PROTECT
10
10
  from django.db.models.query_utils import DeferredAttribute
11
11
  from django.db.utils import IntegrityError
12
12
  from lamin_utils import logger
13
13
  from lamindb_setup._init_instance import get_schema_module_name
14
- from lamindb_setup.core.hashing import HASH_LENGTH, hash_dict
14
+ from lamindb_setup.core.hashing import HASH_LENGTH, hash_dict, hash_string
15
15
  from pandas.api.types import CategoricalDtype, is_string_dtype
16
16
  from pandas.core.dtypes.base import ExtensionDtype
17
17
 
@@ -28,12 +28,12 @@ from lamindb.errors import FieldValidationError, ValidationError
28
28
  from ..base.ids import base62_12
29
29
  from ._relations import dict_module_name_to_model_name
30
30
  from .can_curate import CanCurate
31
- from .query_set import RecordList
32
- from .record import BasicRecord, Record, Registry, _get_record_kwargs
31
+ from .query_set import SQLRecordList
33
32
  from .run import (
34
33
  TracksRun,
35
34
  TracksUpdates,
36
35
  )
36
+ from .sqlrecord import BaseSQLRecord, Registry, SQLRecord, _get_record_kwargs
37
37
 
38
38
  if TYPE_CHECKING:
39
39
  from collections.abc import Iterable
@@ -50,6 +50,18 @@ def parse_dtype(dtype_str: str, is_param: bool = False) -> list[dict[str, str]]:
50
50
  allowed_dtypes = FEATURE_DTYPES
51
51
  if is_param:
52
52
  allowed_dtypes.add("dict")
53
+
54
+ # Handle list[...] types
55
+ if dtype_str.startswith("list[") and dtype_str.endswith("]"):
56
+ inner_dtype_str = dtype_str[5:-1] # Remove "list[" and "]"
57
+ # Recursively parse the inner type
58
+ inner_result = parse_dtype(inner_dtype_str, is_param)
59
+ # Add "list": True to each component
60
+ for component in inner_result:
61
+ if isinstance(component, dict):
62
+ component["list"] = True # type: ignore
63
+ return inner_result
64
+
53
65
  is_composed_cat = dtype_str.startswith("cat[") and dtype_str.endswith("]")
54
66
  result = []
55
67
  if is_composed_cat:
@@ -71,7 +83,7 @@ def parse_dtype(dtype_str: str, is_param: bool = False) -> list[dict[str, str]]:
71
83
 
72
84
  def parse_cat_dtype(
73
85
  dtype_str: str,
74
- related_registries: dict[str, Record] | None = None,
86
+ related_registries: dict[str, SQLRecord] | None = None,
75
87
  is_itype: bool = False,
76
88
  ) -> dict[str, Any]:
77
89
  """Parses a categorical dtype string into its components (registry, field, subtypes)."""
@@ -119,8 +131,17 @@ def parse_cat_dtype(
119
131
  if "." in registry_str:
120
132
  registry_str_split = registry_str.split(".")
121
133
  assert len(registry_str_split) == 2, registry_str # noqa: S101
122
- module_name, class_name = registry_str_split
123
- module_name = get_schema_module_name(module_name)
134
+ module_name_attempt, class_name = registry_str_split
135
+ module_name = get_schema_module_name(
136
+ module_name_attempt, raise_import_error=False
137
+ )
138
+ if module_name is None:
139
+ raise ImportError(
140
+ f"Can not parse dtype {dtype_str} because {module_name_attempt} "
141
+ f"was not found.\nInstall the module with `pip install {module_name_attempt}`\n"
142
+ "and also add the module to this instance via instance settings page "
143
+ "under 'schema modules'."
144
+ )
124
145
  else:
125
146
  module_name, class_name = "lamindb", registry_str
126
147
  module = importlib.import_module(module_name)
@@ -143,12 +164,30 @@ def parse_cat_dtype(
143
164
 
144
165
 
145
166
  def serialize_dtype(
146
- dtype: Registry | Record | FieldAttr | list[Record] | list[Registry] | str,
167
+ dtype: Registry
168
+ | SQLRecord
169
+ | FieldAttr
170
+ | list[SQLRecord]
171
+ | list[Registry]
172
+ | list[str]
173
+ | list[float]
174
+ | str
175
+ | type,
147
176
  is_itype: bool = False,
148
177
  ) -> str:
149
178
  """Converts a data type object into its string representation."""
179
+ from .record import Record
150
180
  from .ulabel import ULabel
151
181
 
182
+ # Handle generic types like list[str], list[Registry], etc.
183
+ if hasattr(dtype, "__origin__") and dtype.__origin__ is list:
184
+ # Get the inner type from list[T]
185
+ inner_type = dtype.__args__[0] if dtype.__args__ else None # type: ignore
186
+ if inner_type is not None:
187
+ # Recursively serialize the inner type
188
+ inner_dtype_str = serialize_dtype(inner_type, is_itype=is_itype)
189
+ return f"list[{inner_dtype_str}]"
190
+
152
191
  if (
153
192
  not isinstance(dtype, list)
154
193
  and hasattr(dtype, "__name__")
@@ -167,21 +206,24 @@ def serialize_dtype(
167
206
  dtype_str = serialize_pandas_dtype(dtype)
168
207
  else:
169
208
  error_message = "dtype has to be a registry, a ulabel subtype, a registry field, or a list of registries or fields, not {}"
170
- if isinstance(dtype, (Registry, DeferredAttribute, ULabel)):
209
+ if isinstance(dtype, (Registry, DeferredAttribute, ULabel, Record)):
171
210
  dtype = [dtype]
172
211
  elif not isinstance(dtype, list):
173
212
  raise ValueError(error_message.format(dtype))
174
213
  dtype_str = ""
175
214
  for one_dtype in dtype:
176
- if not isinstance(one_dtype, (Registry, DeferredAttribute, ULabel)):
215
+ if not isinstance(one_dtype, (Registry, DeferredAttribute, ULabel, Record)):
177
216
  raise ValueError(error_message.format(one_dtype))
178
217
  if isinstance(one_dtype, Registry):
179
218
  dtype_str += one_dtype.__get_name_with_module__() + "|"
180
- elif isinstance(one_dtype, ULabel):
219
+ elif isinstance(one_dtype, (ULabel, Record)):
181
220
  assert one_dtype.is_type, ( # noqa: S101
182
221
  f"ulabel has to be a type if acting as dtype, {one_dtype} has `is_type` False"
183
222
  )
184
- dtype_str += f"ULabel[{one_dtype.name}]"
223
+ if isinstance(one_dtype, ULabel):
224
+ dtype_str += f"ULabel[{one_dtype.name}]"
225
+ else:
226
+ dtype_str += f"Record[{one_dtype.name}]"
185
227
  else:
186
228
  name = one_dtype.field.name
187
229
  field_ext = f".{name}" if name != "name" else ""
@@ -247,10 +289,10 @@ def process_init_feature_param(args, kwargs, is_param: bool = False):
247
289
  return kwargs
248
290
 
249
291
 
250
- class Feature(Record, CanCurate, TracksRun, TracksUpdates):
251
- """Dataset dimensions.
292
+ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
293
+ """Variables, such as dataframe columns or run parameters.
252
294
 
253
- A feature represents a dimension of a dataset, such as a column in a
295
+ A feature often represents a dimension of a dataset, such as a column in a
254
296
  `DataFrame`. The `Feature` registry organizes metadata of features.
255
297
 
256
298
  The `Feature` registry helps you organize and query datasets based on their
@@ -317,6 +359,13 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
317
359
  ... dtype=[ln.ULabel, bt.CellType],
318
360
  ... ).save()
319
361
 
362
+ A multivalue feature with a list of cell types.
363
+
364
+ >>> ln.Feature(
365
+ ... name="cell_types",
366
+ ... dtype=list[bt.CellType], # or list[str] for a list of strings
367
+ ... ).save()
368
+
320
369
  Hint:
321
370
 
322
371
  *Features* and *labels* denote two ways of using entities to organize data:
@@ -337,7 +386,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
337
386
 
338
387
  """
339
388
 
340
- class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta):
389
+ class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
341
390
  abstract = False
342
391
 
343
392
  _name_field: str = "name"
@@ -353,19 +402,19 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
353
402
  editable=False, unique=True, db_index=True, max_length=12, default=base62_12
354
403
  )
355
404
  """Universal id, valid across DB instances."""
356
- name: str = CharField(max_length=150, db_index=True, unique=True)
357
- """Name of feature (hard unique constraint `unique=True`)."""
405
+ name: str = CharField(max_length=150, db_index=True)
406
+ """Name of feature."""
358
407
  dtype: Dtype | None = CharField(db_index=True, null=True)
359
408
  """Data type (:class:`~lamindb.base.types.Dtype`)."""
360
409
  type: Feature | None = ForeignKey(
361
- "self", PROTECT, null=True, related_name="records"
410
+ "self", PROTECT, null=True, related_name="features"
362
411
  )
363
412
  """Type of feature (e.g., 'Readout', 'Metric', 'Metadata', 'ExpertAnnotation', 'ModelPrediction').
364
413
 
365
414
  Allows to group features by type, e.g., all read outs, all metrics, etc.
366
415
  """
367
- records: Feature
368
- """Records of this type."""
416
+ features: Feature
417
+ """Features of this type (can only be non-empty if `is_type` is `True`)."""
369
418
  is_type: bool = BooleanField(default=False, db_index=True, null=True)
370
419
  """Distinguish types from instances of the type."""
371
420
  unit: str | None = CharField(max_length=30, db_index=True, null=True)
@@ -413,10 +462,10 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
413
462
  "Schema", through="SchemaFeature", related_name="features"
414
463
  )
415
464
  """Feature sets linked to this feature."""
416
- _expect_many: bool = models.BooleanField(default=True, db_default=True)
417
- """Indicates whether values for this feature are expected to occur a single or multiple times for an artifact (default `True`).
465
+ _expect_many: bool = models.BooleanField(default=None, db_default=None, null=True)
466
+ """Indicates whether values for this feature are expected to occur a single or multiple times for an artifact (default `None`).
418
467
 
419
- - if it's `True` (default), the values come from an observation-level aggregation and a dtype of `datetime` on the observation-level mean `set[datetime]` on the artifact-level
468
+ - if it's `True` (default), the values come from an observation-level aggregation and a dtype of `datetime` on the observation-level means `set[datetime]` on the artifact-level
420
469
  - if it's `False` it's an artifact-level value and datetime means datetime; this is an edge case because an arbitrary artifact would always be a set of arbitrary measurements that would need to be aggregated ("one just happens to measure a single cell line in that artifact")
421
470
  """
422
471
  _curation: dict[str, Any] = JSONField(default=None, db_default=None, null=True)
@@ -484,7 +533,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
484
533
  )
485
534
 
486
535
  @classmethod
487
- def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordList:
536
+ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> SQLRecordList:
488
537
  """Create Feature records for columns."""
489
538
  field = Feature.name if field is None else field
490
539
  registry = field.field.model # type: ignore
@@ -502,7 +551,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
502
551
  Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()
503
552
  ] # type: ignore
504
553
  assert len(features) == len(df.columns) # noqa: S101
505
- return RecordList(features)
554
+ return SQLRecordList(features)
506
555
 
507
556
  def save(self, *args, **kwargs) -> Feature:
508
557
  """Save."""
@@ -606,7 +655,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
606
655
  # return "Artifact"
607
656
 
608
657
 
609
- class FeatureValue(Record, TracksRun):
658
+ class FeatureValue(SQLRecord, TracksRun):
610
659
  """Non-categorical features values.
611
660
 
612
661
  Categorical feature values are stored in their respective registries:
@@ -634,44 +683,23 @@ class FeatureValue(Record, TracksRun):
634
683
  hash: str = CharField(max_length=HASH_LENGTH, null=True, db_index=True)
635
684
  """Value hash."""
636
685
 
637
- class Meta(BasicRecord.Meta, TracksRun.Meta):
638
- constraints = [
639
- # For simple types, use direct value comparison
640
- models.UniqueConstraint(
641
- fields=["feature", "value"],
642
- name="unique_simple_feature_value",
643
- condition=Q(hash__isnull=True),
644
- ),
645
- # For complex types (dictionaries), use hash
646
- models.UniqueConstraint(
647
- fields=["feature", "hash"],
648
- name="unique_complex_feature_value",
649
- condition=Q(hash__isnull=False),
650
- ),
651
- ]
686
+ class Meta(BaseSQLRecord.Meta, TracksRun.Meta):
687
+ unique_together = ("feature", "hash")
652
688
 
653
689
  @classmethod
654
690
  def get_or_create(cls, feature, value):
655
- # Simple types: int, float, str, bool
656
- if isinstance(value, (int, float, str, bool)):
657
- try:
658
- return (
659
- cls.objects.create(feature=feature, value=value, hash=None),
660
- False,
661
- )
662
- except IntegrityError:
663
- return cls.objects.get(feature=feature, value=value), True
664
-
665
- # Complex types: dict, list
691
+ # simple values: (int, float, str, bool, datetime)
692
+ if not isinstance(value, dict):
693
+ hash = hash_string(str(value))
666
694
  else:
667
695
  hash = hash_dict(value)
668
- try:
669
- return (
670
- cls.objects.create(feature=feature, value=value, hash=hash),
671
- False,
672
- )
673
- except IntegrityError:
674
- return cls.objects.get(feature=feature, hash=hash), True
696
+ try:
697
+ return (
698
+ cls.objects.create(feature=feature, value=value, hash=hash),
699
+ False,
700
+ )
701
+ except IntegrityError:
702
+ return cls.objects.get(feature=feature, hash=hash), True
675
703
 
676
704
 
677
705
  def suggest_categorical_for_str_iterable(