lamindb 1.5.2__py3-none-any.whl → 1.6a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. lamindb/__init__.py +24 -6
  2. lamindb/_finish.py +5 -5
  3. lamindb/_tracked.py +1 -1
  4. lamindb/_view.py +4 -4
  5. lamindb/core/_context.py +32 -6
  6. lamindb/core/_settings.py +1 -1
  7. lamindb/core/datasets/mini_immuno.py +8 -0
  8. lamindb/core/loaders.py +1 -1
  9. lamindb/core/storage/_anndata_accessor.py +9 -9
  10. lamindb/core/storage/_valid_suffixes.py +1 -0
  11. lamindb/core/storage/_zarr.py +32 -107
  12. lamindb/curators/__init__.py +19 -2
  13. lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
  14. lamindb/curators/_legacy.py +15 -19
  15. lamindb/curators/core.py +247 -80
  16. lamindb/errors.py +2 -2
  17. lamindb/migrations/0069_squashed.py +8 -8
  18. lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
  19. lamindb/migrations/0073_merge_ourprojects.py +7 -7
  20. lamindb/migrations/0075_lamindbv1_part5.py +1 -1
  21. lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
  22. lamindb/migrations/0080_polish_lamindbv1.py +2 -2
  23. lamindb/migrations/0088_schema_components.py +1 -1
  24. lamindb/migrations/0090_runproject_project_runs.py +2 -2
  25. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
  26. lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
  27. lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
  28. lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
  29. lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
  30. lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
  31. lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
  32. lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
  33. lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
  34. lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
  35. lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
  36. lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
  37. lamindb/models/__init__.py +35 -18
  38. lamindb/models/_describe.py +4 -4
  39. lamindb/models/_django.py +38 -4
  40. lamindb/models/_feature_manager.py +66 -123
  41. lamindb/models/_from_values.py +13 -13
  42. lamindb/models/_label_manager.py +8 -6
  43. lamindb/models/_relations.py +7 -7
  44. lamindb/models/artifact.py +166 -156
  45. lamindb/models/can_curate.py +25 -25
  46. lamindb/models/collection.py +48 -18
  47. lamindb/models/core.py +3 -3
  48. lamindb/models/feature.py +88 -60
  49. lamindb/models/has_parents.py +17 -17
  50. lamindb/models/project.py +52 -24
  51. lamindb/models/query_manager.py +5 -5
  52. lamindb/models/query_set.py +61 -37
  53. lamindb/models/record.py +158 -1583
  54. lamindb/models/run.py +39 -176
  55. lamindb/models/save.py +6 -6
  56. lamindb/models/schema.py +33 -44
  57. lamindb/models/sqlrecord.py +1743 -0
  58. lamindb/models/transform.py +17 -33
  59. lamindb/models/ulabel.py +21 -15
  60. {lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/METADATA +7 -11
  61. lamindb-1.6a2.dist-info/RECORD +118 -0
  62. lamindb/core/storage/_anndata_sizes.py +0 -41
  63. lamindb/models/flextable.py +0 -163
  64. lamindb-1.5.2.dist-info/RECORD +0 -109
  65. {lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/LICENSE +0 -0
  66. {lamindb-1.5.2.dist-info → lamindb-1.6a2.dist-info}/WHEEL +0 -0
lamindb/models/run.py CHANGED
@@ -1,18 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any, overload
3
+ from typing import TYPE_CHECKING, overload
4
4
 
5
5
  import numpy as np
6
6
  from django.db import models
7
7
  from django.db.models import (
8
8
  CASCADE,
9
9
  PROTECT,
10
- Q,
11
10
  )
12
- from django.db.utils import IntegrityError
13
11
  from lamindb_setup import _check_instance_setup
14
- from lamindb_setup.core.hashing import HASH_LENGTH, hash_dict
15
12
 
13
+ from lamindb.base import deprecated
16
14
  from lamindb.base.fields import (
17
15
  BooleanField,
18
16
  CharField,
@@ -20,22 +18,20 @@ from lamindb.base.fields import (
20
18
  ForeignKey,
21
19
  )
22
20
  from lamindb.base.users import current_user_id
23
- from lamindb.errors import InvalidArgument, ValidationError
21
+ from lamindb.errors import InvalidArgument
24
22
 
25
- from ..base.ids import base62_20
23
+ from ..base.ids import base62_16
26
24
  from .can_curate import CanCurate
27
- from .record import BasicRecord, LinkORM, Record, Registry
25
+ from .sqlrecord import BaseSQLRecord, IsLink, SQLRecord
28
26
 
29
27
  if TYPE_CHECKING:
30
28
  from datetime import datetime
31
29
 
32
- from lamindb.base.types import Dtype, FieldAttr
33
-
34
30
  from .artifact import Artifact
35
31
  from .collection import Collection
32
+ from .feature import FeatureValue
36
33
  from .project import Project
37
34
  from .query_set import QuerySet
38
- from .schema import Schema
39
35
  from .transform import Transform
40
36
  from .ulabel import ULabel
41
37
 
@@ -43,14 +39,14 @@ if TYPE_CHECKING:
43
39
  _TRACKING_READY: bool | None = None
44
40
 
45
41
 
46
- class ParamManager:
47
- """Param manager."""
42
+ class FeatureManager:
43
+ """Feature manager."""
48
44
 
49
45
  pass
50
46
 
51
47
 
52
- class ParamManagerRun(ParamManager):
53
- """Param manager."""
48
+ class FeatureManagerRun(FeatureManager):
49
+ """Feature manager."""
54
50
 
55
51
  pass
56
52
 
@@ -140,7 +136,7 @@ class TracksUpdates(models.Model):
140
136
  super().__init__(*args, **kwargs)
141
137
 
142
138
 
143
- class User(BasicRecord, CanCurate):
139
+ class User(BaseSQLRecord, CanCurate):
144
140
  """Users.
145
141
 
146
142
  All data in this registry is synced from `lamin.ai` to ensure a universal
@@ -201,152 +197,7 @@ class User(BasicRecord, CanCurate):
201
197
  super().__init__(*args, **kwargs)
202
198
 
203
199
 
204
- class Param(Record, CanCurate, TracksRun, TracksUpdates):
205
- """Parameters of runs & models."""
206
-
207
- class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta):
208
- abstract = False
209
-
210
- _name_field: str = "name"
211
-
212
- name: str = CharField(max_length=100, db_index=True)
213
- dtype: Dtype | None = CharField(db_index=True, null=True)
214
- """Data type (:class:`~lamindb.base.types.Dtype`)."""
215
- type: Param | None = ForeignKey("self", PROTECT, null=True, related_name="records")
216
- """Type of param (e.g., 'Pipeline', 'ModelTraining', 'PostProcessing').
217
-
218
- Allows to group features by type, e.g., all read outs, all metrics, etc.
219
- """
220
- records: Param
221
- """Records of this type."""
222
- is_type: bool = BooleanField(default=False, db_index=True, null=True)
223
- """Distinguish types from instances of the type."""
224
- _expect_many: bool = models.BooleanField(default=False, db_default=False)
225
- """Indicates whether values for this param are expected to occur a single or multiple times for an artifact/run (default `False`).
226
-
227
- - if it's `False` (default), the values mean artifact/run-level values and a dtype of `datetime` means `datetime`
228
- - if it's `True`, the values are from an aggregation, which this seems like an edge case but when characterizing a model ensemble trained with different parameters it could be relevant
229
- """
230
- schemas: Schema = models.ManyToManyField(
231
- "Schema", through="SchemaParam", related_name="params"
232
- )
233
- """Feature sets linked to this feature."""
234
- # backward fields
235
- values: ParamValue
236
- """Values for this parameter."""
237
-
238
- @overload
239
- def __init__(
240
- self,
241
- name: str,
242
- dtype: Dtype | Registry | list[Registry] | FieldAttr,
243
- type: Param | None = None,
244
- is_type: bool = False,
245
- ): ...
246
-
247
- @overload
248
- def __init__(
249
- self,
250
- *db_args,
251
- ): ...
252
-
253
- def __init__(self, *args, **kwargs):
254
- from .feature import process_init_feature_param
255
-
256
- if len(args) == len(self._meta.concrete_fields):
257
- super().__init__(*args, **kwargs)
258
- return None
259
-
260
- dtype = kwargs.get("dtype", None)
261
- kwargs = process_init_feature_param(args, kwargs, is_param=True)
262
- super().__init__(*args, **kwargs)
263
- dtype_str = kwargs.pop("dtype", None)
264
- if not self._state.adding:
265
- if not (
266
- self.dtype.startswith("cat")
267
- if dtype == "cat"
268
- else self.dtype == dtype_str
269
- ):
270
- raise ValidationError(
271
- f"Feature {self.name} already exists with dtype {self.dtype}, you passed {dtype_str}"
272
- )
273
-
274
-
275
- # FeatureValue behaves in many ways like a link in a LinkORM
276
- # in particular, we don't want a _public field on it
277
- # Also, we don't inherit from TracksRun because a ParamValue
278
- # is typically created before a run is created and we want to
279
- # avoid delete cycles (for Model params though it might be helpful)
280
- class ParamValue(Record):
281
- """Parameter values.
282
-
283
- Is largely analogous to `FeatureValue`.
284
- """
285
-
286
- # we do not have a unique constraint on param & value because it leads to hashing errors
287
- # for large dictionaries: https://lamin.ai/laminlabs/lamindata/transform/jgTrkoeuxAfs0000
288
- # we do not hash values because we have `get_or_create` logic all over the place
289
- # and also for checking whether the (param, value) combination exists
290
- # there does not seem an issue with querying for a dict-like value
291
- # https://lamin.ai/laminlabs/lamindata/transform/jgTrkoeuxAfs0001
292
- _name_field: str = "value"
293
-
294
- param: Param = ForeignKey(Param, CASCADE, related_name="values")
295
- """The dimension metadata."""
296
- value: Any = (
297
- models.JSONField()
298
- ) # stores float, integer, boolean, datetime or dictionaries
299
- """The JSON-like value."""
300
- # it'd be confusing and hard to populate a run here because these
301
- # values are typically created upon creating a run
302
- # hence, ParamValue does _not_ inherit from TracksRun but manually
303
- # adds created_at & created_by
304
- # because ParamValue cannot be updated, we don't need updated_at
305
- created_at: datetime = DateTimeField(
306
- editable=False, db_default=models.functions.Now(), db_index=True
307
- )
308
- """Time of creation of record."""
309
- created_by: User = ForeignKey(
310
- User, PROTECT, default=current_user_id, related_name="+"
311
- )
312
- """Creator of record."""
313
- hash: str = CharField(max_length=HASH_LENGTH, null=True, db_index=True)
314
-
315
- class Meta:
316
- constraints = [
317
- # For simple types, use direct value comparison
318
- models.UniqueConstraint(
319
- fields=["param", "value"],
320
- name="unique_simple_param_value",
321
- condition=Q(hash__isnull=True),
322
- ),
323
- # For complex types (dictionaries), use hash
324
- models.UniqueConstraint(
325
- fields=["param", "hash"],
326
- name="unique_complex_param_value",
327
- condition=Q(hash__isnull=False),
328
- ),
329
- ]
330
-
331
- @classmethod
332
- def get_or_create(cls, param, value):
333
- # Simple types: int, float, str, bool
334
- if isinstance(value, (int, float, str, bool)):
335
- try:
336
- return cls.objects.create(param=param, value=value, hash=None), False
337
- except IntegrityError:
338
- return cls.objects.get(param=param, value=value), True
339
-
340
- # Complex types: dict, list
341
- else:
342
- hash = hash_dict(value)
343
- try:
344
- return cls.objects.create(param=param, value=value, hash=hash), False
345
- except IntegrityError:
346
- return cls.objects.get(param=param, hash=hash), True
347
-
348
-
349
- class Run(Record):
200
+ class Run(SQLRecord):
350
201
  """Runs of transforms such as the execution of a script.
351
202
 
352
203
  A registry to store runs of transforms, such as an executation of a script.
@@ -381,14 +232,16 @@ class Run(Record):
381
232
 
382
233
  _name_field: str = "started_at"
383
234
 
384
- params: ParamManager = ParamManagerRun # type: ignore
385
- """Param manager.
235
+ features: FeatureManager = FeatureManagerRun # type: ignore
236
+ """Features manager.
237
+
238
+ Run parameters are tracked via the `Feature` registry, just like all other variables.
386
239
 
387
240
  Guide: :ref:`track-run-parameters`
388
241
 
389
242
  Example::
390
243
 
391
- run.params.add_values({
244
+ run.features.add_values({
392
245
  "learning_rate": 0.01,
393
246
  "input_dir": "s3://my-bucket/mydataset",
394
247
  "downsample": True,
@@ -401,8 +254,9 @@ class Run(Record):
401
254
 
402
255
  id: int = models.BigAutoField(primary_key=True)
403
256
  """Internal id, valid only in one DB instance."""
257
+ # default uid was changed from base62_20 to base62_16 in 1.6.0
404
258
  uid: str = CharField(
405
- editable=False, unique=True, db_index=True, max_length=20, default=base62_20
259
+ editable=False, unique=True, db_index=True, max_length=20, default=base62_16
406
260
  )
407
261
  """Universal id, valid across DB instances."""
408
262
  name: str | None = CharField(max_length=150, null=True)
@@ -446,10 +300,11 @@ class Run(Record):
446
300
  """The collections serving as input for this run."""
447
301
  output_collections: Collection
448
302
  """The collections generated by this run."""
449
- _param_values: ParamValue = models.ManyToManyField(
450
- ParamValue, through="RunParamValue", related_name="runs"
451
- )
452
303
  """Parameter values."""
304
+ _feature_values: FeatureValue = models.ManyToManyField(
305
+ "FeatureValue", through="RunFeatureValue", related_name="runs"
306
+ )
307
+ """Feature values."""
453
308
  reference: str | None = CharField(max_length=255, db_index=True, null=True)
454
309
  """A reference like a URL or external ID (such as from a workflow manager)."""
455
310
  reference_type: str | None = CharField(max_length=25, db_index=True, null=True)
@@ -510,7 +365,7 @@ class Run(Record):
510
365
  *args,
511
366
  **kwargs,
512
367
  ):
513
- self.params = ParamManager(self) # type: ignore
368
+ self.features = FeatureManager(self) # type: ignore
514
369
  if len(args) == len(self._meta.concrete_fields):
515
370
  super().__init__(*args, **kwargs)
516
371
  return None
@@ -540,6 +395,11 @@ class Run(Record):
540
395
  delete_run_artifacts(self)
541
396
  super().delete()
542
397
 
398
+ @property
399
+ @deprecated("features")
400
+ def params(self) -> FeatureManager:
401
+ return self.features
402
+
543
403
  @classmethod
544
404
  def filter(
545
405
  cls,
@@ -566,6 +426,7 @@ class Run(Record):
566
426
  ln.Run.filter(hyperparam_x=100)
567
427
  """
568
428
  from ._feature_manager import filter_base
429
+ from .feature import Feature
569
430
  from .query_set import QuerySet
570
431
 
571
432
  if expressions:
@@ -574,14 +435,14 @@ class Run(Record):
574
435
  if field_or_feature_or_param in Run.__get_available_fields__():
575
436
  return QuerySet(model=cls).filter(*queries, **expressions)
576
437
  elif all(
577
- params_validated := Param.validate(
438
+ params_validated := Feature.validate(
578
439
  keys_normalized, field="name", mute=True
579
440
  )
580
441
  ):
581
- return filter_base(ParamManagerRun, **expressions)
442
+ return filter_base(FeatureManagerRun, **expressions)
582
443
  else:
583
444
  params = ", ".join(sorted(np.array(keys_normalized)[~params_validated]))
584
- message = f"param names: {params}"
445
+ message = f"feature names: {params}"
585
446
  fields = ", ".join(sorted(cls.__get_available_fields__()))
586
447
  raise InvalidArgument(
587
448
  f"You can query either by available fields: {fields}\n"
@@ -612,11 +473,13 @@ def delete_run_artifacts(run: Run) -> None:
612
473
  report.delete(permanent=True)
613
474
 
614
475
 
615
- class RunParamValue(BasicRecord, LinkORM):
476
+ class RunFeatureValue(BaseSQLRecord, IsLink):
616
477
  id: int = models.BigAutoField(primary_key=True)
617
- run: Run = ForeignKey(Run, CASCADE, related_name="links_paramvalue")
478
+ run: Run = ForeignKey(Run, CASCADE, related_name="links_featurevalue")
618
479
  # we follow the lower() case convention rather than snake case for link models
619
- paramvalue: ParamValue = ForeignKey(ParamValue, PROTECT, related_name="links_run")
480
+ featurevalue: FeatureValue = ForeignKey(
481
+ "FeatureValue", PROTECT, related_name="links_run"
482
+ )
620
483
  created_at: datetime = DateTimeField(
621
484
  editable=False, db_default=models.functions.Now(), db_index=True
622
485
  )
@@ -627,4 +490,4 @@ class RunParamValue(BasicRecord, LinkORM):
627
490
  """Creator of record."""
628
491
 
629
492
  class Meta:
630
- unique_together = ("run", "paramvalue")
493
+ unique_together = ("run", "featurevalue")
lamindb/models/save.py CHANGED
@@ -21,7 +21,7 @@ from ..core.storage.paths import (
21
21
  delete_storage_using_key,
22
22
  store_file_or_folder,
23
23
  )
24
- from .record import Record
24
+ from .sqlrecord import SQLRecord
25
25
 
26
26
  if TYPE_CHECKING:
27
27
  from collections.abc import Iterable
@@ -29,7 +29,7 @@ if TYPE_CHECKING:
29
29
  from .artifact import Artifact
30
30
 
31
31
 
32
- def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> None:
32
+ def save(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False) -> None:
33
33
  """Bulk save records.
34
34
 
35
35
  Note:
@@ -42,7 +42,7 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
42
42
  existing records! Use ``record.save()`` for these use cases.
43
43
 
44
44
  Args:
45
- records: Multiple :class:`~lamindb.models.Record` objects.
45
+ records: Multiple :class:`~lamindb.models.SQLRecord` objects.
46
46
  ignore_conflicts: If ``True``, do not error if some records violate a
47
47
  unique or another constraint. However, it won't inplace update the id
48
48
  fields of records. If you need records with ids, you need to query
@@ -69,7 +69,7 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
69
69
  """
70
70
  from .artifact import Artifact
71
71
 
72
- if isinstance(records, Record):
72
+ if isinstance(records, SQLRecord):
73
73
  raise ValueError("Please use record.save() if saving a single record.")
74
74
 
75
75
  # previously, this was all set based,
@@ -107,7 +107,7 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
107
107
  return None
108
108
 
109
109
 
110
- def bulk_create(records: Iterable[Record], ignore_conflicts: bool | None = False):
110
+ def bulk_create(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False):
111
111
  records_by_orm = defaultdict(list)
112
112
  for record in records:
113
113
  records_by_orm[record.__class__].append(record)
@@ -116,7 +116,7 @@ def bulk_create(records: Iterable[Record], ignore_conflicts: bool | None = False
116
116
  # records[:] = created # In-place list update; does not seem to be necessary
117
117
 
118
118
 
119
- def bulk_update(records: Iterable[Record], ignore_conflicts: bool | None = False):
119
+ def bulk_update(records: Iterable[SQLRecord], ignore_conflicts: bool | None = False):
120
120
  records_by_orm = defaultdict(list)
121
121
  for record in records:
122
122
  records_by_orm[record.__class__].append(record)
lamindb/models/schema.py CHANGED
@@ -35,16 +35,16 @@ from .feature import (
35
35
  serialize_dtype,
36
36
  serialize_pandas_dtype,
37
37
  )
38
- from .record import (
39
- BasicRecord,
40
- LinkORM,
41
- Record,
38
+ from .run import TracksRun, TracksUpdates
39
+ from .sqlrecord import (
40
+ BaseSQLRecord,
41
+ IsLink,
42
42
  Registry,
43
+ SQLRecord,
43
44
  _get_record_kwargs,
44
45
  init_self_from_db,
45
46
  update_attributes,
46
47
  )
47
- from .run import Param, TracksRun, TracksUpdates
48
48
 
49
49
  if TYPE_CHECKING:
50
50
  import pandas as pd
@@ -59,7 +59,7 @@ NUMBER_TYPE = "num"
59
59
  DICT_KEYS_TYPE = type({}.keys()) # type: ignore
60
60
 
61
61
 
62
- def validate_features(features: list[Record]) -> Record:
62
+ def validate_features(features: list[SQLRecord]) -> SQLRecord:
63
63
  """Validate and return feature type."""
64
64
  try:
65
65
  if len(features) == 0:
@@ -70,7 +70,7 @@ def validate_features(features: list[Record]) -> Record:
70
70
  ) from None
71
71
  if not hasattr(features, "__getitem__"):
72
72
  raise TypeError("features has to be list-like")
73
- if not isinstance(features[0], Record):
73
+ if not isinstance(features[0], SQLRecord):
74
74
  raise TypeError(
75
75
  "features has to store feature records! use .from_values() otherwise"
76
76
  )
@@ -84,8 +84,8 @@ def validate_features(features: list[Record]) -> Record:
84
84
 
85
85
 
86
86
  def get_features_config(
87
- features: list[Record] | tuple[Record, dict],
88
- ) -> tuple[list[Record], list[tuple[Record, dict]]]:
87
+ features: list[SQLRecord] | tuple[SQLRecord, dict],
88
+ ) -> tuple[list[SQLRecord], list[tuple[SQLRecord, dict]]]:
89
89
  """Get features and their config from the return of feature.with_config()."""
90
90
  features_list = []
91
91
  configs = []
@@ -251,13 +251,13 @@ KNOWN_SCHEMAS = {
251
251
  }
252
252
 
253
253
 
254
- class Schema(Record, CanCurate, TracksRun):
254
+ class Schema(SQLRecord, CanCurate, TracksRun):
255
255
  """Schemas of a dataset such as the set of columns of a `DataFrame`.
256
256
 
257
257
  Composite schemas can have multiple slots, e.g., for an `AnnData`, one schema for slot `obs` and another one for `var`.
258
258
 
259
259
  Args:
260
- features: `list[Record] | list[tuple[Feature, dict]] | None = None` Feature
260
+ features: `list[SQLRecord] | list[tuple[Feature, dict]] | None = None` Feature
261
261
  records, e.g., `[Feature(...), Feature(...)]` or Features with their config, e.g., `[Feature(...).with_config(optional=True)]`.
262
262
  index: `Feature | None = None` A :class:`~lamindb.Feature` record to validate an index of a `DataFrame` and therefore also, e.g., `AnnData` obs and var indices.
263
263
  slots: `dict[str, Schema] | None = None` A dictionary mapping slot names to :class:`~lamindb.Schema` objects.
@@ -350,7 +350,7 @@ class Schema(Record, CanCurate, TracksRun):
350
350
  schema = ln.Schema.from_df(df)
351
351
  """
352
352
 
353
- class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta):
353
+ class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
354
354
  abstract = False
355
355
 
356
356
  _name_field: str = "name"
@@ -363,18 +363,16 @@ class Schema(Record, CanCurate, TracksRun):
363
363
 
364
364
  id: int = models.AutoField(primary_key=True)
365
365
  """Internal id, valid only in one DB instance."""
366
+ # Before lamindb 1.5, it was 20 char long. Since lamindb 1.5, it is 16 char long.
366
367
  uid: str = CharField(editable=False, unique=True, db_index=True, max_length=20)
367
- """A universal id.
368
-
369
- Before lamindb 1.5, it was 20 char long. Since lamindb 1.5, it is 16 char long.
370
- """
368
+ """A universal id."""
371
369
  name: str | None = CharField(max_length=150, null=True, db_index=True)
372
370
  """A name."""
373
371
  description: str | None = CharField(null=True, db_index=True)
374
372
  """A description."""
375
373
  n: int = IntegerField()
376
374
  """Number of features in the schema."""
377
- type: Schema | None = ForeignKey("self", PROTECT, null=True, related_name="records")
375
+ type: Schema | None = ForeignKey("self", PROTECT, null=True, related_name="schemas")
378
376
  """Type of schema.
379
377
 
380
378
  Allows to group schemas by type, e.g., all meassurements evaluating gene expression vs. protein expression vs. multi modal.
@@ -383,8 +381,8 @@ class Schema(Record, CanCurate, TracksRun):
383
381
 
384
382
  Here are a few more examples for type names: `'ExpressionPanel'`, `'ProteinPanel'`, `'Multimodal'`, `'Metadata'`, `'Embedding'`.
385
383
  """
386
- records: Schema
387
- """Records of this type."""
384
+ instances: Schema
385
+ """Schemas of this type (can only be non-empty if `is_type` is `True`)."""
388
386
  is_type: bool = BooleanField(default=False, db_index=True, null=True)
389
387
  """Distinguish types from instances of the type."""
390
388
  itype: str | None = CharField(
@@ -434,8 +432,6 @@ class Schema(Record, CanCurate, TracksRun):
434
432
  """
435
433
  features: Feature
436
434
  """The features contained in the schema."""
437
- params: Param
438
- """The params contained in the schema."""
439
435
  artifacts: Artifact
440
436
  """The artifacts that measure a feature set that matches this schema."""
441
437
  validated_artifacts: Artifact
@@ -468,7 +464,7 @@ class Schema(Record, CanCurate, TracksRun):
468
464
  @overload
469
465
  def __init__(
470
466
  self,
471
- features: list[Record] | list[tuple[Feature, dict]] | None = None,
467
+ features: list[SQLRecord] | list[tuple[Feature, dict]] | None = None,
472
468
  index: Feature | None = None,
473
469
  slots: dict[str, Schema] | None = None,
474
470
  name: str | None = None,
@@ -503,12 +499,14 @@ class Schema(Record, CanCurate, TracksRun):
503
499
  if len(args) > 1:
504
500
  raise ValueError("Only one non-keyword arg allowed: features")
505
501
 
506
- features: list[Record] | None = args[0] if args else kwargs.pop("features", [])
502
+ features: list[SQLRecord] | None = (
503
+ args[0] if args else kwargs.pop("features", [])
504
+ )
507
505
  index: Feature | None = kwargs.pop("index", None)
508
506
  slots: dict[str, Schema] = kwargs.pop("slots", {})
509
507
  name: str | None = kwargs.pop("name", None)
510
508
  description: str | None = kwargs.pop("description", None)
511
- itype: str | Record | DeferredAttribute | None = kwargs.pop("itype", None)
509
+ itype: str | SQLRecord | DeferredAttribute | None = kwargs.pop("itype", None)
512
510
  flexible: bool | None = kwargs.pop("flexible", None)
513
511
  type: Feature | None = kwargs.pop("type", None)
514
512
  is_type: bool = kwargs.pop("is_type", False)
@@ -590,12 +588,12 @@ class Schema(Record, CanCurate, TracksRun):
590
588
 
591
589
  def _validate_kwargs_calculate_hash(
592
590
  self,
593
- features: list[Record],
591
+ features: list[SQLRecord],
594
592
  index: Feature | None,
595
593
  slots: dict[str, Schema],
596
594
  name: str | None,
597
595
  description: str | None,
598
- itype: str | Record | DeferredAttribute | None,
596
+ itype: str | SQLRecord | DeferredAttribute | None,
599
597
  flexible: bool | None,
600
598
  type: Feature | None,
601
599
  is_type: bool,
@@ -737,8 +735,8 @@ class Schema(Record, CanCurate, TracksRun):
737
735
  type: str | None = None,
738
736
  name: str | None = None,
739
737
  mute: bool = False,
740
- organism: Record | str | None = None,
741
- source: Record | None = None,
738
+ organism: SQLRecord | str | None = None,
739
+ source: SQLRecord | None = None,
742
740
  raise_validation_error: bool = True,
743
741
  ) -> Schema:
744
742
  """Create feature set for validated features.
@@ -772,7 +770,7 @@ class Schema(Record, CanCurate, TracksRun):
772
770
  """
773
771
  if not isinstance(field, FieldAttr):
774
772
  raise TypeError(
775
- "Argument `field` must be a Record field, e.g., `Feature.name`"
773
+ "Argument `field` must be a SQLRecord field, e.g., `Feature.name`"
776
774
  )
777
775
  if len(values) == 0:
778
776
  raise ValueError("Provide a list of at least one value")
@@ -815,8 +813,8 @@ class Schema(Record, CanCurate, TracksRun):
815
813
  field: FieldAttr = Feature.name,
816
814
  name: str | None = None,
817
815
  mute: bool = False,
818
- organism: Record | str | None = None,
819
- source: Record | None = None,
816
+ organism: SQLRecord | str | None = None,
817
+ source: SQLRecord | None = None,
820
818
  ) -> Schema | None:
821
819
  """Create schema for valid columns."""
822
820
  registry = field.field.model
@@ -856,7 +854,7 @@ class Schema(Record, CanCurate, TracksRun):
856
854
  """Save."""
857
855
  from .save import bulk_create
858
856
 
859
- if not self._state.adding:
857
+ if self.pk is not None:
860
858
  features = (
861
859
  self._features[1]
862
860
  if hasattr(self, "_features")
@@ -1147,7 +1145,7 @@ def _get_related_name(self: Schema) -> str:
1147
1145
  return related_name
1148
1146
 
1149
1147
 
1150
- class SchemaFeature(BasicRecord, LinkORM):
1148
+ class SchemaFeature(BaseSQLRecord, IsLink):
1151
1149
  id: int = models.BigAutoField(primary_key=True)
1152
1150
  schema: Schema = ForeignKey(Schema, CASCADE, related_name="links_feature")
1153
1151
  feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_schema")
@@ -1156,16 +1154,7 @@ class SchemaFeature(BasicRecord, LinkORM):
1156
1154
  unique_together = ("schema", "feature")
1157
1155
 
1158
1156
 
1159
- class SchemaParam(BasicRecord, LinkORM):
1160
- id: int = models.BigAutoField(primary_key=True)
1161
- schema: Schema = ForeignKey(Schema, CASCADE, related_name="+")
1162
- param: Param = ForeignKey(Param, PROTECT, related_name="+")
1163
-
1164
- class Meta:
1165
- unique_together = ("schema", "param")
1166
-
1167
-
1168
- class ArtifactSchema(BasicRecord, LinkORM, TracksRun):
1157
+ class ArtifactSchema(BaseSQLRecord, IsLink, TracksRun):
1169
1158
  id: int = models.BigAutoField(primary_key=True)
1170
1159
  artifact: Artifact = ForeignKey("Artifact", CASCADE, related_name="_links_schema")
1171
1160
  schema: Schema = ForeignKey(Schema, PROTECT, related_name="_links_artifact")
@@ -1176,7 +1165,7 @@ class ArtifactSchema(BasicRecord, LinkORM, TracksRun):
1176
1165
  unique_together = (("artifact", "schema"), ("artifact", "slot"))
1177
1166
 
1178
1167
 
1179
- class SchemaComponent(BasicRecord, LinkORM, TracksRun):
1168
+ class SchemaComponent(BaseSQLRecord, IsLink, TracksRun):
1180
1169
  id: int = models.BigAutoField(primary_key=True)
1181
1170
  composite: Schema = ForeignKey(Schema, CASCADE, related_name="links_composite")
1182
1171
  component: Schema = ForeignKey(Schema, PROTECT, related_name="links_component")