lamindb 1.10.2__py3-none-any.whl → 1.11a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. lamindb/__init__.py +89 -49
  2. lamindb/_finish.py +14 -12
  3. lamindb/_tracked.py +2 -4
  4. lamindb/_view.py +1 -1
  5. lamindb/base/__init__.py +2 -1
  6. lamindb/base/dtypes.py +76 -0
  7. lamindb/core/_settings.py +2 -2
  8. lamindb/core/storage/_anndata_accessor.py +29 -9
  9. lamindb/curators/_legacy.py +16 -3
  10. lamindb/curators/core.py +432 -186
  11. lamindb/examples/cellxgene/__init__.py +8 -3
  12. lamindb/examples/cellxgene/_cellxgene.py +127 -13
  13. lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
  14. lamindb/examples/croissant/__init__.py +12 -2
  15. lamindb/examples/datasets/__init__.py +2 -2
  16. lamindb/examples/datasets/_core.py +1 -1
  17. lamindb/examples/datasets/_small.py +66 -22
  18. lamindb/examples/datasets/mini_immuno.py +1 -0
  19. lamindb/migrations/0119_squashed.py +5 -2
  20. lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
  21. lamindb/migrations/0121_recorduser.py +53 -0
  22. lamindb/models/__init__.py +3 -1
  23. lamindb/models/_describe.py +2 -2
  24. lamindb/models/_feature_manager.py +53 -53
  25. lamindb/models/_from_values.py +2 -2
  26. lamindb/models/_is_versioned.py +4 -4
  27. lamindb/models/_label_manager.py +4 -4
  28. lamindb/models/artifact.py +305 -116
  29. lamindb/models/artifact_set.py +36 -1
  30. lamindb/models/can_curate.py +1 -2
  31. lamindb/models/collection.py +3 -34
  32. lamindb/models/feature.py +111 -7
  33. lamindb/models/has_parents.py +11 -11
  34. lamindb/models/project.py +18 -0
  35. lamindb/models/query_manager.py +16 -7
  36. lamindb/models/query_set.py +59 -34
  37. lamindb/models/record.py +25 -4
  38. lamindb/models/run.py +8 -6
  39. lamindb/models/schema.py +54 -26
  40. lamindb/models/sqlrecord.py +123 -25
  41. lamindb/models/storage.py +59 -14
  42. lamindb/models/transform.py +17 -17
  43. lamindb/models/ulabel.py +6 -1
  44. {lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/METADATA +4 -5
  45. {lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/RECORD +47 -44
  46. {lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/WHEEL +1 -1
  47. {lamindb-1.10.2.dist-info/licenses → lamindb-1.11a1.dist-info}/LICENSE +0 -0
@@ -13,6 +13,7 @@ from django.db import models
13
13
  from django.db.models import F, ForeignKey, ManyToManyField, Q, Subquery
14
14
  from django.db.models.fields.related import ForeignObjectRel
15
15
  from lamin_utils import logger
16
+ from lamindb_setup.core import deprecated
16
17
  from lamindb_setup.core._docs import doc_args
17
18
 
18
19
  from ..errors import DoesNotExist
@@ -144,7 +145,6 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
144
145
  queryset,
145
146
  expressions,
146
147
  )
147
-
148
148
  if issubclass(queryset.model, SQLRecord):
149
149
  # branch_id is set to 1 unless expressions contains id or uid
150
150
  if not (
@@ -173,32 +173,28 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
173
173
 
174
174
 
175
175
  def get(
176
- registry_or_queryset: Union[type[SQLRecord], QuerySet],
176
+ registry_or_queryset: Union[type[SQLRecord], BasicQuerySet],
177
177
  idlike: int | str | None = None,
178
178
  **expressions,
179
179
  ) -> SQLRecord:
180
- if isinstance(registry_or_queryset, QuerySet):
180
+ if isinstance(registry_or_queryset, BasicQuerySet):
181
181
  qs = registry_or_queryset
182
182
  registry = qs.model
183
183
  else:
184
- qs = QuerySet(model=registry_or_queryset)
184
+ qs = BasicQuerySet(model=registry_or_queryset)
185
185
  registry = registry_or_queryset
186
186
  if isinstance(idlike, int):
187
- return super(QuerySet, qs).get(id=idlike) # type: ignore
187
+ return BasicQuerySet.get(qs, id=idlike)
188
188
  elif isinstance(idlike, str):
189
- qs = qs.filter(uid__startswith=idlike)
190
-
191
189
  NAME_FIELD = (
192
190
  registry._name_field if hasattr(registry, "_name_field") else "name"
193
191
  )
194
192
  DOESNOTEXIST_MSG = f"No record found with uid '{idlike}'. Did you forget a keyword as in {registry.__name__}.get({NAME_FIELD}='{idlike}')?"
195
-
196
- if issubclass(registry, IsVersioned):
197
- if len(idlike) <= registry._len_stem_uid:
198
- return one_helper(qs.latest_version(), DOESNOTEXIST_MSG)
199
- else:
200
- return one_helper(qs, DOESNOTEXIST_MSG)
193
+ if issubclass(registry, IsVersioned) and len(idlike) <= registry._len_stem_uid:
194
+ qs = BasicQuerySet.filter(qs, uid__startswith=idlike, is_latest=True)
195
+ return one_helper(qs, DOESNOTEXIST_MSG)
201
196
  else:
197
+ qs = BasicQuerySet.filter(qs, uid__startswith=idlike)
202
198
  return one_helper(qs, DOESNOTEXIST_MSG)
203
199
  else:
204
200
  assert idlike is None # noqa: S101
@@ -210,24 +206,23 @@ def get(
210
206
  if issubclass(registry, IsVersioned) and is_latest_was_not_in_expressions:
211
207
  expressions["is_latest"] = True
212
208
  try:
213
- return registry.objects.using(qs.db).get(**expressions)
214
- except registry.DoesNotExist:
209
+ return BasicQuerySet.get(qs, **expressions)
210
+ except registry.DoesNotExist as e:
215
211
  # handle the case in which the is_latest injection led to a missed query
216
212
  if "is_latest" in expressions and is_latest_was_not_in_expressions:
217
213
  expressions.pop("is_latest")
218
214
  result = (
219
- registry.objects.using(qs.db)
220
- .filter(**expressions)
215
+ BasicQuerySet.filter(qs, **expressions)
221
216
  .order_by("-created_at")
222
217
  .first()
223
218
  )
224
219
  if result is not None:
225
220
  return result
226
- raise registry.DoesNotExist from registry.DoesNotExist
221
+ raise registry.DoesNotExist from e
227
222
 
228
223
 
229
224
  class SQLRecordList(UserList, Generic[T]):
230
- """Is ordered, can't be queried, but has `.df()`."""
225
+ """Is ordered, can't be queried, but has `.to_dataframe()`."""
231
226
 
232
227
  def __init__(self, records: Iterable[T]):
233
228
  if isinstance(records, list):
@@ -235,16 +230,24 @@ class SQLRecordList(UserList, Generic[T]):
235
230
  else:
236
231
  super().__init__(records) # Let UserList handle the conversion
237
232
 
238
- def df(self) -> pd.DataFrame:
233
+ def to_dataframe(self) -> pd.DataFrame:
239
234
  keys = get_keys_from_df(self.data, self.data[0].__class__)
240
235
  values = [record.__dict__ for record in self.data]
241
236
  return pd.DataFrame(values, columns=keys)
242
237
 
243
- def list(
238
+ @deprecated(new_name="to_dataframe")
239
+ def df(self) -> pd.DataFrame:
240
+ return self.to_dataframe()
241
+
242
+ def to_list(
244
243
  self, field: str
245
- ) -> list[str]: # meaningful to be parallel with list() in QuerySet
244
+ ) -> list[str]: # meaningful to be parallel with to_list() in QuerySet
246
245
  return [getattr(record, field) for record in self.data]
247
246
 
247
+ @deprecated(new_name="to_list")
248
+ def list(self, field: str) -> list[str]:
249
+ return self.to_list(field)
250
+
248
251
  def one(self) -> T:
249
252
  """Exactly one result. Throws error if there are more or none."""
250
253
  return one_helper(self)
@@ -348,7 +351,7 @@ def get_feature_annotate_kwargs(
348
351
  | Q(dtype__startswith="cat[ULabel")
349
352
  | Q(dtype__startswith="cat[Record")
350
353
  )
351
- feature_names = feature_qs.list("name")
354
+ feature_names = feature_qs.to_list("name")
352
355
  logger.important(
353
356
  f"queried for all categorical features with dtype ULabel or Record and non-categorical features: ({len(feature_names)}) {feature_names}"
354
357
  )
@@ -671,8 +674,8 @@ class BasicQuerySet(models.QuerySet):
671
674
  new_cls = cls
672
675
  return object.__new__(new_cls)
673
676
 
674
- @doc_args(SQLRecord.df.__doc__)
675
- def df(
677
+ @doc_args(SQLRecord.to_dataframe.__doc__)
678
+ def to_dataframe(
676
679
  self,
677
680
  include: str | list[str] | None = None,
678
681
  features: bool | list[str] | str | None = None,
@@ -706,7 +709,7 @@ class BasicQuerySet(models.QuerySet):
706
709
  id_subquery = self.values("id")
707
710
  time = logger.debug("finished get id values", time=time)
708
711
  # for annotate, we want the queryset without filters so that joins don't affect the annotations
709
- query_set_without_filters = self.model.objects.filter(
712
+ query_set_without_filters = self.model.objects.using(self._db).filter(
710
713
  id__in=Subquery(id_subquery)
711
714
  )
712
715
  time = logger.debug("finished get query_set_without_filters", time=time)
@@ -739,26 +742,34 @@ class BasicQuerySet(models.QuerySet):
739
742
  time = logger.debug("finished", time=time)
740
743
  return df_reshaped
741
744
 
745
+ @deprecated(new_name="to_dataframe")
746
+ def df(
747
+ self,
748
+ include: str | list[str] | None = None,
749
+ features: bool | list[str] | str | None = None,
750
+ ) -> pd.DataFrame:
751
+ return self.to_dataframe(include, features)
752
+
742
753
  def delete(self, *args, **kwargs):
743
754
  """Delete all records in the query set."""
744
- from lamindb.models import Artifact, Collection, Run, Transform
755
+ from lamindb.models import Artifact, Collection, Run, Storage, Transform
745
756
 
746
757
  # both Transform & Run might reference artifacts
747
- if self.model in {Artifact, Collection, Transform, Run}:
758
+ if self.model in {Artifact, Collection, Transform, Run, Storage}:
748
759
  for record in self:
749
760
  logger.important(f"deleting {record}")
750
761
  record.delete(*args, **kwargs)
751
762
  else:
752
763
  super().delete(*args, **kwargs)
753
764
 
754
- def list(self, field: str | None = None) -> list[SQLRecord] | list[str]:
765
+ def to_list(self, field: str | None = None) -> list[SQLRecord] | list[str]:
755
766
  """Populate an (unordered) list with the results.
756
767
 
757
768
  Note that the order in this list is only meaningful if you ordered the underlying query set with `.order_by()`.
758
769
 
759
770
  Examples:
760
- >>> queryset.list() # list of records
761
- >>> queryset.list("name") # list of values
771
+ >>> queryset.to_list() # list of records
772
+ >>> queryset.to_list("name") # list of values
762
773
  """
763
774
  if field is None:
764
775
  return list(self)
@@ -766,6 +777,10 @@ class BasicQuerySet(models.QuerySet):
766
777
  # list casting is necessary because values_list does not return a list
767
778
  return list(self.values_list(field, flat=True))
768
779
 
780
+ @deprecated(new_name="to_list")
781
+ def list(self, field: str | None = None) -> list[SQLRecord] | list[str]:
782
+ return self.to_list(field)
783
+
769
784
  def first(self) -> SQLRecord | None:
770
785
  """If non-empty, the first result in the query set, otherwise ``None``.
771
786
 
@@ -869,8 +884,18 @@ class QuerySet(BasicQuerySet):
869
884
  """Query a single record. Raises error if there are more or none."""
870
885
  is_run_input = expressions.pop("is_run_input", False)
871
886
 
887
+ if path := expressions.pop("path", None):
888
+ from .artifact_set import ArtifactSet, artifacts_from_path
889
+
890
+ if not isinstance(self, ArtifactSet):
891
+ raise ValueError("Querying by path is only possible for artifacts.")
892
+
893
+ qs = artifacts_from_path(self, path)
894
+ else:
895
+ qs = self
896
+
872
897
  try:
873
- record = get(self, idlike, **expressions)
898
+ record = get(qs, idlike, **expressions) # type: ignore
874
899
  except ValueError as e:
875
900
  # Pass through original error for explicit id lookups
876
901
  if "Field 'id' expected a number" in str(e):
@@ -886,8 +911,8 @@ class QuerySet(BasicQuerySet):
886
911
  raise # pragma: no cover
887
912
 
888
913
  if is_run_input is not False: # might be None or True or Run
889
- from lamindb.models.artifact import Artifact, _track_run_input
890
- from lamindb.models.collection import Collection
914
+ from .artifact import Artifact, _track_run_input
915
+ from .collection import Collection
891
916
 
892
917
  if isinstance(record, (Artifact, Collection)):
893
918
  _track_run_input(record, is_run_input)
lamindb/models/record.py CHANGED
@@ -20,7 +20,7 @@ from .can_curate import CanCurate
20
20
  from .feature import Feature
21
21
  from .has_parents import _query_relatives
22
22
  from .query_set import reorder_subset_columns_in_df
23
- from .run import Run, TracksRun, TracksUpdates
23
+ from .run import Run, TracksRun, TracksUpdates, User
24
24
  from .sqlrecord import BaseSQLRecord, IsLink, SQLRecord, _get_record_kwargs
25
25
  from .transform import Transform
26
26
  from .ulabel import ULabel
@@ -54,6 +54,7 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
54
54
 
55
55
  class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
56
56
  abstract = False
57
+ app_label = "lamindb"
57
58
 
58
59
  _name_field: str = "name"
59
60
 
@@ -207,11 +208,13 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
207
208
  def to_pandas(self) -> pd.DataFrame:
208
209
  """Export all children of a record type recursively to a pandas DataFrame."""
209
210
  assert self.is_type, "Only types can be exported as dataframes" # noqa: S101
210
- df = self.query_children().df(features="queryset")
211
+ df = self.query_children().to_dataframe(features="queryset")
211
212
  df.columns.values[0] = "__lamindb_record_uid__"
212
213
  df.columns.values[1] = "__lamindb_record_name__"
213
214
  if self.schema is not None:
214
- desired_order = self.schema.members.list("name") # only members is ordered!
215
+ desired_order = self.schema.members.to_list(
216
+ "name"
217
+ ) # only members is ordered!
215
218
  else:
216
219
  # sort alphabetically for now
217
220
  desired_order = df.columns[2:].tolist()
@@ -235,7 +238,7 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
235
238
  )
236
239
  run = Run(transform, initiated_by_run=context.run).save()
237
240
  run.input_records.add(self)
238
- return Artifact.from_df(
241
+ return Artifact.from_dataframe(
239
242
  self.to_pandas(),
240
243
  key=key,
241
244
  description=f"Export of sheet {self.uid}{description}",
@@ -252,6 +255,7 @@ class RecordJson(BaseSQLRecord, IsLink):
252
255
  value: Any = JSONField(default=None, db_default=None)
253
256
 
254
257
  class Meta:
258
+ app_label = "lamindb"
255
259
  unique_together = ("record", "feature") # a list is modeled as a list in json
256
260
 
257
261
 
@@ -266,6 +270,7 @@ class RecordRecord(SQLRecord, IsLink):
266
270
  ) # component
267
271
 
268
272
  class Meta:
273
+ app_label = "lamindb"
269
274
  unique_together = ("record", "feature", "value")
270
275
 
271
276
 
@@ -277,6 +282,19 @@ class RecordULabel(BaseSQLRecord, IsLink):
277
282
 
278
283
  class Meta:
279
284
  # allows linking exactly one record to one ulabel per feature, because we likely don't want to have Many
285
+ app_label = "lamindb"
286
+ unique_together = ("record", "feature", "value")
287
+
288
+
289
+ class RecordUser(BaseSQLRecord, IsLink):
290
+ id: int = models.BigAutoField(primary_key=True)
291
+ record: Record = ForeignKey(Record, CASCADE, related_name="values_user")
292
+ feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_recorduser")
293
+ value: User = ForeignKey(User, PROTECT, related_name="links_record")
294
+
295
+ class Meta:
296
+ # allows linking exactly one record to one user per feature, because we likely don't want to have Many
297
+ app_label = "lamindb"
280
298
  unique_together = ("record", "feature", "value")
281
299
 
282
300
 
@@ -288,6 +306,7 @@ class RecordRun(BaseSQLRecord, IsLink):
288
306
 
289
307
  class Meta:
290
308
  # allows linking several records to a single run for the same feature because we'll likely need this
309
+ app_label = "lamindb"
291
310
  unique_together = ("record", "feature", "value")
292
311
 
293
312
 
@@ -299,6 +318,7 @@ class RecordArtifact(BaseSQLRecord, IsLink):
299
318
 
300
319
  class Meta:
301
320
  # allows linking several records to a single artifact for the same feature because we'll likely need this
321
+ app_label = "lamindb"
302
322
  unique_together = ("record", "feature", "value")
303
323
 
304
324
 
@@ -315,4 +335,5 @@ class ArtifactRecord(BaseSQLRecord, IsLink):
315
335
 
316
336
  class Meta:
317
337
  # allows linking several records to a single artifact for the same feature because we'll likely need this
338
+ app_label = "lamindb"
318
339
  unique_together = ("artifact", "record", "feature")
lamindb/models/run.py CHANGED
@@ -142,6 +142,9 @@ class User(BaseSQLRecord, CanCurate):
142
142
  >>> user
143
143
  """
144
144
 
145
+ class Meta:
146
+ app_label = "lamindb"
147
+
145
148
  _name_field: str = "handle"
146
149
 
147
150
  id: int = models.AutoField(primary_key=True)
@@ -223,6 +226,9 @@ class Run(SQLRecord):
223
226
  >>> ln.context.run
224
227
  """
225
228
 
229
+ class Meta:
230
+ app_label = "lamindb"
231
+
226
232
  _name_field: str = "started_at"
227
233
 
228
234
  id: int = models.BigAutoField(primary_key=True)
@@ -368,11 +374,6 @@ class Run(SQLRecord):
368
374
  reference_type=reference_type,
369
375
  )
370
376
 
371
- def delete(self) -> None:
372
- """Delete."""
373
- delete_run_artifacts(self)
374
- super().delete()
375
-
376
377
  @property
377
378
  @deprecated("features")
378
379
  def params(self) -> FeatureManager:
@@ -470,7 +471,7 @@ def delete_run_artifacts(run: Run) -> None:
470
471
  if environment._environment_of.count() == 0:
471
472
  environment.delete(permanent=True)
472
473
  if report is not None:
473
- # only delete if there are no other runs attached to this environment
474
+ # only delete if there are no other runs attached to this report
474
475
  if report._report_of.count() == 0:
475
476
  report.delete(permanent=True)
476
477
 
@@ -492,4 +493,5 @@ class RunFeatureValue(BaseSQLRecord, IsLink):
492
493
  """Creator of record."""
493
494
 
494
495
  class Meta:
496
+ app_label = "lamindb"
495
497
  unique_together = ("run", "featurevalue")
lamindb/models/schema.py CHANGED
@@ -6,6 +6,7 @@ import numpy as np
6
6
  from django.db import models
7
7
  from django.db.models import CASCADE, PROTECT, ManyToManyField
8
8
  from lamin_utils import logger
9
+ from lamindb_setup.core import deprecated
9
10
  from lamindb_setup.core.hashing import HASH_LENGTH, hash_string
10
11
  from rich.table import Table
11
12
  from rich.text import Text
@@ -348,11 +349,12 @@ class Schema(SQLRecord, CanCurate, TracksRun):
348
349
 
349
350
  # from a dataframe
350
351
  df = pd.DataFrame({"feat1": [1, 2], "feat2": [3.1, 4.2], "feat3": ["cond1", "cond2"]})
351
- schema = ln.Schema.from_df(df)
352
+ schema = ln.Schema.from_dataframe(df)
352
353
  """
353
354
 
354
355
  class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
355
356
  abstract = False
357
+ app_label = "lamindb"
356
358
 
357
359
  _name_field: str = "name"
358
360
  _aux_fields: dict[str, tuple[str, type]] = {
@@ -576,19 +578,22 @@ class Schema(SQLRecord, CanCurate, TracksRun):
576
578
  self.optionals.set(optional_features)
577
579
  return None
578
580
  self._slots: dict[str, Schema] = {}
581
+
579
582
  if features:
580
583
  self._features = (get_related_name(features_registry), features) # type: ignore
581
- elif slots:
584
+ if slots:
582
585
  for slot_key, component in slots.items():
583
586
  if component._state.adding:
584
587
  raise InvalidArgument(
585
588
  f"schema for {slot_key} {component} must be saved before use"
586
589
  )
587
590
  self._slots = slots
591
+
588
592
  if validated_kwargs["hash"] in KNOWN_SCHEMAS:
589
593
  validated_kwargs["uid"] = KNOWN_SCHEMAS[validated_kwargs["hash"]]
590
594
  else:
591
595
  validated_kwargs["uid"] = ids.base62_16()
596
+
592
597
  super().__init__(**validated_kwargs)
593
598
 
594
599
  def _validate_kwargs_calculate_hash(
@@ -623,14 +628,20 @@ class Schema(SQLRecord, CanCurate, TracksRun):
623
628
  raise TypeError("index must be a Feature")
624
629
  features.insert(0, index)
625
630
 
631
+ if slots:
632
+ itype = "Composite"
633
+ if otype is None:
634
+ raise InvalidArgument("Please pass otype != None for composite schemas")
635
+
626
636
  if features:
627
637
  features, configs = get_features_config(features)
628
638
  features_registry = validate_features(features)
629
- itype_compare = features_registry.__get_name_with_module__()
630
- if itype is not None:
631
- assert itype.startswith(itype_compare), str(itype_compare) # noqa: S101
632
- else:
633
- itype = itype_compare
639
+ if itype != "Composite":
640
+ itype_compare = features_registry.__get_name_with_module__()
641
+ if itype is not None:
642
+ assert itype.startswith(itype_compare), str(itype_compare) # noqa: S101
643
+ else:
644
+ itype = itype_compare
634
645
  if n_features is not None:
635
646
  if n_features != len(features):
636
647
  logger.important(f"updating to n {len(features)} features")
@@ -654,11 +665,6 @@ class Schema(SQLRecord, CanCurate, TracksRun):
654
665
  if flexible is None:
655
666
  flexible = flexible_default
656
667
 
657
- if slots:
658
- itype = "Composite"
659
- if otype is None:
660
- raise InvalidArgument("Please pass otype != None for composite schemas")
661
-
662
668
  if itype is not None and not isinstance(itype, str):
663
669
  itype_str = serialize_dtype(itype, is_itype=True)
664
670
  else:
@@ -771,7 +777,7 @@ class Schema(SQLRecord, CanCurate, TracksRun):
771
777
  cls,
772
778
  values: ListLike,
773
779
  field: FieldAttr = Feature.name,
774
- type: str | None = None,
780
+ dtype: str | None = None,
775
781
  name: str | None = None,
776
782
  mute: bool = False,
777
783
  organism: SQLRecord | str | None = None,
@@ -783,7 +789,7 @@ class Schema(SQLRecord, CanCurate, TracksRun):
783
789
  Args:
784
790
  values: A list of values, like feature names or ids.
785
791
  field: The field of a reference registry to map values.
786
- type: The simple type.
792
+ dtype: The simple dtype.
787
793
  Defaults to `None` if reference registry is :class:`~lamindb.Feature`,
788
794
  defaults to `"float"` otherwise.
789
795
  name: A name.
@@ -816,8 +822,8 @@ class Schema(SQLRecord, CanCurate, TracksRun):
816
822
  if isinstance(values, DICT_KEYS_TYPE):
817
823
  values = list(values)
818
824
  registry = field.field.model
819
- if registry != Feature and type is None:
820
- type = NUMBER_TYPE
825
+ if registry != Feature and dtype is None:
826
+ dtype = NUMBER_TYPE
821
827
  logger.debug("setting feature set to 'number'")
822
828
  validated = registry.validate(values, field=field, mute=mute, organism=organism)
823
829
  values_array = np.array(values)
@@ -841,12 +847,12 @@ class Schema(SQLRecord, CanCurate, TracksRun):
841
847
  schema = Schema(
842
848
  features=validated_features,
843
849
  name=name,
844
- dtype=get_type_str(type),
850
+ dtype=get_type_str(dtype),
845
851
  )
846
852
  return schema
847
853
 
848
854
  @classmethod
849
- def from_df(
855
+ def from_dataframe(
850
856
  cls,
851
857
  df: pd.DataFrame,
852
858
  field: FieldAttr = Feature.name,
@@ -889,15 +895,28 @@ class Schema(SQLRecord, CanCurate, TracksRun):
889
895
  )
890
896
  return schema
891
897
 
898
+ @classmethod
899
+ @deprecated("from_dataframe")
900
+ def from_df(
901
+ cls,
902
+ df: pd.DataFrame,
903
+ field: FieldAttr = Feature.name,
904
+ name: str | None = None,
905
+ mute: bool = False,
906
+ organism: SQLRecord | str | None = None,
907
+ source: SQLRecord | None = None,
908
+ ) -> Schema | None:
909
+ return cls.from_dataframe(df, field, name, mute, organism, source)
910
+
892
911
  def save(self, *args, **kwargs) -> Schema:
893
- """Save."""
912
+ """Save schema."""
894
913
  from .save import bulk_create
895
914
 
896
915
  if self.pk is not None:
897
916
  features = (
898
917
  self._features[1]
899
918
  if hasattr(self, "_features")
900
- else (self.members.list() if self.members.exists() else [])
919
+ else (self.members.to_list() if self.members.exists() else [])
901
920
  )
902
921
  index_feature = self.index
903
922
  _, validated_kwargs, _, _, _ = self._validate_kwargs_calculate_hash(
@@ -925,7 +944,7 @@ class Schema(SQLRecord, CanCurate, TracksRun):
925
944
  datasets = Artifact.filter(schema=self).all()
926
945
  if datasets.exists():
927
946
  logger.warning(
928
- f"you updated the schema hash and might invalidate datasets that were previously validated with this schema: {datasets.list('uid')}"
947
+ f"you updated the schema hash and might invalidate datasets that were previously validated with this schema: {datasets.to_list('uid')}"
929
948
  )
930
949
  self.hash = validated_kwargs["hash"]
931
950
  self.n = validated_kwargs["n"]
@@ -947,13 +966,16 @@ class Schema(SQLRecord, CanCurate, TracksRun):
947
966
  assert self.n > 0 # noqa: S101
948
967
  using: bool | None = kwargs.pop("using", None)
949
968
  related_name, records = self._features
969
+
970
+ # .set() does not preserve the order but orders by the feature primary key
950
971
  # only the following method preserves the order
951
- # .set() does not preserve the order but orders by
952
- # the feature primary key
953
972
  through_model = getattr(self, related_name).through
954
- related_model_split = parse_cat_dtype(self.itype, is_itype=True)[
955
- "registry_str"
956
- ].split(".")
973
+ if self.itype == "Composite":
974
+ related_model_split = ["Feature"]
975
+ else:
976
+ related_model_split = parse_cat_dtype(self.itype, is_itype=True)[
977
+ "registry_str"
978
+ ].split(".")
957
979
  if len(related_model_split) == 1:
958
980
  related_field = related_model_split[0].lower()
959
981
  else:
@@ -965,6 +987,7 @@ class Schema(SQLRecord, CanCurate, TracksRun):
965
987
  ]
966
988
  through_model.objects.using(using).bulk_create(links, ignore_conflicts=True)
967
989
  delattr(self, "_features")
990
+
968
991
  return self
969
992
 
970
993
  @property
@@ -978,6 +1001,8 @@ class Schema(SQLRecord, CanCurate, TracksRun):
978
1001
  # this should return a queryset and not a list...
979
1002
  # need to fix this
980
1003
  return self._features[1]
1004
+ if len(self.features.all()) > 0:
1005
+ return self.features.order_by("links_schema__id")
981
1006
  if self.itype == "Composite" or self.is_type:
982
1007
  return Feature.objects.none()
983
1008
  related_name = self._get_related_name()
@@ -1200,6 +1225,7 @@ class SchemaFeature(BaseSQLRecord, IsLink):
1200
1225
  feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_schema")
1201
1226
 
1202
1227
  class Meta:
1228
+ app_label = "lamindb"
1203
1229
  unique_together = ("schema", "feature")
1204
1230
 
1205
1231
 
@@ -1211,6 +1237,7 @@ class ArtifactSchema(BaseSQLRecord, IsLink, TracksRun):
1211
1237
  feature_ref_is_semantic: bool | None = BooleanField(null=True)
1212
1238
 
1213
1239
  class Meta:
1240
+ app_label = "lamindb"
1214
1241
  unique_together = (("artifact", "schema"), ("artifact", "slot"))
1215
1242
 
1216
1243
 
@@ -1221,6 +1248,7 @@ class SchemaComponent(BaseSQLRecord, IsLink, TracksRun):
1221
1248
  slot: str | None = CharField(null=True)
1222
1249
 
1223
1250
  class Meta:
1251
+ app_label = "lamindb"
1224
1252
  unique_together = (("composite", "slot", "component"), ("composite", "slot"))
1225
1253
 
1226
1254