lamindb 1.10.2__py3-none-any.whl → 1.11a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +89 -49
- lamindb/_finish.py +14 -12
- lamindb/_tracked.py +2 -4
- lamindb/_view.py +1 -1
- lamindb/base/__init__.py +2 -1
- lamindb/base/dtypes.py +76 -0
- lamindb/core/_settings.py +2 -2
- lamindb/core/storage/_anndata_accessor.py +29 -9
- lamindb/curators/_legacy.py +16 -3
- lamindb/curators/core.py +432 -186
- lamindb/examples/cellxgene/__init__.py +8 -3
- lamindb/examples/cellxgene/_cellxgene.py +127 -13
- lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
- lamindb/examples/croissant/__init__.py +12 -2
- lamindb/examples/datasets/__init__.py +2 -2
- lamindb/examples/datasets/_core.py +1 -1
- lamindb/examples/datasets/_small.py +66 -22
- lamindb/examples/datasets/mini_immuno.py +1 -0
- lamindb/migrations/0119_squashed.py +5 -2
- lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
- lamindb/migrations/0121_recorduser.py +53 -0
- lamindb/models/__init__.py +3 -1
- lamindb/models/_describe.py +2 -2
- lamindb/models/_feature_manager.py +53 -53
- lamindb/models/_from_values.py +2 -2
- lamindb/models/_is_versioned.py +4 -4
- lamindb/models/_label_manager.py +4 -4
- lamindb/models/artifact.py +305 -116
- lamindb/models/artifact_set.py +36 -1
- lamindb/models/can_curate.py +1 -2
- lamindb/models/collection.py +3 -34
- lamindb/models/feature.py +111 -7
- lamindb/models/has_parents.py +11 -11
- lamindb/models/project.py +18 -0
- lamindb/models/query_manager.py +16 -7
- lamindb/models/query_set.py +59 -34
- lamindb/models/record.py +25 -4
- lamindb/models/run.py +8 -6
- lamindb/models/schema.py +54 -26
- lamindb/models/sqlrecord.py +123 -25
- lamindb/models/storage.py +59 -14
- lamindb/models/transform.py +17 -17
- lamindb/models/ulabel.py +6 -1
- {lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/METADATA +4 -5
- {lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/RECORD +47 -44
- {lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/WHEEL +1 -1
- {lamindb-1.10.2.dist-info/licenses → lamindb-1.11a1.dist-info}/LICENSE +0 -0
lamindb/models/query_set.py
CHANGED
@@ -13,6 +13,7 @@ from django.db import models
|
|
13
13
|
from django.db.models import F, ForeignKey, ManyToManyField, Q, Subquery
|
14
14
|
from django.db.models.fields.related import ForeignObjectRel
|
15
15
|
from lamin_utils import logger
|
16
|
+
from lamindb_setup.core import deprecated
|
16
17
|
from lamindb_setup.core._docs import doc_args
|
17
18
|
|
18
19
|
from ..errors import DoesNotExist
|
@@ -144,7 +145,6 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
|
144
145
|
queryset,
|
145
146
|
expressions,
|
146
147
|
)
|
147
|
-
|
148
148
|
if issubclass(queryset.model, SQLRecord):
|
149
149
|
# branch_id is set to 1 unless expressions contains id or uid
|
150
150
|
if not (
|
@@ -173,32 +173,28 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
|
173
173
|
|
174
174
|
|
175
175
|
def get(
|
176
|
-
registry_or_queryset: Union[type[SQLRecord],
|
176
|
+
registry_or_queryset: Union[type[SQLRecord], BasicQuerySet],
|
177
177
|
idlike: int | str | None = None,
|
178
178
|
**expressions,
|
179
179
|
) -> SQLRecord:
|
180
|
-
if isinstance(registry_or_queryset,
|
180
|
+
if isinstance(registry_or_queryset, BasicQuerySet):
|
181
181
|
qs = registry_or_queryset
|
182
182
|
registry = qs.model
|
183
183
|
else:
|
184
|
-
qs =
|
184
|
+
qs = BasicQuerySet(model=registry_or_queryset)
|
185
185
|
registry = registry_or_queryset
|
186
186
|
if isinstance(idlike, int):
|
187
|
-
return
|
187
|
+
return BasicQuerySet.get(qs, id=idlike)
|
188
188
|
elif isinstance(idlike, str):
|
189
|
-
qs = qs.filter(uid__startswith=idlike)
|
190
|
-
|
191
189
|
NAME_FIELD = (
|
192
190
|
registry._name_field if hasattr(registry, "_name_field") else "name"
|
193
191
|
)
|
194
192
|
DOESNOTEXIST_MSG = f"No record found with uid '{idlike}'. Did you forget a keyword as in {registry.__name__}.get({NAME_FIELD}='{idlike}')?"
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
return one_helper(qs.latest_version(), DOESNOTEXIST_MSG)
|
199
|
-
else:
|
200
|
-
return one_helper(qs, DOESNOTEXIST_MSG)
|
193
|
+
if issubclass(registry, IsVersioned) and len(idlike) <= registry._len_stem_uid:
|
194
|
+
qs = BasicQuerySet.filter(qs, uid__startswith=idlike, is_latest=True)
|
195
|
+
return one_helper(qs, DOESNOTEXIST_MSG)
|
201
196
|
else:
|
197
|
+
qs = BasicQuerySet.filter(qs, uid__startswith=idlike)
|
202
198
|
return one_helper(qs, DOESNOTEXIST_MSG)
|
203
199
|
else:
|
204
200
|
assert idlike is None # noqa: S101
|
@@ -210,24 +206,23 @@ def get(
|
|
210
206
|
if issubclass(registry, IsVersioned) and is_latest_was_not_in_expressions:
|
211
207
|
expressions["is_latest"] = True
|
212
208
|
try:
|
213
|
-
return
|
214
|
-
except registry.DoesNotExist:
|
209
|
+
return BasicQuerySet.get(qs, **expressions)
|
210
|
+
except registry.DoesNotExist as e:
|
215
211
|
# handle the case in which the is_latest injection led to a missed query
|
216
212
|
if "is_latest" in expressions and is_latest_was_not_in_expressions:
|
217
213
|
expressions.pop("is_latest")
|
218
214
|
result = (
|
219
|
-
|
220
|
-
.filter(**expressions)
|
215
|
+
BasicQuerySet.filter(qs, **expressions)
|
221
216
|
.order_by("-created_at")
|
222
217
|
.first()
|
223
218
|
)
|
224
219
|
if result is not None:
|
225
220
|
return result
|
226
|
-
raise registry.DoesNotExist from
|
221
|
+
raise registry.DoesNotExist from e
|
227
222
|
|
228
223
|
|
229
224
|
class SQLRecordList(UserList, Generic[T]):
|
230
|
-
"""Is ordered, can't be queried, but has `.
|
225
|
+
"""Is ordered, can't be queried, but has `.to_dataframe()`."""
|
231
226
|
|
232
227
|
def __init__(self, records: Iterable[T]):
|
233
228
|
if isinstance(records, list):
|
@@ -235,16 +230,24 @@ class SQLRecordList(UserList, Generic[T]):
|
|
235
230
|
else:
|
236
231
|
super().__init__(records) # Let UserList handle the conversion
|
237
232
|
|
238
|
-
def
|
233
|
+
def to_dataframe(self) -> pd.DataFrame:
|
239
234
|
keys = get_keys_from_df(self.data, self.data[0].__class__)
|
240
235
|
values = [record.__dict__ for record in self.data]
|
241
236
|
return pd.DataFrame(values, columns=keys)
|
242
237
|
|
243
|
-
|
238
|
+
@deprecated(new_name="to_dataframe")
|
239
|
+
def df(self) -> pd.DataFrame:
|
240
|
+
return self.to_dataframe()
|
241
|
+
|
242
|
+
def to_list(
|
244
243
|
self, field: str
|
245
|
-
) -> list[str]: # meaningful to be parallel with
|
244
|
+
) -> list[str]: # meaningful to be parallel with to_list() in QuerySet
|
246
245
|
return [getattr(record, field) for record in self.data]
|
247
246
|
|
247
|
+
@deprecated(new_name="to_list")
|
248
|
+
def list(self, field: str) -> list[str]:
|
249
|
+
return self.to_list(field)
|
250
|
+
|
248
251
|
def one(self) -> T:
|
249
252
|
"""Exactly one result. Throws error if there are more or none."""
|
250
253
|
return one_helper(self)
|
@@ -348,7 +351,7 @@ def get_feature_annotate_kwargs(
|
|
348
351
|
| Q(dtype__startswith="cat[ULabel")
|
349
352
|
| Q(dtype__startswith="cat[Record")
|
350
353
|
)
|
351
|
-
feature_names = feature_qs.
|
354
|
+
feature_names = feature_qs.to_list("name")
|
352
355
|
logger.important(
|
353
356
|
f"queried for all categorical features with dtype ULabel or Record and non-categorical features: ({len(feature_names)}) {feature_names}"
|
354
357
|
)
|
@@ -671,8 +674,8 @@ class BasicQuerySet(models.QuerySet):
|
|
671
674
|
new_cls = cls
|
672
675
|
return object.__new__(new_cls)
|
673
676
|
|
674
|
-
@doc_args(SQLRecord.
|
675
|
-
def
|
677
|
+
@doc_args(SQLRecord.to_dataframe.__doc__)
|
678
|
+
def to_dataframe(
|
676
679
|
self,
|
677
680
|
include: str | list[str] | None = None,
|
678
681
|
features: bool | list[str] | str | None = None,
|
@@ -706,7 +709,7 @@ class BasicQuerySet(models.QuerySet):
|
|
706
709
|
id_subquery = self.values("id")
|
707
710
|
time = logger.debug("finished get id values", time=time)
|
708
711
|
# for annotate, we want the queryset without filters so that joins don't affect the annotations
|
709
|
-
query_set_without_filters = self.model.objects.filter(
|
712
|
+
query_set_without_filters = self.model.objects.using(self._db).filter(
|
710
713
|
id__in=Subquery(id_subquery)
|
711
714
|
)
|
712
715
|
time = logger.debug("finished get query_set_without_filters", time=time)
|
@@ -739,26 +742,34 @@ class BasicQuerySet(models.QuerySet):
|
|
739
742
|
time = logger.debug("finished", time=time)
|
740
743
|
return df_reshaped
|
741
744
|
|
745
|
+
@deprecated(new_name="to_dataframe")
|
746
|
+
def df(
|
747
|
+
self,
|
748
|
+
include: str | list[str] | None = None,
|
749
|
+
features: bool | list[str] | str | None = None,
|
750
|
+
) -> pd.DataFrame:
|
751
|
+
return self.to_dataframe(include, features)
|
752
|
+
|
742
753
|
def delete(self, *args, **kwargs):
|
743
754
|
"""Delete all records in the query set."""
|
744
|
-
from lamindb.models import Artifact, Collection, Run, Transform
|
755
|
+
from lamindb.models import Artifact, Collection, Run, Storage, Transform
|
745
756
|
|
746
757
|
# both Transform & Run might reference artifacts
|
747
|
-
if self.model in {Artifact, Collection, Transform, Run}:
|
758
|
+
if self.model in {Artifact, Collection, Transform, Run, Storage}:
|
748
759
|
for record in self:
|
749
760
|
logger.important(f"deleting {record}")
|
750
761
|
record.delete(*args, **kwargs)
|
751
762
|
else:
|
752
763
|
super().delete(*args, **kwargs)
|
753
764
|
|
754
|
-
def
|
765
|
+
def to_list(self, field: str | None = None) -> list[SQLRecord] | list[str]:
|
755
766
|
"""Populate an (unordered) list with the results.
|
756
767
|
|
757
768
|
Note that the order in this list is only meaningful if you ordered the underlying query set with `.order_by()`.
|
758
769
|
|
759
770
|
Examples:
|
760
|
-
>>> queryset.
|
761
|
-
>>> queryset.
|
771
|
+
>>> queryset.to_list() # list of records
|
772
|
+
>>> queryset.to_list("name") # list of values
|
762
773
|
"""
|
763
774
|
if field is None:
|
764
775
|
return list(self)
|
@@ -766,6 +777,10 @@ class BasicQuerySet(models.QuerySet):
|
|
766
777
|
# list casting is necessary because values_list does not return a list
|
767
778
|
return list(self.values_list(field, flat=True))
|
768
779
|
|
780
|
+
@deprecated(new_name="to_list")
|
781
|
+
def list(self, field: str | None = None) -> list[SQLRecord] | list[str]:
|
782
|
+
return self.to_list(field)
|
783
|
+
|
769
784
|
def first(self) -> SQLRecord | None:
|
770
785
|
"""If non-empty, the first result in the query set, otherwise ``None``.
|
771
786
|
|
@@ -869,8 +884,18 @@ class QuerySet(BasicQuerySet):
|
|
869
884
|
"""Query a single record. Raises error if there are more or none."""
|
870
885
|
is_run_input = expressions.pop("is_run_input", False)
|
871
886
|
|
887
|
+
if path := expressions.pop("path", None):
|
888
|
+
from .artifact_set import ArtifactSet, artifacts_from_path
|
889
|
+
|
890
|
+
if not isinstance(self, ArtifactSet):
|
891
|
+
raise ValueError("Querying by path is only possible for artifacts.")
|
892
|
+
|
893
|
+
qs = artifacts_from_path(self, path)
|
894
|
+
else:
|
895
|
+
qs = self
|
896
|
+
|
872
897
|
try:
|
873
|
-
record = get(
|
898
|
+
record = get(qs, idlike, **expressions) # type: ignore
|
874
899
|
except ValueError as e:
|
875
900
|
# Pass through original error for explicit id lookups
|
876
901
|
if "Field 'id' expected a number" in str(e):
|
@@ -886,8 +911,8 @@ class QuerySet(BasicQuerySet):
|
|
886
911
|
raise # pragma: no cover
|
887
912
|
|
888
913
|
if is_run_input is not False: # might be None or True or Run
|
889
|
-
from
|
890
|
-
from
|
914
|
+
from .artifact import Artifact, _track_run_input
|
915
|
+
from .collection import Collection
|
891
916
|
|
892
917
|
if isinstance(record, (Artifact, Collection)):
|
893
918
|
_track_run_input(record, is_run_input)
|
lamindb/models/record.py
CHANGED
@@ -20,7 +20,7 @@ from .can_curate import CanCurate
|
|
20
20
|
from .feature import Feature
|
21
21
|
from .has_parents import _query_relatives
|
22
22
|
from .query_set import reorder_subset_columns_in_df
|
23
|
-
from .run import Run, TracksRun, TracksUpdates
|
23
|
+
from .run import Run, TracksRun, TracksUpdates, User
|
24
24
|
from .sqlrecord import BaseSQLRecord, IsLink, SQLRecord, _get_record_kwargs
|
25
25
|
from .transform import Transform
|
26
26
|
from .ulabel import ULabel
|
@@ -54,6 +54,7 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
54
54
|
|
55
55
|
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
56
56
|
abstract = False
|
57
|
+
app_label = "lamindb"
|
57
58
|
|
58
59
|
_name_field: str = "name"
|
59
60
|
|
@@ -207,11 +208,13 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
207
208
|
def to_pandas(self) -> pd.DataFrame:
|
208
209
|
"""Export all children of a record type recursively to a pandas DataFrame."""
|
209
210
|
assert self.is_type, "Only types can be exported as dataframes" # noqa: S101
|
210
|
-
df = self.query_children().
|
211
|
+
df = self.query_children().to_dataframe(features="queryset")
|
211
212
|
df.columns.values[0] = "__lamindb_record_uid__"
|
212
213
|
df.columns.values[1] = "__lamindb_record_name__"
|
213
214
|
if self.schema is not None:
|
214
|
-
desired_order = self.schema.members.
|
215
|
+
desired_order = self.schema.members.to_list(
|
216
|
+
"name"
|
217
|
+
) # only members is ordered!
|
215
218
|
else:
|
216
219
|
# sort alphabetically for now
|
217
220
|
desired_order = df.columns[2:].tolist()
|
@@ -235,7 +238,7 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
235
238
|
)
|
236
239
|
run = Run(transform, initiated_by_run=context.run).save()
|
237
240
|
run.input_records.add(self)
|
238
|
-
return Artifact.
|
241
|
+
return Artifact.from_dataframe(
|
239
242
|
self.to_pandas(),
|
240
243
|
key=key,
|
241
244
|
description=f"Export of sheet {self.uid}{description}",
|
@@ -252,6 +255,7 @@ class RecordJson(BaseSQLRecord, IsLink):
|
|
252
255
|
value: Any = JSONField(default=None, db_default=None)
|
253
256
|
|
254
257
|
class Meta:
|
258
|
+
app_label = "lamindb"
|
255
259
|
unique_together = ("record", "feature") # a list is modeled as a list in json
|
256
260
|
|
257
261
|
|
@@ -266,6 +270,7 @@ class RecordRecord(SQLRecord, IsLink):
|
|
266
270
|
) # component
|
267
271
|
|
268
272
|
class Meta:
|
273
|
+
app_label = "lamindb"
|
269
274
|
unique_together = ("record", "feature", "value")
|
270
275
|
|
271
276
|
|
@@ -277,6 +282,19 @@ class RecordULabel(BaseSQLRecord, IsLink):
|
|
277
282
|
|
278
283
|
class Meta:
|
279
284
|
# allows linking exactly one record to one ulabel per feature, because we likely don't want to have Many
|
285
|
+
app_label = "lamindb"
|
286
|
+
unique_together = ("record", "feature", "value")
|
287
|
+
|
288
|
+
|
289
|
+
class RecordUser(BaseSQLRecord, IsLink):
|
290
|
+
id: int = models.BigAutoField(primary_key=True)
|
291
|
+
record: Record = ForeignKey(Record, CASCADE, related_name="values_user")
|
292
|
+
feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_recorduser")
|
293
|
+
value: User = ForeignKey(User, PROTECT, related_name="links_record")
|
294
|
+
|
295
|
+
class Meta:
|
296
|
+
# allows linking exactly one record to one user per feature, because we likely don't want to have Many
|
297
|
+
app_label = "lamindb"
|
280
298
|
unique_together = ("record", "feature", "value")
|
281
299
|
|
282
300
|
|
@@ -288,6 +306,7 @@ class RecordRun(BaseSQLRecord, IsLink):
|
|
288
306
|
|
289
307
|
class Meta:
|
290
308
|
# allows linking several records to a single run for the same feature because we'll likely need this
|
309
|
+
app_label = "lamindb"
|
291
310
|
unique_together = ("record", "feature", "value")
|
292
311
|
|
293
312
|
|
@@ -299,6 +318,7 @@ class RecordArtifact(BaseSQLRecord, IsLink):
|
|
299
318
|
|
300
319
|
class Meta:
|
301
320
|
# allows linking several records to a single artifact for the same feature because we'll likely need this
|
321
|
+
app_label = "lamindb"
|
302
322
|
unique_together = ("record", "feature", "value")
|
303
323
|
|
304
324
|
|
@@ -315,4 +335,5 @@ class ArtifactRecord(BaseSQLRecord, IsLink):
|
|
315
335
|
|
316
336
|
class Meta:
|
317
337
|
# allows linking several records to a single artifact for the same feature because we'll likely need this
|
338
|
+
app_label = "lamindb"
|
318
339
|
unique_together = ("artifact", "record", "feature")
|
lamindb/models/run.py
CHANGED
@@ -142,6 +142,9 @@ class User(BaseSQLRecord, CanCurate):
|
|
142
142
|
>>> user
|
143
143
|
"""
|
144
144
|
|
145
|
+
class Meta:
|
146
|
+
app_label = "lamindb"
|
147
|
+
|
145
148
|
_name_field: str = "handle"
|
146
149
|
|
147
150
|
id: int = models.AutoField(primary_key=True)
|
@@ -223,6 +226,9 @@ class Run(SQLRecord):
|
|
223
226
|
>>> ln.context.run
|
224
227
|
"""
|
225
228
|
|
229
|
+
class Meta:
|
230
|
+
app_label = "lamindb"
|
231
|
+
|
226
232
|
_name_field: str = "started_at"
|
227
233
|
|
228
234
|
id: int = models.BigAutoField(primary_key=True)
|
@@ -368,11 +374,6 @@ class Run(SQLRecord):
|
|
368
374
|
reference_type=reference_type,
|
369
375
|
)
|
370
376
|
|
371
|
-
def delete(self) -> None:
|
372
|
-
"""Delete."""
|
373
|
-
delete_run_artifacts(self)
|
374
|
-
super().delete()
|
375
|
-
|
376
377
|
@property
|
377
378
|
@deprecated("features")
|
378
379
|
def params(self) -> FeatureManager:
|
@@ -470,7 +471,7 @@ def delete_run_artifacts(run: Run) -> None:
|
|
470
471
|
if environment._environment_of.count() == 0:
|
471
472
|
environment.delete(permanent=True)
|
472
473
|
if report is not None:
|
473
|
-
# only delete if there are no other runs attached to this
|
474
|
+
# only delete if there are no other runs attached to this report
|
474
475
|
if report._report_of.count() == 0:
|
475
476
|
report.delete(permanent=True)
|
476
477
|
|
@@ -492,4 +493,5 @@ class RunFeatureValue(BaseSQLRecord, IsLink):
|
|
492
493
|
"""Creator of record."""
|
493
494
|
|
494
495
|
class Meta:
|
496
|
+
app_label = "lamindb"
|
495
497
|
unique_together = ("run", "featurevalue")
|
lamindb/models/schema.py
CHANGED
@@ -6,6 +6,7 @@ import numpy as np
|
|
6
6
|
from django.db import models
|
7
7
|
from django.db.models import CASCADE, PROTECT, ManyToManyField
|
8
8
|
from lamin_utils import logger
|
9
|
+
from lamindb_setup.core import deprecated
|
9
10
|
from lamindb_setup.core.hashing import HASH_LENGTH, hash_string
|
10
11
|
from rich.table import Table
|
11
12
|
from rich.text import Text
|
@@ -348,11 +349,12 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
348
349
|
|
349
350
|
# from a dataframe
|
350
351
|
df = pd.DataFrame({"feat1": [1, 2], "feat2": [3.1, 4.2], "feat3": ["cond1", "cond2"]})
|
351
|
-
schema = ln.Schema.
|
352
|
+
schema = ln.Schema.from_dataframe(df)
|
352
353
|
"""
|
353
354
|
|
354
355
|
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
355
356
|
abstract = False
|
357
|
+
app_label = "lamindb"
|
356
358
|
|
357
359
|
_name_field: str = "name"
|
358
360
|
_aux_fields: dict[str, tuple[str, type]] = {
|
@@ -576,19 +578,22 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
576
578
|
self.optionals.set(optional_features)
|
577
579
|
return None
|
578
580
|
self._slots: dict[str, Schema] = {}
|
581
|
+
|
579
582
|
if features:
|
580
583
|
self._features = (get_related_name(features_registry), features) # type: ignore
|
581
|
-
|
584
|
+
if slots:
|
582
585
|
for slot_key, component in slots.items():
|
583
586
|
if component._state.adding:
|
584
587
|
raise InvalidArgument(
|
585
588
|
f"schema for {slot_key} {component} must be saved before use"
|
586
589
|
)
|
587
590
|
self._slots = slots
|
591
|
+
|
588
592
|
if validated_kwargs["hash"] in KNOWN_SCHEMAS:
|
589
593
|
validated_kwargs["uid"] = KNOWN_SCHEMAS[validated_kwargs["hash"]]
|
590
594
|
else:
|
591
595
|
validated_kwargs["uid"] = ids.base62_16()
|
596
|
+
|
592
597
|
super().__init__(**validated_kwargs)
|
593
598
|
|
594
599
|
def _validate_kwargs_calculate_hash(
|
@@ -623,14 +628,20 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
623
628
|
raise TypeError("index must be a Feature")
|
624
629
|
features.insert(0, index)
|
625
630
|
|
631
|
+
if slots:
|
632
|
+
itype = "Composite"
|
633
|
+
if otype is None:
|
634
|
+
raise InvalidArgument("Please pass otype != None for composite schemas")
|
635
|
+
|
626
636
|
if features:
|
627
637
|
features, configs = get_features_config(features)
|
628
638
|
features_registry = validate_features(features)
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
639
|
+
if itype != "Composite":
|
640
|
+
itype_compare = features_registry.__get_name_with_module__()
|
641
|
+
if itype is not None:
|
642
|
+
assert itype.startswith(itype_compare), str(itype_compare) # noqa: S101
|
643
|
+
else:
|
644
|
+
itype = itype_compare
|
634
645
|
if n_features is not None:
|
635
646
|
if n_features != len(features):
|
636
647
|
logger.important(f"updating to n {len(features)} features")
|
@@ -654,11 +665,6 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
654
665
|
if flexible is None:
|
655
666
|
flexible = flexible_default
|
656
667
|
|
657
|
-
if slots:
|
658
|
-
itype = "Composite"
|
659
|
-
if otype is None:
|
660
|
-
raise InvalidArgument("Please pass otype != None for composite schemas")
|
661
|
-
|
662
668
|
if itype is not None and not isinstance(itype, str):
|
663
669
|
itype_str = serialize_dtype(itype, is_itype=True)
|
664
670
|
else:
|
@@ -771,7 +777,7 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
771
777
|
cls,
|
772
778
|
values: ListLike,
|
773
779
|
field: FieldAttr = Feature.name,
|
774
|
-
|
780
|
+
dtype: str | None = None,
|
775
781
|
name: str | None = None,
|
776
782
|
mute: bool = False,
|
777
783
|
organism: SQLRecord | str | None = None,
|
@@ -783,7 +789,7 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
783
789
|
Args:
|
784
790
|
values: A list of values, like feature names or ids.
|
785
791
|
field: The field of a reference registry to map values.
|
786
|
-
|
792
|
+
dtype: The simple dtype.
|
787
793
|
Defaults to `None` if reference registry is :class:`~lamindb.Feature`,
|
788
794
|
defaults to `"float"` otherwise.
|
789
795
|
name: A name.
|
@@ -816,8 +822,8 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
816
822
|
if isinstance(values, DICT_KEYS_TYPE):
|
817
823
|
values = list(values)
|
818
824
|
registry = field.field.model
|
819
|
-
if registry != Feature and
|
820
|
-
|
825
|
+
if registry != Feature and dtype is None:
|
826
|
+
dtype = NUMBER_TYPE
|
821
827
|
logger.debug("setting feature set to 'number'")
|
822
828
|
validated = registry.validate(values, field=field, mute=mute, organism=organism)
|
823
829
|
values_array = np.array(values)
|
@@ -841,12 +847,12 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
841
847
|
schema = Schema(
|
842
848
|
features=validated_features,
|
843
849
|
name=name,
|
844
|
-
dtype=get_type_str(
|
850
|
+
dtype=get_type_str(dtype),
|
845
851
|
)
|
846
852
|
return schema
|
847
853
|
|
848
854
|
@classmethod
|
849
|
-
def
|
855
|
+
def from_dataframe(
|
850
856
|
cls,
|
851
857
|
df: pd.DataFrame,
|
852
858
|
field: FieldAttr = Feature.name,
|
@@ -889,15 +895,28 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
889
895
|
)
|
890
896
|
return schema
|
891
897
|
|
898
|
+
@classmethod
|
899
|
+
@deprecated("from_dataframe")
|
900
|
+
def from_df(
|
901
|
+
cls,
|
902
|
+
df: pd.DataFrame,
|
903
|
+
field: FieldAttr = Feature.name,
|
904
|
+
name: str | None = None,
|
905
|
+
mute: bool = False,
|
906
|
+
organism: SQLRecord | str | None = None,
|
907
|
+
source: SQLRecord | None = None,
|
908
|
+
) -> Schema | None:
|
909
|
+
return cls.from_dataframe(df, field, name, mute, organism, source)
|
910
|
+
|
892
911
|
def save(self, *args, **kwargs) -> Schema:
|
893
|
-
"""Save."""
|
912
|
+
"""Save schema."""
|
894
913
|
from .save import bulk_create
|
895
914
|
|
896
915
|
if self.pk is not None:
|
897
916
|
features = (
|
898
917
|
self._features[1]
|
899
918
|
if hasattr(self, "_features")
|
900
|
-
else (self.members.
|
919
|
+
else (self.members.to_list() if self.members.exists() else [])
|
901
920
|
)
|
902
921
|
index_feature = self.index
|
903
922
|
_, validated_kwargs, _, _, _ = self._validate_kwargs_calculate_hash(
|
@@ -925,7 +944,7 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
925
944
|
datasets = Artifact.filter(schema=self).all()
|
926
945
|
if datasets.exists():
|
927
946
|
logger.warning(
|
928
|
-
f"you updated the schema hash and might invalidate datasets that were previously validated with this schema: {datasets.
|
947
|
+
f"you updated the schema hash and might invalidate datasets that were previously validated with this schema: {datasets.to_list('uid')}"
|
929
948
|
)
|
930
949
|
self.hash = validated_kwargs["hash"]
|
931
950
|
self.n = validated_kwargs["n"]
|
@@ -947,13 +966,16 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
947
966
|
assert self.n > 0 # noqa: S101
|
948
967
|
using: bool | None = kwargs.pop("using", None)
|
949
968
|
related_name, records = self._features
|
969
|
+
|
970
|
+
# .set() does not preserve the order but orders by the feature primary key
|
950
971
|
# only the following method preserves the order
|
951
|
-
# .set() does not preserve the order but orders by
|
952
|
-
# the feature primary key
|
953
972
|
through_model = getattr(self, related_name).through
|
954
|
-
|
955
|
-
"
|
956
|
-
|
973
|
+
if self.itype == "Composite":
|
974
|
+
related_model_split = ["Feature"]
|
975
|
+
else:
|
976
|
+
related_model_split = parse_cat_dtype(self.itype, is_itype=True)[
|
977
|
+
"registry_str"
|
978
|
+
].split(".")
|
957
979
|
if len(related_model_split) == 1:
|
958
980
|
related_field = related_model_split[0].lower()
|
959
981
|
else:
|
@@ -965,6 +987,7 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
965
987
|
]
|
966
988
|
through_model.objects.using(using).bulk_create(links, ignore_conflicts=True)
|
967
989
|
delattr(self, "_features")
|
990
|
+
|
968
991
|
return self
|
969
992
|
|
970
993
|
@property
|
@@ -978,6 +1001,8 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
978
1001
|
# this should return a queryset and not a list...
|
979
1002
|
# need to fix this
|
980
1003
|
return self._features[1]
|
1004
|
+
if len(self.features.all()) > 0:
|
1005
|
+
return self.features.order_by("links_schema__id")
|
981
1006
|
if self.itype == "Composite" or self.is_type:
|
982
1007
|
return Feature.objects.none()
|
983
1008
|
related_name = self._get_related_name()
|
@@ -1200,6 +1225,7 @@ class SchemaFeature(BaseSQLRecord, IsLink):
|
|
1200
1225
|
feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_schema")
|
1201
1226
|
|
1202
1227
|
class Meta:
|
1228
|
+
app_label = "lamindb"
|
1203
1229
|
unique_together = ("schema", "feature")
|
1204
1230
|
|
1205
1231
|
|
@@ -1211,6 +1237,7 @@ class ArtifactSchema(BaseSQLRecord, IsLink, TracksRun):
|
|
1211
1237
|
feature_ref_is_semantic: bool | None = BooleanField(null=True)
|
1212
1238
|
|
1213
1239
|
class Meta:
|
1240
|
+
app_label = "lamindb"
|
1214
1241
|
unique_together = (("artifact", "schema"), ("artifact", "slot"))
|
1215
1242
|
|
1216
1243
|
|
@@ -1221,6 +1248,7 @@ class SchemaComponent(BaseSQLRecord, IsLink, TracksRun):
|
|
1221
1248
|
slot: str | None = CharField(null=True)
|
1222
1249
|
|
1223
1250
|
class Meta:
|
1251
|
+
app_label = "lamindb"
|
1224
1252
|
unique_together = (("composite", "slot", "component"), ("composite", "slot"))
|
1225
1253
|
|
1226
1254
|
|