lamindb 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +30 -25
- lamindb/_tracked.py +1 -1
- lamindb/_view.py +2 -3
- lamindb/base/__init__.py +1 -1
- lamindb/base/ids.py +1 -10
- lamindb/core/__init__.py +7 -65
- lamindb/core/_compat.py +60 -0
- lamindb/core/_context.py +43 -20
- lamindb/core/_settings.py +6 -6
- lamindb/core/_sync_git.py +1 -1
- lamindb/core/loaders.py +30 -19
- lamindb/core/storage/_backed_access.py +4 -2
- lamindb/core/storage/_tiledbsoma.py +8 -6
- lamindb/core/storage/_zarr.py +104 -25
- lamindb/core/storage/objects.py +63 -28
- lamindb/core/storage/paths.py +4 -1
- lamindb/core/types.py +10 -0
- lamindb/curators/__init__.py +100 -85
- lamindb/errors.py +1 -1
- lamindb/integrations/_vitessce.py +4 -4
- lamindb/migrations/0089_subsequent_runs.py +159 -0
- lamindb/migrations/0090_runproject_project_runs.py +73 -0
- lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
- lamindb/models/__init__.py +79 -0
- lamindb/{core → models}/_describe.py +3 -3
- lamindb/{core → models}/_django.py +8 -5
- lamindb/{core → models}/_feature_manager.py +103 -87
- lamindb/{_from_values.py → models/_from_values.py} +5 -2
- lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
- lamindb/{core → models}/_label_manager.py +10 -17
- lamindb/{core/relations.py → models/_relations.py} +8 -1
- lamindb/models/artifact.py +2602 -0
- lamindb/{_can_curate.py → models/can_curate.py} +349 -180
- lamindb/models/collection.py +683 -0
- lamindb/models/core.py +135 -0
- lamindb/models/feature.py +643 -0
- lamindb/models/flextable.py +163 -0
- lamindb/{_parents.py → models/has_parents.py} +55 -49
- lamindb/models/project.py +384 -0
- lamindb/{_query_manager.py → models/query_manager.py} +10 -8
- lamindb/{_query_set.py → models/query_set.py} +40 -26
- lamindb/models/record.py +1762 -0
- lamindb/models/run.py +563 -0
- lamindb/{_save.py → models/save.py} +9 -7
- lamindb/models/schema.py +732 -0
- lamindb/models/transform.py +360 -0
- lamindb/models/ulabel.py +249 -0
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/METADATA +6 -6
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/RECORD +51 -51
- lamindb/_artifact.py +0 -1379
- lamindb/_collection.py +0 -440
- lamindb/_feature.py +0 -316
- lamindb/_is_versioned.py +0 -40
- lamindb/_record.py +0 -1064
- lamindb/_run.py +0 -60
- lamindb/_schema.py +0 -347
- lamindb/_storage.py +0 -15
- lamindb/_transform.py +0 -170
- lamindb/_ulabel.py +0 -56
- lamindb/_utils.py +0 -9
- lamindb/base/validation.py +0 -63
- lamindb/core/_data.py +0 -491
- lamindb/core/fields.py +0 -12
- lamindb/models.py +0 -4475
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/LICENSE +0 -0
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/WHEEL +0 -0
lamindb/curators/__init__.py
CHANGED
@@ -9,12 +9,23 @@
|
|
9
9
|
DataFrameCurator
|
10
10
|
AnnDataCurator
|
11
11
|
|
12
|
+
CatManager:
|
13
|
+
|
14
|
+
.. autosummary::
|
15
|
+
:toctree: .
|
16
|
+
|
17
|
+
CatManager
|
18
|
+
DataFrameCatManager
|
19
|
+
AnnDataCatManager
|
20
|
+
MuDataCatManager
|
21
|
+
TiledbsomaCatManager
|
22
|
+
CurateLookup
|
23
|
+
|
12
24
|
"""
|
13
25
|
|
14
26
|
from __future__ import annotations
|
15
27
|
|
16
28
|
import copy
|
17
|
-
import random
|
18
29
|
import re
|
19
30
|
from importlib import resources
|
20
31
|
from itertools import chain
|
@@ -40,14 +51,10 @@ if TYPE_CHECKING:
|
|
40
51
|
|
41
52
|
from lamindb.base.types import FieldAttr
|
42
53
|
from lamindb.models import Record
|
43
|
-
from lamindb._feature import parse_dtype, parse_dtype_single_cat
|
44
54
|
from lamindb.base.types import FieldAttr # noqa
|
45
|
-
from lamindb.core._data import add_labels
|
46
|
-
from lamindb.core._feature_manager import parse_staged_feature_sets_from_anndata
|
47
55
|
from lamindb.core._settings import settings
|
48
56
|
from lamindb.models import (
|
49
57
|
Artifact,
|
50
|
-
CanCurate,
|
51
58
|
Collection,
|
52
59
|
Feature,
|
53
60
|
Record,
|
@@ -55,9 +62,11 @@ from lamindb.models import (
|
|
55
62
|
Schema,
|
56
63
|
ULabel,
|
57
64
|
)
|
65
|
+
from lamindb.models._feature_manager import parse_staged_feature_sets_from_anndata
|
66
|
+
from lamindb.models.artifact import add_labels, data_is_anndata
|
67
|
+
from lamindb.models.feature import parse_dtype, parse_dtype_single_cat
|
68
|
+
from lamindb.models._from_values import _format_values
|
58
69
|
|
59
|
-
from .._artifact import data_is_anndata
|
60
|
-
from .._from_values import _format_values
|
61
70
|
from ..errors import InvalidArgument, ValidationError
|
62
71
|
|
63
72
|
if TYPE_CHECKING:
|
@@ -68,7 +77,7 @@ if TYPE_CHECKING:
|
|
68
77
|
from mudata import MuData
|
69
78
|
from spatialdata import SpatialData
|
70
79
|
|
71
|
-
from lamindb.
|
80
|
+
from lamindb.models.query_set import RecordList
|
72
81
|
|
73
82
|
|
74
83
|
def strip_ansi_codes(text):
|
@@ -141,7 +150,7 @@ class CurateLookup:
|
|
141
150
|
" → categories.alveolar_type_1_fibroblast_cell\n\n"
|
142
151
|
"To look up public ontologies, use .lookup(public=True)"
|
143
152
|
)
|
144
|
-
else: #
|
153
|
+
else: # pragma: no cover
|
145
154
|
return colors.warning("No fields are found!")
|
146
155
|
|
147
156
|
|
@@ -199,7 +208,7 @@ class Curator:
|
|
199
208
|
@doc_args(VALIDATE_DOCSTRING)
|
200
209
|
def validate(self) -> bool | str:
|
201
210
|
"""{}""" # noqa: D415
|
202
|
-
pass #
|
211
|
+
pass # pragma: no cover
|
203
212
|
|
204
213
|
@doc_args(SAVE_ARTIFACT_DOCSTRING)
|
205
214
|
def save_artifact(
|
@@ -453,24 +462,31 @@ class AnnDataCurator(Curator):
|
|
453
462
|
raise InvalidArgument("dataset must be AnnData-like.")
|
454
463
|
if schema.otype != "AnnData":
|
455
464
|
raise InvalidArgument("Schema otype must be 'AnnData'.")
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
465
|
+
# TODO: also support slots other than obs and var
|
466
|
+
self._slots = {
|
467
|
+
slot: DataFrameCurator(
|
468
|
+
(
|
469
|
+
self._dataset.__getattribute__(slot).T
|
470
|
+
if slot == "var"
|
471
|
+
else self._dataset.__getattribute__(slot)
|
472
|
+
),
|
473
|
+
slot_schema,
|
474
|
+
)
|
475
|
+
for slot, slot_schema in schema.slots.items()
|
476
|
+
if slot in {"obs", "var"}
|
477
|
+
}
|
462
478
|
|
463
479
|
@property
|
464
480
|
@doc_args(SLOTS_DOCSTRING)
|
465
481
|
def slots(self) -> dict[str, DataFrameCurator]:
|
466
482
|
"""{}""" # noqa: D415
|
467
|
-
return
|
483
|
+
return self._slots
|
468
484
|
|
469
485
|
@doc_args(VALIDATE_DOCSTRING)
|
470
486
|
def validate(self) -> None:
|
471
487
|
"""{}""" # noqa: D415
|
472
|
-
self.
|
473
|
-
|
488
|
+
for _, curator in self._slots.items():
|
489
|
+
curator.validate()
|
474
490
|
|
475
491
|
@doc_args(SAVE_ARTIFACT_DOCSTRING)
|
476
492
|
def save_artifact(
|
@@ -483,13 +499,18 @@ class AnnDataCurator(Curator):
|
|
483
499
|
):
|
484
500
|
"""{}""" # noqa: D415
|
485
501
|
if not self._is_validated:
|
486
|
-
self.validate()
|
487
|
-
result = parse_dtype_single_cat(self._var_curator._schema.itype, is_itype=True)
|
502
|
+
self.validate()
|
488
503
|
return save_artifact( # type: ignore
|
489
504
|
self._dataset,
|
490
505
|
description=description,
|
491
|
-
fields=self.
|
492
|
-
columns_field=
|
506
|
+
fields=self.slots["obs"]._cat_manager.categoricals,
|
507
|
+
columns_field=(
|
508
|
+
parse_dtype_single_cat(self.slots["var"]._schema.itype, is_itype=True)[
|
509
|
+
"field"
|
510
|
+
]
|
511
|
+
if "var" in self._slots
|
512
|
+
else None
|
513
|
+
),
|
493
514
|
key=key,
|
494
515
|
artifact=self._artifact,
|
495
516
|
revises=revises,
|
@@ -519,8 +540,8 @@ class CatManager:
|
|
519
540
|
|
520
541
|
If you find non-validated values, you have several options:
|
521
542
|
|
522
|
-
- new values found in the data can be registered using :meth:`~lamindb.
|
523
|
-
- non-validated values can be accessed using :meth:`~lamindb.
|
543
|
+
- new values found in the data can be registered using :meth:`~lamindb.curators.DataFrameCatManager.add_new_from`
|
544
|
+
- non-validated values can be accessed using :meth:`~lamindb.curators.DataFrameCatManager.non_validated` and addressed manually
|
524
545
|
"""
|
525
546
|
|
526
547
|
def __init__(
|
@@ -599,7 +620,7 @@ class CatManager:
|
|
599
620
|
Returns:
|
600
621
|
None
|
601
622
|
"""
|
602
|
-
pass #
|
623
|
+
pass # pragma: no cover
|
603
624
|
|
604
625
|
@doc_args(SAVE_ARTIFACT_DOCSTRING)
|
605
626
|
def save_artifact(
|
@@ -891,7 +912,7 @@ class AnnDataCatManager(CatManager):
|
|
891
912
|
def __init__(
|
892
913
|
self,
|
893
914
|
data: ad.AnnData | Artifact,
|
894
|
-
var_index: FieldAttr,
|
915
|
+
var_index: FieldAttr | None = None,
|
895
916
|
categoricals: dict[str, FieldAttr] | None = None,
|
896
917
|
obs_columns: FieldAttr = Feature.name,
|
897
918
|
verbosity: str = "hint",
|
@@ -960,15 +981,16 @@ class AnnDataCatManager(CatManager):
|
|
960
981
|
validated_only: bool = True,
|
961
982
|
):
|
962
983
|
"""Save variable records."""
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
984
|
+
if self.var_index is not None:
|
985
|
+
update_registry(
|
986
|
+
values=list(self._adata.var.index),
|
987
|
+
field=self.var_index,
|
988
|
+
key="var_index",
|
989
|
+
validated_only=validated_only,
|
990
|
+
organism=self._organism,
|
991
|
+
source=self._sources.get("var_index"),
|
992
|
+
exclude=self._exclude.get("var_index"),
|
993
|
+
)
|
972
994
|
|
973
995
|
def add_new_from(self, key: str, **kwargs):
|
974
996
|
"""Add validated & new categories.
|
@@ -1004,15 +1026,19 @@ class AnnDataCatManager(CatManager):
|
|
1004
1026
|
|
1005
1027
|
# add all validated records to the current instance
|
1006
1028
|
self._save_from_var_index(validated_only=True)
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
1015
|
-
|
1029
|
+
if self.var_index is not None:
|
1030
|
+
validated_var, non_validated_var = validate_categories(
|
1031
|
+
self._adata.var.index,
|
1032
|
+
field=self._var_field,
|
1033
|
+
key="var_index",
|
1034
|
+
source=self._sources.get("var_index"),
|
1035
|
+
hint_print=".add_new_from_var_index()",
|
1036
|
+
exclude=self._exclude.get("var_index"),
|
1037
|
+
organism=self._organism, # type: ignore
|
1038
|
+
)
|
1039
|
+
else:
|
1040
|
+
validated_var = True
|
1041
|
+
non_validated_var = []
|
1016
1042
|
validated_obs = self._obs_df_curator.validate()
|
1017
1043
|
self._non_validated = self._obs_df_curator._non_validated # type: ignore
|
1018
1044
|
if len(non_validated_var) > 0:
|
@@ -1711,7 +1737,7 @@ class TiledbsomaCatManager(CatManager):
|
|
1711
1737
|
Returns:
|
1712
1738
|
A saved artifact record.
|
1713
1739
|
"""
|
1714
|
-
from lamindb.
|
1740
|
+
from lamindb.models.artifact import add_labels
|
1715
1741
|
|
1716
1742
|
if not self._is_validated:
|
1717
1743
|
self.validate()
|
@@ -1848,11 +1874,11 @@ class SpatialDataCatManager(CatManager):
|
|
1848
1874
|
exclude=exclude,
|
1849
1875
|
)
|
1850
1876
|
if isinstance(sdata, Artifact):
|
1851
|
-
# TODO: load() doesn't yet work
|
1852
1877
|
self._sdata = sdata.load()
|
1853
1878
|
else:
|
1854
1879
|
self._sdata = self._dataset
|
1855
1880
|
self._sample_metadata_key = sample_metadata_key
|
1881
|
+
self._write_path = None
|
1856
1882
|
self._var_fields = var_index
|
1857
1883
|
self._verify_accessor_exists(self._var_fields.keys())
|
1858
1884
|
self._categoricals = categoricals
|
@@ -2134,26 +2160,14 @@ class SpatialDataCatManager(CatManager):
|
|
2134
2160
|
try:
|
2135
2161
|
settings.verbosity = "warning"
|
2136
2162
|
|
2137
|
-
|
2138
|
-
|
2139
|
-
|
2140
|
-
|
2141
|
-
|
2142
|
-
|
2143
|
-
|
2144
|
-
|
2145
|
-
# Create the Artifact and associate Artifact metadata
|
2146
|
-
self._artifact = Artifact(
|
2147
|
-
write_path,
|
2148
|
-
description=description,
|
2149
|
-
key=key,
|
2150
|
-
revises=revises,
|
2151
|
-
run=run,
|
2152
|
-
)
|
2153
|
-
# According to Tim it is not easy to calculate the number of observations.
|
2154
|
-
# We would have to write custom code to iterate over labels (which might not even exist at that point)
|
2155
|
-
self._artifact.otype = "spatialdata"
|
2156
|
-
self._artifact.save()
|
2163
|
+
self._artifact = Artifact.from_spatialdata(
|
2164
|
+
self._sdata,
|
2165
|
+
key=key,
|
2166
|
+
description=description,
|
2167
|
+
revises=revises,
|
2168
|
+
run=run,
|
2169
|
+
)
|
2170
|
+
self._artifact.save()
|
2157
2171
|
|
2158
2172
|
# Link schemas
|
2159
2173
|
feature_kwargs = check_registry_organism(
|
@@ -2171,7 +2185,7 @@ class SpatialDataCatManager(CatManager):
|
|
2171
2185
|
"""Add Schemas from SpatialData."""
|
2172
2186
|
if obs_fields is None:
|
2173
2187
|
obs_fields = {}
|
2174
|
-
assert host.otype == "
|
2188
|
+
assert host.otype == "SpatialData" # noqa: S101
|
2175
2189
|
|
2176
2190
|
feature_sets = {}
|
2177
2191
|
|
@@ -2799,7 +2813,7 @@ class DoseHandler:
|
|
2799
2813
|
return cls.UNIT_MAP.get(unit, unit)
|
2800
2814
|
|
2801
2815
|
@classmethod
|
2802
|
-
def validate_values(cls, values: pd.Series) -> list:
|
2816
|
+
def validate_values(cls, values: pd.Series) -> list[str]:
|
2803
2817
|
"""Validate pert_dose values with strict case checking."""
|
2804
2818
|
errors = []
|
2805
2819
|
|
@@ -2843,7 +2857,7 @@ class TimeHandler:
|
|
2843
2857
|
return unit[0].lower()
|
2844
2858
|
|
2845
2859
|
@classmethod
|
2846
|
-
def validate_values(cls, values: pd.Series) -> list:
|
2860
|
+
def validate_values(cls, values: pd.Series) -> list[str]:
|
2847
2861
|
"""Validate pert_time values."""
|
2848
2862
|
errors = []
|
2849
2863
|
|
@@ -3197,8 +3211,8 @@ def validate_categories(
|
|
3197
3211
|
exclude: str | list | None = None,
|
3198
3212
|
hint_print: str | None = None,
|
3199
3213
|
curator: CatManager | None = None,
|
3200
|
-
) -> tuple[bool, list]:
|
3201
|
-
"""Validate ontology terms
|
3214
|
+
) -> tuple[bool, list[str]]:
|
3215
|
+
"""Validate ontology terms using LaminDB registries.
|
3202
3216
|
|
3203
3217
|
Args:
|
3204
3218
|
values: The values to validate.
|
@@ -3210,8 +3224,8 @@ def validate_categories(
|
|
3210
3224
|
standardize: Whether to standardize the values.
|
3211
3225
|
hint_print: The hint to print that suggests fixing non-validated values.
|
3212
3226
|
"""
|
3213
|
-
from lamindb._from_values import _format_values
|
3214
3227
|
from lamindb.core._settings import settings
|
3228
|
+
from lamindb.models._from_values import _format_values
|
3215
3229
|
|
3216
3230
|
model_field = f"{field.field.model.__name__}.{field.field.name}"
|
3217
3231
|
|
@@ -3346,7 +3360,7 @@ def validate_categories_in_df(
|
|
3346
3360
|
def save_artifact(
|
3347
3361
|
data: pd.DataFrame | ad.AnnData | MuData,
|
3348
3362
|
fields: dict[str, FieldAttr] | dict[str, dict[str, FieldAttr]],
|
3349
|
-
columns_field: FieldAttr | dict[str, FieldAttr],
|
3363
|
+
columns_field: FieldAttr | dict[str, FieldAttr] | None = None,
|
3350
3364
|
description: str | None = None,
|
3351
3365
|
organism: str | None = None,
|
3352
3366
|
key: str | None = None,
|
@@ -3372,8 +3386,7 @@ def save_artifact(
|
|
3372
3386
|
Returns:
|
3373
3387
|
The saved Artifact.
|
3374
3388
|
"""
|
3375
|
-
from ..
|
3376
|
-
from ..core._data import add_labels
|
3389
|
+
from ..models.artifact import add_labels, data_is_anndata, data_is_mudata
|
3377
3390
|
|
3378
3391
|
if artifact is None:
|
3379
3392
|
if data_is_anndata(data):
|
@@ -3395,7 +3408,7 @@ def save_artifact(
|
|
3395
3408
|
artifact.schema = schema
|
3396
3409
|
artifact.save()
|
3397
3410
|
|
3398
|
-
if organism is not None:
|
3411
|
+
if organism is not None and columns_field is not None:
|
3399
3412
|
feature_kwargs = check_registry_organism(
|
3400
3413
|
(
|
3401
3414
|
list(columns_field.values())[0].field.model
|
@@ -3532,8 +3545,8 @@ def update_registry(
|
|
3532
3545
|
exclude: Values to exclude from inspect.
|
3533
3546
|
kwargs: Additional keyword arguments to pass to the registry model to create new records.
|
3534
3547
|
"""
|
3535
|
-
from lamindb._save import save as ln_save
|
3536
3548
|
from lamindb.core._settings import settings
|
3549
|
+
from lamindb.models.save import save as ln_save
|
3537
3550
|
|
3538
3551
|
registry = field.field.model
|
3539
3552
|
filter_kwargs = check_registry_organism(registry, organism)
|
@@ -3621,7 +3634,7 @@ def log_saved_labels(
|
|
3621
3634
|
validated_only: bool = True,
|
3622
3635
|
) -> None:
|
3623
3636
|
"""Log the saved labels."""
|
3624
|
-
from .._from_values import _format_values
|
3637
|
+
from ..models._from_values import _format_values
|
3625
3638
|
|
3626
3639
|
model_field = colors.italic(model_field)
|
3627
3640
|
for k, labels in labels_saved.items():
|
@@ -3667,12 +3680,14 @@ def _save_organism(name: str):
|
|
3667
3680
|
return organism
|
3668
3681
|
|
3669
3682
|
|
3670
|
-
def _ref_is_name(field: FieldAttr) -> bool | None:
|
3683
|
+
def _ref_is_name(field: FieldAttr | None) -> bool | None:
|
3671
3684
|
"""Check if the reference field is a name field."""
|
3672
|
-
from ..
|
3685
|
+
from ..models.can_curate import get_name_field
|
3673
3686
|
|
3674
|
-
|
3675
|
-
|
3687
|
+
if field is not None:
|
3688
|
+
name_field = get_name_field(field.field.model)
|
3689
|
+
return field.field.name == name_field
|
3690
|
+
return None
|
3676
3691
|
|
3677
3692
|
|
3678
3693
|
# backward compat constructors ------------------
|
@@ -3721,7 +3736,7 @@ def from_anndata(
|
|
3721
3736
|
@classmethod # type: ignore
|
3722
3737
|
def from_mudata(
|
3723
3738
|
cls,
|
3724
|
-
mdata: MuData,
|
3739
|
+
mdata: MuData | UPathStr,
|
3725
3740
|
var_index: dict[str, dict[str, FieldAttr]],
|
3726
3741
|
categoricals: dict[str, FieldAttr] | None = None,
|
3727
3742
|
verbosity: str = "hint",
|
@@ -3761,7 +3776,7 @@ def from_tiledbsoma(
|
|
3761
3776
|
@classmethod # type: ignore
|
3762
3777
|
def from_spatialdata(
|
3763
3778
|
cls,
|
3764
|
-
sdata,
|
3779
|
+
sdata: SpatialData | UPathStr,
|
3765
3780
|
var_index: dict[str, FieldAttr],
|
3766
3781
|
categoricals: dict[str, dict[str, FieldAttr]] | None = None,
|
3767
3782
|
organism: str | None = None,
|
lamindb/errors.py
CHANGED
@@ -7,10 +7,10 @@ from typing import TYPE_CHECKING
|
|
7
7
|
import lamindb_setup as ln_setup
|
8
8
|
from lamin_utils import logger
|
9
9
|
|
10
|
-
from lamindb.
|
11
|
-
from lamindb.
|
12
|
-
from lamindb.
|
13
|
-
from lamindb.
|
10
|
+
from lamindb.models.artifact import Artifact
|
11
|
+
from lamindb.models.collection import Collection
|
12
|
+
from lamindb.models.run import Run
|
13
|
+
from lamindb.models.transform import Transform
|
14
14
|
|
15
15
|
if TYPE_CHECKING:
|
16
16
|
from vitessce import VitessceConfig
|
@@ -0,0 +1,159 @@
|
|
1
|
+
# ruff: noqa: S608
|
2
|
+
from django.db import migrations, models
|
3
|
+
|
4
|
+
|
5
|
+
def update_model_run_relationships(apps, schema_editor, model_name):
|
6
|
+
vendor = schema_editor.connection.vendor
|
7
|
+
|
8
|
+
# Define table names based on model_name
|
9
|
+
model_table = f"lamindb_{model_name}"
|
10
|
+
link_table = f"lamindb_{model_name}__previous_runs"
|
11
|
+
model_id_field = f"{model_name}_id"
|
12
|
+
|
13
|
+
with schema_editor.connection.cursor() as cursor:
|
14
|
+
# Step 1: Add the current run_id to the _previous_runs table if it doesn't exist
|
15
|
+
cursor.execute(f"""
|
16
|
+
INSERT INTO {link_table} ({model_id_field}, run_id)
|
17
|
+
SELECT a.id, a.run_id
|
18
|
+
FROM {model_table} a
|
19
|
+
WHERE a.run_id IS NOT NULL
|
20
|
+
AND NOT EXISTS (
|
21
|
+
SELECT 1
|
22
|
+
FROM {link_table} apr
|
23
|
+
WHERE apr.{model_id_field} = a.id
|
24
|
+
AND apr.run_id = a.run_id
|
25
|
+
);
|
26
|
+
""")
|
27
|
+
|
28
|
+
# Step 2: For each model, find the earliest run (lowest ID) and set it as the run_id
|
29
|
+
if vendor == "sqlite":
|
30
|
+
cursor.execute(f"""
|
31
|
+
UPDATE {model_table}
|
32
|
+
SET run_id = (
|
33
|
+
SELECT MIN(r.id)
|
34
|
+
FROM lamindb_run r
|
35
|
+
JOIN {link_table} apr ON r.id = apr.run_id
|
36
|
+
WHERE apr.{model_id_field} = {model_table}.id
|
37
|
+
)
|
38
|
+
WHERE EXISTS (
|
39
|
+
SELECT 1
|
40
|
+
FROM {link_table} apr
|
41
|
+
WHERE apr.{model_id_field} = {model_table}.id
|
42
|
+
);
|
43
|
+
""")
|
44
|
+
else: # PostgreSQL
|
45
|
+
cursor.execute(f"""
|
46
|
+
UPDATE {model_table} AS a
|
47
|
+
SET run_id = subquery.min_run_id
|
48
|
+
FROM (
|
49
|
+
SELECT {model_id_field}, MIN(run_id) as min_run_id
|
50
|
+
FROM {link_table}
|
51
|
+
GROUP BY {model_id_field}
|
52
|
+
) AS subquery
|
53
|
+
WHERE a.id = subquery.{model_id_field}
|
54
|
+
AND EXISTS (
|
55
|
+
SELECT 1
|
56
|
+
FROM {link_table} apr
|
57
|
+
WHERE apr.{model_id_field} = a.id
|
58
|
+
);
|
59
|
+
""")
|
60
|
+
|
61
|
+
# Step 3: Remove the earliest run from the link table
|
62
|
+
if vendor == "sqlite":
|
63
|
+
cursor.execute(f"""
|
64
|
+
DELETE FROM {link_table}
|
65
|
+
WHERE EXISTS (
|
66
|
+
SELECT 1 FROM {model_table} a
|
67
|
+
WHERE {link_table}.{model_id_field} = a.id
|
68
|
+
AND {link_table}.run_id = a.run_id
|
69
|
+
);
|
70
|
+
""")
|
71
|
+
else: # PostgreSQL
|
72
|
+
cursor.execute(f"""
|
73
|
+
DELETE FROM {link_table} AS apr
|
74
|
+
USING {model_table} AS a
|
75
|
+
WHERE apr.{model_id_field} = a.id
|
76
|
+
AND apr.run_id = a.run_id;
|
77
|
+
""")
|
78
|
+
|
79
|
+
|
80
|
+
def update_artifact_run_relationships(apps, schema_editor):
|
81
|
+
"""Migration function for artifacts."""
|
82
|
+
update_model_run_relationships(apps, schema_editor, "artifact")
|
83
|
+
|
84
|
+
|
85
|
+
def update_collection_run_relationships(apps, schema_editor):
|
86
|
+
"""Migration function for collections."""
|
87
|
+
update_model_run_relationships(apps, schema_editor, "collection")
|
88
|
+
|
89
|
+
|
90
|
+
class Migration(migrations.Migration):
|
91
|
+
dependencies = [
|
92
|
+
("lamindb", "0088_schema_components"),
|
93
|
+
]
|
94
|
+
|
95
|
+
operations = [
|
96
|
+
# unrelated to subsequent runs, but related to lamindb 1.2
|
97
|
+
# update the otype field in the artifact table
|
98
|
+
migrations.RunSQL(
|
99
|
+
sql="""
|
100
|
+
UPDATE lamindb_artifact
|
101
|
+
SET otype = 'SpatialData'
|
102
|
+
WHERE otype = 'spatialdata';
|
103
|
+
"""
|
104
|
+
),
|
105
|
+
# Migrate artifact relationships
|
106
|
+
migrations.RunPython(
|
107
|
+
update_artifact_run_relationships, migrations.RunPython.noop
|
108
|
+
),
|
109
|
+
# Update artifact model state
|
110
|
+
migrations.SeparateDatabaseAndState(
|
111
|
+
# Database operations (none, to keep tables intact)
|
112
|
+
[],
|
113
|
+
# State operations (to update Django model only)
|
114
|
+
[
|
115
|
+
# Remove the old field from model state
|
116
|
+
migrations.RemoveField(
|
117
|
+
model_name="artifact",
|
118
|
+
name="_previous_runs",
|
119
|
+
),
|
120
|
+
# Add the new field with the same underlying table
|
121
|
+
migrations.AddField(
|
122
|
+
model_name="artifact",
|
123
|
+
name="_subsequent_runs",
|
124
|
+
field=models.ManyToManyField(
|
125
|
+
"lamindb.run",
|
126
|
+
related_name="_recreated_artifacts",
|
127
|
+
db_table="lamindb_artifact__previous_runs", # Keep the original table name
|
128
|
+
),
|
129
|
+
),
|
130
|
+
],
|
131
|
+
),
|
132
|
+
# Migrate collection relationships
|
133
|
+
migrations.RunPython(
|
134
|
+
update_collection_run_relationships, migrations.RunPython.noop
|
135
|
+
),
|
136
|
+
# Update collection model state
|
137
|
+
migrations.SeparateDatabaseAndState(
|
138
|
+
# Database operations (none, to keep tables intact)
|
139
|
+
[],
|
140
|
+
# State operations (to update Django model only)
|
141
|
+
[
|
142
|
+
# Remove the old field from model state
|
143
|
+
migrations.RemoveField(
|
144
|
+
model_name="collection",
|
145
|
+
name="_previous_runs",
|
146
|
+
),
|
147
|
+
# Add the new field with the same underlying table
|
148
|
+
migrations.AddField(
|
149
|
+
model_name="collection",
|
150
|
+
name="_subsequent_runs",
|
151
|
+
field=models.ManyToManyField(
|
152
|
+
"lamindb.run",
|
153
|
+
related_name="_recreated_collections",
|
154
|
+
db_table="lamindb_collection__previous_runs", # Keep the original table name
|
155
|
+
),
|
156
|
+
),
|
157
|
+
],
|
158
|
+
),
|
159
|
+
]
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# Generated by Django 5.2 on 2025-03-05 10:20
|
2
|
+
|
3
|
+
import django.db.models.deletion
|
4
|
+
import django.db.models.functions.datetime
|
5
|
+
from django.db import migrations, models
|
6
|
+
|
7
|
+
import lamindb.base.fields
|
8
|
+
import lamindb.base.users
|
9
|
+
import lamindb.models.record
|
10
|
+
|
11
|
+
|
12
|
+
class Migration(migrations.Migration):
|
13
|
+
dependencies = [
|
14
|
+
("lamindb", "0089_subsequent_runs"),
|
15
|
+
]
|
16
|
+
|
17
|
+
operations = [
|
18
|
+
migrations.CreateModel(
|
19
|
+
name="RunProject",
|
20
|
+
fields=[
|
21
|
+
("id", models.BigAutoField(primary_key=True, serialize=False)),
|
22
|
+
(
|
23
|
+
"created_at",
|
24
|
+
lamindb.base.fields.DateTimeField(
|
25
|
+
blank=True,
|
26
|
+
db_default=django.db.models.functions.datetime.Now(),
|
27
|
+
db_index=True,
|
28
|
+
editable=False,
|
29
|
+
),
|
30
|
+
),
|
31
|
+
(
|
32
|
+
"created_by",
|
33
|
+
lamindb.base.fields.ForeignKey(
|
34
|
+
blank=True,
|
35
|
+
default=lamindb.base.users.current_user_id,
|
36
|
+
editable=False,
|
37
|
+
on_delete=django.db.models.deletion.PROTECT,
|
38
|
+
related_name="+",
|
39
|
+
to="lamindb.user",
|
40
|
+
),
|
41
|
+
),
|
42
|
+
(
|
43
|
+
"project",
|
44
|
+
lamindb.base.fields.ForeignKey(
|
45
|
+
blank=True,
|
46
|
+
on_delete=django.db.models.deletion.PROTECT,
|
47
|
+
related_name="links_run",
|
48
|
+
to="lamindb.project",
|
49
|
+
),
|
50
|
+
),
|
51
|
+
(
|
52
|
+
"run",
|
53
|
+
lamindb.base.fields.ForeignKey(
|
54
|
+
blank=True,
|
55
|
+
on_delete=django.db.models.deletion.CASCADE,
|
56
|
+
related_name="links_project",
|
57
|
+
to="lamindb.run",
|
58
|
+
),
|
59
|
+
),
|
60
|
+
],
|
61
|
+
options={
|
62
|
+
"unique_together": {("run", "project")},
|
63
|
+
},
|
64
|
+
bases=(models.Model, lamindb.models.record.LinkORM),
|
65
|
+
),
|
66
|
+
migrations.AddField(
|
67
|
+
model_name="project",
|
68
|
+
name="runs",
|
69
|
+
field=models.ManyToManyField(
|
70
|
+
related_name="projects", through="lamindb.RunProject", to="lamindb.run"
|
71
|
+
),
|
72
|
+
),
|
73
|
+
]
|