lamindb 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. lamindb/__init__.py +30 -25
  2. lamindb/_tracked.py +1 -1
  3. lamindb/_view.py +2 -3
  4. lamindb/base/__init__.py +1 -1
  5. lamindb/base/ids.py +1 -10
  6. lamindb/core/__init__.py +7 -65
  7. lamindb/core/_compat.py +60 -0
  8. lamindb/core/_context.py +43 -20
  9. lamindb/core/_settings.py +6 -6
  10. lamindb/core/_sync_git.py +1 -1
  11. lamindb/core/loaders.py +30 -19
  12. lamindb/core/storage/_backed_access.py +4 -2
  13. lamindb/core/storage/_tiledbsoma.py +8 -6
  14. lamindb/core/storage/_zarr.py +104 -25
  15. lamindb/core/storage/objects.py +63 -28
  16. lamindb/core/storage/paths.py +4 -1
  17. lamindb/core/types.py +10 -0
  18. lamindb/curators/__init__.py +100 -85
  19. lamindb/errors.py +1 -1
  20. lamindb/integrations/_vitessce.py +4 -4
  21. lamindb/migrations/0089_subsequent_runs.py +159 -0
  22. lamindb/migrations/0090_runproject_project_runs.py +73 -0
  23. lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
  24. lamindb/models/__init__.py +79 -0
  25. lamindb/{core → models}/_describe.py +3 -3
  26. lamindb/{core → models}/_django.py +8 -5
  27. lamindb/{core → models}/_feature_manager.py +103 -87
  28. lamindb/{_from_values.py → models/_from_values.py} +5 -2
  29. lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
  30. lamindb/{core → models}/_label_manager.py +10 -17
  31. lamindb/{core/relations.py → models/_relations.py} +8 -1
  32. lamindb/models/artifact.py +2602 -0
  33. lamindb/{_can_curate.py → models/can_curate.py} +349 -180
  34. lamindb/models/collection.py +683 -0
  35. lamindb/models/core.py +135 -0
  36. lamindb/models/feature.py +643 -0
  37. lamindb/models/flextable.py +163 -0
  38. lamindb/{_parents.py → models/has_parents.py} +55 -49
  39. lamindb/models/project.py +384 -0
  40. lamindb/{_query_manager.py → models/query_manager.py} +10 -8
  41. lamindb/{_query_set.py → models/query_set.py} +40 -26
  42. lamindb/models/record.py +1762 -0
  43. lamindb/models/run.py +563 -0
  44. lamindb/{_save.py → models/save.py} +9 -7
  45. lamindb/models/schema.py +732 -0
  46. lamindb/models/transform.py +360 -0
  47. lamindb/models/ulabel.py +249 -0
  48. {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/METADATA +6 -6
  49. {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/RECORD +51 -51
  50. lamindb/_artifact.py +0 -1379
  51. lamindb/_collection.py +0 -440
  52. lamindb/_feature.py +0 -316
  53. lamindb/_is_versioned.py +0 -40
  54. lamindb/_record.py +0 -1064
  55. lamindb/_run.py +0 -60
  56. lamindb/_schema.py +0 -347
  57. lamindb/_storage.py +0 -15
  58. lamindb/_transform.py +0 -170
  59. lamindb/_ulabel.py +0 -56
  60. lamindb/_utils.py +0 -9
  61. lamindb/base/validation.py +0 -63
  62. lamindb/core/_data.py +0 -491
  63. lamindb/core/fields.py +0 -12
  64. lamindb/models.py +0 -4475
  65. {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/LICENSE +0 -0
  66. {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/WHEEL +0 -0
@@ -9,12 +9,23 @@
9
9
  DataFrameCurator
10
10
  AnnDataCurator
11
11
 
12
+ CatManager:
13
+
14
+ .. autosummary::
15
+ :toctree: .
16
+
17
+ CatManager
18
+ DataFrameCatManager
19
+ AnnDataCatManager
20
+ MuDataCatManager
21
+ TiledbsomaCatManager
22
+ CurateLookup
23
+
12
24
  """
13
25
 
14
26
  from __future__ import annotations
15
27
 
16
28
  import copy
17
- import random
18
29
  import re
19
30
  from importlib import resources
20
31
  from itertools import chain
@@ -40,14 +51,10 @@ if TYPE_CHECKING:
40
51
 
41
52
  from lamindb.base.types import FieldAttr
42
53
  from lamindb.models import Record
43
- from lamindb._feature import parse_dtype, parse_dtype_single_cat
44
54
  from lamindb.base.types import FieldAttr # noqa
45
- from lamindb.core._data import add_labels
46
- from lamindb.core._feature_manager import parse_staged_feature_sets_from_anndata
47
55
  from lamindb.core._settings import settings
48
56
  from lamindb.models import (
49
57
  Artifact,
50
- CanCurate,
51
58
  Collection,
52
59
  Feature,
53
60
  Record,
@@ -55,9 +62,11 @@ from lamindb.models import (
55
62
  Schema,
56
63
  ULabel,
57
64
  )
65
+ from lamindb.models._feature_manager import parse_staged_feature_sets_from_anndata
66
+ from lamindb.models.artifact import add_labels, data_is_anndata
67
+ from lamindb.models.feature import parse_dtype, parse_dtype_single_cat
68
+ from lamindb.models._from_values import _format_values
58
69
 
59
- from .._artifact import data_is_anndata
60
- from .._from_values import _format_values
61
70
  from ..errors import InvalidArgument, ValidationError
62
71
 
63
72
  if TYPE_CHECKING:
@@ -68,7 +77,7 @@ if TYPE_CHECKING:
68
77
  from mudata import MuData
69
78
  from spatialdata import SpatialData
70
79
 
71
- from lamindb._query_set import RecordList
80
+ from lamindb.models.query_set import RecordList
72
81
 
73
82
 
74
83
  def strip_ansi_codes(text):
@@ -141,7 +150,7 @@ class CurateLookup:
141
150
  " → categories.alveolar_type_1_fibroblast_cell\n\n"
142
151
  "To look up public ontologies, use .lookup(public=True)"
143
152
  )
144
- else: # pdagma: no cover
153
+ else: # pragma: no cover
145
154
  return colors.warning("No fields are found!")
146
155
 
147
156
 
@@ -199,7 +208,7 @@ class Curator:
199
208
  @doc_args(VALIDATE_DOCSTRING)
200
209
  def validate(self) -> bool | str:
201
210
  """{}""" # noqa: D415
202
- pass # pdagma: no cover
211
+ pass # pragma: no cover
203
212
 
204
213
  @doc_args(SAVE_ARTIFACT_DOCSTRING)
205
214
  def save_artifact(
@@ -453,24 +462,31 @@ class AnnDataCurator(Curator):
453
462
  raise InvalidArgument("dataset must be AnnData-like.")
454
463
  if schema.otype != "AnnData":
455
464
  raise InvalidArgument("Schema otype must be 'AnnData'.")
456
- self._obs_curator = DataFrameCurator(
457
- self._dataset.obs, schema._get_component("obs")
458
- )
459
- self._var_curator = DataFrameCurator(
460
- self._dataset.var.T, schema._get_component("var")
461
- )
465
+ # TODO: also support slots other than obs and var
466
+ self._slots = {
467
+ slot: DataFrameCurator(
468
+ (
469
+ self._dataset.__getattribute__(slot).T
470
+ if slot == "var"
471
+ else self._dataset.__getattribute__(slot)
472
+ ),
473
+ slot_schema,
474
+ )
475
+ for slot, slot_schema in schema.slots.items()
476
+ if slot in {"obs", "var"}
477
+ }
462
478
 
463
479
  @property
464
480
  @doc_args(SLOTS_DOCSTRING)
465
481
  def slots(self) -> dict[str, DataFrameCurator]:
466
482
  """{}""" # noqa: D415
467
- return {"obs": self._obs_curator, "var": self._var_curator}
483
+ return self._slots
468
484
 
469
485
  @doc_args(VALIDATE_DOCSTRING)
470
486
  def validate(self) -> None:
471
487
  """{}""" # noqa: D415
472
- self._obs_curator.validate()
473
- self._var_curator.validate()
488
+ for _, curator in self._slots.items():
489
+ curator.validate()
474
490
 
475
491
  @doc_args(SAVE_ARTIFACT_DOCSTRING)
476
492
  def save_artifact(
@@ -483,13 +499,18 @@ class AnnDataCurator(Curator):
483
499
  ):
484
500
  """{}""" # noqa: D415
485
501
  if not self._is_validated:
486
- self.validate() # raises ValidationError if doesn't validate
487
- result = parse_dtype_single_cat(self._var_curator._schema.itype, is_itype=True)
502
+ self.validate()
488
503
  return save_artifact( # type: ignore
489
504
  self._dataset,
490
505
  description=description,
491
- fields=self._obs_curator._cat_manager.categoricals,
492
- columns_field=result["field"],
506
+ fields=self.slots["obs"]._cat_manager.categoricals,
507
+ columns_field=(
508
+ parse_dtype_single_cat(self.slots["var"]._schema.itype, is_itype=True)[
509
+ "field"
510
+ ]
511
+ if "var" in self._slots
512
+ else None
513
+ ),
493
514
  key=key,
494
515
  artifact=self._artifact,
495
516
  revises=revises,
@@ -519,8 +540,8 @@ class CatManager:
519
540
 
520
541
  If you find non-validated values, you have several options:
521
542
 
522
- - new values found in the data can be registered using :meth:`~lamindb.core.DataFrameCatManager.add_new_from`
523
- - non-validated values can be accessed using :meth:`~lamindb.core.DataFrameCatManager.non_validated` and addressed manually
543
+ - new values found in the data can be registered using :meth:`~lamindb.curators.DataFrameCatManager.add_new_from`
544
+ - non-validated values can be accessed using :meth:`~lamindb.curators.DataFrameCatManager.non_validated` and addressed manually
524
545
  """
525
546
 
526
547
  def __init__(
@@ -599,7 +620,7 @@ class CatManager:
599
620
  Returns:
600
621
  None
601
622
  """
602
- pass # pdagma: no cover
623
+ pass # pragma: no cover
603
624
 
604
625
  @doc_args(SAVE_ARTIFACT_DOCSTRING)
605
626
  def save_artifact(
@@ -891,7 +912,7 @@ class AnnDataCatManager(CatManager):
891
912
  def __init__(
892
913
  self,
893
914
  data: ad.AnnData | Artifact,
894
- var_index: FieldAttr,
915
+ var_index: FieldAttr | None = None,
895
916
  categoricals: dict[str, FieldAttr] | None = None,
896
917
  obs_columns: FieldAttr = Feature.name,
897
918
  verbosity: str = "hint",
@@ -960,15 +981,16 @@ class AnnDataCatManager(CatManager):
960
981
  validated_only: bool = True,
961
982
  ):
962
983
  """Save variable records."""
963
- update_registry(
964
- values=list(self._adata.var.index),
965
- field=self.var_index,
966
- key="var_index",
967
- validated_only=validated_only,
968
- organism=self._organism,
969
- source=self._sources.get("var_index"),
970
- exclude=self._exclude.get("var_index"),
971
- )
984
+ if self.var_index is not None:
985
+ update_registry(
986
+ values=list(self._adata.var.index),
987
+ field=self.var_index,
988
+ key="var_index",
989
+ validated_only=validated_only,
990
+ organism=self._organism,
991
+ source=self._sources.get("var_index"),
992
+ exclude=self._exclude.get("var_index"),
993
+ )
972
994
 
973
995
  def add_new_from(self, key: str, **kwargs):
974
996
  """Add validated & new categories.
@@ -1004,15 +1026,19 @@ class AnnDataCatManager(CatManager):
1004
1026
 
1005
1027
  # add all validated records to the current instance
1006
1028
  self._save_from_var_index(validated_only=True)
1007
- validated_var, non_validated_var = validate_categories(
1008
- self._adata.var.index,
1009
- field=self._var_field,
1010
- key="var_index",
1011
- source=self._sources.get("var_index"),
1012
- hint_print=".add_new_from_var_index()",
1013
- exclude=self._exclude.get("var_index"),
1014
- organism=self._organism, # type: ignore
1015
- )
1029
+ if self.var_index is not None:
1030
+ validated_var, non_validated_var = validate_categories(
1031
+ self._adata.var.index,
1032
+ field=self._var_field,
1033
+ key="var_index",
1034
+ source=self._sources.get("var_index"),
1035
+ hint_print=".add_new_from_var_index()",
1036
+ exclude=self._exclude.get("var_index"),
1037
+ organism=self._organism, # type: ignore
1038
+ )
1039
+ else:
1040
+ validated_var = True
1041
+ non_validated_var = []
1016
1042
  validated_obs = self._obs_df_curator.validate()
1017
1043
  self._non_validated = self._obs_df_curator._non_validated # type: ignore
1018
1044
  if len(non_validated_var) > 0:
@@ -1711,7 +1737,7 @@ class TiledbsomaCatManager(CatManager):
1711
1737
  Returns:
1712
1738
  A saved artifact record.
1713
1739
  """
1714
- from lamindb.core._data import add_labels
1740
+ from lamindb.models.artifact import add_labels
1715
1741
 
1716
1742
  if not self._is_validated:
1717
1743
  self.validate()
@@ -1848,11 +1874,11 @@ class SpatialDataCatManager(CatManager):
1848
1874
  exclude=exclude,
1849
1875
  )
1850
1876
  if isinstance(sdata, Artifact):
1851
- # TODO: load() doesn't yet work
1852
1877
  self._sdata = sdata.load()
1853
1878
  else:
1854
1879
  self._sdata = self._dataset
1855
1880
  self._sample_metadata_key = sample_metadata_key
1881
+ self._write_path = None
1856
1882
  self._var_fields = var_index
1857
1883
  self._verify_accessor_exists(self._var_fields.keys())
1858
1884
  self._categoricals = categoricals
@@ -2134,26 +2160,14 @@ class SpatialDataCatManager(CatManager):
2134
2160
  try:
2135
2161
  settings.verbosity = "warning"
2136
2162
 
2137
- if self._artifact is None:
2138
- # Write the SpatialData object to a random path in tmp directory
2139
- # The Artifact constructor will move it to the cache
2140
- write_path = (
2141
- f"{settings.cache_dir}/{random.randint(10**7, 10**8 - 1)}.zarr"
2142
- )
2143
- self._sdata.write(write_path)
2144
-
2145
- # Create the Artifact and associate Artifact metadata
2146
- self._artifact = Artifact(
2147
- write_path,
2148
- description=description,
2149
- key=key,
2150
- revises=revises,
2151
- run=run,
2152
- )
2153
- # According to Tim it is not easy to calculate the number of observations.
2154
- # We would have to write custom code to iterate over labels (which might not even exist at that point)
2155
- self._artifact.otype = "spatialdata"
2156
- self._artifact.save()
2163
+ self._artifact = Artifact.from_spatialdata(
2164
+ self._sdata,
2165
+ key=key,
2166
+ description=description,
2167
+ revises=revises,
2168
+ run=run,
2169
+ )
2170
+ self._artifact.save()
2157
2171
 
2158
2172
  # Link schemas
2159
2173
  feature_kwargs = check_registry_organism(
@@ -2171,7 +2185,7 @@ class SpatialDataCatManager(CatManager):
2171
2185
  """Add Schemas from SpatialData."""
2172
2186
  if obs_fields is None:
2173
2187
  obs_fields = {}
2174
- assert host.otype == "spatialdata" # noqa: S101
2188
+ assert host.otype == "SpatialData" # noqa: S101
2175
2189
 
2176
2190
  feature_sets = {}
2177
2191
 
@@ -2799,7 +2813,7 @@ class DoseHandler:
2799
2813
  return cls.UNIT_MAP.get(unit, unit)
2800
2814
 
2801
2815
  @classmethod
2802
- def validate_values(cls, values: pd.Series) -> list:
2816
+ def validate_values(cls, values: pd.Series) -> list[str]:
2803
2817
  """Validate pert_dose values with strict case checking."""
2804
2818
  errors = []
2805
2819
 
@@ -2843,7 +2857,7 @@ class TimeHandler:
2843
2857
  return unit[0].lower()
2844
2858
 
2845
2859
  @classmethod
2846
- def validate_values(cls, values: pd.Series) -> list:
2860
+ def validate_values(cls, values: pd.Series) -> list[str]:
2847
2861
  """Validate pert_time values."""
2848
2862
  errors = []
2849
2863
 
@@ -3197,8 +3211,8 @@ def validate_categories(
3197
3211
  exclude: str | list | None = None,
3198
3212
  hint_print: str | None = None,
3199
3213
  curator: CatManager | None = None,
3200
- ) -> tuple[bool, list]:
3201
- """Validate ontology terms in a pandas series using LaminDB registries.
3214
+ ) -> tuple[bool, list[str]]:
3215
+ """Validate ontology terms using LaminDB registries.
3202
3216
 
3203
3217
  Args:
3204
3218
  values: The values to validate.
@@ -3210,8 +3224,8 @@ def validate_categories(
3210
3224
  standardize: Whether to standardize the values.
3211
3225
  hint_print: The hint to print that suggests fixing non-validated values.
3212
3226
  """
3213
- from lamindb._from_values import _format_values
3214
3227
  from lamindb.core._settings import settings
3228
+ from lamindb.models._from_values import _format_values
3215
3229
 
3216
3230
  model_field = f"{field.field.model.__name__}.{field.field.name}"
3217
3231
 
@@ -3346,7 +3360,7 @@ def validate_categories_in_df(
3346
3360
  def save_artifact(
3347
3361
  data: pd.DataFrame | ad.AnnData | MuData,
3348
3362
  fields: dict[str, FieldAttr] | dict[str, dict[str, FieldAttr]],
3349
- columns_field: FieldAttr | dict[str, FieldAttr],
3363
+ columns_field: FieldAttr | dict[str, FieldAttr] | None = None,
3350
3364
  description: str | None = None,
3351
3365
  organism: str | None = None,
3352
3366
  key: str | None = None,
@@ -3372,8 +3386,7 @@ def save_artifact(
3372
3386
  Returns:
3373
3387
  The saved Artifact.
3374
3388
  """
3375
- from .._artifact import data_is_anndata, data_is_mudata
3376
- from ..core._data import add_labels
3389
+ from ..models.artifact import add_labels, data_is_anndata, data_is_mudata
3377
3390
 
3378
3391
  if artifact is None:
3379
3392
  if data_is_anndata(data):
@@ -3395,7 +3408,7 @@ def save_artifact(
3395
3408
  artifact.schema = schema
3396
3409
  artifact.save()
3397
3410
 
3398
- if organism is not None:
3411
+ if organism is not None and columns_field is not None:
3399
3412
  feature_kwargs = check_registry_organism(
3400
3413
  (
3401
3414
  list(columns_field.values())[0].field.model
@@ -3532,8 +3545,8 @@ def update_registry(
3532
3545
  exclude: Values to exclude from inspect.
3533
3546
  kwargs: Additional keyword arguments to pass to the registry model to create new records.
3534
3547
  """
3535
- from lamindb._save import save as ln_save
3536
3548
  from lamindb.core._settings import settings
3549
+ from lamindb.models.save import save as ln_save
3537
3550
 
3538
3551
  registry = field.field.model
3539
3552
  filter_kwargs = check_registry_organism(registry, organism)
@@ -3621,7 +3634,7 @@ def log_saved_labels(
3621
3634
  validated_only: bool = True,
3622
3635
  ) -> None:
3623
3636
  """Log the saved labels."""
3624
- from .._from_values import _format_values
3637
+ from ..models._from_values import _format_values
3625
3638
 
3626
3639
  model_field = colors.italic(model_field)
3627
3640
  for k, labels in labels_saved.items():
@@ -3667,12 +3680,14 @@ def _save_organism(name: str):
3667
3680
  return organism
3668
3681
 
3669
3682
 
3670
- def _ref_is_name(field: FieldAttr) -> bool | None:
3683
+ def _ref_is_name(field: FieldAttr | None) -> bool | None:
3671
3684
  """Check if the reference field is a name field."""
3672
- from .._can_curate import get_name_field
3685
+ from ..models.can_curate import get_name_field
3673
3686
 
3674
- name_field = get_name_field(field.field.model)
3675
- return field.field.name == name_field
3687
+ if field is not None:
3688
+ name_field = get_name_field(field.field.model)
3689
+ return field.field.name == name_field
3690
+ return None
3676
3691
 
3677
3692
 
3678
3693
  # backward compat constructors ------------------
@@ -3721,7 +3736,7 @@ def from_anndata(
3721
3736
  @classmethod # type: ignore
3722
3737
  def from_mudata(
3723
3738
  cls,
3724
- mdata: MuData,
3739
+ mdata: MuData | UPathStr,
3725
3740
  var_index: dict[str, dict[str, FieldAttr]],
3726
3741
  categoricals: dict[str, FieldAttr] | None = None,
3727
3742
  verbosity: str = "hint",
@@ -3761,7 +3776,7 @@ def from_tiledbsoma(
3761
3776
  @classmethod # type: ignore
3762
3777
  def from_spatialdata(
3763
3778
  cls,
3764
- sdata,
3779
+ sdata: SpatialData | UPathStr,
3765
3780
  var_index: dict[str, FieldAttr],
3766
3781
  categoricals: dict[str, dict[str, FieldAttr]] | None = None,
3767
3782
  organism: str | None = None,
lamindb/errors.py CHANGED
@@ -1,4 +1,4 @@
1
- """Exceptions.
1
+ """Errors.
2
2
 
3
3
  .. autosummary::
4
4
  :toctree: .
@@ -7,10 +7,10 @@ from typing import TYPE_CHECKING
7
7
  import lamindb_setup as ln_setup
8
8
  from lamin_utils import logger
9
9
 
10
- from lamindb._artifact import Artifact
11
- from lamindb._collection import Collection
12
- from lamindb._run import Run
13
- from lamindb._transform import Transform
10
+ from lamindb.models.artifact import Artifact
11
+ from lamindb.models.collection import Collection
12
+ from lamindb.models.run import Run
13
+ from lamindb.models.transform import Transform
14
14
 
15
15
  if TYPE_CHECKING:
16
16
  from vitessce import VitessceConfig
@@ -0,0 +1,159 @@
1
+ # ruff: noqa: S608
2
+ from django.db import migrations, models
3
+
4
+
5
+ def update_model_run_relationships(apps, schema_editor, model_name):
6
+ vendor = schema_editor.connection.vendor
7
+
8
+ # Define table names based on model_name
9
+ model_table = f"lamindb_{model_name}"
10
+ link_table = f"lamindb_{model_name}__previous_runs"
11
+ model_id_field = f"{model_name}_id"
12
+
13
+ with schema_editor.connection.cursor() as cursor:
14
+ # Step 1: Add the current run_id to the _previous_runs table if it doesn't exist
15
+ cursor.execute(f"""
16
+ INSERT INTO {link_table} ({model_id_field}, run_id)
17
+ SELECT a.id, a.run_id
18
+ FROM {model_table} a
19
+ WHERE a.run_id IS NOT NULL
20
+ AND NOT EXISTS (
21
+ SELECT 1
22
+ FROM {link_table} apr
23
+ WHERE apr.{model_id_field} = a.id
24
+ AND apr.run_id = a.run_id
25
+ );
26
+ """)
27
+
28
+ # Step 2: For each model, find the earliest run (lowest ID) and set it as the run_id
29
+ if vendor == "sqlite":
30
+ cursor.execute(f"""
31
+ UPDATE {model_table}
32
+ SET run_id = (
33
+ SELECT MIN(r.id)
34
+ FROM lamindb_run r
35
+ JOIN {link_table} apr ON r.id = apr.run_id
36
+ WHERE apr.{model_id_field} = {model_table}.id
37
+ )
38
+ WHERE EXISTS (
39
+ SELECT 1
40
+ FROM {link_table} apr
41
+ WHERE apr.{model_id_field} = {model_table}.id
42
+ );
43
+ """)
44
+ else: # PostgreSQL
45
+ cursor.execute(f"""
46
+ UPDATE {model_table} AS a
47
+ SET run_id = subquery.min_run_id
48
+ FROM (
49
+ SELECT {model_id_field}, MIN(run_id) as min_run_id
50
+ FROM {link_table}
51
+ GROUP BY {model_id_field}
52
+ ) AS subquery
53
+ WHERE a.id = subquery.{model_id_field}
54
+ AND EXISTS (
55
+ SELECT 1
56
+ FROM {link_table} apr
57
+ WHERE apr.{model_id_field} = a.id
58
+ );
59
+ """)
60
+
61
+ # Step 3: Remove the earliest run from the link table
62
+ if vendor == "sqlite":
63
+ cursor.execute(f"""
64
+ DELETE FROM {link_table}
65
+ WHERE EXISTS (
66
+ SELECT 1 FROM {model_table} a
67
+ WHERE {link_table}.{model_id_field} = a.id
68
+ AND {link_table}.run_id = a.run_id
69
+ );
70
+ """)
71
+ else: # PostgreSQL
72
+ cursor.execute(f"""
73
+ DELETE FROM {link_table} AS apr
74
+ USING {model_table} AS a
75
+ WHERE apr.{model_id_field} = a.id
76
+ AND apr.run_id = a.run_id;
77
+ """)
78
+
79
+
80
+ def update_artifact_run_relationships(apps, schema_editor):
81
+ """Migration function for artifacts."""
82
+ update_model_run_relationships(apps, schema_editor, "artifact")
83
+
84
+
85
+ def update_collection_run_relationships(apps, schema_editor):
86
+ """Migration function for collections."""
87
+ update_model_run_relationships(apps, schema_editor, "collection")
88
+
89
+
90
+ class Migration(migrations.Migration):
91
+ dependencies = [
92
+ ("lamindb", "0088_schema_components"),
93
+ ]
94
+
95
+ operations = [
96
+ # unrelated to subsequent runs, but related to lamindb 1.2
97
+ # update the otype field in the artifact table
98
+ migrations.RunSQL(
99
+ sql="""
100
+ UPDATE lamindb_artifact
101
+ SET otype = 'SpatialData'
102
+ WHERE otype = 'spatialdata';
103
+ """
104
+ ),
105
+ # Migrate artifact relationships
106
+ migrations.RunPython(
107
+ update_artifact_run_relationships, migrations.RunPython.noop
108
+ ),
109
+ # Update artifact model state
110
+ migrations.SeparateDatabaseAndState(
111
+ # Database operations (none, to keep tables intact)
112
+ [],
113
+ # State operations (to update Django model only)
114
+ [
115
+ # Remove the old field from model state
116
+ migrations.RemoveField(
117
+ model_name="artifact",
118
+ name="_previous_runs",
119
+ ),
120
+ # Add the new field with the same underlying table
121
+ migrations.AddField(
122
+ model_name="artifact",
123
+ name="_subsequent_runs",
124
+ field=models.ManyToManyField(
125
+ "lamindb.run",
126
+ related_name="_recreated_artifacts",
127
+ db_table="lamindb_artifact__previous_runs", # Keep the original table name
128
+ ),
129
+ ),
130
+ ],
131
+ ),
132
+ # Migrate collection relationships
133
+ migrations.RunPython(
134
+ update_collection_run_relationships, migrations.RunPython.noop
135
+ ),
136
+ # Update collection model state
137
+ migrations.SeparateDatabaseAndState(
138
+ # Database operations (none, to keep tables intact)
139
+ [],
140
+ # State operations (to update Django model only)
141
+ [
142
+ # Remove the old field from model state
143
+ migrations.RemoveField(
144
+ model_name="collection",
145
+ name="_previous_runs",
146
+ ),
147
+ # Add the new field with the same underlying table
148
+ migrations.AddField(
149
+ model_name="collection",
150
+ name="_subsequent_runs",
151
+ field=models.ManyToManyField(
152
+ "lamindb.run",
153
+ related_name="_recreated_collections",
154
+ db_table="lamindb_collection__previous_runs", # Keep the original table name
155
+ ),
156
+ ),
157
+ ],
158
+ ),
159
+ ]
@@ -0,0 +1,73 @@
1
+ # Generated by Django 5.2 on 2025-03-05 10:20
2
+
3
+ import django.db.models.deletion
4
+ import django.db.models.functions.datetime
5
+ from django.db import migrations, models
6
+
7
+ import lamindb.base.fields
8
+ import lamindb.base.users
9
+ import lamindb.models.record
10
+
11
+
12
+ class Migration(migrations.Migration):
13
+ dependencies = [
14
+ ("lamindb", "0089_subsequent_runs"),
15
+ ]
16
+
17
+ operations = [
18
+ migrations.CreateModel(
19
+ name="RunProject",
20
+ fields=[
21
+ ("id", models.BigAutoField(primary_key=True, serialize=False)),
22
+ (
23
+ "created_at",
24
+ lamindb.base.fields.DateTimeField(
25
+ blank=True,
26
+ db_default=django.db.models.functions.datetime.Now(),
27
+ db_index=True,
28
+ editable=False,
29
+ ),
30
+ ),
31
+ (
32
+ "created_by",
33
+ lamindb.base.fields.ForeignKey(
34
+ blank=True,
35
+ default=lamindb.base.users.current_user_id,
36
+ editable=False,
37
+ on_delete=django.db.models.deletion.PROTECT,
38
+ related_name="+",
39
+ to="lamindb.user",
40
+ ),
41
+ ),
42
+ (
43
+ "project",
44
+ lamindb.base.fields.ForeignKey(
45
+ blank=True,
46
+ on_delete=django.db.models.deletion.PROTECT,
47
+ related_name="links_run",
48
+ to="lamindb.project",
49
+ ),
50
+ ),
51
+ (
52
+ "run",
53
+ lamindb.base.fields.ForeignKey(
54
+ blank=True,
55
+ on_delete=django.db.models.deletion.CASCADE,
56
+ related_name="links_project",
57
+ to="lamindb.run",
58
+ ),
59
+ ),
60
+ ],
61
+ options={
62
+ "unique_together": {("run", "project")},
63
+ },
64
+ bases=(models.Model, lamindb.models.record.LinkORM),
65
+ ),
66
+ migrations.AddField(
67
+ model_name="project",
68
+ name="runs",
69
+ field=models.ManyToManyField(
70
+ related_name="projects", through="lamindb.RunProject", to="lamindb.run"
71
+ ),
72
+ ),
73
+ ]