lamindb 1.3.2__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. lamindb/__init__.py +52 -36
  2. lamindb/_finish.py +17 -10
  3. lamindb/_tracked.py +1 -1
  4. lamindb/base/__init__.py +3 -1
  5. lamindb/base/fields.py +40 -22
  6. lamindb/base/ids.py +1 -94
  7. lamindb/base/types.py +2 -0
  8. lamindb/base/uids.py +117 -0
  9. lamindb/core/_context.py +216 -133
  10. lamindb/core/_settings.py +38 -25
  11. lamindb/core/datasets/__init__.py +11 -4
  12. lamindb/core/datasets/_core.py +5 -5
  13. lamindb/core/datasets/_small.py +0 -93
  14. lamindb/core/datasets/mini_immuno.py +172 -0
  15. lamindb/core/loaders.py +1 -1
  16. lamindb/core/storage/_backed_access.py +100 -6
  17. lamindb/core/storage/_polars_lazy_df.py +51 -0
  18. lamindb/core/storage/_pyarrow_dataset.py +15 -30
  19. lamindb/core/storage/objects.py +6 -0
  20. lamindb/core/subsettings/__init__.py +2 -0
  21. lamindb/core/subsettings/_annotation_settings.py +11 -0
  22. lamindb/curators/__init__.py +7 -3559
  23. lamindb/curators/_legacy.py +2056 -0
  24. lamindb/curators/core.py +1546 -0
  25. lamindb/errors.py +11 -0
  26. lamindb/examples/__init__.py +27 -0
  27. lamindb/examples/schemas/__init__.py +12 -0
  28. lamindb/examples/schemas/_anndata.py +25 -0
  29. lamindb/examples/schemas/_simple.py +19 -0
  30. lamindb/integrations/_vitessce.py +8 -5
  31. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
  32. lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
  33. lamindb/models/__init__.py +12 -2
  34. lamindb/models/_describe.py +21 -4
  35. lamindb/models/_feature_manager.py +384 -301
  36. lamindb/models/_from_values.py +1 -1
  37. lamindb/models/_is_versioned.py +5 -15
  38. lamindb/models/_label_manager.py +8 -2
  39. lamindb/models/artifact.py +354 -177
  40. lamindb/models/artifact_set.py +122 -0
  41. lamindb/models/can_curate.py +4 -1
  42. lamindb/models/collection.py +79 -56
  43. lamindb/models/core.py +1 -1
  44. lamindb/models/feature.py +78 -47
  45. lamindb/models/has_parents.py +24 -9
  46. lamindb/models/project.py +3 -3
  47. lamindb/models/query_manager.py +221 -22
  48. lamindb/models/query_set.py +251 -206
  49. lamindb/models/record.py +211 -344
  50. lamindb/models/run.py +59 -5
  51. lamindb/models/save.py +9 -5
  52. lamindb/models/schema.py +673 -196
  53. lamindb/models/transform.py +5 -14
  54. lamindb/models/ulabel.py +8 -5
  55. {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/METADATA +8 -7
  56. lamindb-1.5.0.dist-info/RECORD +108 -0
  57. lamindb-1.3.2.dist-info/RECORD +0 -95
  58. {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/LICENSE +0 -0
  59. {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/WHEEL +0 -0
lamindb/models/record.py CHANGED
@@ -5,7 +5,6 @@ import inspect
5
5
  import re
6
6
  import sys
7
7
  from collections import defaultdict
8
- from functools import reduce
9
8
  from itertools import chain
10
9
  from pathlib import PurePosixPath
11
10
  from typing import (
@@ -21,36 +20,15 @@ from typing import (
21
20
  import dj_database_url
22
21
  import lamindb_setup as ln_setup
23
22
  from django.core.exceptions import ValidationError as DjangoValidationError
24
- from django.db import IntegrityError, connections, models, transaction
25
- from django.db.models import (
26
- CASCADE,
27
- PROTECT,
28
- Field,
29
- IntegerField,
30
- Manager,
31
- Q,
32
- QuerySet,
33
- Value,
34
- )
23
+ from django.db import IntegrityError, ProgrammingError, connections, models, transaction
24
+ from django.db.models import CASCADE, PROTECT, Field, Manager, QuerySet
35
25
  from django.db.models.base import ModelBase
36
26
  from django.db.models.fields.related import (
37
27
  ManyToManyField,
38
28
  ManyToManyRel,
39
29
  ManyToOneRel,
40
30
  )
41
- from django.db.models.functions import Cast, Coalesce
42
- from django.db.models.lookups import (
43
- Contains,
44
- Exact,
45
- IContains,
46
- IExact,
47
- IRegex,
48
- IStartsWith,
49
- Regex,
50
- StartsWith,
51
- )
52
31
  from lamin_utils import colors, logger
53
- from lamin_utils._lookup import Lookup
54
32
  from lamindb_setup import settings as setup_settings
55
33
  from lamindb_setup._connect_instance import (
56
34
  get_owner_name_from_identifier,
@@ -60,25 +38,25 @@ from lamindb_setup._connect_instance import (
60
38
  from lamindb_setup.core._docs import doc_args
61
39
  from lamindb_setup.core._hub_core import connect_instance_hub
62
40
  from lamindb_setup.core._settings_store import instance_settings_file
41
+ from lamindb_setup.core.django import DBToken, db_token_manager
63
42
  from lamindb_setup.core.upath import extract_suffix_from_path
64
43
 
65
- from lamindb.base import deprecated
66
- from lamindb.base.fields import (
44
+ from ..base.fields import (
67
45
  CharField,
68
46
  DateTimeField,
69
47
  ForeignKey,
70
48
  JSONField,
71
- TextField,
72
49
  )
73
- from lamindb.base.types import FieldAttr, StrField
74
- from lamindb.errors import FieldValidationError
75
-
50
+ from ..base.types import FieldAttr, StrField
76
51
  from ..errors import (
52
+ FieldValidationError,
77
53
  InvalidArgument,
54
+ NoWriteAccess,
78
55
  RecordNameChangeIntegrityError,
79
56
  ValidationError,
80
57
  )
81
58
  from ._is_versioned import IsVersioned
59
+ from .query_manager import QueryManager, _lookup, _search
82
60
 
83
61
  if TYPE_CHECKING:
84
62
  from datetime import datetime
@@ -183,14 +161,19 @@ def init_self_from_db(self: Record, existing_record: Record):
183
161
 
184
162
  def update_attributes(record: Record, attributes: dict[str, str]):
185
163
  for key, value in attributes.items():
186
- if (
187
- getattr(record, key) != value
188
- and value is not None
189
- and key != "dtype"
190
- and key != "_aux"
191
- ):
192
- logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
193
- setattr(record, key, value)
164
+ if getattr(record, key) != value and value is not None:
165
+ if key not in {"uid", "dtype", "otype", "hash"}:
166
+ logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
167
+ setattr(record, key, value)
168
+ else:
169
+ hash_message = (
170
+ "recomputing on .save()"
171
+ if key == "hash"
172
+ else f"keeping {getattr(record, key)}"
173
+ )
174
+ logger.warning(
175
+ f"ignoring tentative value {value} for {key}, {hash_message}"
176
+ )
194
177
 
195
178
 
196
179
  def validate_literal_fields(record: Record, kwargs) -> None:
@@ -278,9 +261,12 @@ def validate_fields(record: Record, kwargs):
278
261
  "uid"
279
262
  ).max_length # triggers FieldDoesNotExist
280
263
  if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
281
- raise ValidationError(
282
- f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
283
- )
264
+ if not (
265
+ record.__class__ is Schema and len(kwargs["uid"]) == 16
266
+ ): # no error for schema
267
+ raise ValidationError(
268
+ f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
269
+ )
284
270
  # validate is_type
285
271
  if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
286
272
  if kwargs["name"].endswith("s"):
@@ -370,6 +356,8 @@ class Registry(ModelBase):
370
356
  Note: `Registry` inherits from Django's `ModelBase`.
371
357
  """
372
358
 
359
+ _available_fields: set[str] = None
360
+
373
361
  def __new__(cls, name, bases, attrs, **kwargs):
374
362
  new_class = super().__new__(cls, name, bases, attrs, **kwargs)
375
363
  return new_class
@@ -411,36 +399,13 @@ class Registry(ModelBase):
411
399
  def __repr__(cls) -> str:
412
400
  return registry_repr(cls)
413
401
 
402
+ @doc_args(_lookup.__doc__)
414
403
  def lookup(
415
404
  cls,
416
405
  field: StrField | None = None,
417
406
  return_field: StrField | None = None,
418
407
  ) -> NamedTuple:
419
- """Return an auto-complete object for a field.
420
-
421
- Args:
422
- field: The field to look up the values for. Defaults to first string field.
423
- return_field: The field to return. If `None`, returns the whole record.
424
-
425
- Returns:
426
- A `NamedTuple` of lookup information of the field values with a
427
- dictionary converter.
428
-
429
- See Also:
430
- :meth:`~lamindb.models.Record.search`
431
-
432
- Examples:
433
- >>> import bionty as bt
434
- >>> bt.settings.organism = "human"
435
- >>> bt.Gene.from_source(symbol="ADGB-DT").save()
436
- >>> lookup = bt.Gene.lookup()
437
- >>> lookup.adgb_dt
438
- >>> lookup_dict = lookup.dict()
439
- >>> lookup_dict['ADGB-DT']
440
- >>> lookup_by_ensembl_id = bt.Gene.lookup(field="ensembl_gene_id")
441
- >>> genes.ensg00000002745
442
- >>> lookup_return_symbols = bt.Gene.lookup(field="ensembl_gene_id", return_field="symbol")
443
- """
408
+ """{}""" # noqa: D415
444
409
  return _lookup(cls=cls, field=field, return_field=return_field)
445
410
 
446
411
  def filter(cls, *queries, **expressions) -> QuerySet:
@@ -487,10 +452,12 @@ class Registry(ModelBase):
487
452
  - Guide: :doc:`docs:registries`
488
453
  - Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
489
454
 
490
- Examples::
455
+ Examples:
456
+
457
+ ::
491
458
 
492
- ulabel = ln.ULabel.get("FvtpPJLJ")
493
- ulabel = ln.ULabel.get(name="my-label")
459
+ ulabel = ln.ULabel.get("FvtpPJLJ")
460
+ ulabel = ln.ULabel.get(name="my-label")
494
461
  """
495
462
  from .query_set import QuerySet
496
463
 
@@ -538,6 +505,7 @@ class Registry(ModelBase):
538
505
  query_set = query_set.order_by("-updated_at")
539
506
  return query_set[:limit].df(include=include, features=features)
540
507
 
508
+ @doc_args(_search.__doc__)
541
509
  def search(
542
510
  cls,
543
511
  string: str,
@@ -546,27 +514,7 @@ class Registry(ModelBase):
546
514
  limit: int | None = 20,
547
515
  case_sensitive: bool = False,
548
516
  ) -> QuerySet:
549
- """Search.
550
-
551
- Args:
552
- string: The input string to match against the field ontology values.
553
- field: The field or fields to search. Search all string fields by default.
554
- limit: Maximum amount of top results to return.
555
- case_sensitive: Whether the match is case sensitive.
556
-
557
- Returns:
558
- A sorted `DataFrame` of search results with a score in column `score`.
559
- If `return_queryset` is `True`. `QuerySet`.
560
-
561
- See Also:
562
- :meth:`~lamindb.models.Record.filter`
563
- :meth:`~lamindb.models.Record.lookup`
564
-
565
- Examples:
566
- >>> ulabels = ln.ULabel.from_values(["ULabel1", "ULabel2", "ULabel3"], field="name")
567
- >>> ln.save(ulabels)
568
- >>> ln.ULabel.search("ULabel2")
569
- """
517
+ """{}""" # noqa: D415
570
518
  return _search(
571
519
  cls=cls,
572
520
  string=string,
@@ -594,17 +542,21 @@ class Registry(ModelBase):
594
542
  """
595
543
  from .query_set import QuerySet
596
544
 
597
- if instance is None:
545
+ # connection already established
546
+ if instance in connections:
547
+ return QuerySet(model=cls, using=instance)
548
+ # we're in the default instance
549
+ if instance is None or instance == "default":
598
550
  return QuerySet(model=cls, using=None)
599
-
600
551
  owner, name = get_owner_name_from_identifier(instance)
601
- if f"{owner}/{name}" == setup_settings.instance.slug:
552
+ if [owner, name] == setup_settings.instance.slug.split("/"):
602
553
  return QuerySet(model=cls, using=None)
603
554
 
604
- settings_file = instance_settings_file(name, owner)
605
- cache_filepath = (
606
- ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
555
+ # move on to different instances
556
+ cache_using_filepath = (
557
+ setup_settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
607
558
  )
559
+ settings_file = instance_settings_file(name, owner)
608
560
  if not settings_file.exists():
609
561
  result = connect_instance_hub(owner=owner, name=name)
610
562
  if isinstance(result, str):
@@ -613,24 +565,46 @@ class Registry(ModelBase):
613
565
  )
614
566
  iresult, _ = result
615
567
  # do not use {} syntax below, it gives rise to a dict if the schema modules
616
- # are empty and then triggers a TypeError in missing_members = source_module - target_module
617
- source_module = set( # noqa
568
+ # are empty and then triggers a TypeError in missing_members = source_modules - target_modules
569
+ source_modules = set( # noqa
618
570
  [mod for mod in iresult["schema_str"].split(",") if mod != ""]
619
571
  )
620
- target_module = ln_setup.settings.instance.modules
621
- if not source_module.issubset(target_module):
622
- missing_members = source_module - target_module
623
- logger.warning(
624
- f"source modules has additional modules: {missing_members}\nconsider mounting these registry modules to transfer all metadata"
625
- )
626
- cache_filepath.write_text(f"{iresult['lnid']}\n{iresult['schema_str']}") # type: ignore
627
- settings_file = instance_settings_file(name, owner)
572
+ # this just retrives the full connection string from iresult
628
573
  db = update_db_using_local(iresult, settings_file)
574
+ cache_using_filepath.write_text(
575
+ f"{iresult['lnid']}\n{iresult['schema_str']}"
576
+ )
577
+ # need to set the token if it is a fine_grained_access and the user is jwt (not public)
578
+ is_fine_grained_access = (
579
+ iresult["fine_grained_access"] and iresult["db_permissions"] == "jwt"
580
+ )
581
+ # access_db can take both: the dict from connect_instance_hub and isettings
582
+ into_db_token = iresult
629
583
  else:
630
584
  isettings = load_instance_settings(settings_file)
585
+ source_modules = isettings.modules
631
586
  db = isettings.db
632
- cache_filepath.write_text(f"{isettings.uid}\n{','.join(isettings.modules)}") # type: ignore
587
+ cache_using_filepath.write_text(
588
+ f"{isettings.uid}\n{','.join(source_modules)}"
589
+ )
590
+ # need to set the token if it is a fine_grained_access and the user is jwt (not public)
591
+ is_fine_grained_access = (
592
+ isettings._fine_grained_access and isettings._db_permissions == "jwt"
593
+ )
594
+ # access_db can take both: the dict from connect_instance_hub and isettings
595
+ into_db_token = isettings
596
+
597
+ target_modules = setup_settings.instance.modules
598
+ if missing_members := source_modules - target_modules:
599
+ logger.warning(
600
+ f"source modules has additional modules: {missing_members}\n"
601
+ "consider mounting these registry modules to transfer all metadata"
602
+ )
603
+
633
604
  add_db_connection(db, instance)
605
+ if is_fine_grained_access:
606
+ db_token = DBToken(into_db_token)
607
+ db_token_manager.set(db_token, instance)
634
608
  return QuerySet(model=cls, using=instance)
635
609
 
636
610
  def __get_module_name__(cls) -> str:
@@ -640,10 +614,6 @@ class Registry(ModelBase):
640
614
  module_name = "core"
641
615
  return module_name
642
616
 
643
- @deprecated("__get_module_name__")
644
- def __get_schema_name__(cls) -> str:
645
- return cls.__get_module_name__()
646
-
647
617
  def __get_name_with_module__(cls) -> str:
648
618
  module_name = cls.__get_module_name__()
649
619
  if module_name == "core":
@@ -652,9 +622,19 @@ class Registry(ModelBase):
652
622
  module_prefix = f"{module_name}."
653
623
  return f"{module_prefix}{cls.__name__}"
654
624
 
655
- @deprecated("__get_name_with_module__")
656
- def __get_name_with_schema__(cls) -> str:
657
- return cls.__get_name_with_module__()
625
+ def __get_available_fields__(cls) -> set[str]:
626
+ if cls._available_fields is None:
627
+ cls._available_fields = {
628
+ f.name
629
+ for f in cls._meta.get_fields()
630
+ if not f.name.startswith("_")
631
+ and not f.name.startswith("links_")
632
+ and not f.name.endswith("_id")
633
+ }
634
+ if cls.__name__ == "Artifact":
635
+ cls._available_fields.add("visibility")
636
+ cls._available_fields.add("transform")
637
+ return cls._available_fields
658
638
 
659
639
 
660
640
  class BasicRecord(models.Model, metaclass=Registry):
@@ -665,80 +645,89 @@ class BasicRecord(models.Model, metaclass=Registry):
665
645
  It's mainly used for LinkORMs and similar.
666
646
  """
667
647
 
648
+ objects = QueryManager()
649
+
668
650
  class Meta:
669
651
  abstract = True
652
+ base_manager_name = "objects"
670
653
 
671
654
  def __init__(self, *args, **kwargs):
672
655
  skip_validation = kwargs.pop("_skip_validation", False)
673
- if not args and skip_validation:
674
- super().__init__(**kwargs)
675
- elif not args and not skip_validation:
676
- from ..core._settings import settings
677
- from .can_curate import CanCurate
678
- from .collection import Collection
679
- from .schema import Schema
680
- from .transform import Transform
681
-
682
- validate_fields(self, kwargs)
683
-
684
- # do not search for names if an id is passed; this is important
685
- # e.g. when synching ids from the notebook store to lamindb
686
- has_consciously_provided_uid = False
687
- if "_has_consciously_provided_uid" in kwargs:
688
- has_consciously_provided_uid = kwargs.pop(
689
- "_has_consciously_provided_uid"
690
- )
656
+ if not args:
691
657
  if (
692
- isinstance(self, (CanCurate, Collection, Transform))
693
- and settings.creation.search_names
694
- and not has_consciously_provided_uid
658
+ issubclass(self.__class__, Record)
659
+ and self.__class__.__name__
660
+ not in {"Storage", "ULabel", "Feature", "Schema", "Param"}
661
+ # do not save bionty entities in restricted spaces by default
662
+ and self.__class__.__module__ != "bionty.models"
695
663
  ):
696
- name_field = getattr(self, "_name_field", "name")
697
- exact_match = suggest_records_with_similar_names(
698
- self, name_field, kwargs
699
- )
700
- if exact_match is not None:
701
- if "version" in kwargs:
702
- if kwargs["version"] is not None:
703
- version_comment = " and version"
704
- existing_record = self.__class__.filter(
705
- **{
706
- name_field: kwargs[name_field],
707
- "version": kwargs["version"],
708
- }
709
- ).one_or_none()
664
+ from lamindb import context as run_context
665
+
666
+ if run_context.space is not None:
667
+ kwargs["space"] = run_context.space
668
+ if skip_validation:
669
+ super().__init__(**kwargs)
670
+ else:
671
+ from ..core._settings import settings
672
+ from .can_curate import CanCurate
673
+ from .collection import Collection
674
+ from .transform import Transform
675
+
676
+ validate_fields(self, kwargs)
677
+
678
+ # do not search for names if an id is passed; this is important
679
+ # e.g. when synching ids from the notebook store to lamindb
680
+ has_consciously_provided_uid = False
681
+ if "_has_consciously_provided_uid" in kwargs:
682
+ has_consciously_provided_uid = kwargs.pop(
683
+ "_has_consciously_provided_uid"
684
+ )
685
+ if (
686
+ isinstance(self, (CanCurate, Collection, Transform))
687
+ and settings.creation.search_names
688
+ and not has_consciously_provided_uid
689
+ ):
690
+ name_field = getattr(self, "_name_field", "name")
691
+ exact_match = suggest_records_with_similar_names(
692
+ self, name_field, kwargs
693
+ )
694
+ if exact_match is not None:
695
+ if "version" in kwargs:
696
+ if kwargs["version"] is not None:
697
+ version_comment = " and version"
698
+ existing_record = self.__class__.filter(
699
+ **{
700
+ name_field: kwargs[name_field],
701
+ "version": kwargs["version"],
702
+ }
703
+ ).one_or_none()
704
+ else:
705
+ # for a versioned record, an exact name match is not a criterion
706
+ # for retrieving a record in case `version` isn't passed -
707
+ # we'd always pull out many records with exactly the same name
708
+ existing_record = None
710
709
  else:
711
- # for a versioned record, an exact name match is not a criterion
712
- # for retrieving a record in case `version` isn't passed -
713
- # we'd always pull out many records with exactly the same name
714
- existing_record = None
715
- else:
716
- version_comment = ""
717
- existing_record = exact_match
718
- if existing_record is not None:
719
- logger.important(
720
- f"returning existing {self.__class__.__name__} record with same"
721
- f" {name_field}{version_comment}: '{kwargs[name_field]}'"
722
- )
723
- if isinstance(self, Schema):
724
- if existing_record.hash != kwargs["hash"]:
725
- raise ValueError(
726
- f"Schema name is already in use by schema with uid '{existing_record.uid}', please choose a different name."
727
- )
728
- init_self_from_db(self, existing_record)
729
- update_attributes(self, kwargs)
730
- return None
731
- super().__init__(**kwargs)
732
- if isinstance(self, ValidateFields):
733
- # this will trigger validation against django validators
734
- try:
735
- if hasattr(self, "clean_fields"):
736
- self.clean_fields()
737
- else:
738
- self._Model__clean_fields()
739
- except DjangoValidationError as e:
740
- message = _format_django_validation_error(self, e)
741
- raise FieldValidationError(message) from e
710
+ version_comment = ""
711
+ existing_record = exact_match
712
+ if existing_record is not None:
713
+ logger.important(
714
+ f"returning existing {self.__class__.__name__} record with same"
715
+ f" {name_field}{version_comment}: '{kwargs[name_field]}'"
716
+ )
717
+ init_self_from_db(self, existing_record)
718
+ update_attributes(self, kwargs)
719
+ return None
720
+ super().__init__(**kwargs)
721
+ if isinstance(self, ValidateFields):
722
+ # this will trigger validation against django validators
723
+ try:
724
+ if hasattr(self, "clean_fields"):
725
+ self.clean_fields()
726
+ else:
727
+ self._Model__clean_fields()
728
+ except DjangoValidationError as e:
729
+ message = _format_django_validation_error(self, e)
730
+ raise FieldValidationError(message) from e
742
731
  elif len(args) != len(self._meta.concrete_fields):
743
732
  raise FieldValidationError(
744
733
  f"Use keyword arguments instead of positional arguments, e.g.: {self.__class__.__name__}(name='...')."
@@ -797,20 +786,33 @@ class BasicRecord(models.Model, metaclass=Registry):
797
786
  # save unversioned record
798
787
  else:
799
788
  super().save(*args, **kwargs)
800
- except IntegrityError as e:
789
+ except (IntegrityError, ProgrammingError) as e:
801
790
  error_msg = str(e)
802
791
  # two possible error messages for hash duplication
803
792
  # "duplicate key value violates unique constraint"
804
793
  # "UNIQUE constraint failed"
805
794
  if (
806
- "UNIQUE constraint failed" in error_msg
807
- or "duplicate key value violates unique constraint" in error_msg
808
- ) and "hash" in error_msg:
795
+ isinstance(e, IntegrityError)
796
+ and "hash" in error_msg
797
+ and (
798
+ "UNIQUE constraint failed" in error_msg
799
+ or "duplicate key value violates unique constraint" in error_msg
800
+ )
801
+ ):
809
802
  pre_existing_record = self.__class__.get(hash=self.hash)
810
803
  logger.warning(
811
804
  f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
812
805
  )
813
806
  init_self_from_db(self, pre_existing_record)
807
+ elif (
808
+ isinstance(e, ProgrammingError)
809
+ and hasattr(self, "space")
810
+ and "new row violates row-level security policy" in error_msg
811
+ ):
812
+ raise NoWriteAccess(
813
+ f"You’re not allowed to write to the space '{self.space.name}'.\n"
814
+ "Please contact an administrator of the space if you need write access."
815
+ ) from None
814
816
  else:
815
817
  raise
816
818
  # call the below in case a user makes more updates to the record
@@ -840,16 +842,20 @@ class BasicRecord(models.Model, metaclass=Registry):
840
842
  if k != "run":
841
843
  logger.important(f"{k} records: {', '.join(v)}")
842
844
 
843
- if self.__class__.__name__ in {
844
- "Artifact",
845
- "Transform",
846
- "Run",
847
- "ULabel",
848
- "Feature",
849
- "Schema",
850
- "Collection",
851
- "Reference",
852
- }:
845
+ if (
846
+ self.__class__.__name__
847
+ in {
848
+ "Artifact",
849
+ "Transform",
850
+ "Run",
851
+ "ULabel",
852
+ "Feature",
853
+ "Schema",
854
+ "Collection",
855
+ "Reference",
856
+ }
857
+ and self._branch_code >= 1
858
+ ):
853
859
  import lamindb as ln
854
860
 
855
861
  if ln.context.project is not None:
@@ -882,7 +888,7 @@ class BasicRecord(models.Model, metaclass=Registry):
882
888
 
883
889
 
884
890
  class Space(BasicRecord):
885
- """Spaces.
891
+ """Spaces to restrict access to records to specific users or teams.
886
892
 
887
893
  You can use spaces to restrict access to records within an instance.
888
894
 
@@ -1063,146 +1069,6 @@ def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
1063
1069
  return []
1064
1070
 
1065
1071
 
1066
- def _search(
1067
- cls,
1068
- string: str,
1069
- *,
1070
- field: StrField | list[StrField] | None = None,
1071
- limit: int | None = 20,
1072
- case_sensitive: bool = False,
1073
- truncate_string: bool = False,
1074
- ) -> QuerySet:
1075
- if string is None:
1076
- raise ValueError("Cannot search for None value! Please pass a valid string.")
1077
-
1078
- input_queryset = (
1079
- cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
1080
- )
1081
- registry = input_queryset.model
1082
- name_field = getattr(registry, "_name_field", "name")
1083
- if field is None:
1084
- fields = [
1085
- field.name
1086
- for field in registry._meta.fields
1087
- if field.get_internal_type() in {"CharField", "TextField"}
1088
- ]
1089
- else:
1090
- if not isinstance(field, list):
1091
- fields_input = [field]
1092
- else:
1093
- fields_input = field
1094
- fields = []
1095
- for field in fields_input:
1096
- if not isinstance(field, str):
1097
- try:
1098
- fields.append(field.field.name)
1099
- except AttributeError as error:
1100
- raise TypeError(
1101
- "Please pass a Record string field, e.g., `CellType.name`!"
1102
- ) from error
1103
- else:
1104
- fields.append(field)
1105
-
1106
- if truncate_string:
1107
- if (len_string := len(string)) > 5:
1108
- n_80_pct = int(len_string * 0.8)
1109
- string = string[:n_80_pct]
1110
-
1111
- string = string.strip()
1112
- string_escape = re.escape(string)
1113
-
1114
- exact_lookup = Exact if case_sensitive else IExact
1115
- regex_lookup = Regex if case_sensitive else IRegex
1116
- contains_lookup = Contains if case_sensitive else IContains
1117
-
1118
- ranks = []
1119
- contains_filters = []
1120
- for field in fields:
1121
- field_expr = Coalesce(
1122
- Cast(field, output_field=TextField()),
1123
- Value(""),
1124
- output_field=TextField(),
1125
- )
1126
- # exact rank
1127
- exact_expr = exact_lookup(field_expr, string)
1128
- exact_rank = Cast(exact_expr, output_field=IntegerField()) * 200
1129
- ranks.append(exact_rank)
1130
- # exact synonym
1131
- synonym_expr = regex_lookup(field_expr, rf"(?:^|.*\|){string_escape}(?:\|.*|$)")
1132
- synonym_rank = Cast(synonym_expr, output_field=IntegerField()) * 200
1133
- ranks.append(synonym_rank)
1134
- # match as sub-phrase
1135
- sub_expr = regex_lookup(
1136
- field_expr, rf"(?:^|.*[ \|\.,;:]){string_escape}(?:[ \|\.,;:].*|$)"
1137
- )
1138
- sub_rank = Cast(sub_expr, output_field=IntegerField()) * 10
1139
- ranks.append(sub_rank)
1140
- # startswith and avoid matching string with " " on the right
1141
- # mostly for truncated
1142
- startswith_expr = regex_lookup(
1143
- field_expr, rf"(?:^|.*\|){string_escape}[^ ]*(?:\|.*|$)"
1144
- )
1145
- startswith_rank = Cast(startswith_expr, output_field=IntegerField()) * 8
1146
- ranks.append(startswith_rank)
1147
- # match as sub-phrase from the left, mostly for truncated
1148
- right_expr = regex_lookup(field_expr, rf"(?:^|.*[ \|]){string_escape}.*")
1149
- right_rank = Cast(right_expr, output_field=IntegerField()) * 2
1150
- ranks.append(right_rank)
1151
- # match as sub-phrase from the right
1152
- left_expr = regex_lookup(field_expr, rf".*{string_escape}(?:$|[ \|\.,;:].*)")
1153
- left_rank = Cast(left_expr, output_field=IntegerField()) * 2
1154
- ranks.append(left_rank)
1155
- # simple contains filter
1156
- contains_expr = contains_lookup(field_expr, string)
1157
- contains_filter = Q(contains_expr)
1158
- contains_filters.append(contains_filter)
1159
- # also rank by contains
1160
- contains_rank = Cast(contains_expr, output_field=IntegerField())
1161
- ranks.append(contains_rank)
1162
- # additional rule for truncated strings
1163
- # weight matches from the beginning of the string higher
1164
- # sometimes whole words get truncated and startswith_expr is not enough
1165
- if truncate_string and field == name_field:
1166
- startswith_lookup = StartsWith if case_sensitive else IStartsWith
1167
- name_startswith_expr = startswith_lookup(field_expr, string)
1168
- name_startswith_rank = (
1169
- Cast(name_startswith_expr, output_field=IntegerField()) * 2
1170
- )
1171
- ranks.append(name_startswith_rank)
1172
-
1173
- ranked_queryset = (
1174
- input_queryset.filter(reduce(lambda a, b: a | b, contains_filters))
1175
- .alias(rank=sum(ranks))
1176
- .order_by("-rank")
1177
- )
1178
-
1179
- return ranked_queryset[:limit]
1180
-
1181
-
1182
- def _lookup(
1183
- cls,
1184
- field: StrField | None = None,
1185
- return_field: StrField | None = None,
1186
- using_key: str | None = None,
1187
- ) -> NamedTuple:
1188
- """{}""" # noqa: D415
1189
- queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
1190
- field = get_name_field(registry=queryset.model, field=field)
1191
-
1192
- return Lookup(
1193
- records=queryset,
1194
- values=[i.get(field) for i in queryset.values()],
1195
- tuple_name=cls.__class__.__name__,
1196
- prefix="ln",
1197
- ).lookup(
1198
- return_field=(
1199
- get_name_field(registry=queryset.model, field=return_field)
1200
- if return_field is not None
1201
- else None
1202
- )
1203
- )
1204
-
1205
-
1206
1072
  def get_name_field(
1207
1073
  registry: type[Record] | QuerySet | Manager,
1208
1074
  *,
@@ -1315,10 +1181,12 @@ def get_transfer_run(record) -> Run:
1315
1181
 
1316
1182
  slug = record._state.db
1317
1183
  owner, name = get_owner_name_from_identifier(slug)
1318
- cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
1319
- if not cache_filepath.exists():
1184
+ cache_using_filepath = (
1185
+ ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
1186
+ )
1187
+ if not cache_using_filepath.exists():
1320
1188
  raise SystemExit("Need to call .using() before")
1321
- instance_uid = cache_filepath.read_text().split("\n")[0]
1189
+ instance_uid = cache_using_filepath.read_text().split("\n")[0]
1322
1190
  key = f"transfers/{instance_uid}"
1323
1191
  uid = instance_uid + "0000"
1324
1192
  transform = Transform.filter(uid=uid).one_or_none()
@@ -1401,11 +1269,13 @@ def transfer_to_default_db(
1401
1269
  def track_current_key_and_name_values(record: Record):
1402
1270
  from lamindb.models import Artifact
1403
1271
 
1272
+ # below, we're using __dict__ to avoid triggering the refresh from the database
1273
+ # which can lead to a recursion
1404
1274
  if isinstance(record, Artifact):
1405
- record._old_key = record.key
1406
- record._old_suffix = record.suffix
1275
+ record._old_key = record.__dict__.get("key")
1276
+ record._old_suffix = record.__dict__.get("suffix")
1407
1277
  elif hasattr(record, "_name_field"):
1408
- record._old_name = getattr(record, record._name_field)
1278
+ record._old_name = record.__dict__.get(record._name_field)
1409
1279
 
1410
1280
 
1411
1281
  def check_name_change(record: Record):
@@ -1439,9 +1309,6 @@ def check_name_change(record: Record):
1439
1309
  label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
1440
1310
  )
1441
1311
  .exclude(feature_id=None) # must have a feature
1442
- .exclude(
1443
- feature_ref_is_name=None
1444
- ) # must be linked via Curator and therefore part of a schema
1445
1312
  .distinct()
1446
1313
  )
1447
1314
  artifact_ids = linked_records.list("artifact__uid")