lamindb 1.5.2__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. lamindb/__init__.py +25 -6
  2. lamindb/_finish.py +5 -5
  3. lamindb/_tracked.py +1 -1
  4. lamindb/_view.py +4 -4
  5. lamindb/core/_context.py +32 -6
  6. lamindb/core/_settings.py +1 -1
  7. lamindb/core/datasets/mini_immuno.py +8 -0
  8. lamindb/core/loaders.py +1 -1
  9. lamindb/core/storage/_anndata_accessor.py +9 -9
  10. lamindb/core/storage/_valid_suffixes.py +1 -0
  11. lamindb/core/storage/_zarr.py +32 -107
  12. lamindb/curators/__init__.py +19 -2
  13. lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
  14. lamindb/curators/_legacy.py +15 -19
  15. lamindb/curators/core.py +247 -80
  16. lamindb/errors.py +2 -2
  17. lamindb/migrations/0069_squashed.py +8 -8
  18. lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
  19. lamindb/migrations/0073_merge_ourprojects.py +7 -7
  20. lamindb/migrations/0075_lamindbv1_part5.py +1 -1
  21. lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
  22. lamindb/migrations/0080_polish_lamindbv1.py +2 -2
  23. lamindb/migrations/0088_schema_components.py +1 -1
  24. lamindb/migrations/0090_runproject_project_runs.py +2 -2
  25. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
  26. lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
  27. lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
  28. lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
  29. lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
  30. lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
  31. lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
  32. lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
  33. lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
  34. lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
  35. lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
  36. lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
  37. lamindb/models/__init__.py +35 -18
  38. lamindb/models/_describe.py +4 -4
  39. lamindb/models/_django.py +38 -4
  40. lamindb/models/_feature_manager.py +66 -123
  41. lamindb/models/_from_values.py +13 -13
  42. lamindb/models/_label_manager.py +8 -6
  43. lamindb/models/_relations.py +7 -7
  44. lamindb/models/artifact.py +166 -156
  45. lamindb/models/can_curate.py +25 -25
  46. lamindb/models/collection.py +48 -18
  47. lamindb/models/core.py +3 -3
  48. lamindb/models/feature.py +88 -60
  49. lamindb/models/has_parents.py +17 -17
  50. lamindb/models/project.py +52 -24
  51. lamindb/models/query_manager.py +5 -5
  52. lamindb/models/query_set.py +61 -37
  53. lamindb/models/record.py +158 -1583
  54. lamindb/models/run.py +39 -176
  55. lamindb/models/save.py +6 -6
  56. lamindb/models/schema.py +33 -44
  57. lamindb/models/sqlrecord.py +1743 -0
  58. lamindb/models/transform.py +17 -33
  59. lamindb/models/ulabel.py +21 -15
  60. {lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/METADATA +7 -11
  61. lamindb-1.6.0.dist-info/RECORD +118 -0
  62. lamindb/core/storage/_anndata_sizes.py +0 -41
  63. lamindb/models/flextable.py +0 -163
  64. lamindb-1.5.2.dist-info/RECORD +0 -109
  65. {lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/LICENSE +0 -0
  66. {lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1743 @@
1
+ from __future__ import annotations
2
+
3
+ import builtins
4
+ import inspect
5
+ import re
6
+ import sys
7
+ from collections import defaultdict
8
+ from itertools import chain
9
+ from pathlib import PurePosixPath
10
+ from typing import (
11
+ TYPE_CHECKING,
12
+ Any,
13
+ Literal,
14
+ NamedTuple,
15
+ TypeVar,
16
+ Union,
17
+ overload,
18
+ )
19
+
20
+ import dj_database_url
21
+ import lamindb_setup as ln_setup
22
+ from django.core.exceptions import ValidationError as DjangoValidationError
23
+ from django.db import IntegrityError, ProgrammingError, connections, models, transaction
24
+ from django.db.models import CASCADE, PROTECT, Field, Manager, QuerySet
25
+ from django.db.models.base import ModelBase
26
+ from django.db.models.fields.related import (
27
+ ManyToManyField,
28
+ ManyToManyRel,
29
+ ManyToOneRel,
30
+ )
31
+ from lamin_utils import colors, logger
32
+ from lamindb_setup import settings as setup_settings
33
+ from lamindb_setup._connect_instance import (
34
+ get_owner_name_from_identifier,
35
+ load_instance_settings,
36
+ update_db_using_local,
37
+ )
38
+ from lamindb_setup.core._docs import doc_args
39
+ from lamindb_setup.core._hub_core import connect_instance_hub
40
+ from lamindb_setup.core._settings_store import instance_settings_file
41
+ from lamindb_setup.core.django import DBToken, db_token_manager
42
+ from lamindb_setup.core.upath import extract_suffix_from_path
43
+
44
+ from lamindb.base import deprecated
45
+
46
+ from ..base.fields import (
47
+ CharField,
48
+ DateTimeField,
49
+ ForeignKey,
50
+ JSONField,
51
+ )
52
+ from ..base.types import FieldAttr, StrField
53
+ from ..errors import (
54
+ FieldValidationError,
55
+ InvalidArgument,
56
+ NoWriteAccess,
57
+ SQLRecordNameChangeIntegrityError,
58
+ ValidationError,
59
+ )
60
+ from ._is_versioned import IsVersioned
61
+ from .query_manager import QueryManager, _lookup, _search
62
+
63
+ if TYPE_CHECKING:
64
+ from datetime import datetime
65
+
66
+ import pandas as pd
67
+
68
+ from .artifact import Artifact
69
+ from .run import Run, User
70
+ from .transform import Transform
71
+
72
+
73
+ T = TypeVar("T", bound="SQLRecord")
74
+ IPYTHON = getattr(builtins, "__IPYTHON__", False)
75
+
76
+
77
+ # -------------------------------------------------------------------------------------
78
+ # A note on required fields at the SQLRecord level
79
+ #
80
+ # As Django does most of its validation on the Form-level, it doesn't offer functionality
81
+ # for validating the integrity of an SQLRecord object upon instantation (similar to pydantic)
82
+ #
83
+ # For required fields, we define them as commonly done on the SQL level together
84
+ # with a validator in SQLRecord (validate_required_fields)
85
+ #
86
+ # This goes against the Django convention, but goes with the SQLModel convention
87
+ # (Optional fields can be null on the SQL level, non-optional fields cannot)
88
+ #
89
+ # Due to Django's convention where CharFieldAttr has pre-configured (null=False, default=""), marking
90
+ # a required field necessitates passing `default=None`. Without the validator it would trigger
91
+ # an error at the SQL-level, with it, it triggers it at instantiation
92
+
93
+ # -------------------------------------------------------------------------------------
94
+ # A note on class and instance methods of core SQLRecord
95
+ #
96
+ # All of these are defined and tested within lamindb, in files starting with _{orm_name}.py
97
+
98
+ # -------------------------------------------------------------------------------------
99
+ # A note on maximal lengths of char fields
100
+ #
101
+ # 100 characters:
102
+ # "Raindrops pitter-pattered on the windowpane, blurring the"
103
+ # "city lights outside, curled up with a mug."
104
+ # A good maximal length for a name (title).
105
+ #
106
+ # 150 characters: We choose this for name maximal length because some users like long names.
107
+ #
108
+ # 255 characters:
109
+ # "In creating a precise 255-character paragraph, one engages in"
110
+ # "a dance of words, where clarity meets brevity. Every syllable counts,"
111
+ # "illustrating the skill in compact expression, ensuring the essence of the"
112
+ # "message shines through within the exacting limit."
113
+ # This is a good maximal length for a description field.
114
+
115
+
116
+ class IsLink:
117
+ pass
118
+
119
+
120
+ def deferred_attribute__repr__(self):
121
+ return f"FieldAttr({self.field.model.__name__}.{self.field.name})"
122
+
123
+
124
+ FieldAttr.__repr__ = deferred_attribute__repr__ # type: ignore
125
+
126
+
127
+ class ValidateFields:
128
+ pass
129
+
130
+
131
+ def is_approx_pascal_case(s):
132
+ """Check if the last component of a dotted string is in PascalCase.
133
+
134
+ Args:
135
+ s (str): The string to check
136
+
137
+ Returns:
138
+ bool: True if the last component is in PascalCase
139
+
140
+ Raises:
141
+ ValueError: If the last component doesn't start with a capital letter
142
+ """
143
+ if "[" in s: # this is because we allow types of form 'script[test_script.py]'
144
+ return True
145
+ last_component = s.split(".")[-1]
146
+
147
+ if not last_component[0].isupper():
148
+ raise ValueError(
149
+ f"'{last_component}' should start with a capital letter given you're defining a type"
150
+ )
151
+
152
+ return True
153
+
154
+
155
+ def init_self_from_db(self: SQLRecord, existing_record: SQLRecord):
156
+ new_args = [
157
+ getattr(existing_record, field.attname) for field in self._meta.concrete_fields
158
+ ]
159
+ super(self.__class__, self).__init__(*new_args)
160
+ self._state.adding = False # mimic from_db
161
+ self._state.db = "default"
162
+
163
+
164
+ def update_attributes(record: SQLRecord, attributes: dict[str, str]):
165
+ for key, value in attributes.items():
166
+ if getattr(record, key) != value and value is not None:
167
+ if key not in {"uid", "dtype", "otype", "hash"}:
168
+ logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
169
+ setattr(record, key, value)
170
+ else:
171
+ hash_message = (
172
+ "recomputing on .save()"
173
+ if key == "hash"
174
+ else f"keeping {getattr(record, key)}"
175
+ )
176
+ logger.warning(
177
+ f"ignoring tentative value {value} for {key}, {hash_message}"
178
+ )
179
+
180
+
181
+ def validate_literal_fields(record: SQLRecord, kwargs) -> None:
182
+ """Validate all Literal type fields in a record.
183
+
184
+ Args:
185
+ record: record being validated
186
+
187
+ Raises:
188
+ ValidationError: If any field value is not in its Literal's allowed values
189
+ """
190
+ if isinstance(record, IsLink):
191
+ return None
192
+ if record.__class__.__name__ in "Feature":
193
+ return None
194
+ from lamindb.base.types import Dtype, TransformType
195
+
196
+ types = {
197
+ "TransformType": TransformType,
198
+ "ArtifactKind": Dtype,
199
+ "Dtype": Dtype,
200
+ }
201
+ errors = {}
202
+ annotations = getattr(record.__class__, "__annotations__", {})
203
+ for field_name, annotation in annotations.items():
204
+ if field_name not in kwargs or kwargs[field_name] is None:
205
+ continue
206
+ value = kwargs[field_name]
207
+ if str(annotation) in types:
208
+ annotation = types[annotation]
209
+ if not hasattr(annotation, "__origin__"):
210
+ continue
211
+ literal_type = annotation if annotation.__origin__ is Literal else None
212
+ if literal_type is None:
213
+ continue
214
+ valid_values = set(literal_type.__args__)
215
+ if value not in valid_values:
216
+ errors[field_name] = (
217
+ f"{field_name}: {colors.yellow(value)} is not a valid value"
218
+ f"\n → Valid values are: {colors.green(', '.join(sorted(valid_values)))}"
219
+ )
220
+ if errors:
221
+ message = "\n "
222
+ for _, error in errors.items():
223
+ message += error + "\n "
224
+ raise FieldValidationError(message)
225
+
226
+
227
+ def validate_fields(record: SQLRecord, kwargs):
228
+ from lamindb.models import (
229
+ Artifact,
230
+ Collection,
231
+ Feature,
232
+ Run,
233
+ Schema,
234
+ Transform,
235
+ ULabel,
236
+ )
237
+
238
+ # validate required fields
239
+ # a "required field" is a Django field that has `null=False, default=None`
240
+ required_fields = {
241
+ k.name for k in record._meta.fields if not k.null and k.default is None
242
+ }
243
+ required_fields_not_passed = {k: None for k in required_fields if k not in kwargs}
244
+ kwargs.update(required_fields_not_passed)
245
+ missing_fields = [
246
+ k for k, v in kwargs.items() if v is None and k in required_fields
247
+ ]
248
+ if missing_fields:
249
+ raise FieldValidationError(f"{missing_fields} are required.")
250
+ # ensure the exact length of the internal uid for core entities
251
+ if "uid" in kwargs and record.__class__ in {
252
+ Artifact,
253
+ Collection,
254
+ Transform,
255
+ Run,
256
+ ULabel,
257
+ Feature,
258
+ Schema,
259
+ }:
260
+ uid_max_length = record.__class__._meta.get_field(
261
+ "uid"
262
+ ).max_length # triggers FieldDoesNotExist
263
+ if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
264
+ if not (
265
+ record.__class__ is Schema and len(kwargs["uid"]) == 16
266
+ ): # no error for schema
267
+ raise ValidationError(
268
+ f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
269
+ )
270
+ # validate is_type
271
+ if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
272
+ if kwargs["name"].endswith("s"):
273
+ logger.warning(
274
+ f"name '{kwargs['name']}' for type ends with 's', in case you're naming with plural, consider the singular for a type name"
275
+ )
276
+ is_approx_pascal_case(kwargs["name"])
277
+ # validate literals
278
+ validate_literal_fields(record, kwargs)
279
+
280
+
281
+ def suggest_records_with_similar_names(
282
+ record: SQLRecord, name_field: str, kwargs
283
+ ) -> SQLRecord | None:
284
+ """Returns True if found exact match, otherwise False.
285
+
286
+ Logs similar matches if found.
287
+ """
288
+ if kwargs.get(name_field) is None or not isinstance(kwargs.get(name_field), str):
289
+ return None
290
+ # need to perform an additional request to find the exact match
291
+ # previously, this was inferred from the truncated/fuzzy search below
292
+ # but this isn't reliable: https://laminlabs.slack.com/archives/C04FPE8V01W/p1737812808563409
293
+ # the below needs to be .first() because there might be multiple records with the same
294
+ # name field in case the record is versioned (e.g. for Transform key)
295
+ exact_match = record.__class__.filter(**{name_field: kwargs[name_field]}).first()
296
+ if exact_match is not None:
297
+ return exact_match
298
+ queryset = _search(
299
+ record.__class__,
300
+ kwargs[name_field],
301
+ field=name_field,
302
+ truncate_string=True,
303
+ limit=3,
304
+ )
305
+ if not queryset.exists(): # empty queryset
306
+ return None
307
+ s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
308
+ msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
309
+ if IPYTHON:
310
+ from IPython.display import display
311
+
312
+ from lamindb import settings
313
+
314
+ logger.warning(f"{msg}")
315
+ if settings._verbosity_int >= 1:
316
+ display(queryset.df())
317
+ else:
318
+ logger.warning(f"{msg}\n{queryset}")
319
+ return None
320
+
321
+
322
+ RECORD_REGISTRY_EXAMPLE = """Example::
323
+
324
+ from lamindb import SQLRecord, fields
325
+
326
+ # sub-classing `SQLRecord` creates a new registry
327
+ class Experiment(SQLRecord):
328
+ name: str = fields.CharField()
329
+
330
+ # instantiating `Experiment` creates a record `experiment`
331
+ experiment = Experiment(name="my experiment")
332
+
333
+ # you can save the record to the database
334
+ experiment.save()
335
+
336
+ # `Experiment` refers to the registry, which you can query
337
+ df = Experiment.filter(name__startswith="my ").df()
338
+ """
339
+
340
+
341
+ # this is the metaclass for SQLRecord
342
+ @doc_args(RECORD_REGISTRY_EXAMPLE)
343
+ class Registry(ModelBase):
344
+ """Metaclass for :class:`~lamindb.models.SQLRecord`.
345
+
346
+ Each `Registry` *object* is a `SQLRecord` *class* and corresponds to a table in the metadata SQL database.
347
+
348
+ You work with `Registry` objects whenever you use *class methods* of `SQLRecord`.
349
+
350
+ You call any subclass of `SQLRecord` a "registry" and their objects "records". A `SQLRecord` object corresponds to a row in the SQL table.
351
+
352
+ If you want to create a new registry, you sub-class `SQLRecord`.
353
+
354
+ {}
355
+
356
+ Note: `Registry` inherits from Django's `ModelBase`.
357
+ """
358
+
359
+ _available_fields: set[str] = None
360
+
361
+ def __new__(cls, name, bases, attrs, **kwargs):
362
+ new_class = super().__new__(cls, name, bases, attrs, **kwargs)
363
+ return new_class
364
+
365
+ # below creates a sensible auto-complete behavior that differs across the
366
+ # class and instance level in Jupyter Editors it doesn't have any effect for
367
+ # static type analyzer like pylance used in VSCode
368
+ def __dir__(cls):
369
+ # this is needed to bring auto-complete on the class-level back
370
+ # https://laminlabs.slack.com/archives/C04FPE8V01W/p1717535625268849
371
+ # Filter class attributes, excluding instance methods
372
+ exclude_instance_methods = "sphinx" not in sys.modules
373
+ # https://laminlabs.slack.com/archives/C04FPE8V01W/p1721134595920959
374
+
375
+ def include_attribute(attr_name, attr_value):
376
+ if attr_name.startswith("__"):
377
+ return False
378
+ if exclude_instance_methods and callable(attr_value):
379
+ return isinstance(attr_value, (classmethod, staticmethod, type))
380
+ return True
381
+
382
+ # check also inherited attributes
383
+ if hasattr(cls, "mro"):
384
+ attrs = chain(*(c.__dict__.items() for c in cls.mro()))
385
+ else:
386
+ attrs = cls.__dict__.items()
387
+
388
+ result = []
389
+ for attr_name, attr_value in attrs:
390
+ if attr_name not in result and include_attribute(attr_name, attr_value):
391
+ result.append(attr_name)
392
+
393
+ # Add non-dunder attributes from Registry
394
+ for attr in dir(Registry):
395
+ if not attr.startswith("__") and attr not in result:
396
+ result.append(attr)
397
+ return result
398
+
399
+ def __repr__(cls) -> str:
400
+ return registry_repr(cls)
401
+
402
+ @doc_args(_lookup.__doc__)
403
+ def lookup(
404
+ cls,
405
+ field: StrField | None = None,
406
+ return_field: StrField | None = None,
407
+ ) -> NamedTuple:
408
+ """{}""" # noqa: D415
409
+ return _lookup(cls=cls, field=field, return_field=return_field)
410
+
411
+ def filter(cls, *queries, **expressions) -> QuerySet:
412
+ """Query records.
413
+
414
+ Args:
415
+ queries: One or multiple `Q` objects.
416
+ expressions: Fields and values passed as Django query expressions.
417
+
418
+ Returns:
419
+ A :class:`~lamindb.models.QuerySet`.
420
+
421
+ See Also:
422
+ - Guide: :doc:`docs:registries`
423
+ - Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
424
+
425
+ Examples:
426
+ >>> ln.ULabel(name="my label").save()
427
+ >>> ln.ULabel.filter(name__startswith="my").df()
428
+ """
429
+ from .query_set import QuerySet
430
+
431
+ _using_key = None
432
+ if "_using_key" in expressions:
433
+ _using_key = expressions.pop("_using_key")
434
+
435
+ return QuerySet(model=cls, using=_using_key).filter(*queries, **expressions)
436
+
437
+ def get(
438
+ cls: type[T],
439
+ idlike: int | str | None = None,
440
+ **expressions,
441
+ ) -> T:
442
+ """Get a single record.
443
+
444
+ Args:
445
+ idlike: Either a uid stub, uid or an integer id.
446
+ expressions: Fields and values passed as Django query expressions.
447
+
448
+ Raises:
449
+ :exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
450
+
451
+ See Also:
452
+ - Guide: :doc:`docs:registries`
453
+ - Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
454
+
455
+ Examples:
456
+
457
+ ::
458
+
459
+ ulabel = ln.ULabel.get("FvtpPJLJ")
460
+ ulabel = ln.ULabel.get(name="my-label")
461
+ """
462
+ from .query_set import QuerySet
463
+
464
+ return QuerySet(model=cls).get(idlike, **expressions)
465
+
466
+ def df(
467
+ cls,
468
+ include: str | list[str] | None = None,
469
+ features: bool | list[str] = False,
470
+ limit: int = 100,
471
+ ) -> pd.DataFrame:
472
+ """Convert to `pd.DataFrame`.
473
+
474
+ By default, shows all direct fields, except `updated_at`.
475
+
476
+ Use arguments `include` or `feature` to include other data.
477
+
478
+ Args:
479
+ include: Related fields to include as columns. Takes strings of
480
+ form `"ulabels__name"`, `"cell_types__name"`, etc. or a list
481
+ of such strings.
482
+ features: If `True`, map all features of the
483
+ :class:`~lamindb.Feature` registry onto the resulting
484
+ `DataFrame`. Only available for `Artifact`.
485
+ limit: Maximum number of rows to display from a Pandas DataFrame.
486
+ Defaults to 100 to reduce database load.
487
+
488
+ Examples:
489
+
490
+ Include the name of the creator in the `DataFrame`:
491
+
492
+ >>> ln.ULabel.df(include="created_by__name"])
493
+
494
+ Include display of features for `Artifact`:
495
+
496
+ >>> df = ln.Artifact.df(features=True)
497
+ >>> ln.view(df) # visualize with type annotations
498
+
499
+ Only include select features:
500
+
501
+ >>> df = ln.Artifact.df(features=["cell_type_by_expert", "cell_type_by_model"])
502
+ """
503
+ query_set = cls.filter()
504
+ if hasattr(cls, "updated_at"):
505
+ query_set = query_set.order_by("-updated_at")
506
+ return query_set[:limit].df(include=include, features=features)
507
+
508
+ @doc_args(_search.__doc__)
509
+ def search(
510
+ cls,
511
+ string: str,
512
+ *,
513
+ field: StrField | None = None,
514
+ limit: int | None = 20,
515
+ case_sensitive: bool = False,
516
+ ) -> QuerySet:
517
+ """{}""" # noqa: D415
518
+ return _search(
519
+ cls=cls,
520
+ string=string,
521
+ field=field,
522
+ limit=limit,
523
+ case_sensitive=case_sensitive,
524
+ )
525
+
526
+ def using(
527
+ cls,
528
+ instance: str | None,
529
+ ) -> QuerySet:
530
+ """Use a non-default LaminDB instance.
531
+
532
+ Args:
533
+ instance: An instance identifier of form "account_handle/instance_name".
534
+
535
+ Examples:
536
+ >>> ln.ULabel.using("account_handle/instance_name").search("ULabel7", field="name")
537
+ uid score
538
+ name
539
+ ULabel7 g7Hk9b2v 100.0
540
+ ULabel5 t4Jm6s0q 75.0
541
+ ULabel6 r2Xw8p1z 75.0
542
+ """
543
+ from .query_set import QuerySet
544
+
545
+ # connection already established
546
+ if instance in connections:
547
+ return QuerySet(model=cls, using=instance)
548
+ # we're in the default instance
549
+ if instance is None or instance == "default":
550
+ return QuerySet(model=cls, using=None)
551
+ owner, name = get_owner_name_from_identifier(instance)
552
+ if [owner, name] == setup_settings.instance.slug.split("/"):
553
+ return QuerySet(model=cls, using=None)
554
+
555
+ # move on to different instances
556
+ cache_using_filepath = (
557
+ setup_settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
558
+ )
559
+ settings_file = instance_settings_file(name, owner)
560
+ if not settings_file.exists():
561
+ result = connect_instance_hub(owner=owner, name=name)
562
+ if isinstance(result, str):
563
+ raise RuntimeError(
564
+ f"Failed to load instance {instance}, please check your permissions!"
565
+ )
566
+ iresult, _ = result
567
+ # do not use {} syntax below, it gives rise to a dict if the schema modules
568
+ # are empty and then triggers a TypeError in missing_members = source_modules - target_modules
569
+ source_modules = set( # noqa
570
+ [mod for mod in iresult["schema_str"].split(",") if mod != ""]
571
+ )
572
+ # this just retrives the full connection string from iresult
573
+ db = update_db_using_local(iresult, settings_file)
574
+ cache_using_filepath.write_text(
575
+ f"{iresult['lnid']}\n{iresult['schema_str']}"
576
+ )
577
+ # need to set the token if it is a fine_grained_access and the user is jwt (not public)
578
+ is_fine_grained_access = (
579
+ iresult["fine_grained_access"] and iresult["db_permissions"] == "jwt"
580
+ )
581
+ # access_db can take both: the dict from connect_instance_hub and isettings
582
+ into_db_token = iresult
583
+ else:
584
+ isettings = load_instance_settings(settings_file)
585
+ source_modules = isettings.modules
586
+ db = isettings.db
587
+ cache_using_filepath.write_text(
588
+ f"{isettings.uid}\n{','.join(source_modules)}"
589
+ )
590
+ # need to set the token if it is a fine_grained_access and the user is jwt (not public)
591
+ is_fine_grained_access = (
592
+ isettings._fine_grained_access and isettings._db_permissions == "jwt"
593
+ )
594
+ # access_db can take both: the dict from connect_instance_hub and isettings
595
+ into_db_token = isettings
596
+
597
+ target_modules = setup_settings.instance.modules
598
+ if missing_members := source_modules - target_modules:
599
+ logger.info(
600
+ f"in transfer, source lamindb instance has additional modules: {', '.join(missing_members)}"
601
+ )
602
+
603
+ add_db_connection(db, instance)
604
+ if is_fine_grained_access:
605
+ db_token = DBToken(into_db_token)
606
+ db_token_manager.set(db_token, instance)
607
+ return QuerySet(model=cls, using=instance)
608
+
609
+ def __get_module_name__(cls) -> str:
610
+ schema_module_name = cls.__module__.split(".")[0]
611
+ module_name = schema_module_name.replace("lnschema_", "")
612
+ if module_name == "lamindb":
613
+ module_name = "core"
614
+ return module_name
615
+
616
+ def __get_name_with_module__(cls) -> str:
617
+ module_name = cls.__get_module_name__()
618
+ if module_name == "core":
619
+ module_prefix = ""
620
+ else:
621
+ module_prefix = f"{module_name}."
622
+ return f"{module_prefix}{cls.__name__}"
623
+
624
+ def __get_available_fields__(cls) -> set[str]:
625
+ if cls._available_fields is None:
626
+ cls._available_fields = {
627
+ f.name
628
+ for f in cls._meta.get_fields()
629
+ if not f.name.startswith("_")
630
+ and not f.name.startswith("links_")
631
+ and not f.name.endswith("_id")
632
+ }
633
+ if cls.__name__ == "Artifact":
634
+ cls._available_fields.add("visibility") # backward compat
635
+ cls._available_fields.add("_branch_code") # backward compat
636
+ cls._available_fields.add("transform")
637
+ return cls._available_fields
638
+
639
+
640
+ class BaseSQLRecord(models.Model, metaclass=Registry):
641
+ """Basic metadata record.
642
+
643
+ It has the same methods as SQLRecord, but doesn't have the additional fields.
644
+
645
+ It's mainly used for IsLinks and similar.
646
+ """
647
+
648
+ objects = QueryManager()
649
+
650
+ class Meta:
651
+ abstract = True
652
+ base_manager_name = "objects"
653
+
654
+ def __init__(self, *args, **kwargs):
655
+ skip_validation = kwargs.pop("_skip_validation", False)
656
+ if not args:
657
+ if (
658
+ issubclass(self.__class__, SQLRecord)
659
+ and self.__class__.__name__
660
+ not in {"Storage", "ULabel", "Feature", "Schema"}
661
+ # do not save bionty entities in restricted spaces by default
662
+ and self.__class__.__module__ != "bionty.models"
663
+ ):
664
+ from lamindb import context as run_context
665
+
666
+ if run_context.space is not None:
667
+ kwargs["space"] = run_context.space
668
+ if skip_validation:
669
+ super().__init__(**kwargs)
670
+ else:
671
+ from ..core._settings import settings
672
+ from .can_curate import CanCurate
673
+ from .collection import Collection
674
+ from .transform import Transform
675
+
676
+ validate_fields(self, kwargs)
677
+
678
+ # do not search for names if an id is passed; this is important
679
+ # e.g. when synching ids from the notebook store to lamindb
680
+ has_consciously_provided_uid = False
681
+ if "_has_consciously_provided_uid" in kwargs:
682
+ has_consciously_provided_uid = kwargs.pop(
683
+ "_has_consciously_provided_uid"
684
+ )
685
+ if (
686
+ isinstance(self, (CanCurate, Collection, Transform))
687
+ and settings.creation.search_names
688
+ and not has_consciously_provided_uid
689
+ ):
690
+ name_field = getattr(self, "_name_field", "name")
691
+ exact_match = suggest_records_with_similar_names(
692
+ self, name_field, kwargs
693
+ )
694
+ if exact_match is not None:
695
+ if "version" in kwargs:
696
+ if kwargs["version"] is not None:
697
+ version_comment = " and version"
698
+ existing_record = self.__class__.filter(
699
+ **{
700
+ name_field: kwargs[name_field],
701
+ "version": kwargs["version"],
702
+ }
703
+ ).one_or_none()
704
+ else:
705
+ # for a versioned record, an exact name match is not a criterion
706
+ # for retrieving a record in case `version` isn't passed -
707
+ # we'd always pull out many records with exactly the same name
708
+ existing_record = None
709
+ else:
710
+ version_comment = ""
711
+ existing_record = exact_match
712
+ if existing_record is not None:
713
+ logger.important(
714
+ f"returning existing {self.__class__.__name__} record with same"
715
+ f" {name_field}{version_comment}: '{kwargs[name_field]}'"
716
+ )
717
+ init_self_from_db(self, existing_record)
718
+ update_attributes(self, kwargs)
719
+ return None
720
+ super().__init__(**kwargs)
721
+ if isinstance(self, ValidateFields):
722
+ # this will trigger validation against django validators
723
+ try:
724
+ if hasattr(self, "clean_fields"):
725
+ self.clean_fields()
726
+ else:
727
+ self._Model__clean_fields()
728
+ except DjangoValidationError as e:
729
+ message = _format_django_validation_error(self, e)
730
+ raise FieldValidationError(message) from e
731
+ elif len(args) != len(self._meta.concrete_fields):
732
+ raise FieldValidationError(
733
+ f"Use keyword arguments instead of positional arguments, e.g.: {self.__class__.__name__}(name='...')."
734
+ )
735
+ else:
736
+ super().__init__(*args)
737
+ track_current_key_and_name_values(self)
738
+
739
+ def save(self, *args, **kwargs) -> SQLRecord:
740
+ """Save.
741
+
742
+ Always saves to the default database.
743
+ """
744
+ using_key = None
745
+ if "using" in kwargs:
746
+ using_key = kwargs["using"]
747
+ db = self._state.db
748
+ pk_on_db = self.pk
749
+ artifacts: list = []
750
+ if self.__class__.__name__ == "Collection" and self.id is not None:
751
+ # when creating a new collection without being able to access artifacts
752
+ artifacts = self.ordered_artifacts.list()
753
+ pre_existing_record = None
754
+ # consider records that are being transferred from other databases
755
+ transfer_logs: dict[str, list[str]] = {
756
+ "mapped": [],
757
+ "transferred": [],
758
+ "run": None,
759
+ }
760
+ if db is not None and db != "default" and using_key is None:
761
+ if isinstance(self, IsVersioned):
762
+ if not self.is_latest:
763
+ raise NotImplementedError(
764
+ "You are attempting to transfer a record that's not the latest in its version history. This is currently not supported."
765
+ )
766
+ pre_existing_record = transfer_to_default_db(
767
+ self, using_key, transfer_logs=transfer_logs
768
+ )
769
+ self._revises: IsVersioned
770
+ if pre_existing_record is not None:
771
+ init_self_from_db(self, pre_existing_record)
772
+ else:
773
+ check_key_change(self)
774
+ check_name_change(self)
775
+ try:
776
+ # save versioned record in presence of self._revises
777
+ if isinstance(self, IsVersioned) and self._revises is not None:
778
+ assert self._revises.is_latest # noqa: S101
779
+ revises = self._revises
780
+ revises.is_latest = False
781
+ with transaction.atomic():
782
+ revises._revises = None # ensure we don't start a recursion
783
+ revises.save()
784
+ super().save(*args, **kwargs) # type: ignore
785
+ self._revises = None
786
+ # save unversioned record
787
+ else:
788
+ super().save(*args, **kwargs)
789
+ except (IntegrityError, ProgrammingError) as e:
790
+ error_msg = str(e)
791
+ # two possible error messages for hash duplication
792
+ # "duplicate key value violates unique constraint"
793
+ # "UNIQUE constraint failed"
794
+ if (
795
+ self.__class__.__name__ in {"Transform", "Artifact"}
796
+ and isinstance(e, IntegrityError)
797
+ and "hash" in error_msg
798
+ and (
799
+ "UNIQUE constraint failed" in error_msg
800
+ or "duplicate key value violates unique constraint" in error_msg
801
+ )
802
+ ):
803
+ pre_existing_record = self.__class__.get(hash=self.hash)
804
+ logger.warning(
805
+ f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
806
+ )
807
+ init_self_from_db(self, pre_existing_record)
808
+ elif (
809
+ isinstance(e, ProgrammingError)
810
+ and hasattr(self, "space")
811
+ and "new row violates row-level security policy" in error_msg
812
+ ):
813
+ raise NoWriteAccess(
814
+ f"You’re not allowed to write to the space '{self.space.name}'.\n"
815
+ "Please contact an administrator of the space if you need write access."
816
+ ) from None
817
+ else:
818
+ raise
819
+ # call the below in case a user makes more updates to the record
820
+ track_current_key_and_name_values(self)
821
+ # perform transfer of many-to-many fields
822
+ # only supported for Artifact and Collection records
823
+ if db is not None and db != "default" and using_key is None:
824
+ if self.__class__.__name__ == "Collection":
825
+ if len(artifacts) > 0:
826
+ logger.info("transfer artifacts")
827
+ for artifact in artifacts:
828
+ artifact.save()
829
+ self.artifacts.add(*artifacts)
830
+ if hasattr(self, "labels"):
831
+ from copy import copy
832
+
833
+ from lamindb.models._feature_manager import FeatureManager
834
+
835
+ # here we go back to original record on the source database
836
+ self_on_db = copy(self)
837
+ self_on_db._state.db = db
838
+ self_on_db.pk = pk_on_db # manually set the primary key
839
+ self_on_db.features = FeatureManager(self_on_db) # type: ignore
840
+ self.features._add_from(self_on_db, transfer_logs=transfer_logs)
841
+ self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
842
+ for k, v in transfer_logs.items():
843
+ if k != "run" and len(v) > 0:
844
+ logger.important(f"{k} records: {', '.join(v)}")
845
+
846
+ if self.__class__.__name__ in {
847
+ "Artifact",
848
+ "Transform",
849
+ "Run",
850
+ "ULabel",
851
+ "Feature",
852
+ "Schema",
853
+ "Collection",
854
+ "Reference",
855
+ } and not (
856
+ self.__class__.__name__ == "Artifact" and self.kind == "__lamindb_run__"
857
+ ):
858
+ import lamindb as ln
859
+
860
+ if ln.context.project is not None:
861
+ self.projects.add(ln.context.project)
862
+ return self
863
+
864
+ def delete(self) -> None:
865
+ """Delete."""
866
+ # note that the logic below does not fire if a record is moved to the trash
867
+ # the idea is that moving a record to the trash should move its entire version family
868
+ # to the trash, whereas permanently deleting should default to only deleting a single record
869
+ # of a version family
870
+ # we can consider making it easy to permanently delete entire version families as well,
871
+ # but that's for another time
872
+ if isinstance(self, IsVersioned) and self.is_latest:
873
+ new_latest = (
874
+ self.__class__.objects.using(self._state.db)
875
+ .filter(is_latest=False, uid__startswith=self.stem_uid)
876
+ .order_by("-created_at")
877
+ .first()
878
+ )
879
+ if new_latest is not None:
880
+ new_latest.is_latest = True
881
+ with transaction.atomic():
882
+ new_latest.save()
883
+ super().delete() # type: ignore
884
+ logger.warning(f"new latest version is {new_latest}")
885
+ return None
886
+ super().delete()
887
+
888
+
889
+ class Space(BaseSQLRecord):
890
+ """Spaces to restrict access to records to specific users or teams.
891
+
892
+ You can use spaces to restrict access to records within an instance.
893
+
894
+ All data in this registry is synced from `lamin.ai` to enable re-using spaces across instances.
895
+ There is no need to manually create records.
896
+ """
897
+
898
+ id: int = models.SmallAutoField(primary_key=True)
899
+ """Internal id, valid only in one DB instance."""
900
+ name: str = models.CharField(max_length=100, db_index=True)
901
+ """Name of space."""
902
+ uid: str = CharField(
903
+ editable=False,
904
+ unique=True,
905
+ max_length=12,
906
+ default="A",
907
+ db_default="A",
908
+ db_index=True,
909
+ )
910
+ """Universal id."""
911
+ description: str | None = CharField(null=True)
912
+ """Description of space."""
913
+ created_at: datetime = DateTimeField(
914
+ editable=False, db_default=models.functions.Now(), db_index=True
915
+ )
916
+ """Time of creation of record."""
917
+ created_by: User = ForeignKey(
918
+ "User", CASCADE, default=None, related_name="+", null=True
919
+ )
920
+ """Creator of space."""
921
+
922
+ @overload
923
+ def __init__(
924
+ self,
925
+ name: str,
926
+ description: str | None = None,
927
+ ): ...
928
+
929
+ @overload
930
+ def __init__(
931
+ self,
932
+ *db_args,
933
+ ): ...
934
+
935
+ def __init__(
936
+ self,
937
+ *args,
938
+ **kwargs,
939
+ ):
940
+ super().__init__(*args, **kwargs)
941
+
942
+
943
+ class Branch(BaseSQLRecord):
944
+ """Branches allow to group changes similar to how git branches group changes."""
945
+
946
+ id: int = models.AutoField(primary_key=True)
947
+ """An integer id that's synchronized for a family of coupled database instances.
948
+
949
+ Among all LaminDB instances, this id is arbitrary and non-unique.
950
+ """
951
+ name: str = models.CharField(max_length=100, db_index=True)
952
+ """Name of branch."""
953
+ uid: str = CharField(
954
+ editable=False,
955
+ unique=True,
956
+ max_length=12,
957
+ default="M",
958
+ db_default="M",
959
+ db_index=True,
960
+ )
961
+ """Universal id.
962
+
963
+ This id is useful if one wants to apply the same patch to many database instances.
964
+ """
965
+ description: str | None = CharField(null=True)
966
+ """Description of branch."""
967
+ created_at: datetime = DateTimeField(
968
+ editable=False, db_default=models.functions.Now(), db_index=True
969
+ )
970
+ """Time of creation of record."""
971
+ created_by: User = ForeignKey(
972
+ "User", CASCADE, default=None, related_name="+", null=True
973
+ )
974
+ """Creator of branch."""
975
+
976
+ @overload
977
+ def __init__(
978
+ self,
979
+ name: str,
980
+ description: str | None = None,
981
+ ): ...
982
+
983
+ @overload
984
+ def __init__(
985
+ self,
986
+ *db_args,
987
+ ): ...
988
+
989
+ def __init__(
990
+ self,
991
+ *args,
992
+ **kwargs,
993
+ ):
994
+ super().__init__(*args, **kwargs)
995
+
996
+
997
+ @doc_args(RECORD_REGISTRY_EXAMPLE)
998
+ class SQLRecord(BaseSQLRecord, metaclass=Registry):
999
+ """Metadata record.
1000
+
1001
+ Every `SQLRecord` is a data model that comes with a registry in form of a SQL
1002
+ table in your database.
1003
+
1004
+ Sub-classing `SQLRecord` creates a new registry while instantiating a `SQLRecord`
1005
+ creates a new record.
1006
+
1007
+ {}
1008
+
1009
+ `SQLRecord`'s metaclass is :class:`~lamindb.models.Registry`.
1010
+
1011
+ `SQLRecord` inherits from Django's `Model` class. Why does LaminDB call it `SQLRecord`
1012
+ and not `Model`? The term `SQLRecord` can't lead to confusion with statistical,
1013
+ machine learning or biological models.
1014
+ """
1015
+
1016
+ branch: int = ForeignKey(
1017
+ Branch, PROTECT, default=1, db_default=1, db_column="_branch_code"
1018
+ )
1019
+ """Whether record is on a branch or in another "special state".
1020
+
1021
+ This dictates where a record appears in exploration, queries & searches,
1022
+ whether a record can be edited, and whether a record acts as a template.
1023
+
1024
+ Branch name coding is handled through LaminHub. "Special state" coding is as defined below.
1025
+
1026
+ One should note that there is no "main" branch as in git, but that all five special codes
1027
+ (-1, 0, 1, 2, 3) act as sub-specfications for what git would call the main branch. This also
1028
+ means that for records that live on a branch only the "default state" exists. E.g., one can only
1029
+ turn a record into a template, lock it, archive it, or trash it once it's merged onto the main
1030
+ branch.
1031
+
1032
+ - 3: template (hidden in queries & searches)
1033
+ - 2: locked (same as default, but locked for edits except for space admins)
1034
+ - 1: default (visible in queries & searches)
1035
+ - 0: archive (hidden, meant to be kept, locked for edits for everyone)
1036
+ - -1: trash (hidden, scheduled for deletion)
1037
+
1038
+ An integer higher than >3 codes a branch that can be used for collaborators to create drafts
1039
+ that can be merged onto the main branch in an experience akin to a Pull Request. The mapping
1040
+ onto a semantic branch name is handled through LaminHub.
1041
+ """
1042
+ space: Space = ForeignKey(Space, PROTECT, default=1, db_default=1)
1043
+ """The space in which the record lives."""
1044
+ _aux: dict[str, Any] | None = JSONField(default=None, db_default=None, null=True)
1045
+ """Auxiliary field for dictionary-like metadata."""
1046
+
1047
+ class Meta:
1048
+ abstract = True
1049
+
1050
+ @property
1051
+ @deprecated("branch_id")
1052
+ def _branch_code(self) -> int:
1053
+ """Deprecated alias for `branch`."""
1054
+ return self.branch_id
1055
+
1056
+ @_branch_code.setter
1057
+ def _branch_code(self, value: int):
1058
+ self.branch_id = value
1059
+
1060
+
1061
+ def _format_django_validation_error(record: SQLRecord, e: DjangoValidationError):
1062
+ """Pretty print Django validation errors."""
1063
+ errors = {}
1064
+ if hasattr(e, "error_dict"):
1065
+ error_dict = e.error_dict
1066
+ else:
1067
+ error_dict = {"__all__": e.error_list}
1068
+
1069
+ for field_name, error_list in error_dict.items():
1070
+ for error in error_list:
1071
+ if hasattr(error, "message"):
1072
+ msg = error.message
1073
+ else:
1074
+ msg = str(error)
1075
+
1076
+ if field_name == "__all__":
1077
+ errors[field_name] = f"{colors.yellow(msg)}"
1078
+ else:
1079
+ current_value = getattr(record, field_name, None)
1080
+ errors[field_name] = (
1081
+ f"{field_name}: {colors.yellow(current_value)} is not valid\n → {msg}"
1082
+ )
1083
+
1084
+ if errors:
1085
+ message = "\n "
1086
+ for _, error in errors.items():
1087
+ message += error + "\n "
1088
+
1089
+ return message
1090
+
1091
+
1092
+ def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
1093
+ """Gets the parameters of a SQLRecord from the overloaded signature.
1094
+
1095
+ Example:
1096
+ >>> get_record_params(bt.Organism)
1097
+ >>> [('name', 'str'), ('taxon_id', 'str | None'), ('scientific_name', 'str | None')]
1098
+ """
1099
+ source = inspect.getsource(record_class)
1100
+
1101
+ # Find first overload that's not *db_args
1102
+ pattern = r"@overload\s+def __init__\s*\(([\s\S]*?)\):\s*\.{3}"
1103
+ overloads = re.finditer(pattern, source)
1104
+
1105
+ for single_overload in overloads:
1106
+ params_block = single_overload.group(1)
1107
+ # This is an additional safety measure if the overloaded signature that we're
1108
+ # looking for is not at the top but a "db_args" constructor
1109
+ if "*db_args" in params_block:
1110
+ continue
1111
+
1112
+ params = []
1113
+ for line in params_block.split("\n"):
1114
+ line = line.strip()
1115
+ if not line or "self" in line:
1116
+ continue
1117
+
1118
+ # Extract name and type annotation
1119
+ # The regex pattern finds parameter definitions like:
1120
+ # Simple: name: str
1121
+ # With default: age: int = 0
1122
+ # With complex types: items: List[str] = []
1123
+ param_pattern = (
1124
+ r"(\w+)" # Parameter name
1125
+ r"\s*:\s*" # Colon with optional whitespace
1126
+ r"((?:[^=,]|" # Type hint: either non-equals/comma chars
1127
+ r"(?<=\[)[^[\]]*" # or contents within square brackets
1128
+ r"(?=\]))+)" # looking ahead for closing bracket
1129
+ r"(?:\s*=\s*" # Optional default value part
1130
+ r"([^,]+))?" # Default value: anything but comma
1131
+ )
1132
+ match = re.match(param_pattern, line)
1133
+ if not match:
1134
+ continue
1135
+
1136
+ name, type_str = match.group(1), match.group(2).strip()
1137
+
1138
+ # Keep type as string instead of evaluating
1139
+ params.append((name, type_str))
1140
+
1141
+ return params
1142
+
1143
+ return []
1144
+
1145
+
1146
+ def get_name_field(
1147
+ registry: type[SQLRecord] | QuerySet | Manager,
1148
+ *,
1149
+ field: StrField | None = None,
1150
+ ) -> str:
1151
+ """Get the 1st char or text field from the registry."""
1152
+ if isinstance(registry, (QuerySet, Manager)):
1153
+ registry = registry.model
1154
+ model_field_names = [i.name for i in registry._meta.fields]
1155
+
1156
+ # set to default name field
1157
+ if field is None:
1158
+ if hasattr(registry, "_name_field"):
1159
+ field = registry._meta.get_field(registry._name_field)
1160
+ elif "name" in model_field_names:
1161
+ field = registry._meta.get_field("name")
1162
+ else:
1163
+ # first char or text field that doesn't contain "id"
1164
+ for i in registry._meta.fields:
1165
+ if "id" in i.name:
1166
+ continue
1167
+ if i.get_internal_type() in {"CharField", "TextField"}:
1168
+ field = i
1169
+ break
1170
+
1171
+ # no default name field can be found
1172
+ if field is None:
1173
+ raise ValueError(
1174
+ "please pass a SQLRecord string field, e.g., `CellType.name`!"
1175
+ )
1176
+ else:
1177
+ field = field.name # type:ignore
1178
+ if not isinstance(field, str):
1179
+ try:
1180
+ field = field.field.name
1181
+ except AttributeError:
1182
+ raise TypeError(
1183
+ "please pass a SQLRecord string field, e.g., `CellType.name`!"
1184
+ ) from None
1185
+
1186
+ return field
1187
+
1188
+
1189
+ def add_db_connection(db: str, using: str):
1190
+ db_config = dj_database_url.config(
1191
+ default=db, conn_max_age=600, conn_health_checks=True
1192
+ )
1193
+ db_config["TIME_ZONE"] = "UTC"
1194
+ db_config["OPTIONS"] = {}
1195
+ db_config["AUTOCOMMIT"] = True
1196
+ connections.settings[using] = db_config
1197
+
1198
+
1199
+ REGISTRY_UNIQUE_FIELD = {"storage": "root", "feature": "name", "ulabel": "name"}
1200
+
1201
+
1202
+ def update_fk_to_default_db(
1203
+ records: SQLRecord | list[SQLRecord] | QuerySet,
1204
+ fk: str,
1205
+ using_key: str | None,
1206
+ transfer_logs: dict,
1207
+ ):
1208
+ # here in case it is an iterable, we are checking only a single record
1209
+ # and set the same fks for all other records because we do this only
1210
+ # for certain fks where they have to the same for the whole bulk
1211
+ # see transfer_fk_to_default_db_bulk
1212
+ # todo: but this has to be changed i think, it is not safe as it is now - Sergei
1213
+ record = records[0] if isinstance(records, (list, QuerySet)) else records
1214
+ if getattr(record, f"{fk}_id", None) is not None:
1215
+ # set the space of the transferred record to the current space
1216
+ if fk == "space":
1217
+ # for space we set the record's space to the current space
1218
+ from lamindb import context
1219
+
1220
+ # the default space has id=1
1221
+ fk_record_default = Space.get(1) if context.space is None else context.space
1222
+ # process non-space fks
1223
+ else:
1224
+ fk_record = getattr(record, fk)
1225
+ field = REGISTRY_UNIQUE_FIELD.get(fk, "uid")
1226
+ fk_record_default = fk_record.__class__.filter(
1227
+ **{field: getattr(fk_record, field)}
1228
+ ).one_or_none()
1229
+ if fk_record_default is None:
1230
+ from copy import copy
1231
+
1232
+ fk_record_default = copy(fk_record)
1233
+ transfer_to_default_db(
1234
+ fk_record_default, using_key, save=True, transfer_logs=transfer_logs
1235
+ )
1236
+ # re-set the fks to the newly saved ones in the default db
1237
+ if isinstance(records, (list, QuerySet)):
1238
+ for r in records:
1239
+ setattr(r, f"{fk}", None)
1240
+ setattr(r, f"{fk}_id", fk_record_default.id)
1241
+ else:
1242
+ setattr(records, f"{fk}", None)
1243
+ setattr(records, f"{fk}_id", fk_record_default.id)
1244
+
1245
+
1246
+ FKBULK = [
1247
+ "organism",
1248
+ "source",
1249
+ "report", # Run
1250
+ ]
1251
+
1252
+
1253
+ def transfer_fk_to_default_db_bulk(
1254
+ records: list | QuerySet, using_key: str | None, transfer_logs: dict
1255
+ ):
1256
+ for fk in FKBULK:
1257
+ update_fk_to_default_db(records, fk, using_key, transfer_logs=transfer_logs)
1258
+
1259
+
1260
+ def get_transfer_run(record) -> Run:
1261
+ from lamindb import settings
1262
+ from lamindb.core._context import context
1263
+ from lamindb.models import Run, Transform
1264
+ from lamindb.models.artifact import WARNING_RUN_TRANSFORM
1265
+
1266
+ slug = record._state.db
1267
+ owner, name = get_owner_name_from_identifier(slug)
1268
+ cache_using_filepath = (
1269
+ ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
1270
+ )
1271
+ if not cache_using_filepath.exists():
1272
+ raise SystemExit("Need to call .using() before")
1273
+ instance_uid = cache_using_filepath.read_text().split("\n")[0]
1274
+ key = f"transfers/{instance_uid}"
1275
+ uid = instance_uid + "0000"
1276
+ transform = Transform.filter(uid=uid).one_or_none()
1277
+ if transform is None:
1278
+ search_names = settings.creation.search_names
1279
+ settings.creation.search_names = False
1280
+ transform = Transform( # type: ignore
1281
+ uid=uid, description=f"Transfer from `{slug}`", key=key, type="function"
1282
+ ).save()
1283
+ settings.creation.search_names = search_names
1284
+ # use the global run context to get the initiated_by_run run id
1285
+ if context.run is not None:
1286
+ initiated_by_run = context.run
1287
+ else:
1288
+ if not settings.creation.artifact_silence_missing_run_warning:
1289
+ logger.warning(WARNING_RUN_TRANSFORM)
1290
+ initiated_by_run = None
1291
+ # it doesn't seem to make sense to create new runs for every transfer
1292
+ run = Run.filter(
1293
+ transform=transform, initiated_by_run=initiated_by_run
1294
+ ).one_or_none()
1295
+ if run is None:
1296
+ run = Run(transform=transform, initiated_by_run=initiated_by_run).save() # type: ignore
1297
+ run.initiated_by_run = initiated_by_run # so that it's available in memory
1298
+ return run
1299
+
1300
+
1301
+ def transfer_to_default_db(
1302
+ record: SQLRecord,
1303
+ using_key: str | None,
1304
+ *,
1305
+ transfer_logs: dict,
1306
+ save: bool = False,
1307
+ transfer_fk: bool = True,
1308
+ ) -> SQLRecord | None:
1309
+ if record._state.db is None or record._state.db == "default":
1310
+ return None
1311
+ registry = record.__class__
1312
+ record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
1313
+ record_str = f"{record.__class__.__name__}(uid='{record.uid}')"
1314
+ if transfer_logs["run"] is None:
1315
+ transfer_logs["run"] = get_transfer_run(record)
1316
+ if record_on_default is not None:
1317
+ transfer_logs["mapped"].append(record_str)
1318
+ return record_on_default
1319
+ else:
1320
+ transfer_logs["transferred"].append(record_str)
1321
+
1322
+ if hasattr(record, "created_by_id"):
1323
+ record.created_by = None
1324
+ record.created_by_id = ln_setup.settings.user.id
1325
+ # run & transform
1326
+ run = transfer_logs["run"]
1327
+ if hasattr(record, "run_id"):
1328
+ record.run = None
1329
+ record.run_id = run.id
1330
+ # deal with denormalized transform FK on artifact and collection
1331
+ if hasattr(record, "transform_id"):
1332
+ record.transform = None
1333
+ record.transform_id = run.transform_id
1334
+ # transfer other foreign key fields
1335
+ fk_fields = [
1336
+ i.name
1337
+ for i in record._meta.fields
1338
+ if i.get_internal_type() == "ForeignKey"
1339
+ if i.name not in {"created_by", "run", "transform", "branch"}
1340
+ ]
1341
+ if not transfer_fk:
1342
+ # don't transfer fk fields that are already bulk transferred
1343
+ fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
1344
+ for fk in fk_fields:
1345
+ update_fk_to_default_db(record, fk, using_key, transfer_logs=transfer_logs)
1346
+ record.id = None
1347
+ record._state.db = "default"
1348
+ if save:
1349
+ record.save()
1350
+ return None
1351
+
1352
+
1353
+ def track_current_key_and_name_values(record: SQLRecord):
1354
+ from lamindb.models import Artifact
1355
+
1356
+ # below, we're using __dict__ to avoid triggering the refresh from the database
1357
+ # which can lead to a recursion
1358
+ if isinstance(record, Artifact):
1359
+ record._old_key = record.__dict__.get("key")
1360
+ record._old_suffix = record.__dict__.get("suffix")
1361
+ elif hasattr(record, "_name_field"):
1362
+ record._old_name = record.__dict__.get(record._name_field)
1363
+
1364
+
1365
+ def check_name_change(record: SQLRecord):
1366
+ """Warns if a record's name has changed."""
1367
+ from lamindb.models import Artifact, Collection, Feature, Schema, Transform
1368
+
1369
+ if (
1370
+ not record.pk
1371
+ or not hasattr(record, "_old_name")
1372
+ or not hasattr(record, "_name_field")
1373
+ ):
1374
+ return
1375
+
1376
+ # checked in check_key_change or not checked at all
1377
+ if isinstance(record, (Artifact, Collection, Transform)):
1378
+ return
1379
+
1380
+ # renaming feature sets is not checked
1381
+ if isinstance(record, Schema):
1382
+ return
1383
+
1384
+ old_name = record._old_name
1385
+ new_name = getattr(record, record._name_field)
1386
+ registry = record.__class__.__name__
1387
+
1388
+ if old_name != new_name:
1389
+ # when a label is renamed, only raise a warning if it has a feature
1390
+ if hasattr(record, "artifacts"):
1391
+ linked_records = (
1392
+ record.artifacts.through.filter(
1393
+ label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
1394
+ )
1395
+ .exclude(feature_id=None) # must have a feature
1396
+ .distinct()
1397
+ )
1398
+ artifact_ids = linked_records.list("artifact__uid")
1399
+ n = len(artifact_ids)
1400
+ if n > 0:
1401
+ s = "s" if n > 1 else ""
1402
+ logger.error(
1403
+ f"You are trying to {colors.red('rename label')} from '{old_name}' to '{new_name}'!\n"
1404
+ f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {artifact_ids}\n\n"
1405
+ f"{colors.bold('To rename this label')}, make it external:\n"
1406
+ f" → run `artifact.labels.make_external(label)`\n\n"
1407
+ f"After renaming, consider re-curating the above artifact{s}:\n"
1408
+ f' → in each dataset, manually modify label "{old_name}" to "{new_name}"\n'
1409
+ f" → run `ln.Curator`\n"
1410
+ )
1411
+ raise SQLRecordNameChangeIntegrityError
1412
+
1413
+ # when a feature is renamed
1414
+ elif isinstance(record, Feature):
1415
+ # only internal features are associated with schemas
1416
+ linked_artifacts = Artifact.filter(feature_sets__features=record).list(
1417
+ "uid"
1418
+ )
1419
+ n = len(linked_artifacts)
1420
+ if n > 0:
1421
+ s = "s" if n > 1 else ""
1422
+ logger.error(
1423
+ f"You are trying to {colors.red('rename feature')} from '{old_name}' to '{new_name}'!\n"
1424
+ f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {linked_artifacts}\n\n"
1425
+ f"{colors.bold('To rename this feature')}, make it external:\n"
1426
+ " → run `artifact.features.make_external(feature)`\n\n"
1427
+ f"After renaming, consider re-curating the above artifact{s}:\n"
1428
+ f" → in each dataset, manually modify feature '{old_name}' to '{new_name}'\n"
1429
+ f" → run `ln.Curator`\n"
1430
+ )
1431
+ raise SQLRecordNameChangeIntegrityError
1432
+
1433
+
1434
+ def check_key_change(record: Union[Artifact, Transform]):
1435
+ """Errors if a record's key has falsely changed."""
1436
+ from .artifact import Artifact
1437
+
1438
+ if not isinstance(record, Artifact) or not hasattr(record, "_old_key"):
1439
+ return
1440
+ if record._old_suffix != record.suffix:
1441
+ raise InvalidArgument(
1442
+ f"Changing the `.suffix` of an artifact is not allowed! You tried to change it from '{record._old_suffix}' to '{record.suffix}'."
1443
+ )
1444
+
1445
+ old_key = record._old_key
1446
+ new_key = record.key
1447
+
1448
+ if old_key != new_key:
1449
+ if not record._key_is_virtual:
1450
+ raise InvalidArgument(
1451
+ f"Changing a non-virtual key of an artifact is not allowed! You tried to change it from '{old_key}' to '{new_key}'."
1452
+ )
1453
+ if old_key is not None:
1454
+ old_key_suffix = extract_suffix_from_path(
1455
+ PurePosixPath(old_key), arg_name="key"
1456
+ )
1457
+ assert old_key_suffix == record.suffix, ( # noqa: S101
1458
+ old_key_suffix,
1459
+ record.suffix,
1460
+ )
1461
+ else:
1462
+ old_key_suffix = record.suffix
1463
+ new_key_suffix = extract_suffix_from_path(
1464
+ PurePosixPath(new_key), arg_name="key"
1465
+ )
1466
+ if old_key_suffix != new_key_suffix:
1467
+ raise InvalidArgument(
1468
+ f"The suffix '{new_key_suffix}' of the provided key is incorrect, it should be '{old_key_suffix}'."
1469
+ )
1470
+
1471
+
1472
+ def format_field_value(value: datetime | str | Any) -> Any:
1473
+ from datetime import datetime
1474
+
1475
+ if isinstance(value, datetime):
1476
+ return value.strftime("%Y-%m-%d %H:%M:%S %Z")
1477
+
1478
+ if isinstance(value, str):
1479
+ try:
1480
+ value = datetime.fromisoformat(value)
1481
+ value = value.strftime("%Y-%m-%d %H:%M:%S %Z")
1482
+ except ValueError:
1483
+ pass
1484
+ return f"'{value}'"
1485
+ else:
1486
+ return value
1487
+
1488
+
1489
+ class SQLRecordInfo:
1490
+ def __init__(self, registry: Registry):
1491
+ self.registry = registry
1492
+
1493
+ def _get_type_for_field(self, field_name: str) -> str:
1494
+ field = self.registry._meta.get_field(field_name)
1495
+ related_model_name = (
1496
+ field.related_model.__name__
1497
+ if hasattr(field, "related_model") and field.related_model
1498
+ else None
1499
+ )
1500
+ return related_model_name if related_model_name else field.get_internal_type()
1501
+
1502
+ def _get_base_class_fields(self) -> list[str]:
1503
+ return [
1504
+ field.name
1505
+ for base in self.registry.__bases__
1506
+ if hasattr(base, "_meta")
1507
+ for field in base._meta.get_fields()
1508
+ ]
1509
+
1510
+ def _reorder_fields_by_class(self, fields_to_order: list[Field]) -> list[Field]:
1511
+ """Reorders the fields so that base class fields come last."""
1512
+ non_base_class_fields = [
1513
+ field
1514
+ for field in fields_to_order
1515
+ if field.name not in self._get_base_class_fields()
1516
+ ]
1517
+ found_base_class_fields = [
1518
+ field
1519
+ for field in fields_to_order
1520
+ if field.name in self._get_base_class_fields()
1521
+ ]
1522
+ return non_base_class_fields + found_base_class_fields
1523
+
1524
+ def get_simple_fields(self, return_str: bool = False) -> Any:
1525
+ simple_fields = [
1526
+ field
1527
+ for field in self.registry._meta.get_fields()
1528
+ if not (
1529
+ isinstance(field, ManyToOneRel)
1530
+ or isinstance(field, ManyToManyRel)
1531
+ or isinstance(field, ManyToManyField)
1532
+ or isinstance(field, ForeignKey)
1533
+ or field.name.startswith("_")
1534
+ or field.name == "id"
1535
+ )
1536
+ ]
1537
+ simple_fields = self._reorder_fields_by_class(simple_fields)
1538
+ if not return_str:
1539
+ return simple_fields
1540
+ else:
1541
+ repr_str = f" {colors.italic('Simple fields')}\n"
1542
+ if simple_fields:
1543
+ repr_str += "".join(
1544
+ [
1545
+ f" .{field_name.name}: {self._get_type_for_field(field_name.name)}\n"
1546
+ for field_name in simple_fields
1547
+ ]
1548
+ )
1549
+ return repr_str
1550
+
1551
+ def get_relational_fields(self, return_str: bool = False):
1552
+ # we ignore ManyToOneRel because it leads to so much clutter in the API
1553
+ # also note that our general guideline is to have related_name="+"
1554
+ # for ForeignKey fields
1555
+ relational_fields = (ManyToOneRel, ManyToManyRel, ManyToManyField, ForeignKey)
1556
+
1557
+ class_specific_relational_fields = [
1558
+ field
1559
+ for field in self.registry._meta.fields + self.registry._meta.many_to_many
1560
+ if isinstance(field, relational_fields)
1561
+ and not field.name.startswith(("links_", "_"))
1562
+ ]
1563
+
1564
+ non_class_specific_relational_fields = [
1565
+ field
1566
+ for field in self.registry._meta.get_fields()
1567
+ if isinstance(field, relational_fields)
1568
+ and not field.name.startswith(("links_", "_"))
1569
+ ]
1570
+ non_class_specific_relational_fields = self._reorder_fields_by_class(
1571
+ non_class_specific_relational_fields
1572
+ )
1573
+
1574
+ # Ensure that class specific fields (e.g. Artifact) come before non-class specific fields (e.g. collection)
1575
+ filtered_non_class_specific = [
1576
+ field
1577
+ for field in non_class_specific_relational_fields
1578
+ if field not in class_specific_relational_fields
1579
+ ]
1580
+ ordered_relational_fields = (
1581
+ class_specific_relational_fields + filtered_non_class_specific
1582
+ )
1583
+
1584
+ core_module_fields = []
1585
+ external_modules_fields = []
1586
+ for field in ordered_relational_fields:
1587
+ field_name = repr(field).split(": ")[1][:-1]
1588
+ if field_name.count(".") == 1 and "lamindb" not in field_name:
1589
+ external_modules_fields.append(field)
1590
+ else:
1591
+ core_module_fields.append(field)
1592
+
1593
+ def _get_related_field_type(field) -> str:
1594
+ field_type = (
1595
+ field.related_model.__get_name_with_module__()
1596
+ .replace(
1597
+ "Artifact", ""
1598
+ ) # some fields have an unnecessary 'Artifact' in their name
1599
+ .replace(
1600
+ "Collection", ""
1601
+ ) # some fields have an unnecessary 'Collection' in their name
1602
+ )
1603
+ return (
1604
+ self._get_type_for_field(field.name)
1605
+ if not field_type.strip()
1606
+ else field_type
1607
+ )
1608
+
1609
+ core_module_fields_formatted = [
1610
+ f" .{field.name}: {_get_related_field_type(field)}\n"
1611
+ for field in core_module_fields
1612
+ ]
1613
+ external_modules_fields_formatted = [
1614
+ f" .{field.name}: {_get_related_field_type(field)}\n"
1615
+ for field in external_modules_fields
1616
+ ]
1617
+
1618
+ if not return_str:
1619
+ external_modules_fields_by_modules = defaultdict(list)
1620
+ for field_str, field in zip(
1621
+ external_modules_fields_formatted, external_modules_fields
1622
+ ):
1623
+ field_type = field_str.split(":")[1].split()[0]
1624
+ module_name = field_type.split(".")[0]
1625
+ external_modules_fields_by_modules[module_name].append(field)
1626
+ return core_module_fields, external_modules_fields_by_modules
1627
+ else:
1628
+ repr_str = ""
1629
+
1630
+ # Non-external relational fields
1631
+ if core_module_fields:
1632
+ repr_str += f" {colors.italic('Relational fields')}\n"
1633
+ repr_str += "".join(core_module_fields_formatted)
1634
+
1635
+ # External relational fields
1636
+ external_modules = set()
1637
+ for field in external_modules_fields_formatted:
1638
+ field_type = field.split(":")[1].split()[0]
1639
+ external_modules.add(field_type.split(".")[0])
1640
+
1641
+ if external_modules:
1642
+ # We want Bionty to show up before other modules
1643
+ external_modules = (
1644
+ ["bionty"] + sorted(external_modules - {"bionty"}) # type: ignore
1645
+ if "bionty" in external_modules
1646
+ else sorted(external_modules)
1647
+ )
1648
+ for ext_module in external_modules:
1649
+ ext_module_fields = [
1650
+ field
1651
+ for field in external_modules_fields_formatted
1652
+ if ext_module in field
1653
+ ]
1654
+
1655
+ if ext_module_fields:
1656
+ repr_str += (
1657
+ f" {colors.italic(f'{ext_module.capitalize()} fields')}\n"
1658
+ )
1659
+ repr_str += "".join(ext_module_fields)
1660
+
1661
+ return repr_str
1662
+
1663
+
1664
+ def registry_repr(cls):
1665
+ """Shows fields."""
1666
+ repr_str = f"{colors.green(cls.__name__)}\n"
1667
+ info = SQLRecordInfo(cls)
1668
+ repr_str += info.get_simple_fields(return_str=True)
1669
+ repr_str += info.get_relational_fields(return_str=True)
1670
+ repr_str = repr_str.rstrip("\n")
1671
+ return repr_str
1672
+
1673
+
1674
+ def record_repr(
1675
+ self: SQLRecord, include_foreign_keys: bool = True, exclude_field_names=None
1676
+ ) -> str:
1677
+ if exclude_field_names is None:
1678
+ exclude_field_names = ["id", "updated_at", "source_code"]
1679
+ field_names = [
1680
+ field.name
1681
+ for field in self._meta.fields
1682
+ if (not isinstance(field, ForeignKey) and field.name not in exclude_field_names)
1683
+ ]
1684
+ if include_foreign_keys:
1685
+ field_names += [
1686
+ f"{field.name}_id"
1687
+ for field in self._meta.fields
1688
+ if isinstance(field, ForeignKey)
1689
+ ]
1690
+ if "created_at" in field_names:
1691
+ field_names.remove("created_at")
1692
+ field_names.append("created_at")
1693
+ if field_names[0] != "uid" and "uid" in field_names:
1694
+ field_names.remove("uid")
1695
+ field_names.insert(0, "uid")
1696
+ fields_str = {}
1697
+ for k in field_names:
1698
+ if not k.startswith("_") and hasattr(self, k):
1699
+ value = getattr(self, k)
1700
+ # Force strip the time component of the version
1701
+ if k == "version" and value:
1702
+ fields_str[k] = f"'{str(value).split()[0]}'"
1703
+ else:
1704
+ fields_str[k] = format_field_value(value)
1705
+ fields_joined_str = ", ".join(
1706
+ [f"{k}={fields_str[k]}" for k in fields_str if fields_str[k] is not None]
1707
+ )
1708
+ return f"{self.__class__.__name__}({fields_joined_str})"
1709
+
1710
+
1711
+ # below is code to further format the repr of a record
1712
+ #
1713
+ # def format_repr(
1714
+ # record: SQLRecord, exclude_field_names: str | list[str] | None = None
1715
+ # ) -> str:
1716
+ # if isinstance(exclude_field_names, str):
1717
+ # exclude_field_names = [exclude_field_names]
1718
+ # exclude_field_names_init = ["id", "created_at", "updated_at"]
1719
+ # if exclude_field_names is not None:
1720
+ # exclude_field_names_init += exclude_field_names
1721
+ # return record.__repr__(
1722
+ # include_foreign_keys=False, exclude_field_names=exclude_field_names_init
1723
+ # )
1724
+
1725
+
1726
+ SQLRecord.__repr__ = record_repr # type: ignore
1727
+ SQLRecord.__str__ = record_repr # type: ignore
1728
+
1729
+
1730
+ class Migration(BaseSQLRecord):
1731
+ app = CharField(max_length=255)
1732
+ name = CharField(max_length=255)
1733
+ applied: datetime = DateTimeField()
1734
+
1735
+ class Meta:
1736
+ db_table = "django_migrations"
1737
+ managed = False
1738
+
1739
+
1740
+ LinkORM = IsLink # backward compat
1741
+ Record = SQLRecord # backward compat
1742
+ BasicRecord = BaseSQLRecord # backward compat
1743
+ RecordInfo = SQLRecordInfo # backward compat