lamindb 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. lamindb/__init__.py +33 -26
  2. lamindb/_finish.py +9 -1
  3. lamindb/_tracked.py +26 -3
  4. lamindb/_view.py +2 -3
  5. lamindb/base/__init__.py +1 -1
  6. lamindb/base/ids.py +1 -10
  7. lamindb/base/users.py +1 -4
  8. lamindb/core/__init__.py +7 -65
  9. lamindb/core/_compat.py +60 -0
  10. lamindb/core/_context.py +50 -22
  11. lamindb/core/_mapped_collection.py +4 -2
  12. lamindb/core/_settings.py +6 -6
  13. lamindb/core/_sync_git.py +1 -1
  14. lamindb/core/_track_environment.py +2 -1
  15. lamindb/core/datasets/_small.py +3 -3
  16. lamindb/core/loaders.py +43 -20
  17. lamindb/core/storage/_anndata_accessor.py +8 -3
  18. lamindb/core/storage/_backed_access.py +14 -7
  19. lamindb/core/storage/_pyarrow_dataset.py +24 -9
  20. lamindb/core/storage/_tiledbsoma.py +8 -6
  21. lamindb/core/storage/_zarr.py +104 -25
  22. lamindb/core/storage/objects.py +63 -28
  23. lamindb/core/storage/paths.py +16 -13
  24. lamindb/core/types.py +10 -0
  25. lamindb/curators/__init__.py +176 -149
  26. lamindb/errors.py +1 -1
  27. lamindb/integrations/_vitessce.py +4 -4
  28. lamindb/migrations/0089_subsequent_runs.py +159 -0
  29. lamindb/migrations/0090_runproject_project_runs.py +73 -0
  30. lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
  31. lamindb/models/__init__.py +79 -0
  32. lamindb/{core → models}/_describe.py +3 -3
  33. lamindb/{core → models}/_django.py +8 -5
  34. lamindb/{core → models}/_feature_manager.py +103 -87
  35. lamindb/{_from_values.py → models/_from_values.py} +5 -2
  36. lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
  37. lamindb/{core → models}/_label_manager.py +10 -17
  38. lamindb/{core/relations.py → models/_relations.py} +8 -1
  39. lamindb/models/artifact.py +2602 -0
  40. lamindb/{_can_curate.py → models/can_curate.py} +349 -180
  41. lamindb/models/collection.py +683 -0
  42. lamindb/models/core.py +135 -0
  43. lamindb/models/feature.py +643 -0
  44. lamindb/models/flextable.py +163 -0
  45. lamindb/{_parents.py → models/has_parents.py} +55 -49
  46. lamindb/models/project.py +384 -0
  47. lamindb/{_query_manager.py → models/query_manager.py} +10 -8
  48. lamindb/{_query_set.py → models/query_set.py} +64 -32
  49. lamindb/models/record.py +1762 -0
  50. lamindb/models/run.py +563 -0
  51. lamindb/{_save.py → models/save.py} +18 -8
  52. lamindb/models/schema.py +732 -0
  53. lamindb/models/transform.py +360 -0
  54. lamindb/models/ulabel.py +249 -0
  55. {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/METADATA +6 -6
  56. lamindb-1.2.0.dist-info/RECORD +95 -0
  57. lamindb/_artifact.py +0 -1361
  58. lamindb/_collection.py +0 -440
  59. lamindb/_feature.py +0 -316
  60. lamindb/_is_versioned.py +0 -40
  61. lamindb/_record.py +0 -1065
  62. lamindb/_run.py +0 -60
  63. lamindb/_schema.py +0 -347
  64. lamindb/_storage.py +0 -15
  65. lamindb/_transform.py +0 -170
  66. lamindb/_ulabel.py +0 -56
  67. lamindb/_utils.py +0 -9
  68. lamindb/base/validation.py +0 -63
  69. lamindb/core/_data.py +0 -491
  70. lamindb/core/fields.py +0 -12
  71. lamindb/models.py +0 -4435
  72. lamindb-1.1.0.dist-info/RECORD +0 -95
  73. {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/LICENSE +0 -0
  74. {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1762 @@
1
+ from __future__ import annotations
2
+
3
+ import builtins
4
+ import inspect
5
+ import re
6
+ import sys
7
+ from collections import defaultdict
8
+ from functools import reduce
9
+ from itertools import chain
10
+ from pathlib import PurePosixPath
11
+ from typing import (
12
+ TYPE_CHECKING,
13
+ Any,
14
+ Literal,
15
+ NamedTuple,
16
+ Union,
17
+ )
18
+
19
+ import dj_database_url
20
+ import lamindb_setup as ln_setup
21
+ from django.core.exceptions import ValidationError as DjangoValidationError
22
+ from django.db import IntegrityError, connections, models, transaction
23
+ from django.db.models import (
24
+ CASCADE,
25
+ PROTECT,
26
+ Field,
27
+ IntegerField,
28
+ Manager,
29
+ Q,
30
+ QuerySet,
31
+ Value,
32
+ )
33
+ from django.db.models.base import ModelBase
34
+ from django.db.models.fields.related import (
35
+ ManyToManyField,
36
+ ManyToManyRel,
37
+ ManyToOneRel,
38
+ )
39
+ from django.db.models.functions import Cast, Coalesce
40
+ from django.db.models.lookups import (
41
+ Contains,
42
+ Exact,
43
+ IContains,
44
+ IExact,
45
+ IRegex,
46
+ IStartsWith,
47
+ Regex,
48
+ StartsWith,
49
+ )
50
+ from lamin_utils import colors, logger
51
+ from lamin_utils._lookup import Lookup
52
+ from lamindb_setup import settings as setup_settings
53
+ from lamindb_setup._connect_instance import (
54
+ get_owner_name_from_identifier,
55
+ load_instance_settings,
56
+ update_db_using_local,
57
+ )
58
+ from lamindb_setup.core._docs import doc_args
59
+ from lamindb_setup.core._hub_core import connect_instance_hub
60
+ from lamindb_setup.core._settings_store import instance_settings_file
61
+ from lamindb_setup.core.upath import extract_suffix_from_path
62
+
63
+ from lamindb.base import deprecated
64
+ from lamindb.base.fields import (
65
+ CharField,
66
+ DateTimeField,
67
+ ForeignKey,
68
+ JSONField,
69
+ TextField,
70
+ )
71
+ from lamindb.base.types import FieldAttr, StrField
72
+ from lamindb.errors import FieldValidationError
73
+
74
+ from ..errors import (
75
+ InvalidArgument,
76
+ RecordNameChangeIntegrityError,
77
+ ValidationError,
78
+ )
79
+ from ._is_versioned import IsVersioned
80
+
81
+ if TYPE_CHECKING:
82
+ from datetime import datetime
83
+
84
+ import pandas as pd
85
+
86
+ from .artifact import Artifact
87
+ from .run import Run, User
88
+ from .transform import Transform
89
+
90
+
91
+ IPYTHON = getattr(builtins, "__IPYTHON__", False)
92
+
93
+
94
+ # -------------------------------------------------------------------------------------
95
+ # A note on required fields at the Record level
96
+ #
97
+ # As Django does most of its validation on the Form-level, it doesn't offer functionality
98
+ # for validating the integrity of an Record object upon instantation (similar to pydantic)
99
+ #
100
+ # For required fields, we define them as commonly done on the SQL level together
101
+ # with a validator in Record (validate_required_fields)
102
+ #
103
+ # This goes against the Django convention, but goes with the SQLModel convention
104
+ # (Optional fields can be null on the SQL level, non-optional fields cannot)
105
+ #
106
+ # Due to Django's convention where CharFieldAttr has pre-configured (null=False, default=""), marking
107
+ # a required field necessitates passing `default=None`. Without the validator it would trigger
108
+ # an error at the SQL-level, with it, it triggers it at instantiation
109
+
110
+ # -------------------------------------------------------------------------------------
111
+ # A note on class and instance methods of core Record
112
+ #
113
+ # All of these are defined and tested within lamindb, in files starting with _{orm_name}.py
114
+
115
+ # -------------------------------------------------------------------------------------
116
+ # A note on maximal lengths of char fields
117
+ #
118
+ # 100 characters:
119
+ # "Raindrops pitter-pattered on the windowpane, blurring the"
120
+ # "city lights outside, curled up with a mug."
121
+ # A good maximal length for a name (title).
122
+ #
123
+ # 150 characters: We choose this for name maximal length because some users like long names.
124
+ #
125
+ # 255 characters:
126
+ # "In creating a precise 255-character paragraph, one engages in"
127
+ # "a dance of words, where clarity meets brevity. Every syllable counts,"
128
+ # "illustrating the skill in compact expression, ensuring the essence of the"
129
+ # "message shines through within the exacting limit."
130
+ # This is a good maximal length for a description field.
131
+
132
+
133
+ class LinkORM:
134
+ pass
135
+
136
+
137
+ def deferred_attribute__repr__(self):
138
+ return f"FieldAttr({self.field.model.__name__}.{self.field.name})"
139
+
140
+
141
+ FieldAttr.__repr__ = deferred_attribute__repr__ # type: ignore
142
+
143
+
144
+ class ValidateFields:
145
+ pass
146
+
147
+
148
+ def is_approx_pascal_case(s):
149
+ """Check if the last component of a dotted string is in PascalCase.
150
+
151
+ Args:
152
+ s (str): The string to check
153
+
154
+ Returns:
155
+ bool: True if the last component is in PascalCase
156
+
157
+ Raises:
158
+ ValueError: If the last component doesn't start with a capital letter
159
+ """
160
+ if "[" in s: # this is because we allow types of form 'script[test_script.py]'
161
+ return True
162
+ last_component = s.split(".")[-1]
163
+
164
+ if not last_component[0].isupper():
165
+ raise ValueError(
166
+ f"'{last_component}' should start with a capital letter given you're defining a type"
167
+ )
168
+
169
+ return True
170
+
171
+
172
+ def init_self_from_db(self: Record, existing_record: Record):
173
+ new_args = [
174
+ getattr(existing_record, field.attname) for field in self._meta.concrete_fields
175
+ ]
176
+ super(self.__class__, self).__init__(*new_args)
177
+ self._state.adding = False # mimic from_db
178
+ self._state.db = "default"
179
+
180
+
181
+ def update_attributes(record: Record, attributes: dict[str, str]):
182
+ for key, value in attributes.items():
183
+ if (
184
+ getattr(record, key) != value
185
+ and value is not None
186
+ and key != "dtype"
187
+ and key != "_aux"
188
+ ):
189
+ logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
190
+ setattr(record, key, value)
191
+
192
+
193
+ def validate_literal_fields(record: Record, kwargs) -> None:
194
+ """Validate all Literal type fields in a record.
195
+
196
+ Args:
197
+ record: record being validated
198
+
199
+ Raises:
200
+ ValidationError: If any field value is not in its Literal's allowed values
201
+ """
202
+ if isinstance(record, LinkORM):
203
+ return None
204
+ if record.__class__.__name__ in "Feature":
205
+ return None
206
+ from lamindb.base.types import FeatureDtype, TransformType
207
+
208
+ types = {
209
+ "TransformType": TransformType,
210
+ "ArtifactKind": FeatureDtype,
211
+ "FeatureDtype": FeatureDtype,
212
+ }
213
+ errors = {}
214
+ annotations = getattr(record.__class__, "__annotations__", {})
215
+ for field_name, annotation in annotations.items():
216
+ if field_name not in kwargs or kwargs[field_name] is None:
217
+ continue
218
+ value = kwargs[field_name]
219
+ if str(annotation) in types:
220
+ annotation = types[annotation]
221
+ if not hasattr(annotation, "__origin__"):
222
+ continue
223
+ literal_type = annotation if annotation.__origin__ is Literal else None
224
+ if literal_type is None:
225
+ continue
226
+ valid_values = set(literal_type.__args__)
227
+ if value not in valid_values:
228
+ errors[field_name] = (
229
+ f"{field_name}: {colors.yellow(value)} is not a valid value"
230
+ f"\n → Valid values are: {colors.green(', '.join(sorted(valid_values)))}"
231
+ )
232
+ if errors:
233
+ message = "\n "
234
+ for _, error in errors.items():
235
+ message += error + "\n "
236
+ raise FieldValidationError(message)
237
+
238
+
239
+ def validate_fields(record: Record, kwargs):
240
+ from lamindb.models import (
241
+ Artifact,
242
+ Collection,
243
+ Feature,
244
+ Param,
245
+ Run,
246
+ Schema,
247
+ Transform,
248
+ ULabel,
249
+ )
250
+
251
+ # validate required fields
252
+ # a "required field" is a Django field that has `null=False, default=None`
253
+ required_fields = {
254
+ k.name for k in record._meta.fields if not k.null and k.default is None
255
+ }
256
+ required_fields_not_passed = {k: None for k in required_fields if k not in kwargs}
257
+ kwargs.update(required_fields_not_passed)
258
+ missing_fields = [
259
+ k for k, v in kwargs.items() if v is None and k in required_fields
260
+ ]
261
+ if missing_fields:
262
+ raise FieldValidationError(f"{missing_fields} are required.")
263
+ # ensure the exact length of the internal uid for core entities
264
+ if "uid" in kwargs and record.__class__ in {
265
+ Artifact,
266
+ Collection,
267
+ Transform,
268
+ Run,
269
+ ULabel,
270
+ Feature,
271
+ Schema,
272
+ Param,
273
+ }:
274
+ uid_max_length = record.__class__._meta.get_field(
275
+ "uid"
276
+ ).max_length # triggers FieldDoesNotExist
277
+ if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
278
+ raise ValidationError(
279
+ f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
280
+ )
281
+ # validate is_type
282
+ if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
283
+ if kwargs["name"].endswith("s"):
284
+ logger.warning(
285
+ f"name '{kwargs['name']}' for type ends with 's', in case you're naming with plural, consider the singular for a type name"
286
+ )
287
+ is_approx_pascal_case(kwargs["name"])
288
+ # validate literals
289
+ validate_literal_fields(record, kwargs)
290
+
291
+
292
+ def suggest_records_with_similar_names(
293
+ record: Record, name_field: str, kwargs
294
+ ) -> Record | None:
295
+ """Returns True if found exact match, otherwise False.
296
+
297
+ Logs similar matches if found.
298
+ """
299
+ if kwargs.get(name_field) is None or not isinstance(kwargs.get(name_field), str):
300
+ return None
301
+ # need to perform an additional request to find the exact match
302
+ # previously, this was inferred from the truncated/fuzzy search below
303
+ # but this isn't reliable: https://laminlabs.slack.com/archives/C04FPE8V01W/p1737812808563409
304
+ # the below needs to be .first() because there might be multiple records with the same
305
+ # name field in case the record is versioned (e.g. for Transform key)
306
+ exact_match = record.__class__.filter(**{name_field: kwargs[name_field]}).first()
307
+ if exact_match is not None:
308
+ return exact_match
309
+ queryset = _search(
310
+ record.__class__,
311
+ kwargs[name_field],
312
+ field=name_field,
313
+ truncate_string=True,
314
+ limit=3,
315
+ )
316
+ if not queryset.exists(): # empty queryset
317
+ return None
318
+ s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
319
+ msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
320
+ if IPYTHON:
321
+ from IPython.display import display
322
+
323
+ from lamindb import settings
324
+
325
+ logger.warning(f"{msg}")
326
+ if settings._verbosity_int >= 1:
327
+ display(queryset.df())
328
+ else:
329
+ logger.warning(f"{msg}\n{queryset}")
330
+ return None
331
+
332
+
333
+ RECORD_REGISTRY_EXAMPLE = """Example::
334
+
335
+ from lamindb import Record, fields
336
+
337
+ # sub-classing `Record` creates a new registry
338
+ class Experiment(Record):
339
+ name: str = fields.CharField()
340
+
341
+ # instantiating `Experiment` creates a record `experiment`
342
+ experiment = Experiment(name="my experiment")
343
+
344
+ # you can save the record to the database
345
+ experiment.save()
346
+
347
+ # `Experiment` refers to the registry, which you can query
348
+ df = Experiment.filter(name__startswith="my ").df()
349
+ """
350
+
351
+
352
+ # this is the metaclass for Record
353
+ @doc_args(RECORD_REGISTRY_EXAMPLE)
354
+ class Registry(ModelBase):
355
+ """Metaclass for :class:`~lamindb.models.Record`.
356
+
357
+ Each `Registry` *object* is a `Record` *class* and corresponds to a table in the metadata SQL database.
358
+
359
+ You work with `Registry` objects whenever you use *class methods* of `Record`.
360
+
361
+ You call any subclass of `Record` a "registry" and their objects "records". A `Record` object corresponds to a row in the SQL table.
362
+
363
+ If you want to create a new registry, you sub-class `Record`.
364
+
365
+ {}
366
+
367
+ Note: `Registry` inherits from Django's `ModelBase`.
368
+ """
369
+
370
+ def __new__(cls, name, bases, attrs, **kwargs):
371
+ new_class = super().__new__(cls, name, bases, attrs, **kwargs)
372
+ return new_class
373
+
374
+ # below creates a sensible auto-complete behavior that differs across the
375
+ # class and instance level in Jupyter Editors it doesn't have any effect for
376
+ # static type analyzer like pylance used in VSCode
377
+ def __dir__(cls):
378
+ # this is needed to bring auto-complete on the class-level back
379
+ # https://laminlabs.slack.com/archives/C04FPE8V01W/p1717535625268849
380
+ # Filter class attributes, excluding instance methods
381
+ exclude_instance_methods = "sphinx" not in sys.modules
382
+ # https://laminlabs.slack.com/archives/C04FPE8V01W/p1721134595920959
383
+
384
+ def include_attribute(attr_name, attr_value):
385
+ if attr_name.startswith("__"):
386
+ return False
387
+ if exclude_instance_methods and callable(attr_value):
388
+ return isinstance(attr_value, (classmethod, staticmethod, type))
389
+ return True
390
+
391
+ # check also inherited attributes
392
+ if hasattr(cls, "mro"):
393
+ attrs = chain(*(c.__dict__.items() for c in cls.mro()))
394
+ else:
395
+ attrs = cls.__dict__.items()
396
+
397
+ result = []
398
+ for attr_name, attr_value in attrs:
399
+ if attr_name not in result and include_attribute(attr_name, attr_value):
400
+ result.append(attr_name)
401
+
402
+ # Add non-dunder attributes from Registry
403
+ for attr in dir(Registry):
404
+ if not attr.startswith("__") and attr not in result:
405
+ result.append(attr)
406
+ return result
407
+
408
+ def __repr__(cls) -> str:
409
+ return registry_repr(cls)
410
+
411
+ def lookup(
412
+ cls,
413
+ field: StrField | None = None,
414
+ return_field: StrField | None = None,
415
+ ) -> NamedTuple:
416
+ """Return an auto-complete object for a field.
417
+
418
+ Args:
419
+ field: The field to look up the values for. Defaults to first string field.
420
+ return_field: The field to return. If `None`, returns the whole record.
421
+
422
+ Returns:
423
+ A `NamedTuple` of lookup information of the field values with a
424
+ dictionary converter.
425
+
426
+ See Also:
427
+ :meth:`~lamindb.models.Record.search`
428
+
429
+ Examples:
430
+ >>> import bionty as bt
431
+ >>> bt.settings.organism = "human"
432
+ >>> bt.Gene.from_source(symbol="ADGB-DT").save()
433
+ >>> lookup = bt.Gene.lookup()
434
+ >>> lookup.adgb_dt
435
+ >>> lookup_dict = lookup.dict()
436
+ >>> lookup_dict['ADGB-DT']
437
+ >>> lookup_by_ensembl_id = bt.Gene.lookup(field="ensembl_gene_id")
438
+ >>> genes.ensg00000002745
439
+ >>> lookup_return_symbols = bt.Gene.lookup(field="ensembl_gene_id", return_field="symbol")
440
+ """
441
+ return _lookup(cls=cls, field=field, return_field=return_field)
442
+
443
+ def filter(cls, *queries, **expressions) -> QuerySet:
444
+ """Query records.
445
+
446
+ Args:
447
+ queries: One or multiple `Q` objects.
448
+ expressions: Fields and values passed as Django query expressions.
449
+
450
+ Returns:
451
+ A :class:`~lamindb.models.QuerySet`.
452
+
453
+ See Also:
454
+ - Guide: :doc:`docs:registries`
455
+ - Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
456
+
457
+ Examples:
458
+ >>> ln.ULabel(name="my label").save()
459
+ >>> ln.ULabel.filter(name__startswith="my").df()
460
+ """
461
+ from .query_set import QuerySet
462
+
463
+ _using_key = None
464
+ if "_using_key" in expressions:
465
+ _using_key = expressions.pop("_using_key")
466
+
467
+ return QuerySet(model=cls, using=_using_key).filter(*queries, **expressions)
468
+
469
+ def get(
470
+ cls,
471
+ idlike: int | str | None = None,
472
+ **expressions,
473
+ ) -> Record:
474
+ """Get a single record.
475
+
476
+ Args:
477
+ idlike: Either a uid stub, uid or an integer id.
478
+ expressions: Fields and values passed as Django query expressions.
479
+
480
+ Returns:
481
+ A record.
482
+
483
+ Raises:
484
+ :exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
485
+
486
+ See Also:
487
+ - Guide: :doc:`docs:registries`
488
+ - Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
489
+
490
+ Examples:
491
+ >>> ulabel = ln.ULabel.get("FvtpPJLJ")
492
+ >>> ulabel = ln.ULabel.get(name="my-label")
493
+ """
494
+ from .query_set import QuerySet
495
+
496
+ return QuerySet(model=cls).get(idlike, **expressions)
497
+
498
+ def df(
499
+ cls,
500
+ include: str | list[str] | None = None,
501
+ features: bool | list[str] = False,
502
+ limit: int = 100,
503
+ ) -> pd.DataFrame:
504
+ """Convert to `pd.DataFrame`.
505
+
506
+ By default, shows all direct fields, except `updated_at`.
507
+
508
+ Use arguments `include` or `feature` to include other data.
509
+
510
+ Args:
511
+ include: Related fields to include as columns. Takes strings of
512
+ form `"ulabels__name"`, `"cell_types__name"`, etc. or a list
513
+ of such strings.
514
+ features: If `True`, map all features of the
515
+ :class:`~lamindb.Feature` registry onto the resulting
516
+ `DataFrame`. Only available for `Artifact`.
517
+ limit: Maximum number of rows to display from a Pandas DataFrame.
518
+ Defaults to 100 to reduce database load.
519
+
520
+ Examples:
521
+
522
+ Include the name of the creator in the `DataFrame`:
523
+
524
+ >>> ln.ULabel.df(include="created_by__name"])
525
+
526
+ Include display of features for `Artifact`:
527
+
528
+ >>> df = ln.Artifact.df(features=True)
529
+ >>> ln.view(df) # visualize with type annotations
530
+
531
+ Only include select features:
532
+
533
+ >>> df = ln.Artifact.df(features=["cell_type_by_expert", "cell_type_by_model"])
534
+ """
535
+ query_set = cls.filter()
536
+ if hasattr(cls, "updated_at"):
537
+ query_set = query_set.order_by("-updated_at")
538
+ return query_set[:limit].df(include=include, features=features)
539
+
540
+ def search(
541
+ cls,
542
+ string: str,
543
+ *,
544
+ field: StrField | None = None,
545
+ limit: int | None = 20,
546
+ case_sensitive: bool = False,
547
+ ) -> QuerySet:
548
+ """Search.
549
+
550
+ Args:
551
+ string: The input string to match against the field ontology values.
552
+ field: The field or fields to search. Search all string fields by default.
553
+ limit: Maximum amount of top results to return.
554
+ case_sensitive: Whether the match is case sensitive.
555
+
556
+ Returns:
557
+ A sorted `DataFrame` of search results with a score in column `score`.
558
+ If `return_queryset` is `True`. `QuerySet`.
559
+
560
+ See Also:
561
+ :meth:`~lamindb.models.Record.filter`
562
+ :meth:`~lamindb.models.Record.lookup`
563
+
564
+ Examples:
565
+ >>> ulabels = ln.ULabel.from_values(["ULabel1", "ULabel2", "ULabel3"], field="name")
566
+ >>> ln.save(ulabels)
567
+ >>> ln.ULabel.search("ULabel2")
568
+ """
569
+ return _search(
570
+ cls=cls,
571
+ string=string,
572
+ field=field,
573
+ limit=limit,
574
+ case_sensitive=case_sensitive,
575
+ )
576
+
577
+ def using(
578
+ cls,
579
+ instance: str | None,
580
+ ) -> QuerySet:
581
+ """Use a non-default LaminDB instance.
582
+
583
+ Args:
584
+ instance: An instance identifier of form "account_handle/instance_name".
585
+
586
+ Examples:
587
+ >>> ln.ULabel.using("account_handle/instance_name").search("ULabel7", field="name")
588
+ uid score
589
+ name
590
+ ULabel7 g7Hk9b2v 100.0
591
+ ULabel5 t4Jm6s0q 75.0
592
+ ULabel6 r2Xw8p1z 75.0
593
+ """
594
+ from .query_set import QuerySet
595
+
596
+ if instance is None:
597
+ return QuerySet(model=cls, using=None)
598
+
599
+ owner, name = get_owner_name_from_identifier(instance)
600
+ if f"{owner}/{name}" == setup_settings.instance.slug:
601
+ return QuerySet(model=cls, using=None)
602
+
603
+ settings_file = instance_settings_file(name, owner)
604
+ cache_filepath = (
605
+ ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
606
+ )
607
+ if not settings_file.exists():
608
+ result = connect_instance_hub(owner=owner, name=name)
609
+ if isinstance(result, str):
610
+ raise RuntimeError(
611
+ f"Failed to load instance {instance}, please check your permissions!"
612
+ )
613
+ iresult, _ = result
614
+ source_module = {
615
+ modules for modules in iresult["schema_str"].split(",") if modules != ""
616
+ } # type: ignore
617
+ target_module = ln_setup.settings.instance.modules
618
+ if not source_module.issubset(target_module):
619
+ missing_members = source_module - target_module
620
+ logger.warning(
621
+ f"source modules has additional modules: {missing_members}\nconsider mounting these registry modules to transfer all metadata"
622
+ )
623
+ cache_filepath.write_text(f"{iresult['lnid']}\n{iresult['schema_str']}") # type: ignore
624
+ settings_file = instance_settings_file(name, owner)
625
+ db = update_db_using_local(iresult, settings_file)
626
+ else:
627
+ isettings = load_instance_settings(settings_file)
628
+ db = isettings.db
629
+ cache_filepath.write_text(f"{isettings.uid}\n{','.join(isettings.modules)}") # type: ignore
630
+ add_db_connection(db, instance)
631
+ return QuerySet(model=cls, using=instance)
632
+
633
+ def __get_module_name__(cls) -> str:
634
+ schema_module_name = cls.__module__.split(".")[0]
635
+ module_name = schema_module_name.replace("lnschema_", "")
636
+ if module_name == "lamindb":
637
+ module_name = "core"
638
+ return module_name
639
+
640
+ @deprecated("__get_module_name__")
641
+ def __get_schema_name__(cls) -> str:
642
+ return cls.__get_module_name__()
643
+
644
+ def __get_name_with_module__(cls) -> str:
645
+ module_name = cls.__get_module_name__()
646
+ if module_name == "core":
647
+ module_prefix = ""
648
+ else:
649
+ module_prefix = f"{module_name}."
650
+ return f"{module_prefix}{cls.__name__}"
651
+
652
+ @deprecated("__get_name_with_module__")
653
+ def __get_name_with_schema__(cls) -> str:
654
+ return cls.__get_name_with_module__()
655
+
656
+
657
+ class BasicRecord(models.Model, metaclass=Registry):
658
+ """Basic metadata record.
659
+
660
+ It has the same methods as Record, but doesn't have the additional fields.
661
+
662
+ It's mainly used for LinkORMs and similar.
663
+ """
664
+
665
+ class Meta:
666
+ abstract = True
667
+
668
+ def __init__(self, *args, **kwargs):
669
+ skip_validation = kwargs.pop("_skip_validation", False)
670
+ if not args and skip_validation:
671
+ super().__init__(**kwargs)
672
+ elif not args and not skip_validation:
673
+ from ..core._settings import settings
674
+ from .can_curate import CanCurate
675
+ from .collection import Collection
676
+ from .schema import Schema
677
+ from .transform import Transform
678
+
679
+ validate_fields(self, kwargs)
680
+
681
+ # do not search for names if an id is passed; this is important
682
+ # e.g. when synching ids from the notebook store to lamindb
683
+ has_consciously_provided_uid = False
684
+ if "_has_consciously_provided_uid" in kwargs:
685
+ has_consciously_provided_uid = kwargs.pop(
686
+ "_has_consciously_provided_uid"
687
+ )
688
+ if (
689
+ isinstance(self, (CanCurate, Collection, Transform))
690
+ and settings.creation.search_names
691
+ and not has_consciously_provided_uid
692
+ ):
693
+ name_field = getattr(self, "_name_field", "name")
694
+ exact_match = suggest_records_with_similar_names(
695
+ self, name_field, kwargs
696
+ )
697
+ if exact_match is not None:
698
+ if "version" in kwargs:
699
+ if kwargs["version"] is not None:
700
+ version_comment = " and version"
701
+ existing_record = self.__class__.filter(
702
+ **{
703
+ name_field: kwargs[name_field],
704
+ "version": kwargs["version"],
705
+ }
706
+ ).one_or_none()
707
+ else:
708
+ # for a versioned record, an exact name match is not a criterion
709
+ # for retrieving a record in case `version` isn't passed -
710
+ # we'd always pull out many records with exactly the same name
711
+ existing_record = None
712
+ else:
713
+ version_comment = ""
714
+ existing_record = exact_match
715
+ if existing_record is not None:
716
+ logger.important(
717
+ f"returning existing {self.__class__.__name__} record with same"
718
+ f" {name_field}{version_comment}: '{kwargs[name_field]}'"
719
+ )
720
+ if isinstance(self, Schema):
721
+ if existing_record.hash != kwargs["hash"]:
722
+ raise ValueError(
723
+ f"Schema name is already in use by schema with uid '{existing_record.uid}', please choose a different name."
724
+ )
725
+ init_self_from_db(self, existing_record)
726
+ update_attributes(self, kwargs)
727
+ return None
728
+ super().__init__(**kwargs)
729
+ if isinstance(self, ValidateFields):
730
+ # this will trigger validation against django validators
731
+ try:
732
+ if hasattr(self, "clean_fields"):
733
+ self.clean_fields()
734
+ else:
735
+ self._Model__clean_fields()
736
+ except DjangoValidationError as e:
737
+ message = _format_django_validation_error(self, e)
738
+ raise FieldValidationError(message) from e
739
+ elif len(args) != len(self._meta.concrete_fields):
740
+ raise FieldValidationError(
741
+ f"Use keyword arguments instead of positional arguments, e.g.: {self.__class__.__name__}(name='...')."
742
+ )
743
+ else:
744
+ super().__init__(*args)
745
+ _store_record_old_name(self)
746
+ _store_record_old_key(self)
747
+
748
+ def save(self, *args, **kwargs) -> Record:
749
+ """Save.
750
+
751
+ Always saves to the default database.
752
+ """
753
+ using_key = None
754
+ if "using" in kwargs:
755
+ using_key = kwargs["using"]
756
+ db = self._state.db
757
+ pk_on_db = self.pk
758
+ artifacts: list = []
759
+ if self.__class__.__name__ == "Collection" and self.id is not None:
760
+ # when creating a new collection without being able to access artifacts
761
+ artifacts = self.ordered_artifacts.list()
762
+ pre_existing_record = None
763
+ # consider records that are being transferred from other databases
764
+ transfer_logs: dict[str, list[str]] = {
765
+ "mapped": [],
766
+ "transferred": [],
767
+ "run": None,
768
+ }
769
+ if db is not None and db != "default" and using_key is None:
770
+ if isinstance(self, IsVersioned):
771
+ if not self.is_latest:
772
+ raise NotImplementedError(
773
+ "You are attempting to transfer a record that's not the latest in its version history. This is currently not supported."
774
+ )
775
+ pre_existing_record = transfer_to_default_db(
776
+ self, using_key, transfer_logs=transfer_logs
777
+ )
778
+ self._revises: IsVersioned
779
+ if pre_existing_record is not None:
780
+ init_self_from_db(self, pre_existing_record)
781
+ else:
782
+ check_key_change(self)
783
+ check_name_change(self)
784
+ try:
785
+ # save versioned record in presence of self._revises
786
+ if isinstance(self, IsVersioned) and self._revises is not None:
787
+ assert self._revises.is_latest # noqa: S101
788
+ revises = self._revises
789
+ revises.is_latest = False
790
+ with transaction.atomic():
791
+ revises._revises = None # ensure we don't start a recursion
792
+ revises.save()
793
+ super().save(*args, **kwargs) # type: ignore
794
+ self._revises = None
795
+ # save unversioned record
796
+ else:
797
+ super().save(*args, **kwargs)
798
+ except IntegrityError as e:
799
+ error_msg = str(e)
800
+ # two possible error messages for hash duplication
801
+ # "duplicate key value violates unique constraint"
802
+ # "UNIQUE constraint failed"
803
+ if (
804
+ "UNIQUE constraint failed" in error_msg
805
+ or "duplicate key value violates unique constraint" in error_msg
806
+ ) and "hash" in error_msg:
807
+ pre_existing_record = self.__class__.get(hash=self.hash)
808
+ logger.warning(
809
+ f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
810
+ )
811
+ init_self_from_db(self, pre_existing_record)
812
+ else:
813
+ raise
814
+ _store_record_old_name(self)
815
+ _store_record_old_key(self)
816
+ # perform transfer of many-to-many fields
817
+ # only supported for Artifact and Collection records
818
+ if db is not None and db != "default" and using_key is None:
819
+ if self.__class__.__name__ == "Collection":
820
+ if len(artifacts) > 0:
821
+ logger.info("transfer artifacts")
822
+ for artifact in artifacts:
823
+ artifact.save()
824
+ self.artifacts.add(*artifacts)
825
+ if hasattr(self, "labels"):
826
+ from copy import copy
827
+
828
+ from lamindb.models._feature_manager import FeatureManager
829
+
830
+ # here we go back to original record on the source database
831
+ self_on_db = copy(self)
832
+ self_on_db._state.db = db
833
+ self_on_db.pk = pk_on_db # manually set the primary key
834
+ self_on_db.features = FeatureManager(self_on_db) # type: ignore
835
+ self.features._add_from(self_on_db, transfer_logs=transfer_logs)
836
+ self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
837
+ for k, v in transfer_logs.items():
838
+ if k != "run":
839
+ logger.important(f"{k} records: {', '.join(v)}")
840
+
841
+ if self.__class__.__name__ in {
842
+ "Artifact",
843
+ "Transform",
844
+ "Run",
845
+ "ULabel",
846
+ "Feature",
847
+ "Schema",
848
+ "Collection",
849
+ "Reference",
850
+ }:
851
+ import lamindb as ln
852
+
853
+ if ln.context.project is not None:
854
+ self.projects.add(ln.context.project)
855
+ return self
856
+
857
+ def delete(self) -> None:
858
+ """Delete."""
859
+ # note that the logic below does not fire if a record is moved to the trash
860
+ # the idea is that moving a record to the trash should move its entire version family
861
+ # to the trash, whereas permanently deleting should default to only deleting a single record
862
+ # of a version family
863
+ # we can consider making it easy to permanently delete entire version families as well,
864
+ # but that's for another time
865
+ if isinstance(self, IsVersioned) and self.is_latest:
866
+ new_latest = (
867
+ self.__class__.objects.using(self._state.db)
868
+ .filter(is_latest=False, uid__startswith=self.stem_uid)
869
+ .order_by("-created_at")
870
+ .first()
871
+ )
872
+ if new_latest is not None:
873
+ new_latest.is_latest = True
874
+ with transaction.atomic():
875
+ new_latest.save()
876
+ super().delete() # type: ignore
877
+ logger.warning(f"new latest version is {new_latest}")
878
+ return None
879
+ super().delete()
880
+
881
+
882
+ class Space(BasicRecord):
883
+ """Spaces."""
884
+
885
+ id: int = models.SmallAutoField(primary_key=True)
886
+ """Internal id, valid only in one DB instance."""
887
+ name: str = models.CharField(max_length=100, db_index=True)
888
+ """Name of space."""
889
+ uid: str = CharField(
890
+ editable=False,
891
+ unique=True,
892
+ max_length=12,
893
+ default="00000000",
894
+ db_default="00000000",
895
+ db_index=True,
896
+ )
897
+ """Universal id."""
898
+ description: str | None = CharField(null=True)
899
+ """Description of space."""
900
+ created_at: datetime = DateTimeField(
901
+ editable=False, db_default=models.functions.Now(), db_index=True
902
+ )
903
+ """Time of creation of record."""
904
+ created_by: User = ForeignKey(
905
+ "User", CASCADE, default=None, related_name="+", null=True
906
+ )
907
+ """Creator of run."""
908
+
909
+
910
+ @doc_args(RECORD_REGISTRY_EXAMPLE)
911
+ class Record(BasicRecord, metaclass=Registry):
912
+ """Metadata record.
913
+
914
+ Every `Record` is a data model that comes with a registry in form of a SQL
915
+ table in your database.
916
+
917
+ Sub-classing `Record` creates a new registry while instantiating a `Record`
918
+ creates a new record.
919
+
920
+ {}
921
+
922
+ `Record`'s metaclass is :class:`~lamindb.models.Registry`.
923
+
924
+ `Record` inherits from Django's `Model` class. Why does LaminDB call it `Record`
925
+ and not `Model`? The term `Record` can't lead to confusion with statistical,
926
+ machine learning or biological models.
927
+ """
928
+
929
+ _branch_code: int = models.SmallIntegerField(db_index=True, default=1, db_default=1)
930
+ """Whether record is on a branch, in archive or in trash.
931
+
932
+ This dictates whether a record appears in queries & searches.
933
+
934
+ Coding is as follows:
935
+
936
+ - 3: template (hidden in queries & searches)
937
+ - 2: draft (hidden in queries & searches)
938
+ - 1: default (visible in queries & searches)
939
+ - 0: archive (hidden, meant to be kept)
940
+ - -1: trash (hidden, scheduled for deletion)
941
+
942
+ Any integer higher than >3 codes a branch that's involved in a pull request.
943
+ """
944
+ space: Space = ForeignKey(Space, PROTECT, default=1, db_default=1)
945
+ """The space in which the record lives."""
946
+ _aux: dict[str, Any] | None = JSONField(default=None, db_default=None, null=True)
947
+ """Auxiliary field for dictionary-like metadata."""
948
+
949
+ class Meta:
950
+ abstract = True
951
+
952
+
953
+ def _format_django_validation_error(record: Record, e: DjangoValidationError):
954
+ """Pretty print Django validation errors."""
955
+ errors = {}
956
+ if hasattr(e, "error_dict"):
957
+ error_dict = e.error_dict
958
+ else:
959
+ error_dict = {"__all__": e.error_list}
960
+
961
+ for field_name, error_list in error_dict.items():
962
+ for error in error_list:
963
+ if hasattr(error, "message"):
964
+ msg = error.message
965
+ else:
966
+ msg = str(error)
967
+
968
+ if field_name == "__all__":
969
+ errors[field_name] = f"{colors.yellow(msg)}"
970
+ else:
971
+ current_value = getattr(record, field_name, None)
972
+ errors[field_name] = (
973
+ f"{field_name}: {colors.yellow(current_value)} is not valid\n → {msg}"
974
+ )
975
+
976
+ if errors:
977
+ message = "\n "
978
+ for _, error in errors.items():
979
+ message += error + "\n "
980
+
981
+ return message
982
+
983
+
984
+ def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
985
+ """Gets the parameters of a Record from the overloaded signature.
986
+
987
+ Example:
988
+ >>> get_record_params(bt.Organism)
989
+ >>> [('name', 'str'), ('taxon_id', 'str | None'), ('scientific_name', 'str | None')]
990
+ """
991
+ source = inspect.getsource(record_class)
992
+
993
+ # Find first overload that's not *db_args
994
+ pattern = r"@overload\s+def __init__\s*\(([\s\S]*?)\):\s*\.{3}"
995
+ overloads = re.finditer(pattern, source)
996
+
997
+ for overload in overloads:
998
+ params_block = overload.group(1)
999
+ # This is an additional safety measure if the overloaded signature that we're
1000
+ # looking for is not at the top but a "db_args" constructor
1001
+ if "*db_args" in params_block:
1002
+ continue
1003
+
1004
+ params = []
1005
+ for line in params_block.split("\n"):
1006
+ line = line.strip()
1007
+ if not line or "self" in line:
1008
+ continue
1009
+
1010
+ # Extract name and type annotation
1011
+ # The regex pattern finds parameter definitions like:
1012
+ # Simple: name: str
1013
+ # With default: age: int = 0
1014
+ # With complex types: items: List[str] = []
1015
+ param_pattern = (
1016
+ r"(\w+)" # Parameter name
1017
+ r"\s*:\s*" # Colon with optional whitespace
1018
+ r"((?:[^=,]|" # Type hint: either non-equals/comma chars
1019
+ r"(?<=\[)[^[\]]*" # or contents within square brackets
1020
+ r"(?=\]))+)" # looking ahead for closing bracket
1021
+ r"(?:\s*=\s*" # Optional default value part
1022
+ r"([^,]+))?" # Default value: anything but comma
1023
+ )
1024
+ match = re.match(param_pattern, line)
1025
+ if not match:
1026
+ continue
1027
+
1028
+ name, type_str = match.group(1), match.group(2).strip()
1029
+
1030
+ # Keep type as string instead of evaluating
1031
+ params.append((name, type_str))
1032
+
1033
+ return params
1034
+
1035
+ return []
1036
+
1037
+
1038
+ def _search(
1039
+ cls,
1040
+ string: str,
1041
+ *,
1042
+ field: StrField | list[StrField] | None = None,
1043
+ limit: int | None = 20,
1044
+ case_sensitive: bool = False,
1045
+ using_key: str | None = None,
1046
+ truncate_string: bool = False,
1047
+ ) -> QuerySet:
1048
+ if string is None:
1049
+ raise ValueError("Cannot search for None value! Please pass a valid string.")
1050
+
1051
+ input_queryset = _queryset(cls, using_key=using_key)
1052
+ registry = input_queryset.model
1053
+ name_field = getattr(registry, "_name_field", "name")
1054
+ if field is None:
1055
+ fields = [
1056
+ field.name
1057
+ for field in registry._meta.fields
1058
+ if field.get_internal_type() in {"CharField", "TextField"}
1059
+ ]
1060
+ else:
1061
+ if not isinstance(field, list):
1062
+ fields_input = [field]
1063
+ else:
1064
+ fields_input = field
1065
+ fields = []
1066
+ for field in fields_input:
1067
+ if not isinstance(field, str):
1068
+ try:
1069
+ fields.append(field.field.name)
1070
+ except AttributeError as error:
1071
+ raise TypeError(
1072
+ "Please pass a Record string field, e.g., `CellType.name`!"
1073
+ ) from error
1074
+ else:
1075
+ fields.append(field)
1076
+
1077
+ if truncate_string:
1078
+ if (len_string := len(string)) > 5:
1079
+ n_80_pct = int(len_string * 0.8)
1080
+ string = string[:n_80_pct]
1081
+
1082
+ string = string.strip()
1083
+ string_escape = re.escape(string)
1084
+
1085
+ exact_lookup = Exact if case_sensitive else IExact
1086
+ regex_lookup = Regex if case_sensitive else IRegex
1087
+ contains_lookup = Contains if case_sensitive else IContains
1088
+
1089
+ ranks = []
1090
+ contains_filters = []
1091
+ for field in fields:
1092
+ field_expr = Coalesce(
1093
+ Cast(field, output_field=TextField()),
1094
+ Value(""),
1095
+ output_field=TextField(),
1096
+ )
1097
+ # exact rank
1098
+ exact_expr = exact_lookup(field_expr, string)
1099
+ exact_rank = Cast(exact_expr, output_field=IntegerField()) * 200
1100
+ ranks.append(exact_rank)
1101
+ # exact synonym
1102
+ synonym_expr = regex_lookup(field_expr, rf"(?:^|.*\|){string_escape}(?:\|.*|$)")
1103
+ synonym_rank = Cast(synonym_expr, output_field=IntegerField()) * 200
1104
+ ranks.append(synonym_rank)
1105
+ # match as sub-phrase
1106
+ sub_expr = regex_lookup(
1107
+ field_expr, rf"(?:^|.*[ \|\.,;:]){string_escape}(?:[ \|\.,;:].*|$)"
1108
+ )
1109
+ sub_rank = Cast(sub_expr, output_field=IntegerField()) * 10
1110
+ ranks.append(sub_rank)
1111
+ # startswith and avoid matching string with " " on the right
1112
+ # mostly for truncated
1113
+ startswith_expr = regex_lookup(
1114
+ field_expr, rf"(?:^|.*\|){string_escape}[^ ]*(?:\|.*|$)"
1115
+ )
1116
+ startswith_rank = Cast(startswith_expr, output_field=IntegerField()) * 8
1117
+ ranks.append(startswith_rank)
1118
+ # match as sub-phrase from the left, mostly for truncated
1119
+ right_expr = regex_lookup(field_expr, rf"(?:^|.*[ \|]){string_escape}.*")
1120
+ right_rank = Cast(right_expr, output_field=IntegerField()) * 2
1121
+ ranks.append(right_rank)
1122
+ # match as sub-phrase from the right
1123
+ left_expr = regex_lookup(field_expr, rf".*{string_escape}(?:$|[ \|\.,;:].*)")
1124
+ left_rank = Cast(left_expr, output_field=IntegerField()) * 2
1125
+ ranks.append(left_rank)
1126
+ # simple contains filter
1127
+ contains_expr = contains_lookup(field_expr, string)
1128
+ contains_filter = Q(contains_expr)
1129
+ contains_filters.append(contains_filter)
1130
+ # also rank by contains
1131
+ contains_rank = Cast(contains_expr, output_field=IntegerField())
1132
+ ranks.append(contains_rank)
1133
+ # additional rule for truncated strings
1134
+ # weight matches from the beginning of the string higher
1135
+ # sometimes whole words get truncated and startswith_expr is not enough
1136
+ if truncate_string and field == name_field:
1137
+ startswith_lookup = StartsWith if case_sensitive else IStartsWith
1138
+ name_startswith_expr = startswith_lookup(field_expr, string)
1139
+ name_startswith_rank = (
1140
+ Cast(name_startswith_expr, output_field=IntegerField()) * 2
1141
+ )
1142
+ ranks.append(name_startswith_rank)
1143
+
1144
+ ranked_queryset = (
1145
+ input_queryset.filter(reduce(lambda a, b: a | b, contains_filters))
1146
+ .alias(rank=sum(ranks))
1147
+ .order_by("-rank")
1148
+ )
1149
+
1150
+ return ranked_queryset[:limit]
1151
+
1152
+
1153
+ def _lookup(
1154
+ cls,
1155
+ field: StrField | None = None,
1156
+ return_field: StrField | None = None,
1157
+ using_key: str | None = None,
1158
+ ) -> NamedTuple:
1159
+ """{}""" # noqa: D415
1160
+ queryset = _queryset(cls, using_key=using_key)
1161
+ field = get_name_field(registry=queryset.model, field=field)
1162
+
1163
+ return Lookup(
1164
+ records=queryset,
1165
+ values=[i.get(field) for i in queryset.values()],
1166
+ tuple_name=cls.__class__.__name__,
1167
+ prefix="ln",
1168
+ ).lookup(
1169
+ return_field=(
1170
+ get_name_field(registry=queryset.model, field=return_field)
1171
+ if return_field is not None
1172
+ else None
1173
+ )
1174
+ )
1175
+
1176
+
1177
+ def get_name_field(
1178
+ registry: type[Record] | QuerySet | Manager,
1179
+ *,
1180
+ field: str | StrField | None = None,
1181
+ ) -> str:
1182
+ """Get the 1st char or text field from the registry."""
1183
+ if isinstance(registry, (QuerySet, Manager)):
1184
+ registry = registry.model
1185
+ model_field_names = [i.name for i in registry._meta.fields]
1186
+
1187
+ # set to default name field
1188
+ if field is None:
1189
+ if hasattr(registry, "_name_field"):
1190
+ field = registry._meta.get_field(registry._name_field)
1191
+ elif "name" in model_field_names:
1192
+ field = registry._meta.get_field("name")
1193
+ else:
1194
+ # first char or text field that doesn't contain "id"
1195
+ for i in registry._meta.fields:
1196
+ if "id" in i.name:
1197
+ continue
1198
+ if i.get_internal_type() in {"CharField", "TextField"}:
1199
+ field = i
1200
+ break
1201
+
1202
+ # no default name field can be found
1203
+ if field is None:
1204
+ raise ValueError(
1205
+ "please pass a Record string field, e.g., `CellType.name`!"
1206
+ )
1207
+ else:
1208
+ field = field.name # type:ignore
1209
+ if not isinstance(field, str):
1210
+ try:
1211
+ field = field.field.name
1212
+ except AttributeError:
1213
+ raise TypeError(
1214
+ "please pass a Record string field, e.g., `CellType.name`!"
1215
+ ) from None
1216
+
1217
+ return field
1218
+
1219
+
1220
+ def _queryset(cls: Record | QuerySet | Manager, using_key: str) -> QuerySet:
1221
+ if isinstance(cls, (QuerySet, Manager)):
1222
+ return cls.all()
1223
+ elif using_key is None or using_key == "default":
1224
+ return cls.objects.all()
1225
+ else:
1226
+ # using must be called on cls, otherwise the connection isn't found
1227
+ return cls.using(using_key).all()
1228
+
1229
+
1230
+ def add_db_connection(db: str, using: str):
1231
+ db_config = dj_database_url.config(
1232
+ default=db, conn_max_age=600, conn_health_checks=True
1233
+ )
1234
+ db_config["TIME_ZONE"] = "UTC"
1235
+ db_config["OPTIONS"] = {}
1236
+ db_config["AUTOCOMMIT"] = True
1237
+ connections.settings[using] = db_config
1238
+
1239
+
1240
+ REGISTRY_UNIQUE_FIELD = {
1241
+ "storage": "root",
1242
+ "feature": "name",
1243
+ "ulabel": "name",
1244
+ "space": "name", # TODO: this should be updated with the currently used space instead during transfer
1245
+ }
1246
+
1247
+
1248
+ def update_fk_to_default_db(
1249
+ records: Record | list[Record] | QuerySet,
1250
+ fk: str,
1251
+ using_key: str | None,
1252
+ transfer_logs: dict,
1253
+ ):
1254
+ record = records[0] if isinstance(records, (list, QuerySet)) else records
1255
+ if hasattr(record, f"{fk}_id") and getattr(record, f"{fk}_id") is not None:
1256
+ fk_record = getattr(record, fk)
1257
+ field = REGISTRY_UNIQUE_FIELD.get(fk, "uid")
1258
+ fk_record_default = fk_record.__class__.filter(
1259
+ **{field: getattr(fk_record, field)}
1260
+ ).one_or_none()
1261
+ if fk_record_default is None:
1262
+ from copy import copy
1263
+
1264
+ fk_record_default = copy(fk_record)
1265
+ transfer_to_default_db(
1266
+ fk_record_default, using_key, save=True, transfer_logs=transfer_logs
1267
+ )
1268
+ if isinstance(records, (list, QuerySet)):
1269
+ for r in records:
1270
+ setattr(r, f"{fk}", None)
1271
+ setattr(r, f"{fk}_id", fk_record_default.id)
1272
+ else:
1273
+ setattr(records, f"{fk}", None)
1274
+ setattr(records, f"{fk}_id", fk_record_default.id)
1275
+
1276
+
1277
+ FKBULK = [
1278
+ "organism",
1279
+ "source",
1280
+ "report", # Run
1281
+ ]
1282
+
1283
+
1284
+ def transfer_fk_to_default_db_bulk(
1285
+ records: list | QuerySet, using_key: str | None, transfer_logs: dict
1286
+ ):
1287
+ for fk in FKBULK:
1288
+ update_fk_to_default_db(records, fk, using_key, transfer_logs=transfer_logs)
1289
+
1290
+
1291
+ def get_transfer_run(record) -> Run:
1292
+ from lamindb import settings
1293
+ from lamindb.core._context import context
1294
+ from lamindb.models import Run, Transform
1295
+ from lamindb.models.artifact import WARNING_RUN_TRANSFORM
1296
+
1297
+ slug = record._state.db
1298
+ owner, name = get_owner_name_from_identifier(slug)
1299
+ cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
1300
+ if not cache_filepath.exists():
1301
+ raise SystemExit("Need to call .using() before")
1302
+ instance_uid = cache_filepath.read_text().split("\n")[0]
1303
+ key = f"transfers/{instance_uid}"
1304
+ uid = instance_uid + "0000"
1305
+ transform = Transform.filter(uid=uid).one_or_none()
1306
+ if transform is None:
1307
+ search_names = settings.creation.search_names
1308
+ settings.creation.search_names = False
1309
+ transform = Transform( # type: ignore
1310
+ uid=uid, description=f"Transfer from `{slug}`", key=key, type="function"
1311
+ ).save()
1312
+ settings.creation.search_names = search_names
1313
+ # use the global run context to get the initiated_by_run run id
1314
+ if context.run is not None:
1315
+ initiated_by_run = context.run
1316
+ else:
1317
+ if not settings.creation.artifact_silence_missing_run_warning:
1318
+ logger.warning(WARNING_RUN_TRANSFORM)
1319
+ initiated_by_run = None
1320
+ # it doesn't seem to make sense to create new runs for every transfer
1321
+ run = Run.filter(
1322
+ transform=transform, initiated_by_run=initiated_by_run
1323
+ ).one_or_none()
1324
+ if run is None:
1325
+ run = Run(transform=transform, initiated_by_run=initiated_by_run).save() # type: ignore
1326
+ run.initiated_by_run = initiated_by_run # so that it's available in memory
1327
+ return run
1328
+
1329
+
1330
+ def transfer_to_default_db(
1331
+ record: Record,
1332
+ using_key: str | None,
1333
+ *,
1334
+ transfer_logs: dict,
1335
+ save: bool = False,
1336
+ transfer_fk: bool = True,
1337
+ ) -> Record | None:
1338
+ if record._state.db is None or record._state.db == "default":
1339
+ return None
1340
+ registry = record.__class__
1341
+ record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
1342
+ record_str = f"{record.__class__.__name__}(uid='{record.uid}')"
1343
+ if transfer_logs["run"] is None:
1344
+ transfer_logs["run"] = get_transfer_run(record)
1345
+ if record_on_default is not None:
1346
+ transfer_logs["mapped"].append(record_str)
1347
+ return record_on_default
1348
+ else:
1349
+ transfer_logs["transferred"].append(record_str)
1350
+
1351
+ if hasattr(record, "created_by_id"):
1352
+ record.created_by = None
1353
+ record.created_by_id = ln_setup.settings.user.id
1354
+ # run & transform
1355
+ run = transfer_logs["run"]
1356
+ if hasattr(record, "run_id"):
1357
+ record.run = None
1358
+ record.run_id = run.id
1359
+ # deal with denormalized transform FK on artifact and collection
1360
+ if hasattr(record, "transform_id"):
1361
+ record.transform = None
1362
+ record.transform_id = run.transform_id
1363
+ # transfer other foreign key fields
1364
+ fk_fields = [
1365
+ i.name
1366
+ for i in record._meta.fields
1367
+ if i.get_internal_type() == "ForeignKey"
1368
+ if i.name not in {"created_by", "run", "transform"}
1369
+ ]
1370
+ if not transfer_fk:
1371
+ # don't transfer fk fields that are already bulk transferred
1372
+ fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
1373
+ for fk in fk_fields:
1374
+ update_fk_to_default_db(record, fk, using_key, transfer_logs=transfer_logs)
1375
+ record.id = None
1376
+ record._state.db = "default"
1377
+ if save:
1378
+ record.save()
1379
+ return None
1380
+
1381
+
1382
+ def _store_record_old_name(record: Record):
1383
+ # writes the name to the _name attribute, so we can detect renaming upon save
1384
+ if hasattr(record, "_name_field"):
1385
+ record._old_name = getattr(record, record._name_field)
1386
+
1387
+
1388
+ def _store_record_old_key(record: Record):
1389
+ from lamindb.models import Artifact, Transform
1390
+
1391
+ # writes the key to the _old_key attribute, so we can detect key changes upon save
1392
+ if isinstance(record, (Artifact, Transform)):
1393
+ record._old_key = record.key
1394
+
1395
+
1396
+ def check_name_change(record: Record):
1397
+ """Warns if a record's name has changed."""
1398
+ from lamindb.models import Artifact, Collection, Feature, Schema, Transform
1399
+
1400
+ if (
1401
+ not record.pk
1402
+ or not hasattr(record, "_old_name")
1403
+ or not hasattr(record, "_name_field")
1404
+ ):
1405
+ return
1406
+
1407
+ # checked in check_key_change or not checked at all
1408
+ if isinstance(record, (Artifact, Collection, Transform)):
1409
+ return
1410
+
1411
+ # renaming feature sets is not checked
1412
+ if isinstance(record, Schema):
1413
+ return
1414
+
1415
+ old_name = record._old_name
1416
+ new_name = getattr(record, record._name_field)
1417
+ registry = record.__class__.__name__
1418
+
1419
+ if old_name != new_name:
1420
+ # when a label is renamed, only raise a warning if it has a feature
1421
+ if hasattr(record, "artifacts"):
1422
+ linked_records = (
1423
+ record.artifacts.through.filter(
1424
+ label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
1425
+ )
1426
+ .exclude(feature_id=None) # must have a feature
1427
+ .exclude(
1428
+ feature_ref_is_name=None
1429
+ ) # must be linked via Curator and therefore part of a schema
1430
+ .distinct()
1431
+ )
1432
+ artifact_ids = linked_records.list("artifact__uid")
1433
+ n = len(artifact_ids)
1434
+ if n > 0:
1435
+ s = "s" if n > 1 else ""
1436
+ logger.error(
1437
+ f"You are trying to {colors.red('rename label')} from '{old_name}' to '{new_name}'!\n"
1438
+ f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {artifact_ids}\n\n"
1439
+ f"{colors.bold('To rename this label')}, make it external:\n"
1440
+ f" → run `artifact.labels.make_external(label)`\n\n"
1441
+ f"After renaming, consider re-curating the above artifact{s}:\n"
1442
+ f' → in each dataset, manually modify label "{old_name}" to "{new_name}"\n'
1443
+ f" → run `ln.Curator`\n"
1444
+ )
1445
+ raise RecordNameChangeIntegrityError
1446
+
1447
+ # when a feature is renamed
1448
+ elif isinstance(record, Feature):
1449
+ # only internal features are associated with schemas
1450
+ linked_artifacts = Artifact.filter(feature_sets__features=record).list(
1451
+ "uid"
1452
+ )
1453
+ n = len(linked_artifacts)
1454
+ if n > 0:
1455
+ s = "s" if n > 1 else ""
1456
+ logger.error(
1457
+ f"You are trying to {colors.red('rename feature')} from '{old_name}' to '{new_name}'!\n"
1458
+ f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {linked_artifacts}\n\n"
1459
+ f"{colors.bold('To rename this feature')}, make it external:\n"
1460
+ " → run `artifact.features.make_external(feature)`\n\n"
1461
+ f"After renaming, consider re-curating the above artifact{s}:\n"
1462
+ f" → in each dataset, manually modify feature '{old_name}' to '{new_name}'\n"
1463
+ f" → run `ln.Curator`\n"
1464
+ )
1465
+ raise RecordNameChangeIntegrityError
1466
+
1467
+
1468
+ def check_key_change(record: Union[Artifact, Transform]):
1469
+ """Errors if a record's key has falsely changed."""
1470
+ from .artifact import Artifact
1471
+
1472
+ if not isinstance(record, Artifact) or not hasattr(record, "_old_key"):
1473
+ return
1474
+
1475
+ old_key = record._old_key or ""
1476
+ new_key = record.key or ""
1477
+
1478
+ if old_key != new_key:
1479
+ if not record._key_is_virtual:
1480
+ raise InvalidArgument(
1481
+ f"Changing a non-virtual key of an artifact is not allowed! Tried to change key from '{old_key}' to '{new_key}'."
1482
+ )
1483
+ old_key_suffix = (
1484
+ record.suffix
1485
+ if record.suffix
1486
+ else extract_suffix_from_path(PurePosixPath(old_key), arg_name="key")
1487
+ )
1488
+ new_key_suffix = extract_suffix_from_path(
1489
+ PurePosixPath(new_key), arg_name="key"
1490
+ )
1491
+ if old_key_suffix != new_key_suffix:
1492
+ raise InvalidArgument(
1493
+ f"The suffix '{new_key_suffix}' of the provided key is incorrect, it should be '{old_key_suffix}'."
1494
+ )
1495
+
1496
+
1497
+ def format_field_value(value: datetime | str | Any) -> Any:
1498
+ from datetime import datetime
1499
+
1500
+ if isinstance(value, datetime):
1501
+ return value.strftime("%Y-%m-%d %H:%M:%S %Z")
1502
+
1503
+ if isinstance(value, str):
1504
+ try:
1505
+ value = datetime.fromisoformat(value)
1506
+ value = value.strftime("%Y-%m-%d %H:%M:%S %Z")
1507
+ except ValueError:
1508
+ pass
1509
+ return f"'{value}'"
1510
+ else:
1511
+ return value
1512
+
1513
+
1514
+ class RecordInfo:
1515
+ def __init__(self, registry: Registry):
1516
+ self.registry = registry
1517
+
1518
+ def _get_type_for_field(self, field_name: str) -> str:
1519
+ field = self.registry._meta.get_field(field_name)
1520
+ related_model_name = (
1521
+ field.related_model.__name__
1522
+ if hasattr(field, "related_model") and field.related_model
1523
+ else None
1524
+ )
1525
+ return related_model_name if related_model_name else field.get_internal_type()
1526
+
1527
+ def _get_base_class_fields(self) -> list[str]:
1528
+ return [
1529
+ field.name
1530
+ for base in self.registry.__bases__
1531
+ if hasattr(base, "_meta")
1532
+ for field in base._meta.get_fields()
1533
+ ]
1534
+
1535
+ def _reorder_fields_by_class(self, fields_to_order: list[Field]) -> list[Field]:
1536
+ """Reorders the fields so that base class fields come last."""
1537
+ non_base_class_fields = [
1538
+ field
1539
+ for field in fields_to_order
1540
+ if field.name not in self._get_base_class_fields()
1541
+ ]
1542
+ found_base_class_fields = [
1543
+ field
1544
+ for field in fields_to_order
1545
+ if field.name in self._get_base_class_fields()
1546
+ ]
1547
+ return non_base_class_fields + found_base_class_fields
1548
+
1549
+ def get_simple_fields(self, return_str: bool = False) -> Any:
1550
+ simple_fields = [
1551
+ field
1552
+ for field in self.registry._meta.get_fields()
1553
+ if not (
1554
+ isinstance(field, ManyToOneRel)
1555
+ or isinstance(field, ManyToManyRel)
1556
+ or isinstance(field, ManyToManyField)
1557
+ or isinstance(field, ForeignKey)
1558
+ or field.name.startswith("_")
1559
+ or field.name == "id"
1560
+ )
1561
+ ]
1562
+ simple_fields = self._reorder_fields_by_class(simple_fields)
1563
+ if not return_str:
1564
+ return simple_fields
1565
+ else:
1566
+ repr_str = f" {colors.italic('Simple fields')}\n"
1567
+ if simple_fields:
1568
+ repr_str += "".join(
1569
+ [
1570
+ f" .{field_name.name}: {self._get_type_for_field(field_name.name)}\n"
1571
+ for field_name in simple_fields
1572
+ ]
1573
+ )
1574
+ return repr_str
1575
+
1576
+ def get_relational_fields(self, return_str: bool = False):
1577
+ # we ignore ManyToOneRel because it leads to so much clutter in the API
1578
+ # also note that our general guideline is to have related_name="+"
1579
+ # for ForeignKey fields
1580
+ relational_fields = (ManyToOneRel, ManyToManyRel, ManyToManyField, ForeignKey)
1581
+
1582
+ class_specific_relational_fields = [
1583
+ field
1584
+ for field in self.registry._meta.fields + self.registry._meta.many_to_many
1585
+ if isinstance(field, relational_fields)
1586
+ and not field.name.startswith(("links_", "_"))
1587
+ ]
1588
+
1589
+ non_class_specific_relational_fields = [
1590
+ field
1591
+ for field in self.registry._meta.get_fields()
1592
+ if isinstance(field, relational_fields)
1593
+ and not field.name.startswith(("links_", "_"))
1594
+ ]
1595
+ non_class_specific_relational_fields = self._reorder_fields_by_class(
1596
+ non_class_specific_relational_fields
1597
+ )
1598
+
1599
+ # Ensure that class specific fields (e.g. Artifact) come before non-class specific fields (e.g. collection)
1600
+ filtered_non_class_specific = [
1601
+ field
1602
+ for field in non_class_specific_relational_fields
1603
+ if field not in class_specific_relational_fields
1604
+ ]
1605
+ ordered_relational_fields = (
1606
+ class_specific_relational_fields + filtered_non_class_specific
1607
+ )
1608
+
1609
+ core_module_fields = []
1610
+ external_modules_fields = []
1611
+ for field in ordered_relational_fields:
1612
+ field_name = repr(field).split(": ")[1][:-1]
1613
+ if field_name.count(".") == 1 and "lamindb" not in field_name:
1614
+ external_modules_fields.append(field)
1615
+ else:
1616
+ core_module_fields.append(field)
1617
+
1618
+ def _get_related_field_type(field) -> str:
1619
+ field_type = (
1620
+ field.related_model.__get_name_with_module__()
1621
+ .replace(
1622
+ "Artifact", ""
1623
+ ) # some fields have an unnecessary 'Artifact' in their name
1624
+ .replace(
1625
+ "Collection", ""
1626
+ ) # some fields have an unnecessary 'Collection' in their name
1627
+ )
1628
+ return (
1629
+ self._get_type_for_field(field.name)
1630
+ if not field_type.strip()
1631
+ else field_type
1632
+ )
1633
+
1634
+ core_module_fields_formatted = [
1635
+ f" .{field.name}: {_get_related_field_type(field)}\n"
1636
+ for field in core_module_fields
1637
+ ]
1638
+ external_modules_fields_formatted = [
1639
+ f" .{field.name}: {_get_related_field_type(field)}\n"
1640
+ for field in external_modules_fields
1641
+ ]
1642
+
1643
+ if not return_str:
1644
+ external_modules_fields_by_modules = defaultdict(list)
1645
+ for field_str, field in zip(
1646
+ external_modules_fields_formatted, external_modules_fields
1647
+ ):
1648
+ field_type = field_str.split(":")[1].split()[0]
1649
+ module_name = field_type.split(".")[0]
1650
+ external_modules_fields_by_modules[module_name].append(field)
1651
+ return core_module_fields, external_modules_fields_by_modules
1652
+ else:
1653
+ repr_str = ""
1654
+
1655
+ # Non-external relational fields
1656
+ if core_module_fields:
1657
+ repr_str += f" {colors.italic('Relational fields')}\n"
1658
+ repr_str += "".join(core_module_fields_formatted)
1659
+
1660
+ # External relational fields
1661
+ external_modules = set()
1662
+ for field in external_modules_fields_formatted:
1663
+ field_type = field.split(":")[1].split()[0]
1664
+ external_modules.add(field_type.split(".")[0])
1665
+
1666
+ if external_modules:
1667
+ # We want Bionty to show up before other modules
1668
+ external_modules = (
1669
+ ["bionty"] + sorted(external_modules - {"bionty"}) # type: ignore
1670
+ if "bionty" in external_modules
1671
+ else sorted(external_modules)
1672
+ )
1673
+ for ext_module in external_modules:
1674
+ ext_module_fields = [
1675
+ field
1676
+ for field in external_modules_fields_formatted
1677
+ if ext_module in field
1678
+ ]
1679
+
1680
+ if ext_module_fields:
1681
+ repr_str += (
1682
+ f" {colors.italic(f'{ext_module.capitalize()} fields')}\n"
1683
+ )
1684
+ repr_str += "".join(ext_module_fields)
1685
+
1686
+ return repr_str
1687
+
1688
+
1689
+ def registry_repr(cls):
1690
+ """Shows fields."""
1691
+ repr_str = f"{colors.green(cls.__name__)}\n"
1692
+ info = RecordInfo(cls)
1693
+ repr_str += info.get_simple_fields(return_str=True)
1694
+ repr_str += info.get_relational_fields(return_str=True)
1695
+ repr_str = repr_str.rstrip("\n")
1696
+ return repr_str
1697
+
1698
+
1699
+ def record_repr(
1700
+ self: Record, include_foreign_keys: bool = True, exclude_field_names=None
1701
+ ) -> str:
1702
+ if exclude_field_names is None:
1703
+ exclude_field_names = ["id", "updated_at", "source_code"]
1704
+ field_names = [
1705
+ field.name
1706
+ for field in self._meta.fields
1707
+ if (not isinstance(field, ForeignKey) and field.name not in exclude_field_names)
1708
+ ]
1709
+ if include_foreign_keys:
1710
+ field_names += [
1711
+ f"{field.name}_id"
1712
+ for field in self._meta.fields
1713
+ if isinstance(field, ForeignKey)
1714
+ ]
1715
+ if "created_at" in field_names:
1716
+ field_names.remove("created_at")
1717
+ field_names.append("created_at")
1718
+ if field_names[0] != "uid" and "uid" in field_names:
1719
+ field_names.remove("uid")
1720
+ field_names.insert(0, "uid")
1721
+ fields_str = {}
1722
+ for k in field_names:
1723
+ if not k.startswith("_") and hasattr(self, k):
1724
+ value = getattr(self, k)
1725
+ # Force strip the time component of the version
1726
+ if k == "version" and value:
1727
+ fields_str[k] = f"'{str(value).split()[0]}'"
1728
+ else:
1729
+ fields_str[k] = format_field_value(value)
1730
+ fields_joined_str = ", ".join(
1731
+ [f"{k}={fields_str[k]}" for k in fields_str if fields_str[k] is not None]
1732
+ )
1733
+ return f"{self.__class__.__name__}({fields_joined_str})"
1734
+
1735
+
1736
+ # below is code to further format the repr of a record
1737
+ #
1738
+ # def format_repr(
1739
+ # record: Record, exclude_field_names: str | list[str] | None = None
1740
+ # ) -> str:
1741
+ # if isinstance(exclude_field_names, str):
1742
+ # exclude_field_names = [exclude_field_names]
1743
+ # exclude_field_names_init = ["id", "created_at", "updated_at"]
1744
+ # if exclude_field_names is not None:
1745
+ # exclude_field_names_init += exclude_field_names
1746
+ # return record.__repr__(
1747
+ # include_foreign_keys=False, exclude_field_names=exclude_field_names_init
1748
+ # )
1749
+
1750
+
1751
+ Record.__repr__ = record_repr # type: ignore
1752
+ Record.__str__ = record_repr # type: ignore
1753
+
1754
+
1755
+ class Migration(BasicRecord):
1756
+ app = CharField(max_length=255)
1757
+ name = CharField(max_length=255)
1758
+ applied: datetime = DateTimeField()
1759
+
1760
+ class Meta:
1761
+ db_table = "django_migrations"
1762
+ managed = False