lamindb 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. lamindb/__init__.py +25 -6
  2. lamindb/_finish.py +5 -5
  3. lamindb/_tracked.py +1 -1
  4. lamindb/_view.py +4 -4
  5. lamindb/core/_context.py +32 -6
  6. lamindb/core/_settings.py +1 -1
  7. lamindb/core/datasets/mini_immuno.py +8 -0
  8. lamindb/core/loaders.py +1 -1
  9. lamindb/core/storage/_anndata_accessor.py +9 -9
  10. lamindb/core/storage/_valid_suffixes.py +1 -0
  11. lamindb/core/storage/_zarr.py +32 -107
  12. lamindb/curators/__init__.py +19 -2
  13. lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
  14. lamindb/curators/_legacy.py +15 -19
  15. lamindb/curators/core.py +247 -80
  16. lamindb/errors.py +2 -2
  17. lamindb/migrations/0069_squashed.py +8 -8
  18. lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
  19. lamindb/migrations/0073_merge_ourprojects.py +7 -7
  20. lamindb/migrations/0075_lamindbv1_part5.py +1 -1
  21. lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
  22. lamindb/migrations/0080_polish_lamindbv1.py +2 -2
  23. lamindb/migrations/0088_schema_components.py +1 -1
  24. lamindb/migrations/0090_runproject_project_runs.py +2 -2
  25. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
  26. lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
  27. lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
  28. lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
  29. lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
  30. lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
  31. lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
  32. lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
  33. lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
  34. lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
  35. lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
  36. lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
  37. lamindb/models/__init__.py +35 -18
  38. lamindb/models/_describe.py +4 -4
  39. lamindb/models/_django.py +38 -4
  40. lamindb/models/_feature_manager.py +66 -123
  41. lamindb/models/_from_values.py +13 -13
  42. lamindb/models/_label_manager.py +8 -6
  43. lamindb/models/_relations.py +7 -7
  44. lamindb/models/artifact.py +166 -156
  45. lamindb/models/can_curate.py +25 -25
  46. lamindb/models/collection.py +48 -18
  47. lamindb/models/core.py +3 -3
  48. lamindb/models/feature.py +88 -60
  49. lamindb/models/has_parents.py +17 -17
  50. lamindb/models/project.py +52 -24
  51. lamindb/models/query_manager.py +5 -5
  52. lamindb/models/query_set.py +61 -37
  53. lamindb/models/record.py +158 -1583
  54. lamindb/models/run.py +39 -176
  55. lamindb/models/save.py +6 -6
  56. lamindb/models/schema.py +32 -43
  57. lamindb/models/sqlrecord.py +1743 -0
  58. lamindb/models/transform.py +17 -33
  59. lamindb/models/ulabel.py +21 -15
  60. {lamindb-1.5.3.dist-info → lamindb-1.6.0.dist-info}/METADATA +7 -11
  61. lamindb-1.6.0.dist-info/RECORD +118 -0
  62. lamindb/core/storage/_anndata_sizes.py +0 -41
  63. lamindb/models/flextable.py +0 -163
  64. lamindb-1.5.3.dist-info/RECORD +0 -109
  65. {lamindb-1.5.3.dist-info → lamindb-1.6.0.dist-info}/LICENSE +0 -0
  66. {lamindb-1.5.3.dist-info → lamindb-1.6.0.dist-info}/WHEEL +0 -0
lamindb/models/record.py CHANGED
@@ -1,928 +1,108 @@
1
1
  from __future__ import annotations
2
2
 
3
- import builtins
4
- import inspect
5
- import re
6
- import sys
7
- from collections import defaultdict
8
- from itertools import chain
9
- from pathlib import PurePosixPath
10
- from typing import (
11
- TYPE_CHECKING,
12
- Any,
13
- Literal,
14
- NamedTuple,
15
- TypeVar,
16
- Union,
17
- overload,
18
- )
3
+ from typing import TYPE_CHECKING, Any, overload
19
4
 
20
- import dj_database_url
21
- import lamindb_setup as ln_setup
22
- from django.core.exceptions import ValidationError as DjangoValidationError
23
- from django.db import IntegrityError, ProgrammingError, connections, models, transaction
24
- from django.db.models import CASCADE, PROTECT, Field, Manager, QuerySet
25
- from django.db.models.base import ModelBase
26
- from django.db.models.fields.related import (
27
- ManyToManyField,
28
- ManyToManyRel,
29
- ManyToOneRel,
30
- )
31
- from lamin_utils import colors, logger
32
- from lamindb_setup import settings as setup_settings
33
- from lamindb_setup._connect_instance import (
34
- get_owner_name_from_identifier,
35
- load_instance_settings,
36
- update_db_using_local,
37
- )
38
- from lamindb_setup.core._docs import doc_args
39
- from lamindb_setup.core._hub_core import connect_instance_hub
40
- from lamindb_setup.core._settings_store import instance_settings_file
41
- from lamindb_setup.core.django import DBToken, db_token_manager
42
- from lamindb_setup.core.upath import extract_suffix_from_path
5
+ from django.db import models
6
+ from django.db.models import CASCADE, PROTECT
43
7
 
44
- from ..base.fields import (
8
+ from lamindb.base.fields import (
9
+ BooleanField,
45
10
  CharField,
46
- DateTimeField,
47
11
  ForeignKey,
48
12
  JSONField,
49
13
  )
50
- from ..base.types import FieldAttr, StrField
51
- from ..errors import (
52
- FieldValidationError,
53
- InvalidArgument,
54
- NoWriteAccess,
55
- RecordNameChangeIntegrityError,
56
- ValidationError,
57
- )
58
- from ._is_versioned import IsVersioned
59
- from .query_manager import QueryManager, _lookup, _search
60
-
61
- if TYPE_CHECKING:
62
- from datetime import datetime
63
-
64
- import pandas as pd
65
-
66
- from .artifact import Artifact
67
- from .run import Run, User
68
- from .transform import Transform
69
-
70
-
71
- T = TypeVar("T", bound="Record")
72
- IPYTHON = getattr(builtins, "__IPYTHON__", False)
73
-
74
-
75
- # -------------------------------------------------------------------------------------
76
- # A note on required fields at the Record level
77
- #
78
- # As Django does most of its validation on the Form-level, it doesn't offer functionality
79
- # for validating the integrity of an Record object upon instantation (similar to pydantic)
80
- #
81
- # For required fields, we define them as commonly done on the SQL level together
82
- # with a validator in Record (validate_required_fields)
83
- #
84
- # This goes against the Django convention, but goes with the SQLModel convention
85
- # (Optional fields can be null on the SQL level, non-optional fields cannot)
86
- #
87
- # Due to Django's convention where CharFieldAttr has pre-configured (null=False, default=""), marking
88
- # a required field necessitates passing `default=None`. Without the validator it would trigger
89
- # an error at the SQL-level, with it, it triggers it at instantiation
90
-
91
- # -------------------------------------------------------------------------------------
92
- # A note on class and instance methods of core Record
93
- #
94
- # All of these are defined and tested within lamindb, in files starting with _{orm_name}.py
95
-
96
- # -------------------------------------------------------------------------------------
97
- # A note on maximal lengths of char fields
98
- #
99
- # 100 characters:
100
- # "Raindrops pitter-pattered on the windowpane, blurring the"
101
- # "city lights outside, curled up with a mug."
102
- # A good maximal length for a name (title).
103
- #
104
- # 150 characters: We choose this for name maximal length because some users like long names.
105
- #
106
- # 255 characters:
107
- # "In creating a precise 255-character paragraph, one engages in"
108
- # "a dance of words, where clarity meets brevity. Every syllable counts,"
109
- # "illustrating the skill in compact expression, ensuring the essence of the"
110
- # "message shines through within the exacting limit."
111
- # This is a good maximal length for a description field.
112
-
113
-
114
- class LinkORM:
115
- pass
116
-
117
-
118
- def deferred_attribute__repr__(self):
119
- return f"FieldAttr({self.field.model.__name__}.{self.field.name})"
120
-
121
-
122
- FieldAttr.__repr__ = deferred_attribute__repr__ # type: ignore
123
-
124
-
125
- class ValidateFields:
126
- pass
127
-
128
-
129
- def is_approx_pascal_case(s):
130
- """Check if the last component of a dotted string is in PascalCase.
131
-
132
- Args:
133
- s (str): The string to check
134
-
135
- Returns:
136
- bool: True if the last component is in PascalCase
137
-
138
- Raises:
139
- ValueError: If the last component doesn't start with a capital letter
140
- """
141
- if "[" in s: # this is because we allow types of form 'script[test_script.py]'
142
- return True
143
- last_component = s.split(".")[-1]
144
-
145
- if not last_component[0].isupper():
146
- raise ValueError(
147
- f"'{last_component}' should start with a capital letter given you're defining a type"
148
- )
149
-
150
- return True
151
-
14
+ from lamindb.errors import FieldValidationError
152
15
 
153
- def init_self_from_db(self: Record, existing_record: Record):
154
- new_args = [
155
- getattr(existing_record, field.attname) for field in self._meta.concrete_fields
156
- ]
157
- super(self.__class__, self).__init__(*new_args)
158
- self._state.adding = False # mimic from_db
159
- self._state.db = "default"
16
+ from ..base.ids import base62_12, base62_16
17
+ from .artifact import Artifact
18
+ from .can_curate import CanCurate
19
+ from .feature import Feature
20
+ from .run import Run, TracksRun, TracksUpdates
21
+ from .sqlrecord import BaseSQLRecord, IsLink, SQLRecord, _get_record_kwargs
22
+ from .ulabel import ULabel
160
23
 
24
+ if TYPE_CHECKING:
25
+ from .project import Project
26
+ from .schema import Schema
161
27
 
162
- def update_attributes(record: Record, attributes: dict[str, str]):
163
- for key, value in attributes.items():
164
- if getattr(record, key) != value and value is not None:
165
- if key not in {"uid", "dtype", "otype", "hash"}:
166
- logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
167
- setattr(record, key, value)
168
- else:
169
- hash_message = (
170
- "recomputing on .save()"
171
- if key == "hash"
172
- else f"keeping {getattr(record, key)}"
173
- )
174
- logger.warning(
175
- f"ignoring tentative value {value} for {key}, {hash_message}"
176
- )
177
28
 
29
+ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
30
+ """Flexible records to register, e.g., samples, donors, cells, compounds, sequences.
178
31
 
179
- def validate_literal_fields(record: Record, kwargs) -> None:
180
- """Validate all Literal type fields in a record.
32
+ This is currently more convenient to use through the UI.
181
33
 
182
34
  Args:
183
- record: record being validated
184
-
185
- Raises:
186
- ValidationError: If any field value is not in its Literal's allowed values
35
+ name: `str` A name.
36
+ description: `str` A description.
37
+
38
+ See Also:
39
+ :meth:`~lamindb.Sheet`
40
+ Sheets to group records.
41
+ :meth:`~lamindb.Feature`
42
+ Dimensions of measurement.
43
+ :attr:`~lamindb.Artifact.features`
44
+ Feature manager for an artifact.
187
45
  """
188
- if isinstance(record, LinkORM):
189
- return None
190
- if record.__class__.__name__ in "Feature":
191
- return None
192
- from lamindb.base.types import Dtype, TransformType
193
46
 
194
- types = {
195
- "TransformType": TransformType,
196
- "ArtifactKind": Dtype,
197
- "Dtype": Dtype,
198
- }
199
- errors = {}
200
- annotations = getattr(record.__class__, "__annotations__", {})
201
- for field_name, annotation in annotations.items():
202
- if field_name not in kwargs or kwargs[field_name] is None:
203
- continue
204
- value = kwargs[field_name]
205
- if str(annotation) in types:
206
- annotation = types[annotation]
207
- if not hasattr(annotation, "__origin__"):
208
- continue
209
- literal_type = annotation if annotation.__origin__ is Literal else None
210
- if literal_type is None:
211
- continue
212
- valid_values = set(literal_type.__args__)
213
- if value not in valid_values:
214
- errors[field_name] = (
215
- f"{field_name}: {colors.yellow(value)} is not a valid value"
216
- f"\n → Valid values are: {colors.green(', '.join(sorted(valid_values)))}"
217
- )
218
- if errors:
219
- message = "\n "
220
- for _, error in errors.items():
221
- message += error + "\n "
222
- raise FieldValidationError(message)
47
+ class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
48
+ abstract = False
223
49
 
50
+ _name_field: str = "name"
224
51
 
225
- def validate_fields(record: Record, kwargs):
226
- from lamindb.models import (
227
- Artifact,
228
- Collection,
229
- Feature,
230
- Param,
231
- Run,
232
- Schema,
233
- Transform,
234
- ULabel,
52
+ id: int = models.AutoField(primary_key=True)
53
+ """Internal id, valid only in one DB instance."""
54
+ uid: str = CharField(
55
+ editable=False, unique=True, db_index=True, max_length=16, default=base62_16
235
56
  )
57
+ """A universal random id, valid across DB instances."""
58
+ name: str = CharField(max_length=150, db_index=True, null=True)
59
+ """Name or title of record (optional)."""
60
+ type: Record | None = ForeignKey("self", PROTECT, null=True, related_name="records")
61
+ """Type of record, e.g., `Sample`, `Donor`, `Cell`, `Compound`, `Sequence`.
236
62
 
237
- # validate required fields
238
- # a "required field" is a Django field that has `null=False, default=None`
239
- required_fields = {
240
- k.name for k in record._meta.fields if not k.null and k.default is None
241
- }
242
- required_fields_not_passed = {k: None for k in required_fields if k not in kwargs}
243
- kwargs.update(required_fields_not_passed)
244
- missing_fields = [
245
- k for k, v in kwargs.items() if v is None and k in required_fields
246
- ]
247
- if missing_fields:
248
- raise FieldValidationError(f"{missing_fields} are required.")
249
- # ensure the exact length of the internal uid for core entities
250
- if "uid" in kwargs and record.__class__ in {
251
- Artifact,
252
- Collection,
253
- Transform,
254
- Run,
255
- ULabel,
256
- Feature,
257
- Schema,
258
- Param,
259
- }:
260
- uid_max_length = record.__class__._meta.get_field(
261
- "uid"
262
- ).max_length # triggers FieldDoesNotExist
263
- if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
264
- if not (
265
- record.__class__ is Schema and len(kwargs["uid"]) == 16
266
- ): # no error for schema
267
- raise ValidationError(
268
- f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
269
- )
270
- # validate is_type
271
- if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
272
- if kwargs["name"].endswith("s"):
273
- logger.warning(
274
- f"name '{kwargs['name']}' for type ends with 's', in case you're naming with plural, consider the singular for a type name"
275
- )
276
- is_approx_pascal_case(kwargs["name"])
277
- # validate literals
278
- validate_literal_fields(record, kwargs)
279
-
280
-
281
- def suggest_records_with_similar_names(
282
- record: Record, name_field: str, kwargs
283
- ) -> Record | None:
284
- """Returns True if found exact match, otherwise False.
285
-
286
- Logs similar matches if found.
63
+ Allows to group records by type, e.g., all samples, all donors, all cells, all compounds, all sequences.
287
64
  """
288
- if kwargs.get(name_field) is None or not isinstance(kwargs.get(name_field), str):
289
- return None
290
- # need to perform an additional request to find the exact match
291
- # previously, this was inferred from the truncated/fuzzy search below
292
- # but this isn't reliable: https://laminlabs.slack.com/archives/C04FPE8V01W/p1737812808563409
293
- # the below needs to be .first() because there might be multiple records with the same
294
- # name field in case the record is versioned (e.g. for Transform key)
295
- exact_match = record.__class__.filter(**{name_field: kwargs[name_field]}).first()
296
- if exact_match is not None:
297
- return exact_match
298
- queryset = _search(
299
- record.__class__,
300
- kwargs[name_field],
301
- field=name_field,
302
- truncate_string=True,
303
- limit=3,
304
- )
305
- if not queryset.exists(): # empty queryset
306
- return None
307
- s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
308
- msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
309
- if IPYTHON:
310
- from IPython.display import display
311
-
312
- from lamindb import settings
313
-
314
- logger.warning(f"{msg}")
315
- if settings._verbosity_int >= 1:
316
- display(queryset.df())
317
- else:
318
- logger.warning(f"{msg}\n{queryset}")
319
- return None
320
-
321
-
322
- RECORD_REGISTRY_EXAMPLE = """Example::
323
-
324
- from lamindb import Record, fields
65
+ records: Record
66
+ """Records of this type (can only be non-empty if `is_type` is `True`)."""
67
+ is_type: bool = BooleanField(default=False, db_index=True, null=True)
68
+ """Distinguish types from instances of the type.
325
69
 
326
- # sub-classing `Record` creates a new registry
327
- class Experiment(Record):
328
- name: str = fields.CharField()
329
-
330
- # instantiating `Experiment` creates a record `experiment`
331
- experiment = Experiment(name="my experiment")
332
-
333
- # you can save the record to the database
334
- experiment.save()
335
-
336
- # `Experiment` refers to the registry, which you can query
337
- df = Experiment.filter(name__startswith="my ").df()
338
- """
339
-
340
-
341
- # this is the metaclass for Record
342
- @doc_args(RECORD_REGISTRY_EXAMPLE)
343
- class Registry(ModelBase):
344
- """Metaclass for :class:`~lamindb.models.Record`.
345
-
346
- Each `Registry` *object* is a `Record` *class* and corresponds to a table in the metadata SQL database.
347
-
348
- You work with `Registry` objects whenever you use *class methods* of `Record`.
349
-
350
- You call any subclass of `Record` a "registry" and their objects "records". A `Record` object corresponds to a row in the SQL table.
351
-
352
- If you want to create a new registry, you sub-class `Record`.
353
-
354
- {}
355
-
356
- Note: `Registry` inherits from Django's `ModelBase`.
70
+ For example, if a record "Compound" is a `type`, the actual compounds "darerinib", "tramerinib", would be instances of that `type`.
357
71
  """
358
-
359
- _available_fields: set[str] = None
360
-
361
- def __new__(cls, name, bases, attrs, **kwargs):
362
- new_class = super().__new__(cls, name, bases, attrs, **kwargs)
363
- return new_class
364
-
365
- # below creates a sensible auto-complete behavior that differs across the
366
- # class and instance level in Jupyter Editors it doesn't have any effect for
367
- # static type analyzer like pylance used in VSCode
368
- def __dir__(cls):
369
- # this is needed to bring auto-complete on the class-level back
370
- # https://laminlabs.slack.com/archives/C04FPE8V01W/p1717535625268849
371
- # Filter class attributes, excluding instance methods
372
- exclude_instance_methods = "sphinx" not in sys.modules
373
- # https://laminlabs.slack.com/archives/C04FPE8V01W/p1721134595920959
374
-
375
- def include_attribute(attr_name, attr_value):
376
- if attr_name.startswith("__"):
377
- return False
378
- if exclude_instance_methods and callable(attr_value):
379
- return isinstance(attr_value, (classmethod, staticmethod, type))
380
- return True
381
-
382
- # check also inherited attributes
383
- if hasattr(cls, "mro"):
384
- attrs = chain(*(c.__dict__.items() for c in cls.mro()))
385
- else:
386
- attrs = cls.__dict__.items()
387
-
388
- result = []
389
- for attr_name, attr_value in attrs:
390
- if attr_name not in result and include_attribute(attr_name, attr_value):
391
- result.append(attr_name)
392
-
393
- # Add non-dunder attributes from Registry
394
- for attr in dir(Registry):
395
- if not attr.startswith("__") and attr not in result:
396
- result.append(attr)
397
- return result
398
-
399
- def __repr__(cls) -> str:
400
- return registry_repr(cls)
401
-
402
- @doc_args(_lookup.__doc__)
403
- def lookup(
404
- cls,
405
- field: StrField | None = None,
406
- return_field: StrField | None = None,
407
- ) -> NamedTuple:
408
- """{}""" # noqa: D415
409
- return _lookup(cls=cls, field=field, return_field=return_field)
410
-
411
- def filter(cls, *queries, **expressions) -> QuerySet:
412
- """Query records.
413
-
414
- Args:
415
- queries: One or multiple `Q` objects.
416
- expressions: Fields and values passed as Django query expressions.
417
-
418
- Returns:
419
- A :class:`~lamindb.models.QuerySet`.
420
-
421
- See Also:
422
- - Guide: :doc:`docs:registries`
423
- - Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
424
-
425
- Examples:
426
- >>> ln.ULabel(name="my label").save()
427
- >>> ln.ULabel.filter(name__startswith="my").df()
428
- """
429
- from .query_set import QuerySet
430
-
431
- _using_key = None
432
- if "_using_key" in expressions:
433
- _using_key = expressions.pop("_using_key")
434
-
435
- return QuerySet(model=cls, using=_using_key).filter(*queries, **expressions)
436
-
437
- def get(
438
- cls: type[T],
439
- idlike: int | str | None = None,
440
- **expressions,
441
- ) -> T:
442
- """Get a single record.
443
-
444
- Args:
445
- idlike: Either a uid stub, uid or an integer id.
446
- expressions: Fields and values passed as Django query expressions.
447
-
448
- Raises:
449
- :exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
450
-
451
- See Also:
452
- - Guide: :doc:`docs:registries`
453
- - Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
454
-
455
- Examples:
456
-
457
- ::
458
-
459
- ulabel = ln.ULabel.get("FvtpPJLJ")
460
- ulabel = ln.ULabel.get(name="my-label")
461
- """
462
- from .query_set import QuerySet
463
-
464
- return QuerySet(model=cls).get(idlike, **expressions)
465
-
466
- def df(
467
- cls,
468
- include: str | list[str] | None = None,
469
- features: bool | list[str] = False,
470
- limit: int = 100,
471
- ) -> pd.DataFrame:
472
- """Convert to `pd.DataFrame`.
473
-
474
- By default, shows all direct fields, except `updated_at`.
475
-
476
- Use arguments `include` or `feature` to include other data.
477
-
478
- Args:
479
- include: Related fields to include as columns. Takes strings of
480
- form `"ulabels__name"`, `"cell_types__name"`, etc. or a list
481
- of such strings.
482
- features: If `True`, map all features of the
483
- :class:`~lamindb.Feature` registry onto the resulting
484
- `DataFrame`. Only available for `Artifact`.
485
- limit: Maximum number of rows to display from a Pandas DataFrame.
486
- Defaults to 100 to reduce database load.
487
-
488
- Examples:
489
-
490
- Include the name of the creator in the `DataFrame`:
491
-
492
- >>> ln.ULabel.df(include="created_by__name"])
493
-
494
- Include display of features for `Artifact`:
495
-
496
- >>> df = ln.Artifact.df(features=True)
497
- >>> ln.view(df) # visualize with type annotations
498
-
499
- Only include select features:
500
-
501
- >>> df = ln.Artifact.df(features=["cell_type_by_expert", "cell_type_by_model"])
502
- """
503
- query_set = cls.filter()
504
- if hasattr(cls, "updated_at"):
505
- query_set = query_set.order_by("-updated_at")
506
- return query_set[:limit].df(include=include, features=features)
507
-
508
- @doc_args(_search.__doc__)
509
- def search(
510
- cls,
511
- string: str,
512
- *,
513
- field: StrField | None = None,
514
- limit: int | None = 20,
515
- case_sensitive: bool = False,
516
- ) -> QuerySet:
517
- """{}""" # noqa: D415
518
- return _search(
519
- cls=cls,
520
- string=string,
521
- field=field,
522
- limit=limit,
523
- case_sensitive=case_sensitive,
524
- )
525
-
526
- def using(
527
- cls,
528
- instance: str | None,
529
- ) -> QuerySet:
530
- """Use a non-default LaminDB instance.
531
-
532
- Args:
533
- instance: An instance identifier of form "account_handle/instance_name".
534
-
535
- Examples:
536
- >>> ln.ULabel.using("account_handle/instance_name").search("ULabel7", field="name")
537
- uid score
538
- name
539
- ULabel7 g7Hk9b2v 100.0
540
- ULabel5 t4Jm6s0q 75.0
541
- ULabel6 r2Xw8p1z 75.0
542
- """
543
- from .query_set import QuerySet
544
-
545
- # connection already established
546
- if instance in connections:
547
- return QuerySet(model=cls, using=instance)
548
- # we're in the default instance
549
- if instance is None or instance == "default":
550
- return QuerySet(model=cls, using=None)
551
- owner, name = get_owner_name_from_identifier(instance)
552
- if [owner, name] == setup_settings.instance.slug.split("/"):
553
- return QuerySet(model=cls, using=None)
554
-
555
- # move on to different instances
556
- cache_using_filepath = (
557
- setup_settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
558
- )
559
- settings_file = instance_settings_file(name, owner)
560
- if not settings_file.exists():
561
- result = connect_instance_hub(owner=owner, name=name)
562
- if isinstance(result, str):
563
- raise RuntimeError(
564
- f"Failed to load instance {instance}, please check your permissions!"
565
- )
566
- iresult, _ = result
567
- # do not use {} syntax below, it gives rise to a dict if the schema modules
568
- # are empty and then triggers a TypeError in missing_members = source_modules - target_modules
569
- source_modules = set( # noqa
570
- [mod for mod in iresult["schema_str"].split(",") if mod != ""]
571
- )
572
- # this just retrives the full connection string from iresult
573
- db = update_db_using_local(iresult, settings_file)
574
- cache_using_filepath.write_text(
575
- f"{iresult['lnid']}\n{iresult['schema_str']}"
576
- )
577
- # need to set the token if it is a fine_grained_access and the user is jwt (not public)
578
- is_fine_grained_access = (
579
- iresult["fine_grained_access"] and iresult["db_permissions"] == "jwt"
580
- )
581
- # access_db can take both: the dict from connect_instance_hub and isettings
582
- into_db_token = iresult
583
- else:
584
- isettings = load_instance_settings(settings_file)
585
- source_modules = isettings.modules
586
- db = isettings.db
587
- cache_using_filepath.write_text(
588
- f"{isettings.uid}\n{','.join(source_modules)}"
589
- )
590
- # need to set the token if it is a fine_grained_access and the user is jwt (not public)
591
- is_fine_grained_access = (
592
- isettings._fine_grained_access and isettings._db_permissions == "jwt"
593
- )
594
- # access_db can take both: the dict from connect_instance_hub and isettings
595
- into_db_token = isettings
596
-
597
- target_modules = setup_settings.instance.modules
598
- if missing_members := source_modules - target_modules:
599
- logger.info(
600
- f"in transfer, source lamindb instance has additional modules: {', '.join(missing_members)}"
601
- )
602
-
603
- add_db_connection(db, instance)
604
- if is_fine_grained_access:
605
- db_token = DBToken(into_db_token)
606
- db_token_manager.set(db_token, instance)
607
- return QuerySet(model=cls, using=instance)
608
-
609
- def __get_module_name__(cls) -> str:
610
- schema_module_name = cls.__module__.split(".")[0]
611
- module_name = schema_module_name.replace("lnschema_", "")
612
- if module_name == "lamindb":
613
- module_name = "core"
614
- return module_name
615
-
616
- def __get_name_with_module__(cls) -> str:
617
- module_name = cls.__get_module_name__()
618
- if module_name == "core":
619
- module_prefix = ""
620
- else:
621
- module_prefix = f"{module_name}."
622
- return f"{module_prefix}{cls.__name__}"
623
-
624
- def __get_available_fields__(cls) -> set[str]:
625
- if cls._available_fields is None:
626
- cls._available_fields = {
627
- f.name
628
- for f in cls._meta.get_fields()
629
- if not f.name.startswith("_")
630
- and not f.name.startswith("links_")
631
- and not f.name.endswith("_id")
632
- }
633
- if cls.__name__ == "Artifact":
634
- cls._available_fields.add("visibility")
635
- cls._available_fields.add("transform")
636
- return cls._available_fields
637
-
638
-
639
- class BasicRecord(models.Model, metaclass=Registry):
640
- """Basic metadata record.
641
-
642
- It has the same methods as Record, but doesn't have the additional fields.
643
-
644
- It's mainly used for LinkORMs and similar.
645
- """
646
-
647
- objects = QueryManager()
648
-
649
- class Meta:
650
- abstract = True
651
- base_manager_name = "objects"
652
-
653
- def __init__(self, *args, **kwargs):
654
- skip_validation = kwargs.pop("_skip_validation", False)
655
- if not args:
656
- if (
657
- issubclass(self.__class__, Record)
658
- and self.__class__.__name__
659
- not in {"Storage", "ULabel", "Feature", "Schema", "Param"}
660
- # do not save bionty entities in restricted spaces by default
661
- and self.__class__.__module__ != "bionty.models"
662
- ):
663
- from lamindb import context as run_context
664
-
665
- if run_context.space is not None:
666
- kwargs["space"] = run_context.space
667
- if skip_validation:
668
- super().__init__(**kwargs)
669
- else:
670
- from ..core._settings import settings
671
- from .can_curate import CanCurate
672
- from .collection import Collection
673
- from .transform import Transform
674
-
675
- validate_fields(self, kwargs)
676
-
677
- # do not search for names if an id is passed; this is important
678
- # e.g. when synching ids from the notebook store to lamindb
679
- has_consciously_provided_uid = False
680
- if "_has_consciously_provided_uid" in kwargs:
681
- has_consciously_provided_uid = kwargs.pop(
682
- "_has_consciously_provided_uid"
683
- )
684
- if (
685
- isinstance(self, (CanCurate, Collection, Transform))
686
- and settings.creation.search_names
687
- and not has_consciously_provided_uid
688
- ):
689
- name_field = getattr(self, "_name_field", "name")
690
- exact_match = suggest_records_with_similar_names(
691
- self, name_field, kwargs
692
- )
693
- if exact_match is not None:
694
- if "version" in kwargs:
695
- if kwargs["version"] is not None:
696
- version_comment = " and version"
697
- existing_record = self.__class__.filter(
698
- **{
699
- name_field: kwargs[name_field],
700
- "version": kwargs["version"],
701
- }
702
- ).one_or_none()
703
- else:
704
- # for a versioned record, an exact name match is not a criterion
705
- # for retrieving a record in case `version` isn't passed -
706
- # we'd always pull out many records with exactly the same name
707
- existing_record = None
708
- else:
709
- version_comment = ""
710
- existing_record = exact_match
711
- if existing_record is not None:
712
- logger.important(
713
- f"returning existing {self.__class__.__name__} record with same"
714
- f" {name_field}{version_comment}: '{kwargs[name_field]}'"
715
- )
716
- init_self_from_db(self, existing_record)
717
- update_attributes(self, kwargs)
718
- return None
719
- super().__init__(**kwargs)
720
- if isinstance(self, ValidateFields):
721
- # this will trigger validation against django validators
722
- try:
723
- if hasattr(self, "clean_fields"):
724
- self.clean_fields()
725
- else:
726
- self._Model__clean_fields()
727
- except DjangoValidationError as e:
728
- message = _format_django_validation_error(self, e)
729
- raise FieldValidationError(message) from e
730
- elif len(args) != len(self._meta.concrete_fields):
731
- raise FieldValidationError(
732
- f"Use keyword arguments instead of positional arguments, e.g.: {self.__class__.__name__}(name='...')."
733
- )
734
- else:
735
- super().__init__(*args)
736
- track_current_key_and_name_values(self)
737
-
738
- def save(self, *args, **kwargs) -> Record:
739
- """Save.
740
-
741
- Always saves to the default database.
742
- """
743
- using_key = None
744
- if "using" in kwargs:
745
- using_key = kwargs["using"]
746
- db = self._state.db
747
- pk_on_db = self.pk
748
- artifacts: list = []
749
- if self.__class__.__name__ == "Collection" and self.id is not None:
750
- # when creating a new collection without being able to access artifacts
751
- artifacts = self.ordered_artifacts.list()
752
- pre_existing_record = None
753
- # consider records that are being transferred from other databases
754
- transfer_logs: dict[str, list[str]] = {
755
- "mapped": [],
756
- "transferred": [],
757
- "run": None,
758
- }
759
- if db is not None and db != "default" and using_key is None:
760
- if isinstance(self, IsVersioned):
761
- if not self.is_latest:
762
- raise NotImplementedError(
763
- "You are attempting to transfer a record that's not the latest in its version history. This is currently not supported."
764
- )
765
- pre_existing_record = transfer_to_default_db(
766
- self, using_key, transfer_logs=transfer_logs
767
- )
768
- self._revises: IsVersioned
769
- if pre_existing_record is not None:
770
- init_self_from_db(self, pre_existing_record)
771
- else:
772
- check_key_change(self)
773
- check_name_change(self)
774
- try:
775
- # save versioned record in presence of self._revises
776
- if isinstance(self, IsVersioned) and self._revises is not None:
777
- assert self._revises.is_latest # noqa: S101
778
- revises = self._revises
779
- revises.is_latest = False
780
- with transaction.atomic():
781
- revises._revises = None # ensure we don't start a recursion
782
- revises.save()
783
- super().save(*args, **kwargs) # type: ignore
784
- self._revises = None
785
- # save unversioned record
786
- else:
787
- super().save(*args, **kwargs)
788
- except (IntegrityError, ProgrammingError) as e:
789
- error_msg = str(e)
790
- # two possible error messages for hash duplication
791
- # "duplicate key value violates unique constraint"
792
- # "UNIQUE constraint failed"
793
- if (
794
- isinstance(e, IntegrityError)
795
- and "hash" in error_msg
796
- and (
797
- "UNIQUE constraint failed" in error_msg
798
- or "duplicate key value violates unique constraint" in error_msg
799
- )
800
- ):
801
- pre_existing_record = self.__class__.get(hash=self.hash)
802
- logger.warning(
803
- f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
804
- )
805
- init_self_from_db(self, pre_existing_record)
806
- elif (
807
- isinstance(e, ProgrammingError)
808
- and hasattr(self, "space")
809
- and "new row violates row-level security policy" in error_msg
810
- ):
811
- raise NoWriteAccess(
812
- f"You’re not allowed to write to the space '{self.space.name}'.\n"
813
- "Please contact an administrator of the space if you need write access."
814
- ) from None
815
- else:
816
- raise
817
- # call the below in case a user makes more updates to the record
818
- track_current_key_and_name_values(self)
819
- # perform transfer of many-to-many fields
820
- # only supported for Artifact and Collection records
821
- if db is not None and db != "default" and using_key is None:
822
- if self.__class__.__name__ == "Collection":
823
- if len(artifacts) > 0:
824
- logger.info("transfer artifacts")
825
- for artifact in artifacts:
826
- artifact.save()
827
- self.artifacts.add(*artifacts)
828
- if hasattr(self, "labels"):
829
- from copy import copy
830
-
831
- from lamindb.models._feature_manager import FeatureManager
832
-
833
- # here we go back to original record on the source database
834
- self_on_db = copy(self)
835
- self_on_db._state.db = db
836
- self_on_db.pk = pk_on_db # manually set the primary key
837
- self_on_db.features = FeatureManager(self_on_db) # type: ignore
838
- self.features._add_from(self_on_db, transfer_logs=transfer_logs)
839
- self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
840
- for k, v in transfer_logs.items():
841
- if k != "run" and len(v) > 0:
842
- logger.important(f"{k} records: {', '.join(v)}")
843
-
844
- if (
845
- self.__class__.__name__
846
- in {
847
- "Artifact",
848
- "Transform",
849
- "Run",
850
- "ULabel",
851
- "Feature",
852
- "Schema",
853
- "Collection",
854
- "Reference",
855
- }
856
- and self._branch_code >= 1
857
- ):
858
- import lamindb as ln
859
-
860
- if ln.context.project is not None:
861
- self.projects.add(ln.context.project)
862
- return self
863
-
864
- def delete(self) -> None:
865
- """Delete."""
866
- # note that the logic below does not fire if a record is moved to the trash
867
- # the idea is that moving a record to the trash should move its entire version family
868
- # to the trash, whereas permanently deleting should default to only deleting a single record
869
- # of a version family
870
- # we can consider making it easy to permanently delete entire version families as well,
871
- # but that's for another time
872
- if isinstance(self, IsVersioned) and self.is_latest:
873
- new_latest = (
874
- self.__class__.objects.using(self._state.db)
875
- .filter(is_latest=False, uid__startswith=self.stem_uid)
876
- .order_by("-created_at")
877
- .first()
878
- )
879
- if new_latest is not None:
880
- new_latest.is_latest = True
881
- with transaction.atomic():
882
- new_latest.save()
883
- super().delete() # type: ignore
884
- logger.warning(f"new latest version is {new_latest}")
885
- return None
886
- super().delete()
887
-
888
-
889
- class Space(BasicRecord):
890
- """Spaces to restrict access to records to specific users or teams.
891
-
892
- You can use spaces to restrict access to records within an instance.
893
-
894
- All data in this registry is synced from `lamin.ai` to enable re-using spaces across instances.
895
- There is no need to manually create records.
896
- """
897
-
898
- id: int = models.SmallAutoField(primary_key=True)
899
- """Internal id, valid only in one DB instance."""
900
- name: str = models.CharField(max_length=100, db_index=True)
901
- """Name of space."""
902
- uid: str = CharField(
903
- editable=False,
904
- unique=True,
905
- max_length=12,
906
- default="00000000",
907
- db_default="00000000",
908
- db_index=True,
72
+ # naming convention in analogy with Schema
73
+ components: Record = models.ManyToManyField(
74
+ "Record", through="RecordRecord", symmetrical=False, related_name="composites"
909
75
  )
910
- """Universal id."""
76
+ """Record-like components of this record."""
77
+ composites: Record
78
+ """Record-like composites of this record."""
79
+ sheet: Sheet | None = ForeignKey(
80
+ "Sheet", CASCADE, null=True, related_name="records"
81
+ )
82
+ """Group records by sheet."""
911
83
  description: str | None = CharField(null=True)
912
- """Description of space."""
913
- created_at: datetime = DateTimeField(
914
- editable=False, db_default=models.functions.Now(), db_index=True
84
+ """A description (optional)."""
85
+ artifacts: Artifact = models.ManyToManyField(
86
+ Artifact, through="RecordArtifact", related_name="records"
915
87
  )
916
- """Time of creation of record."""
917
- created_by: User = ForeignKey(
918
- "User", CASCADE, default=None, related_name="+", null=True
88
+ """Linked artifacts."""
89
+ runs: Run = models.ManyToManyField(Run, through="RecordRun", related_name="records")
90
+ """Linked runs."""
91
+ ulabels: ULabel = models.ManyToManyField(
92
+ ULabel,
93
+ through="RecordULabel",
94
+ related_name="_records", # in transition period
919
95
  )
920
- """Creator of run."""
96
+ """Linked runs."""
97
+ projects: Project
98
+ """Linked projects."""
921
99
 
922
100
  @overload
923
101
  def __init__(
924
102
  self,
925
103
  name: str,
104
+ type: Record | None = None,
105
+ is_type: bool = False,
926
106
  description: str | None = None,
927
107
  ): ...
928
108
 
@@ -937,716 +117,111 @@ class Space(BasicRecord):
937
117
  *args,
938
118
  **kwargs,
939
119
  ):
940
- super().__init__(*args, **kwargs)
941
-
942
-
943
- @doc_args(RECORD_REGISTRY_EXAMPLE)
944
- class Record(BasicRecord, metaclass=Registry):
945
- """Metadata record.
946
-
947
- Every `Record` is a data model that comes with a registry in form of a SQL
948
- table in your database.
949
-
950
- Sub-classing `Record` creates a new registry while instantiating a `Record`
951
- creates a new record.
952
-
953
- {}
954
-
955
- `Record`'s metaclass is :class:`~lamindb.models.Registry`.
956
-
957
- `Record` inherits from Django's `Model` class. Why does LaminDB call it `Record`
958
- and not `Model`? The term `Record` can't lead to confusion with statistical,
959
- machine learning or biological models.
960
- """
961
-
962
- _branch_code: int = models.SmallIntegerField(db_index=True, default=1, db_default=1)
963
- """Whether record is on a branch, in archive or in trash.
964
-
965
- This dictates whether a record appears in queries & searches.
966
-
967
- Coding is as follows:
968
-
969
- - 3: template (hidden in queries & searches)
970
- - 2: draft (hidden in queries & searches)
971
- - 1: default (visible in queries & searches)
972
- - 0: archive (hidden, meant to be kept)
973
- - -1: trash (hidden, scheduled for deletion)
974
-
975
- Any integer higher than >3 codes a branch that's involved in a pull request.
976
- """
977
- space: Space = ForeignKey(Space, PROTECT, default=1, db_default=1)
978
- """The space in which the record lives."""
979
- _aux: dict[str, Any] | None = JSONField(default=None, db_default=None, null=True)
980
- """Auxiliary field for dictionary-like metadata."""
981
-
982
- class Meta:
983
- abstract = True
984
-
985
-
986
- def _format_django_validation_error(record: Record, e: DjangoValidationError):
987
- """Pretty print Django validation errors."""
988
- errors = {}
989
- if hasattr(e, "error_dict"):
990
- error_dict = e.error_dict
991
- else:
992
- error_dict = {"__all__": e.error_list}
993
-
994
- for field_name, error_list in error_dict.items():
995
- for error in error_list:
996
- if hasattr(error, "message"):
997
- msg = error.message
998
- else:
999
- msg = str(error)
1000
-
1001
- if field_name == "__all__":
1002
- errors[field_name] = f"{colors.yellow(msg)}"
1003
- else:
1004
- current_value = getattr(record, field_name, None)
1005
- errors[field_name] = (
1006
- f"{field_name}: {colors.yellow(current_value)} is not valid\n → {msg}"
1007
- )
1008
-
1009
- if errors:
1010
- message = "\n "
1011
- for _, error in errors.items():
1012
- message += error + "\n "
1013
-
1014
- return message
1015
-
1016
-
1017
- def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
1018
- """Gets the parameters of a Record from the overloaded signature.
1019
-
1020
- Example:
1021
- >>> get_record_params(bt.Organism)
1022
- >>> [('name', 'str'), ('taxon_id', 'str | None'), ('scientific_name', 'str | None')]
1023
- """
1024
- source = inspect.getsource(record_class)
1025
-
1026
- # Find first overload that's not *db_args
1027
- pattern = r"@overload\s+def __init__\s*\(([\s\S]*?)\):\s*\.{3}"
1028
- overloads = re.finditer(pattern, source)
1029
-
1030
- for single_overload in overloads:
1031
- params_block = single_overload.group(1)
1032
- # This is an additional safety measure if the overloaded signature that we're
1033
- # looking for is not at the top but a "db_args" constructor
1034
- if "*db_args" in params_block:
1035
- continue
1036
-
1037
- params = []
1038
- for line in params_block.split("\n"):
1039
- line = line.strip()
1040
- if not line or "self" in line:
1041
- continue
1042
-
1043
- # Extract name and type annotation
1044
- # The regex pattern finds parameter definitions like:
1045
- # Simple: name: str
1046
- # With default: age: int = 0
1047
- # With complex types: items: List[str] = []
1048
- param_pattern = (
1049
- r"(\w+)" # Parameter name
1050
- r"\s*:\s*" # Colon with optional whitespace
1051
- r"((?:[^=,]|" # Type hint: either non-equals/comma chars
1052
- r"(?<=\[)[^[\]]*" # or contents within square brackets
1053
- r"(?=\]))+)" # looking ahead for closing bracket
1054
- r"(?:\s*=\s*" # Optional default value part
1055
- r"([^,]+))?" # Default value: anything but comma
1056
- )
1057
- match = re.match(param_pattern, line)
1058
- if not match:
1059
- continue
1060
-
1061
- name, type_str = match.group(1), match.group(2).strip()
1062
-
1063
- # Keep type as string instead of evaluating
1064
- params.append((name, type_str))
1065
-
1066
- return params
1067
-
1068
- return []
1069
-
1070
-
1071
- def get_name_field(
1072
- registry: type[Record] | QuerySet | Manager,
1073
- *,
1074
- field: StrField | None = None,
1075
- ) -> str:
1076
- """Get the 1st char or text field from the registry."""
1077
- if isinstance(registry, (QuerySet, Manager)):
1078
- registry = registry.model
1079
- model_field_names = [i.name for i in registry._meta.fields]
1080
-
1081
- # set to default name field
1082
- if field is None:
1083
- if hasattr(registry, "_name_field"):
1084
- field = registry._meta.get_field(registry._name_field)
1085
- elif "name" in model_field_names:
1086
- field = registry._meta.get_field("name")
1087
- else:
1088
- # first char or text field that doesn't contain "id"
1089
- for i in registry._meta.fields:
1090
- if "id" in i.name:
1091
- continue
1092
- if i.get_internal_type() in {"CharField", "TextField"}:
1093
- field = i
1094
- break
120
+ if len(args) == len(self._meta.concrete_fields):
121
+ super().__init__(*args, **kwargs)
122
+ return None
123
+ if len(args) > 0:
124
+ raise ValueError("Only one non-keyword arg allowed")
125
+ name: str = kwargs.pop("name", None)
126
+ type: str | None = kwargs.pop("type", None)
127
+ is_type: bool = kwargs.pop("is_type", False)
128
+ sheet: Sheet = kwargs.pop("sheet", None)
129
+ description: str | None = kwargs.pop("description", None)
130
+ _skip_validation = kwargs.pop(
131
+ "_skip_validation", True
132
+ ) # should not validate records
133
+ _aux = kwargs.pop("_aux", None)
134
+ if len(kwargs) > 0:
135
+ valid_keywords = ", ".join([val[0] for val in _get_record_kwargs(Record)])
136
+ raise FieldValidationError(
137
+ f"Only {valid_keywords} are valid keyword arguments"
138
+ )
139
+ super().__init__(
140
+ name=name,
141
+ type=type,
142
+ is_type=is_type,
143
+ sheet=sheet,
144
+ description=description,
145
+ _skip_validation=_skip_validation,
146
+ _aux=_aux,
147
+ )
1095
148
 
1096
- # no default name field can be found
1097
- if field is None:
1098
- raise ValueError(
1099
- "please pass a Record string field, e.g., `CellType.name`!"
1100
- )
1101
- else:
1102
- field = field.name # type:ignore
1103
- if not isinstance(field, str):
1104
- try:
1105
- field = field.field.name
1106
- except AttributeError:
1107
- raise TypeError(
1108
- "please pass a Record string field, e.g., `CellType.name`!"
1109
- ) from None
1110
149
 
1111
- return field
150
+ class Sheet(SQLRecord, TracksRun, TracksUpdates):
151
+ """Sheets to group records."""
1112
152
 
153
+ class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
154
+ abstract = False
1113
155
 
1114
- def add_db_connection(db: str, using: str):
1115
- db_config = dj_database_url.config(
1116
- default=db, conn_max_age=600, conn_health_checks=True
156
+ id: int = models.AutoField(primary_key=True)
157
+ uid: str = CharField(
158
+ editable=False, unique=True, db_index=True, max_length=12, default=base62_12
1117
159
  )
1118
- db_config["TIME_ZONE"] = "UTC"
1119
- db_config["OPTIONS"] = {}
1120
- db_config["AUTOCOMMIT"] = True
1121
- connections.settings[using] = db_config
1122
-
1123
-
1124
- REGISTRY_UNIQUE_FIELD = {
1125
- "storage": "root",
1126
- "feature": "name",
1127
- "ulabel": "name",
1128
- "space": "name", # TODO: this should be updated with the currently used space instead during transfer
1129
- }
1130
-
1131
-
1132
- def update_fk_to_default_db(
1133
- records: Record | list[Record] | QuerySet,
1134
- fk: str,
1135
- using_key: str | None,
1136
- transfer_logs: dict,
1137
- ):
1138
- record = records[0] if isinstance(records, (list, QuerySet)) else records
1139
- if hasattr(record, f"{fk}_id") and getattr(record, f"{fk}_id") is not None:
1140
- fk_record = getattr(record, fk)
1141
- field = REGISTRY_UNIQUE_FIELD.get(fk, "uid")
1142
- fk_record_default = fk_record.__class__.filter(
1143
- **{field: getattr(fk_record, field)}
1144
- ).one_or_none()
1145
- if fk_record_default is None:
1146
- from copy import copy
1147
-
1148
- fk_record_default = copy(fk_record)
1149
- transfer_to_default_db(
1150
- fk_record_default, using_key, save=True, transfer_logs=transfer_logs
1151
- )
1152
- if isinstance(records, (list, QuerySet)):
1153
- for r in records:
1154
- setattr(r, f"{fk}", None)
1155
- setattr(r, f"{fk}_id", fk_record_default.id)
1156
- else:
1157
- setattr(records, f"{fk}", None)
1158
- setattr(records, f"{fk}_id", fk_record_default.id)
1159
-
1160
-
1161
- FKBULK = [
1162
- "organism",
1163
- "source",
1164
- "report", # Run
1165
- ]
1166
-
1167
-
1168
- def transfer_fk_to_default_db_bulk(
1169
- records: list | QuerySet, using_key: str | None, transfer_logs: dict
1170
- ):
1171
- for fk in FKBULK:
1172
- update_fk_to_default_db(records, fk, using_key, transfer_logs=transfer_logs)
1173
-
1174
-
1175
- def get_transfer_run(record) -> Run:
1176
- from lamindb import settings
1177
- from lamindb.core._context import context
1178
- from lamindb.models import Run, Transform
1179
- from lamindb.models.artifact import WARNING_RUN_TRANSFORM
1180
-
1181
- slug = record._state.db
1182
- owner, name = get_owner_name_from_identifier(slug)
1183
- cache_using_filepath = (
1184
- ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
160
+ """A universal random id, valid across DB instances."""
161
+ name: str = CharField(db_index=True)
162
+ """Name or title of sheet."""
163
+ schema: Schema | None = ForeignKey(
164
+ "Schema", CASCADE, null=True, related_name="sheets"
1185
165
  )
1186
- if not cache_using_filepath.exists():
1187
- raise SystemExit("Need to call .using() before")
1188
- instance_uid = cache_using_filepath.read_text().split("\n")[0]
1189
- key = f"transfers/{instance_uid}"
1190
- uid = instance_uid + "0000"
1191
- transform = Transform.filter(uid=uid).one_or_none()
1192
- if transform is None:
1193
- search_names = settings.creation.search_names
1194
- settings.creation.search_names = False
1195
- transform = Transform( # type: ignore
1196
- uid=uid, description=f"Transfer from `{slug}`", key=key, type="function"
1197
- ).save()
1198
- settings.creation.search_names = search_names
1199
- # use the global run context to get the initiated_by_run run id
1200
- if context.run is not None:
1201
- initiated_by_run = context.run
1202
- else:
1203
- if not settings.creation.artifact_silence_missing_run_warning:
1204
- logger.warning(WARNING_RUN_TRANSFORM)
1205
- initiated_by_run = None
1206
- # it doesn't seem to make sense to create new runs for every transfer
1207
- run = Run.filter(
1208
- transform=transform, initiated_by_run=initiated_by_run
1209
- ).one_or_none()
1210
- if run is None:
1211
- run = Run(transform=transform, initiated_by_run=initiated_by_run).save() # type: ignore
1212
- run.initiated_by_run = initiated_by_run # so that it's available in memory
1213
- return run
1214
-
1215
-
1216
- def transfer_to_default_db(
1217
- record: Record,
1218
- using_key: str | None,
1219
- *,
1220
- transfer_logs: dict,
1221
- save: bool = False,
1222
- transfer_fk: bool = True,
1223
- ) -> Record | None:
1224
- if record._state.db is None or record._state.db == "default":
1225
- return None
1226
- registry = record.__class__
1227
- record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
1228
- record_str = f"{record.__class__.__name__}(uid='{record.uid}')"
1229
- if transfer_logs["run"] is None:
1230
- transfer_logs["run"] = get_transfer_run(record)
1231
- if record_on_default is not None:
1232
- transfer_logs["mapped"].append(record_str)
1233
- return record_on_default
1234
- else:
1235
- transfer_logs["transferred"].append(record_str)
1236
-
1237
- if hasattr(record, "created_by_id"):
1238
- record.created_by = None
1239
- record.created_by_id = ln_setup.settings.user.id
1240
- # run & transform
1241
- run = transfer_logs["run"]
1242
- if hasattr(record, "run_id"):
1243
- record.run = None
1244
- record.run_id = run.id
1245
- # deal with denormalized transform FK on artifact and collection
1246
- if hasattr(record, "transform_id"):
1247
- record.transform = None
1248
- record.transform_id = run.transform_id
1249
- # transfer other foreign key fields
1250
- fk_fields = [
1251
- i.name
1252
- for i in record._meta.fields
1253
- if i.get_internal_type() == "ForeignKey"
1254
- if i.name not in {"created_by", "run", "transform"}
1255
- ]
1256
- if not transfer_fk:
1257
- # don't transfer fk fields that are already bulk transferred
1258
- fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
1259
- for fk in fk_fields:
1260
- update_fk_to_default_db(record, fk, using_key, transfer_logs=transfer_logs)
1261
- record.id = None
1262
- record._state.db = "default"
1263
- if save:
1264
- record.save()
1265
- return None
1266
-
1267
-
1268
- def track_current_key_and_name_values(record: Record):
1269
- from lamindb.models import Artifact
1270
-
1271
- # below, we're using __dict__ to avoid triggering the refresh from the database
1272
- # which can lead to a recursion
1273
- if isinstance(record, Artifact):
1274
- record._old_key = record.__dict__.get("key")
1275
- record._old_suffix = record.__dict__.get("suffix")
1276
- elif hasattr(record, "_name_field"):
1277
- record._old_name = record.__dict__.get(record._name_field)
1278
-
1279
-
1280
- def check_name_change(record: Record):
1281
- """Warns if a record's name has changed."""
1282
- from lamindb.models import Artifact, Collection, Feature, Schema, Transform
1283
-
1284
- if (
1285
- not record.pk
1286
- or not hasattr(record, "_old_name")
1287
- or not hasattr(record, "_name_field")
1288
- ):
1289
- return
166
+ """A schema to enforce for the sheet (optional)."""
167
+ description: str | None = CharField(null=True, db_index=True)
168
+ """A description (optional)."""
169
+ projects: Project
170
+ """Linked projects."""
1290
171
 
1291
- # checked in check_key_change or not checked at all
1292
- if isinstance(record, (Artifact, Collection, Transform)):
1293
- return
1294
172
 
1295
- # renaming feature sets is not checked
1296
- if isinstance(record, Schema):
1297
- return
1298
-
1299
- old_name = record._old_name
1300
- new_name = getattr(record, record._name_field)
1301
- registry = record.__class__.__name__
1302
-
1303
- if old_name != new_name:
1304
- # when a label is renamed, only raise a warning if it has a feature
1305
- if hasattr(record, "artifacts"):
1306
- linked_records = (
1307
- record.artifacts.through.filter(
1308
- label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
1309
- )
1310
- .exclude(feature_id=None) # must have a feature
1311
- .distinct()
1312
- )
1313
- artifact_ids = linked_records.list("artifact__uid")
1314
- n = len(artifact_ids)
1315
- if n > 0:
1316
- s = "s" if n > 1 else ""
1317
- logger.error(
1318
- f"You are trying to {colors.red('rename label')} from '{old_name}' to '{new_name}'!\n"
1319
- f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {artifact_ids}\n\n"
1320
- f"{colors.bold('To rename this label')}, make it external:\n"
1321
- f" → run `artifact.labels.make_external(label)`\n\n"
1322
- f"After renaming, consider re-curating the above artifact{s}:\n"
1323
- f' → in each dataset, manually modify label "{old_name}" to "{new_name}"\n'
1324
- f" → run `ln.Curator`\n"
1325
- )
1326
- raise RecordNameChangeIntegrityError
1327
-
1328
- # when a feature is renamed
1329
- elif isinstance(record, Feature):
1330
- # only internal features are associated with schemas
1331
- linked_artifacts = Artifact.filter(feature_sets__features=record).list(
1332
- "uid"
1333
- )
1334
- n = len(linked_artifacts)
1335
- if n > 0:
1336
- s = "s" if n > 1 else ""
1337
- logger.error(
1338
- f"You are trying to {colors.red('rename feature')} from '{old_name}' to '{new_name}'!\n"
1339
- f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {linked_artifacts}\n\n"
1340
- f"{colors.bold('To rename this feature')}, make it external:\n"
1341
- " → run `artifact.features.make_external(feature)`\n\n"
1342
- f"After renaming, consider re-curating the above artifact{s}:\n"
1343
- f" → in each dataset, manually modify feature '{old_name}' to '{new_name}'\n"
1344
- f" → run `ln.Curator`\n"
1345
- )
1346
- raise RecordNameChangeIntegrityError
1347
-
1348
-
1349
- def check_key_change(record: Union[Artifact, Transform]):
1350
- """Errors if a record's key has falsely changed."""
1351
- from .artifact import Artifact
1352
-
1353
- if not isinstance(record, Artifact) or not hasattr(record, "_old_key"):
1354
- return
1355
- if record._old_suffix != record.suffix:
1356
- raise InvalidArgument(
1357
- f"Changing the `.suffix` of an artifact is not allowed! You tried to change it from '{record._old_suffix}' to '{record.suffix}'."
1358
- )
1359
-
1360
- old_key = record._old_key
1361
- new_key = record.key
1362
-
1363
- if old_key != new_key:
1364
- if not record._key_is_virtual:
1365
- raise InvalidArgument(
1366
- f"Changing a non-virtual key of an artifact is not allowed! You tried to change it from '{old_key}' to '{new_key}'."
1367
- )
1368
- if old_key is not None:
1369
- old_key_suffix = extract_suffix_from_path(
1370
- PurePosixPath(old_key), arg_name="key"
1371
- )
1372
- assert old_key_suffix == record.suffix, ( # noqa: S101
1373
- old_key_suffix,
1374
- record.suffix,
1375
- )
1376
- else:
1377
- old_key_suffix = record.suffix
1378
- new_key_suffix = extract_suffix_from_path(
1379
- PurePosixPath(new_key), arg_name="key"
1380
- )
1381
- if old_key_suffix != new_key_suffix:
1382
- raise InvalidArgument(
1383
- f"The suffix '{new_key_suffix}' of the provided key is incorrect, it should be '{old_key_suffix}'."
1384
- )
173
+ class RecordJson(BaseSQLRecord, IsLink):
174
+ id: int = models.BigAutoField(primary_key=True)
175
+ record: Record = ForeignKey(Record, CASCADE, related_name="values_json")
176
+ feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_recordjson")
177
+ value: Any = JSONField(default=None, db_default=None)
1385
178
 
179
+ class Meta:
180
+ unique_together = ("record", "feature")
1386
181
 
1387
- def format_field_value(value: datetime | str | Any) -> Any:
1388
- from datetime import datetime
1389
-
1390
- if isinstance(value, datetime):
1391
- return value.strftime("%Y-%m-%d %H:%M:%S %Z")
1392
-
1393
- if isinstance(value, str):
1394
- try:
1395
- value = datetime.fromisoformat(value)
1396
- value = value.strftime("%Y-%m-%d %H:%M:%S %Z")
1397
- except ValueError:
1398
- pass
1399
- return f"'{value}'"
1400
- else:
1401
- return value
1402
-
1403
-
1404
- class RecordInfo:
1405
- def __init__(self, registry: Registry):
1406
- self.registry = registry
1407
-
1408
- def _get_type_for_field(self, field_name: str) -> str:
1409
- field = self.registry._meta.get_field(field_name)
1410
- related_model_name = (
1411
- field.related_model.__name__
1412
- if hasattr(field, "related_model") and field.related_model
1413
- else None
1414
- )
1415
- return related_model_name if related_model_name else field.get_internal_type()
1416
-
1417
- def _get_base_class_fields(self) -> list[str]:
1418
- return [
1419
- field.name
1420
- for base in self.registry.__bases__
1421
- if hasattr(base, "_meta")
1422
- for field in base._meta.get_fields()
1423
- ]
1424
-
1425
- def _reorder_fields_by_class(self, fields_to_order: list[Field]) -> list[Field]:
1426
- """Reorders the fields so that base class fields come last."""
1427
- non_base_class_fields = [
1428
- field
1429
- for field in fields_to_order
1430
- if field.name not in self._get_base_class_fields()
1431
- ]
1432
- found_base_class_fields = [
1433
- field
1434
- for field in fields_to_order
1435
- if field.name in self._get_base_class_fields()
1436
- ]
1437
- return non_base_class_fields + found_base_class_fields
1438
-
1439
- def get_simple_fields(self, return_str: bool = False) -> Any:
1440
- simple_fields = [
1441
- field
1442
- for field in self.registry._meta.get_fields()
1443
- if not (
1444
- isinstance(field, ManyToOneRel)
1445
- or isinstance(field, ManyToManyRel)
1446
- or isinstance(field, ManyToManyField)
1447
- or isinstance(field, ForeignKey)
1448
- or field.name.startswith("_")
1449
- or field.name == "id"
1450
- )
1451
- ]
1452
- simple_fields = self._reorder_fields_by_class(simple_fields)
1453
- if not return_str:
1454
- return simple_fields
1455
- else:
1456
- repr_str = f" {colors.italic('Simple fields')}\n"
1457
- if simple_fields:
1458
- repr_str += "".join(
1459
- [
1460
- f" .{field_name.name}: {self._get_type_for_field(field_name.name)}\n"
1461
- for field_name in simple_fields
1462
- ]
1463
- )
1464
- return repr_str
1465
-
1466
- def get_relational_fields(self, return_str: bool = False):
1467
- # we ignore ManyToOneRel because it leads to so much clutter in the API
1468
- # also note that our general guideline is to have related_name="+"
1469
- # for ForeignKey fields
1470
- relational_fields = (ManyToOneRel, ManyToManyRel, ManyToManyField, ForeignKey)
1471
-
1472
- class_specific_relational_fields = [
1473
- field
1474
- for field in self.registry._meta.fields + self.registry._meta.many_to_many
1475
- if isinstance(field, relational_fields)
1476
- and not field.name.startswith(("links_", "_"))
1477
- ]
1478
-
1479
- non_class_specific_relational_fields = [
1480
- field
1481
- for field in self.registry._meta.get_fields()
1482
- if isinstance(field, relational_fields)
1483
- and not field.name.startswith(("links_", "_"))
1484
- ]
1485
- non_class_specific_relational_fields = self._reorder_fields_by_class(
1486
- non_class_specific_relational_fields
1487
- )
1488
-
1489
- # Ensure that class specific fields (e.g. Artifact) come before non-class specific fields (e.g. collection)
1490
- filtered_non_class_specific = [
1491
- field
1492
- for field in non_class_specific_relational_fields
1493
- if field not in class_specific_relational_fields
1494
- ]
1495
- ordered_relational_fields = (
1496
- class_specific_relational_fields + filtered_non_class_specific
1497
- )
1498
-
1499
- core_module_fields = []
1500
- external_modules_fields = []
1501
- for field in ordered_relational_fields:
1502
- field_name = repr(field).split(": ")[1][:-1]
1503
- if field_name.count(".") == 1 and "lamindb" not in field_name:
1504
- external_modules_fields.append(field)
1505
- else:
1506
- core_module_fields.append(field)
1507
-
1508
- def _get_related_field_type(field) -> str:
1509
- field_type = (
1510
- field.related_model.__get_name_with_module__()
1511
- .replace(
1512
- "Artifact", ""
1513
- ) # some fields have an unnecessary 'Artifact' in their name
1514
- .replace(
1515
- "Collection", ""
1516
- ) # some fields have an unnecessary 'Collection' in their name
1517
- )
1518
- return (
1519
- self._get_type_for_field(field.name)
1520
- if not field_type.strip()
1521
- else field_type
1522
- )
1523
-
1524
- core_module_fields_formatted = [
1525
- f" .{field.name}: {_get_related_field_type(field)}\n"
1526
- for field in core_module_fields
1527
- ]
1528
- external_modules_fields_formatted = [
1529
- f" .{field.name}: {_get_related_field_type(field)}\n"
1530
- for field in external_modules_fields
1531
- ]
1532
-
1533
- if not return_str:
1534
- external_modules_fields_by_modules = defaultdict(list)
1535
- for field_str, field in zip(
1536
- external_modules_fields_formatted, external_modules_fields
1537
- ):
1538
- field_type = field_str.split(":")[1].split()[0]
1539
- module_name = field_type.split(".")[0]
1540
- external_modules_fields_by_modules[module_name].append(field)
1541
- return core_module_fields, external_modules_fields_by_modules
1542
- else:
1543
- repr_str = ""
1544
-
1545
- # Non-external relational fields
1546
- if core_module_fields:
1547
- repr_str += f" {colors.italic('Relational fields')}\n"
1548
- repr_str += "".join(core_module_fields_formatted)
1549
-
1550
- # External relational fields
1551
- external_modules = set()
1552
- for field in external_modules_fields_formatted:
1553
- field_type = field.split(":")[1].split()[0]
1554
- external_modules.add(field_type.split(".")[0])
1555
-
1556
- if external_modules:
1557
- # We want Bionty to show up before other modules
1558
- external_modules = (
1559
- ["bionty"] + sorted(external_modules - {"bionty"}) # type: ignore
1560
- if "bionty" in external_modules
1561
- else sorted(external_modules)
1562
- )
1563
- for ext_module in external_modules:
1564
- ext_module_fields = [
1565
- field
1566
- for field in external_modules_fields_formatted
1567
- if ext_module in field
1568
- ]
1569
-
1570
- if ext_module_fields:
1571
- repr_str += (
1572
- f" {colors.italic(f'{ext_module.capitalize()} fields')}\n"
1573
- )
1574
- repr_str += "".join(ext_module_fields)
1575
-
1576
- return repr_str
1577
182
 
183
+ class RecordRecord(SQLRecord, IsLink):
184
+ id: int = models.BigAutoField(primary_key=True)
185
+ record: Record = ForeignKey(
186
+ Record, CASCADE, related_name="values_record"
187
+ ) # composite
188
+ feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_recordrecord")
189
+ value: Record = ForeignKey(
190
+ Record, PROTECT, related_name="links_record"
191
+ ) # component
1578
192
 
1579
- def registry_repr(cls):
1580
- """Shows fields."""
1581
- repr_str = f"{colors.green(cls.__name__)}\n"
1582
- info = RecordInfo(cls)
1583
- repr_str += info.get_simple_fields(return_str=True)
1584
- repr_str += info.get_relational_fields(return_str=True)
1585
- repr_str = repr_str.rstrip("\n")
1586
- return repr_str
193
+ class Meta:
194
+ unique_together = ("record", "feature")
1587
195
 
1588
196
 
1589
- def record_repr(
1590
- self: Record, include_foreign_keys: bool = True, exclude_field_names=None
1591
- ) -> str:
1592
- if exclude_field_names is None:
1593
- exclude_field_names = ["id", "updated_at", "source_code"]
1594
- field_names = [
1595
- field.name
1596
- for field in self._meta.fields
1597
- if (not isinstance(field, ForeignKey) and field.name not in exclude_field_names)
1598
- ]
1599
- if include_foreign_keys:
1600
- field_names += [
1601
- f"{field.name}_id"
1602
- for field in self._meta.fields
1603
- if isinstance(field, ForeignKey)
1604
- ]
1605
- if "created_at" in field_names:
1606
- field_names.remove("created_at")
1607
- field_names.append("created_at")
1608
- if field_names[0] != "uid" and "uid" in field_names:
1609
- field_names.remove("uid")
1610
- field_names.insert(0, "uid")
1611
- fields_str = {}
1612
- for k in field_names:
1613
- if not k.startswith("_") and hasattr(self, k):
1614
- value = getattr(self, k)
1615
- # Force strip the time component of the version
1616
- if k == "version" and value:
1617
- fields_str[k] = f"'{str(value).split()[0]}'"
1618
- else:
1619
- fields_str[k] = format_field_value(value)
1620
- fields_joined_str = ", ".join(
1621
- [f"{k}={fields_str[k]}" for k in fields_str if fields_str[k] is not None]
1622
- )
1623
- return f"{self.__class__.__name__}({fields_joined_str})"
197
+ class RecordULabel(BaseSQLRecord, IsLink):
198
+ id: int = models.BigAutoField(primary_key=True)
199
+ record: Record = ForeignKey(Record, CASCADE, related_name="values_ulabel")
200
+ feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_recordulabel")
201
+ value: ULabel = ForeignKey(ULabel, PROTECT, related_name="links_record")
1624
202
 
203
+ class Meta:
204
+ # allows linking exactly one record to one ulabel per feature, because we likely don't want to have Many
205
+ unique_together = ("record", "feature")
1625
206
 
1626
- # below is code to further format the repr of a record
1627
- #
1628
- # def format_repr(
1629
- # record: Record, exclude_field_names: str | list[str] | None = None
1630
- # ) -> str:
1631
- # if isinstance(exclude_field_names, str):
1632
- # exclude_field_names = [exclude_field_names]
1633
- # exclude_field_names_init = ["id", "created_at", "updated_at"]
1634
- # if exclude_field_names is not None:
1635
- # exclude_field_names_init += exclude_field_names
1636
- # return record.__repr__(
1637
- # include_foreign_keys=False, exclude_field_names=exclude_field_names_init
1638
- # )
1639
207
 
208
+ class RecordRun(BaseSQLRecord, IsLink):
209
+ id: int = models.BigAutoField(primary_key=True)
210
+ record: Record = ForeignKey(Record, CASCADE, related_name="values_run")
211
+ feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_recordrun")
212
+ value: Run = ForeignKey(Run, PROTECT, related_name="links_record")
1640
213
 
1641
- Record.__repr__ = record_repr # type: ignore
1642
- Record.__str__ = record_repr # type: ignore
214
+ class Meta:
215
+ # allows linking several records to a single run for the same feature because we'll likely need this
216
+ unique_together = ("record", "feature")
1643
217
 
1644
218
 
1645
- class Migration(BasicRecord):
1646
- app = CharField(max_length=255)
1647
- name = CharField(max_length=255)
1648
- applied: datetime = DateTimeField()
219
+ class RecordArtifact(BaseSQLRecord, IsLink):
220
+ id: int = models.BigAutoField(primary_key=True)
221
+ record: Record = ForeignKey(Record, CASCADE, related_name="values_artifact")
222
+ feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_recordartifact")
223
+ value: Artifact = ForeignKey(Artifact, PROTECT, related_name="links_record")
1649
224
 
1650
225
  class Meta:
1651
- db_table = "django_migrations"
1652
- managed = False
226
+ # allows linking several records to a single artifact for the same feature because we'll likely need this
227
+ unique_together = ("record", "feature", "value")