lamindb 1.1.1__py3-none-any.whl → 1.2a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. lamindb/__init__.py +28 -25
  2. lamindb/_tracked.py +1 -1
  3. lamindb/_view.py +2 -3
  4. lamindb/base/__init__.py +1 -1
  5. lamindb/base/ids.py +1 -10
  6. lamindb/core/__init__.py +7 -65
  7. lamindb/core/_context.py +34 -8
  8. lamindb/core/_settings.py +6 -6
  9. lamindb/core/_sync_git.py +1 -1
  10. lamindb/core/loaders.py +9 -8
  11. lamindb/core/storage/_backed_access.py +4 -2
  12. lamindb/core/storage/_tiledbsoma.py +6 -4
  13. lamindb/core/storage/_zarr.py +32 -11
  14. lamindb/core/storage/objects.py +59 -26
  15. lamindb/core/storage/paths.py +4 -1
  16. lamindb/curators/__init__.py +97 -81
  17. lamindb/errors.py +1 -1
  18. lamindb/integrations/_vitessce.py +4 -4
  19. lamindb/migrations/0089_subsequent_runs.py +159 -0
  20. lamindb/migrations/0090_runproject_project_runs.py +73 -0
  21. lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
  22. lamindb/models/__init__.py +79 -0
  23. lamindb/{core → models}/_describe.py +3 -3
  24. lamindb/{core → models}/_django.py +8 -5
  25. lamindb/{core → models}/_feature_manager.py +103 -87
  26. lamindb/{_from_values.py → models/_from_values.py} +5 -2
  27. lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
  28. lamindb/{core → models}/_label_manager.py +10 -17
  29. lamindb/{core/relations.py → models/_relations.py} +8 -1
  30. lamindb/models/artifact.py +2601 -0
  31. lamindb/{_can_curate.py → models/can_curate.py} +349 -180
  32. lamindb/models/collection.py +683 -0
  33. lamindb/models/core.py +135 -0
  34. lamindb/models/feature.py +643 -0
  35. lamindb/models/flextable.py +163 -0
  36. lamindb/{_parents.py → models/has_parents.py} +55 -49
  37. lamindb/models/project.py +384 -0
  38. lamindb/{_query_manager.py → models/query_manager.py} +10 -8
  39. lamindb/{_query_set.py → models/query_set.py} +28 -24
  40. lamindb/models/record.py +1757 -0
  41. lamindb/models/run.py +563 -0
  42. lamindb/{_save.py → models/save.py} +9 -7
  43. lamindb/models/schema.py +732 -0
  44. lamindb/models/transform.py +360 -0
  45. lamindb/models/ulabel.py +249 -0
  46. {lamindb-1.1.1.dist-info → lamindb-1.2a2.dist-info}/METADATA +5 -5
  47. {lamindb-1.1.1.dist-info → lamindb-1.2a2.dist-info}/RECORD +49 -50
  48. lamindb/_artifact.py +0 -1379
  49. lamindb/_collection.py +0 -440
  50. lamindb/_feature.py +0 -316
  51. lamindb/_is_versioned.py +0 -40
  52. lamindb/_record.py +0 -1064
  53. lamindb/_run.py +0 -60
  54. lamindb/_schema.py +0 -347
  55. lamindb/_storage.py +0 -15
  56. lamindb/_transform.py +0 -170
  57. lamindb/_ulabel.py +0 -56
  58. lamindb/_utils.py +0 -9
  59. lamindb/base/validation.py +0 -63
  60. lamindb/core/_data.py +0 -491
  61. lamindb/core/fields.py +0 -12
  62. lamindb/models.py +0 -4475
  63. {lamindb-1.1.1.dist-info → lamindb-1.2a2.dist-info}/LICENSE +0 -0
  64. {lamindb-1.1.1.dist-info → lamindb-1.2a2.dist-info}/WHEEL +0 -0
lamindb/_record.py DELETED
@@ -1,1064 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import builtins
4
- import inspect
5
- import re
6
- from functools import reduce
7
- from pathlib import PurePosixPath
8
- from typing import TYPE_CHECKING, NamedTuple
9
-
10
- import dj_database_url
11
- import lamindb_setup as ln_setup
12
- from django.core.exceptions import ValidationError as DjangoValidationError
13
- from django.db import connections, transaction
14
- from django.db.models import (
15
- IntegerField,
16
- Manager,
17
- Q,
18
- QuerySet,
19
- TextField,
20
- Value,
21
- )
22
- from django.db.models.functions import Cast, Coalesce
23
- from django.db.models.lookups import (
24
- Contains,
25
- Exact,
26
- IContains,
27
- IExact,
28
- IRegex,
29
- IStartsWith,
30
- Regex,
31
- StartsWith,
32
- )
33
- from django.db.utils import IntegrityError
34
- from lamin_utils import colors, logger
35
- from lamin_utils._lookup import Lookup
36
- from lamindb_setup._connect_instance import (
37
- get_owner_name_from_identifier,
38
- load_instance_settings,
39
- update_db_using_local,
40
- )
41
- from lamindb_setup.core._docs import doc_args
42
- from lamindb_setup.core._hub_core import connect_instance_hub
43
- from lamindb_setup.core._settings_store import instance_settings_file
44
- from lamindb_setup.core.upath import extract_suffix_from_path
45
-
46
- from lamindb.errors import FieldValidationError
47
- from lamindb.models import (
48
- Artifact,
49
- BasicRecord,
50
- CanCurate,
51
- Collection,
52
- Feature,
53
- IsVersioned,
54
- Param,
55
- Record,
56
- Run,
57
- Schema,
58
- Transform,
59
- ULabel,
60
- ValidateFields,
61
- )
62
-
63
- from ._utils import attach_func_to_class_method
64
- from .core._settings import settings
65
- from .errors import (
66
- InvalidArgument,
67
- RecordNameChangeIntegrityError,
68
- ValidationError,
69
- )
70
-
71
- if TYPE_CHECKING:
72
- import pandas as pd
73
-
74
- from lamindb.base.types import StrField
75
-
76
-
77
- IPYTHON = getattr(builtins, "__IPYTHON__", False)
78
-
79
-
80
- def is_approx_pascal_case(s):
81
- """Check if the last component of a dotted string is in PascalCase.
82
-
83
- Args:
84
- s (str): The string to check
85
-
86
- Returns:
87
- bool: True if the last component is in PascalCase
88
-
89
- Raises:
90
- ValueError: If the last component doesn't start with a capital letter
91
- """
92
- if "[" in s: # this is because we allow types of form 'script[test_script.py]'
93
- return True
94
- last_component = s.split(".")[-1]
95
-
96
- if not last_component[0].isupper():
97
- raise ValueError(
98
- f"'{last_component}' should start with a capital letter given you're defining a type"
99
- )
100
-
101
- return True
102
-
103
-
104
- def init_self_from_db(self: Record, existing_record: Record):
105
- new_args = [
106
- getattr(existing_record, field.attname) for field in self._meta.concrete_fields
107
- ]
108
- super(self.__class__, self).__init__(*new_args)
109
- self._state.adding = False # mimic from_db
110
- self._state.db = "default"
111
-
112
-
113
- def update_attributes(record: Record, attributes: dict[str, str]):
114
- for key, value in attributes.items():
115
- if (
116
- getattr(record, key) != value
117
- and value is not None
118
- and key != "dtype"
119
- and key != "_aux"
120
- ):
121
- logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
122
- setattr(record, key, value)
123
-
124
-
125
- def validate_fields(record: Record, kwargs):
126
- from lamindb.base.validation import validate_literal_fields
127
-
128
- # validate required fields
129
- # a "required field" is a Django field that has `null=False, default=None`
130
- required_fields = {
131
- k.name for k in record._meta.fields if not k.null and k.default is None
132
- }
133
- required_fields_not_passed = {k: None for k in required_fields if k not in kwargs}
134
- kwargs.update(required_fields_not_passed)
135
- missing_fields = [
136
- k for k, v in kwargs.items() if v is None and k in required_fields
137
- ]
138
- if missing_fields:
139
- raise FieldValidationError(f"{missing_fields} are required.")
140
- # ensure the exact length of the internal uid for core entities
141
- if "uid" in kwargs and record.__class__ in {
142
- Artifact,
143
- Collection,
144
- Transform,
145
- Run,
146
- ULabel,
147
- Feature,
148
- Schema,
149
- Param,
150
- }:
151
- uid_max_length = record.__class__._meta.get_field(
152
- "uid"
153
- ).max_length # triggers FieldDoesNotExist
154
- if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
155
- raise ValidationError(
156
- f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
157
- )
158
- # validate is_type
159
- if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
160
- if kwargs["name"].endswith("s"):
161
- logger.warning(
162
- f"name '{kwargs['name']}' for type ends with 's', in case you're naming with plural, consider the singular for a type name"
163
- )
164
- is_approx_pascal_case(kwargs["name"])
165
- # validate literals
166
- validate_literal_fields(record, kwargs)
167
-
168
-
169
- def suggest_records_with_similar_names(
170
- record: Record, name_field: str, kwargs
171
- ) -> Record | None:
172
- """Returns True if found exact match, otherwise False.
173
-
174
- Logs similar matches if found.
175
- """
176
- if kwargs.get(name_field) is None or not isinstance(kwargs.get(name_field), str):
177
- return None
178
- # need to perform an additional request to find the exact match
179
- # previously, this was inferred from the truncated/fuzzy search below
180
- # but this isn't reliable: https://laminlabs.slack.com/archives/C04FPE8V01W/p1737812808563409
181
- # the below needs to be .first() because there might be multiple records with the same
182
- # name field in case the record is versioned (e.g. for Transform key)
183
- exact_match = record.__class__.filter(**{name_field: kwargs[name_field]}).first()
184
- if exact_match is not None:
185
- return exact_match
186
- queryset = _search(
187
- record.__class__,
188
- kwargs[name_field],
189
- field=name_field,
190
- truncate_string=True,
191
- limit=3,
192
- )
193
- if not queryset.exists(): # empty queryset
194
- return None
195
- s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
196
- msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
197
- if IPYTHON:
198
- from IPython.display import display
199
-
200
- logger.warning(f"{msg}")
201
- if settings._verbosity_int >= 1:
202
- display(queryset.df())
203
- else:
204
- logger.warning(f"{msg}\n{queryset}")
205
- return None
206
-
207
-
208
- def __init__(record: Record, *args, **kwargs):
209
- skip_validation = kwargs.pop("_skip_validation", False)
210
- if not args and skip_validation:
211
- super(BasicRecord, record).__init__(**kwargs)
212
- elif not args and not skip_validation:
213
- validate_fields(record, kwargs)
214
-
215
- # do not search for names if an id is passed; this is important
216
- # e.g. when synching ids from the notebook store to lamindb
217
- has_consciously_provided_uid = False
218
- if "_has_consciously_provided_uid" in kwargs:
219
- has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
220
- if (
221
- isinstance(record, (CanCurate, Collection, Transform))
222
- and settings.creation.search_names
223
- and not has_consciously_provided_uid
224
- ):
225
- name_field = getattr(record, "_name_field", "name")
226
- exact_match = suggest_records_with_similar_names(record, name_field, kwargs)
227
- if exact_match is not None:
228
- if "version" in kwargs:
229
- if kwargs["version"] is not None:
230
- version_comment = " and version"
231
- existing_record = record.__class__.filter(
232
- **{
233
- name_field: kwargs[name_field],
234
- "version": kwargs["version"],
235
- }
236
- ).one_or_none()
237
- else:
238
- # for a versioned record, an exact name match is not a criterion
239
- # for retrieving a record in case `version` isn't passed -
240
- # we'd always pull out many records with exactly the same name
241
- existing_record = None
242
- else:
243
- version_comment = ""
244
- existing_record = exact_match
245
- if existing_record is not None:
246
- logger.important(
247
- f"returning existing {record.__class__.__name__} record with same"
248
- f" {name_field}{version_comment}: '{kwargs[name_field]}'"
249
- )
250
- if isinstance(record, Schema):
251
- if existing_record.hash != kwargs["hash"]:
252
- raise ValueError(
253
- f"Schema name is already in use by schema with uid '{existing_record.uid}', please choose a different name."
254
- )
255
- init_self_from_db(record, existing_record)
256
- update_attributes(record, kwargs)
257
- return None
258
- super(BasicRecord, record).__init__(**kwargs)
259
- if isinstance(record, ValidateFields):
260
- # this will trigger validation against django validators
261
- try:
262
- if hasattr(record, "clean_fields"):
263
- record.clean_fields()
264
- else:
265
- record._Model__clean_fields()
266
- except DjangoValidationError as e:
267
- message = _format_django_validation_error(record, e)
268
- raise FieldValidationError(message) from e
269
- elif len(args) != len(record._meta.concrete_fields):
270
- raise FieldValidationError(
271
- f"Use keyword arguments instead of positional arguments, e.g.: {record.__class__.__name__}(name='...')."
272
- )
273
- else:
274
- # object is loaded from DB (**kwargs could be omitted below, I believe)
275
- super(BasicRecord, record).__init__(*args, **kwargs)
276
- _store_record_old_name(record)
277
- _store_record_old_key(record)
278
-
279
-
280
- def _format_django_validation_error(record: Record, e: DjangoValidationError):
281
- """Pretty print Django validation errors."""
282
- errors = {}
283
- if hasattr(e, "error_dict"):
284
- error_dict = e.error_dict
285
- else:
286
- error_dict = {"__all__": e.error_list}
287
-
288
- for field_name, error_list in error_dict.items():
289
- for error in error_list:
290
- if hasattr(error, "message"):
291
- msg = error.message
292
- else:
293
- msg = str(error)
294
-
295
- if field_name == "__all__":
296
- errors[field_name] = f"{colors.yellow(msg)}"
297
- else:
298
- current_value = getattr(record, field_name, None)
299
- errors[field_name] = (
300
- f"{field_name}: {colors.yellow(current_value)} is not valid\n → {msg}"
301
- )
302
-
303
- if errors:
304
- message = "\n "
305
- for _, error in errors.items():
306
- message += error + "\n "
307
-
308
- return message
309
-
310
-
311
- def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
312
- """Gets the parameters of a Record from the overloaded signature.
313
-
314
- Example:
315
- >>> get_record_params(bt.Organism)
316
- >>> [('name', 'str'), ('taxon_id', 'str | None'), ('scientific_name', 'str | None')]
317
- """
318
- source = inspect.getsource(record_class)
319
-
320
- # Find first overload that's not *db_args
321
- pattern = r"@overload\s+def __init__\s*\(([\s\S]*?)\):\s*\.{3}"
322
- overloads = re.finditer(pattern, source)
323
-
324
- for overload in overloads:
325
- params_block = overload.group(1)
326
- # This is an additional safety measure if the overloaded signature that we're
327
- # looking for is not at the top but a "db_args" constructor
328
- if "*db_args" in params_block:
329
- continue
330
-
331
- params = []
332
- for line in params_block.split("\n"):
333
- line = line.strip()
334
- if not line or "self" in line:
335
- continue
336
-
337
- # Extract name and type annotation
338
- # The regex pattern finds parameter definitions like:
339
- # Simple: name: str
340
- # With default: age: int = 0
341
- # With complex types: items: List[str] = []
342
- param_pattern = (
343
- r"(\w+)" # Parameter name
344
- r"\s*:\s*" # Colon with optional whitespace
345
- r"((?:[^=,]|" # Type hint: either non-equals/comma chars
346
- r"(?<=\[)[^[\]]*" # or contents within square brackets
347
- r"(?=\]))+)" # looking ahead for closing bracket
348
- r"(?:\s*=\s*" # Optional default value part
349
- r"([^,]+))?" # Default value: anything but comma
350
- )
351
- match = re.match(param_pattern, line)
352
- if not match:
353
- continue
354
-
355
- name, type_str = match.group(1), match.group(2).strip()
356
-
357
- # Keep type as string instead of evaluating
358
- params.append((name, type_str))
359
-
360
- return params
361
-
362
- return []
363
-
364
-
365
- @classmethod # type:ignore
366
- @doc_args(Record.filter.__doc__)
367
- def filter(cls, *queries, **expressions) -> QuerySet:
368
- """{}""" # noqa: D415
369
- from lamindb._query_set import QuerySet
370
-
371
- _using_key = None
372
- if "_using_key" in expressions:
373
- _using_key = expressions.pop("_using_key")
374
-
375
- return QuerySet(model=cls, using=_using_key).filter(*queries, **expressions)
376
-
377
-
378
- @classmethod # type:ignore
379
- @doc_args(Record.get.__doc__)
380
- def get(
381
- cls,
382
- idlike: int | str | None = None,
383
- **expressions,
384
- ) -> Record:
385
- """{}""" # noqa: D415
386
- from lamindb._query_set import QuerySet
387
-
388
- return QuerySet(model=cls).get(idlike, **expressions)
389
-
390
-
391
- @classmethod # type:ignore
392
- @doc_args(Record.df.__doc__)
393
- def df(
394
- cls,
395
- include: str | list[str] | None = None,
396
- features: bool | list[str] = False,
397
- limit: int = 100,
398
- ) -> pd.DataFrame:
399
- """{}""" # noqa: D415
400
- query_set = cls.filter()
401
- if hasattr(cls, "updated_at"):
402
- query_set = query_set.order_by("-updated_at")
403
- return query_set[:limit].df(include=include, features=features)
404
-
405
-
406
- def _search(
407
- cls,
408
- string: str,
409
- *,
410
- field: StrField | list[StrField] | None = None,
411
- limit: int | None = 20,
412
- case_sensitive: bool = False,
413
- using_key: str | None = None,
414
- truncate_string: bool = False,
415
- ) -> QuerySet:
416
- if string is None:
417
- raise ValueError("Cannot search for None value! Please pass a valid string.")
418
-
419
- input_queryset = _queryset(cls, using_key=using_key)
420
- registry = input_queryset.model
421
- name_field = getattr(registry, "_name_field", "name")
422
- if field is None:
423
- fields = [
424
- field.name
425
- for field in registry._meta.fields
426
- if field.get_internal_type() in {"CharField", "TextField"}
427
- ]
428
- else:
429
- if not isinstance(field, list):
430
- fields_input = [field]
431
- else:
432
- fields_input = field
433
- fields = []
434
- for field in fields_input:
435
- if not isinstance(field, str):
436
- try:
437
- fields.append(field.field.name)
438
- except AttributeError as error:
439
- raise TypeError(
440
- "Please pass a Record string field, e.g., `CellType.name`!"
441
- ) from error
442
- else:
443
- fields.append(field)
444
-
445
- if truncate_string:
446
- if (len_string := len(string)) > 5:
447
- n_80_pct = int(len_string * 0.8)
448
- string = string[:n_80_pct]
449
-
450
- string = string.strip()
451
- string_escape = re.escape(string)
452
-
453
- exact_lookup = Exact if case_sensitive else IExact
454
- regex_lookup = Regex if case_sensitive else IRegex
455
- contains_lookup = Contains if case_sensitive else IContains
456
-
457
- ranks = []
458
- contains_filters = []
459
- for field in fields:
460
- field_expr = Coalesce(
461
- Cast(field, output_field=TextField()),
462
- Value(""),
463
- output_field=TextField(),
464
- )
465
- # exact rank
466
- exact_expr = exact_lookup(field_expr, string)
467
- exact_rank = Cast(exact_expr, output_field=IntegerField()) * 200
468
- ranks.append(exact_rank)
469
- # exact synonym
470
- synonym_expr = regex_lookup(field_expr, rf"(?:^|.*\|){string_escape}(?:\|.*|$)")
471
- synonym_rank = Cast(synonym_expr, output_field=IntegerField()) * 200
472
- ranks.append(synonym_rank)
473
- # match as sub-phrase
474
- sub_expr = regex_lookup(
475
- field_expr, rf"(?:^|.*[ \|\.,;:]){string_escape}(?:[ \|\.,;:].*|$)"
476
- )
477
- sub_rank = Cast(sub_expr, output_field=IntegerField()) * 10
478
- ranks.append(sub_rank)
479
- # startswith and avoid matching string with " " on the right
480
- # mostly for truncated
481
- startswith_expr = regex_lookup(
482
- field_expr, rf"(?:^|.*\|){string_escape}[^ ]*(?:\|.*|$)"
483
- )
484
- startswith_rank = Cast(startswith_expr, output_field=IntegerField()) * 8
485
- ranks.append(startswith_rank)
486
- # match as sub-phrase from the left, mostly for truncated
487
- right_expr = regex_lookup(field_expr, rf"(?:^|.*[ \|]){string_escape}.*")
488
- right_rank = Cast(right_expr, output_field=IntegerField()) * 2
489
- ranks.append(right_rank)
490
- # match as sub-phrase from the right
491
- left_expr = regex_lookup(field_expr, rf".*{string_escape}(?:$|[ \|\.,;:].*)")
492
- left_rank = Cast(left_expr, output_field=IntegerField()) * 2
493
- ranks.append(left_rank)
494
- # simple contains filter
495
- contains_expr = contains_lookup(field_expr, string)
496
- contains_filter = Q(contains_expr)
497
- contains_filters.append(contains_filter)
498
- # also rank by contains
499
- contains_rank = Cast(contains_expr, output_field=IntegerField())
500
- ranks.append(contains_rank)
501
- # additional rule for truncated strings
502
- # weight matches from the beginning of the string higher
503
- # sometimes whole words get truncated and startswith_expr is not enough
504
- if truncate_string and field == name_field:
505
- startswith_lookup = StartsWith if case_sensitive else IStartsWith
506
- name_startswith_expr = startswith_lookup(field_expr, string)
507
- name_startswith_rank = (
508
- Cast(name_startswith_expr, output_field=IntegerField()) * 2
509
- )
510
- ranks.append(name_startswith_rank)
511
-
512
- ranked_queryset = (
513
- input_queryset.filter(reduce(lambda a, b: a | b, contains_filters))
514
- .alias(rank=sum(ranks))
515
- .order_by("-rank")
516
- )
517
-
518
- return ranked_queryset[:limit]
519
-
520
-
521
- @classmethod # type: ignore
522
- @doc_args(Record.search.__doc__)
523
- def search(
524
- cls,
525
- string: str,
526
- *,
527
- field: StrField | None = None,
528
- limit: int | None = 20,
529
- case_sensitive: bool = False,
530
- ) -> QuerySet:
531
- """{}""" # noqa: D415
532
- return _search(
533
- cls=cls,
534
- string=string,
535
- field=field,
536
- limit=limit,
537
- case_sensitive=case_sensitive,
538
- )
539
-
540
-
541
- def _lookup(
542
- cls,
543
- field: StrField | None = None,
544
- return_field: StrField | None = None,
545
- using_key: str | None = None,
546
- ) -> NamedTuple:
547
- """{}""" # noqa: D415
548
- queryset = _queryset(cls, using_key=using_key)
549
- field = get_name_field(registry=queryset.model, field=field)
550
-
551
- return Lookup(
552
- records=queryset,
553
- values=[i.get(field) for i in queryset.values()],
554
- tuple_name=cls.__class__.__name__,
555
- prefix="ln",
556
- ).lookup(
557
- return_field=(
558
- get_name_field(registry=queryset.model, field=return_field)
559
- if return_field is not None
560
- else None
561
- )
562
- )
563
-
564
-
565
- @classmethod # type: ignore
566
- @doc_args(Record.lookup.__doc__)
567
- def lookup(
568
- cls,
569
- field: StrField | None = None,
570
- return_field: StrField | None = None,
571
- ) -> NamedTuple:
572
- """{}""" # noqa: D415
573
- return _lookup(cls=cls, field=field, return_field=return_field)
574
-
575
-
576
- def get_name_field(
577
- registry: type[Record] | QuerySet | Manager,
578
- *,
579
- field: str | StrField | None = None,
580
- ) -> str:
581
- """Get the 1st char or text field from the registry."""
582
- if isinstance(registry, (QuerySet, Manager)):
583
- registry = registry.model
584
- model_field_names = [i.name for i in registry._meta.fields]
585
-
586
- # set to default name field
587
- if field is None:
588
- if hasattr(registry, "_name_field"):
589
- field = registry._meta.get_field(registry._name_field)
590
- elif "name" in model_field_names:
591
- field = registry._meta.get_field("name")
592
- else:
593
- # first char or text field that doesn't contain "id"
594
- for i in registry._meta.fields:
595
- if "id" in i.name:
596
- continue
597
- if i.get_internal_type() in {"CharField", "TextField"}:
598
- field = i
599
- break
600
-
601
- # no default name field can be found
602
- if field is None:
603
- raise ValueError(
604
- "please pass a Record string field, e.g., `CellType.name`!"
605
- )
606
- else:
607
- field = field.name # type:ignore
608
- if not isinstance(field, str):
609
- try:
610
- field = field.field.name
611
- except AttributeError:
612
- raise TypeError(
613
- "please pass a Record string field, e.g., `CellType.name`!"
614
- ) from None
615
-
616
- return field
617
-
618
-
619
- def _queryset(cls: Record | QuerySet | Manager, using_key: str) -> QuerySet:
620
- if isinstance(cls, (QuerySet, Manager)):
621
- return cls.all()
622
- elif using_key is None or using_key == "default":
623
- return cls.objects.all()
624
- else:
625
- # using must be called on cls, otherwise the connection isn't found
626
- return cls.using(using_key).all()
627
-
628
-
629
- def add_db_connection(db: str, using: str):
630
- db_config = dj_database_url.config(
631
- default=db, conn_max_age=600, conn_health_checks=True
632
- )
633
- db_config["TIME_ZONE"] = "UTC"
634
- db_config["OPTIONS"] = {}
635
- db_config["AUTOCOMMIT"] = True
636
- connections.settings[using] = db_config
637
-
638
-
639
- @classmethod # type: ignore
640
- @doc_args(Record.using.__doc__)
641
- def using(
642
- cls,
643
- instance: str | None,
644
- ) -> QuerySet:
645
- """{}""" # noqa: D415
646
- from ._query_set import QuerySet
647
-
648
- if instance is None:
649
- return QuerySet(model=cls, using=None)
650
- owner, name = get_owner_name_from_identifier(instance)
651
- settings_file = instance_settings_file(name, owner)
652
- cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
653
- if not settings_file.exists():
654
- result = connect_instance_hub(owner=owner, name=name)
655
- if isinstance(result, str):
656
- raise RuntimeError(
657
- f"Failed to load instance {instance}, please check your permissions!"
658
- )
659
- iresult, _ = result
660
- source_module = {
661
- modules for modules in iresult["schema_str"].split(",") if modules != ""
662
- } # type: ignore
663
- target_module = ln_setup.settings.instance.modules
664
- if not source_module.issubset(target_module):
665
- missing_members = source_module - target_module
666
- logger.warning(
667
- f"source modules has additional modules: {missing_members}\nconsider mounting these registry modules to transfer all metadata"
668
- )
669
- cache_filepath.write_text(f"{iresult['lnid']}\n{iresult['schema_str']}") # type: ignore
670
- settings_file = instance_settings_file(name, owner)
671
- db = update_db_using_local(iresult, settings_file)
672
- else:
673
- isettings = load_instance_settings(settings_file)
674
- db = isettings.db
675
- cache_filepath.write_text(f"{isettings.uid}\n{','.join(isettings.modules)}") # type: ignore
676
- add_db_connection(db, instance)
677
- return QuerySet(model=cls, using=instance)
678
-
679
-
680
- REGISTRY_UNIQUE_FIELD = {
681
- "storage": "root",
682
- "feature": "name",
683
- "ulabel": "name",
684
- "space": "name", # TODO: this should be updated with the currently used space instead during transfer
685
- }
686
-
687
-
688
- def update_fk_to_default_db(
689
- records: Record | list[Record] | QuerySet,
690
- fk: str,
691
- using_key: str | None,
692
- transfer_logs: dict,
693
- ):
694
- record = records[0] if isinstance(records, (list, QuerySet)) else records
695
- if hasattr(record, f"{fk}_id") and getattr(record, f"{fk}_id") is not None:
696
- fk_record = getattr(record, fk)
697
- field = REGISTRY_UNIQUE_FIELD.get(fk, "uid")
698
- fk_record_default = fk_record.__class__.filter(
699
- **{field: getattr(fk_record, field)}
700
- ).one_or_none()
701
- if fk_record_default is None:
702
- from copy import copy
703
-
704
- fk_record_default = copy(fk_record)
705
- transfer_to_default_db(
706
- fk_record_default, using_key, save=True, transfer_logs=transfer_logs
707
- )
708
- if isinstance(records, (list, QuerySet)):
709
- for r in records:
710
- setattr(r, f"{fk}", None)
711
- setattr(r, f"{fk}_id", fk_record_default.id)
712
- else:
713
- setattr(records, f"{fk}", None)
714
- setattr(records, f"{fk}_id", fk_record_default.id)
715
-
716
-
717
- FKBULK = [
718
- "organism",
719
- "source",
720
- "report", # Run
721
- ]
722
-
723
-
724
- def transfer_fk_to_default_db_bulk(
725
- records: list | QuerySet, using_key: str | None, transfer_logs: dict
726
- ):
727
- for fk in FKBULK:
728
- update_fk_to_default_db(records, fk, using_key, transfer_logs=transfer_logs)
729
-
730
-
731
- def get_transfer_run(record) -> Run:
732
- from lamindb.core._context import context
733
- from lamindb.core._data import WARNING_RUN_TRANSFORM
734
-
735
- slug = record._state.db
736
- owner, name = get_owner_name_from_identifier(slug)
737
- cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
738
- if not cache_filepath.exists():
739
- raise SystemExit("Need to call .using() before")
740
- instance_uid = cache_filepath.read_text().split("\n")[0]
741
- key = f"transfers/{instance_uid}"
742
- uid = instance_uid + "0000"
743
- transform = Transform.filter(uid=uid).one_or_none()
744
- if transform is None:
745
- search_names = settings.creation.search_names
746
- settings.creation.search_names = False
747
- transform = Transform( # type: ignore
748
- uid=uid, description=f"Transfer from `{slug}`", key=key, type="function"
749
- ).save()
750
- settings.creation.search_names = search_names
751
- # use the global run context to get the initiated_by_run run id
752
- if context.run is not None:
753
- initiated_by_run = context.run
754
- else:
755
- if not settings.creation.artifact_silence_missing_run_warning:
756
- logger.warning(WARNING_RUN_TRANSFORM)
757
- initiated_by_run = None
758
- # it doesn't seem to make sense to create new runs for every transfer
759
- run = Run.filter(
760
- transform=transform, initiated_by_run=initiated_by_run
761
- ).one_or_none()
762
- if run is None:
763
- run = Run(transform=transform, initiated_by_run=initiated_by_run).save() # type: ignore
764
- run.initiated_by_run = initiated_by_run # so that it's available in memory
765
- return run
766
-
767
-
768
- def transfer_to_default_db(
769
- record: Record,
770
- using_key: str | None,
771
- *,
772
- transfer_logs: dict,
773
- save: bool = False,
774
- transfer_fk: bool = True,
775
- ) -> Record | None:
776
- if record._state.db is None or record._state.db == "default":
777
- return None
778
- registry = record.__class__
779
- record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
780
- record_str = f"{record.__class__.__name__}(uid='{record.uid}')"
781
- if transfer_logs["run"] is None:
782
- transfer_logs["run"] = get_transfer_run(record)
783
- if record_on_default is not None:
784
- transfer_logs["mapped"].append(record_str)
785
- return record_on_default
786
- else:
787
- transfer_logs["transferred"].append(record_str)
788
-
789
- if hasattr(record, "created_by_id"):
790
- record.created_by = None
791
- record.created_by_id = ln_setup.settings.user.id
792
- # run & transform
793
- run = transfer_logs["run"]
794
- if hasattr(record, "run_id"):
795
- record.run = None
796
- record.run_id = run.id
797
- # deal with denormalized transform FK on artifact and collection
798
- if hasattr(record, "transform_id"):
799
- record.transform = None
800
- record.transform_id = run.transform_id
801
- # transfer other foreign key fields
802
- fk_fields = [
803
- i.name
804
- for i in record._meta.fields
805
- if i.get_internal_type() == "ForeignKey"
806
- if i.name not in {"created_by", "run", "transform"}
807
- ]
808
- if not transfer_fk:
809
- # don't transfer fk fields that are already bulk transferred
810
- fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
811
- for fk in fk_fields:
812
- update_fk_to_default_db(record, fk, using_key, transfer_logs=transfer_logs)
813
- record.id = None
814
- record._state.db = "default"
815
- if save:
816
- record.save()
817
- return None
818
-
819
-
820
- # docstring handled through attach_func_to_class_method
821
- def save(self, *args, **kwargs) -> Record:
822
- using_key = None
823
- if "using" in kwargs:
824
- using_key = kwargs["using"]
825
- db = self._state.db
826
- pk_on_db = self.pk
827
- artifacts: list = []
828
- if self.__class__.__name__ == "Collection" and self.id is not None:
829
- # when creating a new collection without being able to access artifacts
830
- artifacts = self.ordered_artifacts.list()
831
- pre_existing_record = None
832
- # consider records that are being transferred from other databases
833
- transfer_logs: dict[str, list[str]] = {"mapped": [], "transferred": [], "run": None}
834
- if db is not None and db != "default" and using_key is None:
835
- if isinstance(self, IsVersioned):
836
- if not self.is_latest:
837
- raise NotImplementedError(
838
- "You are attempting to transfer a record that's not the latest in its version history. This is currently not supported."
839
- )
840
- pre_existing_record = transfer_to_default_db(
841
- self, using_key, transfer_logs=transfer_logs
842
- )
843
- if pre_existing_record is not None:
844
- init_self_from_db(self, pre_existing_record)
845
- else:
846
- check_key_change(self)
847
- check_name_change(self)
848
- try:
849
- # save versioned record in presence of self._revises
850
- if isinstance(self, IsVersioned) and self._revises is not None:
851
- assert self._revises.is_latest # noqa: S101
852
- revises = self._revises
853
- revises.is_latest = False
854
- with transaction.atomic():
855
- revises._revises = None # ensure we don't start a recursion
856
- revises.save()
857
- super(BasicRecord, self).save(*args, **kwargs) # type: ignore
858
- self._revises = None
859
- # save unversioned record
860
- else:
861
- super(BasicRecord, self).save(*args, **kwargs)
862
- except IntegrityError as e:
863
- error_msg = str(e)
864
- # two possible error messages for hash duplication
865
- # "duplicate key value violates unique constraint"
866
- # "UNIQUE constraint failed"
867
- if (
868
- "UNIQUE constraint failed" in error_msg
869
- or "duplicate key value violates unique constraint" in error_msg
870
- ) and "hash" in error_msg:
871
- pre_existing_record = self.__class__.get(hash=self.hash)
872
- logger.warning(
873
- f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
874
- )
875
- init_self_from_db(self, pre_existing_record)
876
- else:
877
- raise
878
- _store_record_old_name(self)
879
- _store_record_old_key(self)
880
- # perform transfer of many-to-many fields
881
- # only supported for Artifact and Collection records
882
- if db is not None and db != "default" and using_key is None:
883
- if self.__class__.__name__ == "Collection":
884
- if len(artifacts) > 0:
885
- logger.info("transfer artifacts")
886
- for artifact in artifacts:
887
- artifact.save()
888
- self.artifacts.add(*artifacts)
889
- if hasattr(self, "labels"):
890
- from copy import copy
891
-
892
- from lamindb.models import FeatureManager
893
-
894
- # here we go back to original record on the source database
895
- self_on_db = copy(self)
896
- self_on_db._state.db = db
897
- self_on_db.pk = pk_on_db # manually set the primary key
898
- self_on_db.features = FeatureManager(self_on_db) # type: ignore
899
- self.features._add_from(self_on_db, transfer_logs=transfer_logs)
900
- self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
901
- for k, v in transfer_logs.items():
902
- if k != "run":
903
- logger.important(f"{k} records: {', '.join(v)}")
904
- return self
905
-
906
-
907
- def _store_record_old_name(record: Record):
908
- # writes the name to the _name attribute, so we can detect renaming upon save
909
- if hasattr(record, "_name_field"):
910
- record._old_name = getattr(record, record._name_field)
911
-
912
-
913
- def _store_record_old_key(record: Record):
914
- # writes the key to the _old_key attribute, so we can detect key changes upon save
915
- if isinstance(record, (Artifact, Transform)):
916
- record._old_key = record.key
917
-
918
-
919
- def check_name_change(record: Record):
920
- """Warns if a record's name has changed."""
921
- if (
922
- not record.pk
923
- or not hasattr(record, "_old_name")
924
- or not hasattr(record, "_name_field")
925
- ):
926
- return
927
-
928
- # checked in check_key_change or not checked at all
929
- if isinstance(record, (Artifact, Collection, Transform)):
930
- return
931
-
932
- # renaming feature sets is not checked
933
- if isinstance(record, Schema):
934
- return
935
-
936
- old_name = record._old_name
937
- new_name = getattr(record, record._name_field)
938
- registry = record.__class__.__name__
939
-
940
- if old_name != new_name:
941
- # when a label is renamed, only raise a warning if it has a feature
942
- if hasattr(record, "artifacts"):
943
- linked_records = (
944
- record.artifacts.through.filter(
945
- label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
946
- )
947
- .exclude(feature_id=None) # must have a feature
948
- .exclude(
949
- feature_ref_is_name=None
950
- ) # must be linked via Curator and therefore part of a schema
951
- .distinct()
952
- )
953
- artifact_ids = linked_records.list("artifact__uid")
954
- n = len(artifact_ids)
955
- if n > 0:
956
- s = "s" if n > 1 else ""
957
- logger.error(
958
- f"You are trying to {colors.red('rename label')} from '{old_name}' to '{new_name}'!\n"
959
- f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {artifact_ids}\n\n"
960
- f"{colors.bold('To rename this label')}, make it external:\n"
961
- f" → run `artifact.labels.make_external(label)`\n\n"
962
- f"After renaming, consider re-curating the above artifact{s}:\n"
963
- f' → in each dataset, manually modify label "{old_name}" to "{new_name}"\n'
964
- f" → run `ln.Curator`\n"
965
- )
966
- raise RecordNameChangeIntegrityError
967
-
968
- # when a feature is renamed
969
- elif isinstance(record, Feature):
970
- # only internal features are associated with schemas
971
- linked_artifacts = Artifact.filter(feature_sets__features=record).list(
972
- "uid"
973
- )
974
- n = len(linked_artifacts)
975
- if n > 0:
976
- s = "s" if n > 1 else ""
977
- logger.error(
978
- f"You are trying to {colors.red('rename feature')} from '{old_name}' to '{new_name}'!\n"
979
- f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {linked_artifacts}\n\n"
980
- f"{colors.bold('To rename this feature')}, make it external:\n"
981
- " → run `artifact.features.make_external(feature)`\n\n"
982
- f"After renaming, consider re-curating the above artifact{s}:\n"
983
- f" → in each dataset, manually modify feature '{old_name}' to '{new_name}'\n"
984
- f" → run `ln.Curator`\n"
985
- )
986
- raise RecordNameChangeIntegrityError
987
-
988
-
989
- def check_key_change(record: Artifact | Transform):
990
- """Errors if a record's key has falsely changed."""
991
- if not isinstance(record, Artifact) or not hasattr(record, "_old_key"):
992
- return
993
-
994
- old_key = record._old_key or ""
995
- new_key = record.key or ""
996
-
997
- if old_key != new_key:
998
- if not record._key_is_virtual:
999
- raise InvalidArgument(
1000
- f"Changing a non-virtual key of an artifact is not allowed! Tried to change key from '{old_key}' to '{new_key}'."
1001
- )
1002
- old_key_suffix = (
1003
- record.suffix
1004
- if record.suffix
1005
- else extract_suffix_from_path(PurePosixPath(old_key), arg_name="key")
1006
- )
1007
- new_key_suffix = extract_suffix_from_path(
1008
- PurePosixPath(new_key), arg_name="key"
1009
- )
1010
- if old_key_suffix != new_key_suffix:
1011
- raise InvalidArgument(
1012
- f"The suffix '{new_key_suffix}' of the provided key is incorrect, it should be '{old_key_suffix}'."
1013
- )
1014
-
1015
-
1016
- def delete(self) -> None:
1017
- """Delete the record."""
1018
- # note that the logic below does not fire if a record is moved to the trash
1019
- # the idea is that moving a record to the trash should move its entire version family
1020
- # to the trash, whereas permanently deleting should default to only deleting a single record
1021
- # of a version family
1022
- # we can consider making it easy to permanently delete entire version families as well,
1023
- # but that's for another time
1024
- if isinstance(self, IsVersioned) and self.is_latest:
1025
- new_latest = (
1026
- self.__class__.objects.using(self._state.db)
1027
- .filter(is_latest=False, uid__startswith=self.stem_uid)
1028
- .order_by("-created_at")
1029
- .first()
1030
- )
1031
- if new_latest is not None:
1032
- new_latest.is_latest = True
1033
- with transaction.atomic():
1034
- new_latest.save()
1035
- super(BasicRecord, self).delete() # type: ignore
1036
- logger.warning(f"new latest version is {new_latest}")
1037
- return None
1038
- super(BasicRecord, self).delete()
1039
-
1040
-
1041
- METHOD_NAMES = [
1042
- "__init__",
1043
- "filter",
1044
- "get",
1045
- "df",
1046
- "search",
1047
- "lookup",
1048
- "save",
1049
- "delete",
1050
- "using",
1051
- ]
1052
-
1053
- if ln_setup._TESTING: # type: ignore
1054
- from inspect import signature
1055
-
1056
- SIGS = {
1057
- name: signature(getattr(Record, name))
1058
- for name in METHOD_NAMES
1059
- if not name.startswith("__")
1060
- }
1061
-
1062
- for name in METHOD_NAMES:
1063
- attach_func_to_class_method(name, BasicRecord, globals())
1064
- attach_func_to_class_method(name, Record, globals())