lamindb 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. lamindb/__init__.py +33 -26
  2. lamindb/_finish.py +9 -1
  3. lamindb/_tracked.py +26 -3
  4. lamindb/_view.py +2 -3
  5. lamindb/base/__init__.py +1 -1
  6. lamindb/base/ids.py +1 -10
  7. lamindb/base/users.py +1 -4
  8. lamindb/core/__init__.py +7 -65
  9. lamindb/core/_compat.py +60 -0
  10. lamindb/core/_context.py +50 -22
  11. lamindb/core/_mapped_collection.py +4 -2
  12. lamindb/core/_settings.py +6 -6
  13. lamindb/core/_sync_git.py +1 -1
  14. lamindb/core/_track_environment.py +2 -1
  15. lamindb/core/datasets/_small.py +3 -3
  16. lamindb/core/loaders.py +43 -20
  17. lamindb/core/storage/_anndata_accessor.py +8 -3
  18. lamindb/core/storage/_backed_access.py +14 -7
  19. lamindb/core/storage/_pyarrow_dataset.py +24 -9
  20. lamindb/core/storage/_tiledbsoma.py +8 -6
  21. lamindb/core/storage/_zarr.py +104 -25
  22. lamindb/core/storage/objects.py +63 -28
  23. lamindb/core/storage/paths.py +16 -13
  24. lamindb/core/types.py +10 -0
  25. lamindb/curators/__init__.py +176 -149
  26. lamindb/errors.py +1 -1
  27. lamindb/integrations/_vitessce.py +4 -4
  28. lamindb/migrations/0089_subsequent_runs.py +159 -0
  29. lamindb/migrations/0090_runproject_project_runs.py +73 -0
  30. lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
  31. lamindb/models/__init__.py +79 -0
  32. lamindb/{core → models}/_describe.py +3 -3
  33. lamindb/{core → models}/_django.py +8 -5
  34. lamindb/{core → models}/_feature_manager.py +103 -87
  35. lamindb/{_from_values.py → models/_from_values.py} +5 -2
  36. lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
  37. lamindb/{core → models}/_label_manager.py +10 -17
  38. lamindb/{core/relations.py → models/_relations.py} +8 -1
  39. lamindb/models/artifact.py +2602 -0
  40. lamindb/{_can_curate.py → models/can_curate.py} +349 -180
  41. lamindb/models/collection.py +683 -0
  42. lamindb/models/core.py +135 -0
  43. lamindb/models/feature.py +643 -0
  44. lamindb/models/flextable.py +163 -0
  45. lamindb/{_parents.py → models/has_parents.py} +55 -49
  46. lamindb/models/project.py +384 -0
  47. lamindb/{_query_manager.py → models/query_manager.py} +10 -8
  48. lamindb/{_query_set.py → models/query_set.py} +64 -32
  49. lamindb/models/record.py +1762 -0
  50. lamindb/models/run.py +563 -0
  51. lamindb/{_save.py → models/save.py} +18 -8
  52. lamindb/models/schema.py +732 -0
  53. lamindb/models/transform.py +360 -0
  54. lamindb/models/ulabel.py +249 -0
  55. {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/METADATA +6 -6
  56. lamindb-1.2.0.dist-info/RECORD +95 -0
  57. lamindb/_artifact.py +0 -1361
  58. lamindb/_collection.py +0 -440
  59. lamindb/_feature.py +0 -316
  60. lamindb/_is_versioned.py +0 -40
  61. lamindb/_record.py +0 -1065
  62. lamindb/_run.py +0 -60
  63. lamindb/_schema.py +0 -347
  64. lamindb/_storage.py +0 -15
  65. lamindb/_transform.py +0 -170
  66. lamindb/_ulabel.py +0 -56
  67. lamindb/_utils.py +0 -9
  68. lamindb/base/validation.py +0 -63
  69. lamindb/core/_data.py +0 -491
  70. lamindb/core/fields.py +0 -12
  71. lamindb/models.py +0 -4435
  72. lamindb-1.1.0.dist-info/RECORD +0 -95
  73. {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/LICENSE +0 -0
  74. {lamindb-1.1.0.dist-info → lamindb-1.2.0.dist-info}/WHEEL +0 -0
lamindb/_record.py DELETED
@@ -1,1065 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import builtins
4
- import inspect
5
- import re
6
- from functools import reduce
7
- from pathlib import PurePosixPath
8
- from typing import TYPE_CHECKING, NamedTuple
9
-
10
- import dj_database_url
11
- import lamindb_setup as ln_setup
12
- from django.core.exceptions import ValidationError as DjangoValidationError
13
- from django.db import connections, transaction
14
- from django.db.models import (
15
- IntegerField,
16
- Manager,
17
- Q,
18
- QuerySet,
19
- TextField,
20
- Value,
21
- )
22
- from django.db.models.functions import Cast, Coalesce
23
- from django.db.models.lookups import (
24
- Contains,
25
- Exact,
26
- IContains,
27
- IExact,
28
- IRegex,
29
- IStartsWith,
30
- Regex,
31
- StartsWith,
32
- )
33
- from django.db.utils import IntegrityError
34
- from lamin_utils import colors, logger
35
- from lamin_utils._lookup import Lookup
36
- from lamindb_setup._connect_instance import (
37
- get_owner_name_from_identifier,
38
- load_instance_settings,
39
- update_db_using_local,
40
- )
41
- from lamindb_setup.core._docs import doc_args
42
- from lamindb_setup.core._hub_core import connect_instance_hub
43
- from lamindb_setup.core._settings_store import instance_settings_file
44
- from lamindb_setup.core.upath import extract_suffix_from_path
45
-
46
- from lamindb.errors import FieldValidationError
47
- from lamindb.models import (
48
- Artifact,
49
- BasicRecord,
50
- CanCurate,
51
- Collection,
52
- Feature,
53
- IsVersioned,
54
- Param,
55
- Record,
56
- Run,
57
- Schema,
58
- Transform,
59
- ULabel,
60
- ValidateFields,
61
- )
62
-
63
- from ._utils import attach_func_to_class_method
64
- from .core._settings import settings
65
- from .errors import (
66
- InvalidArgument,
67
- RecordNameChangeIntegrityError,
68
- ValidationError,
69
- )
70
-
71
- if TYPE_CHECKING:
72
- import pandas as pd
73
-
74
- from lamindb.base.types import StrField
75
-
76
-
77
- IPYTHON = getattr(builtins, "__IPYTHON__", False)
78
-
79
-
80
- def is_approx_pascal_case(s):
81
- """Check if the last component of a dotted string is in PascalCase.
82
-
83
- Args:
84
- s (str): The string to check
85
-
86
- Returns:
87
- bool: True if the last component is in PascalCase
88
-
89
- Raises:
90
- ValueError: If the last component doesn't start with a capital letter
91
- """
92
- if "[" in s: # this is because we allow types of form 'script[test_script.py]'
93
- return True
94
- last_component = s.split(".")[-1]
95
-
96
- if not last_component[0].isupper():
97
- raise ValueError(
98
- f"'{last_component}' should start with a capital letter given you're defining a type"
99
- )
100
-
101
- return True
102
-
103
-
104
- def init_self_from_db(self: Record, existing_record: Record):
105
- new_args = [
106
- getattr(existing_record, field.attname) for field in self._meta.concrete_fields
107
- ]
108
- super(self.__class__, self).__init__(*new_args)
109
- self._state.adding = False # mimic from_db
110
- self._state.db = "default"
111
-
112
-
113
- def update_attributes(record: Record, attributes: dict[str, str]):
114
- for key, value in attributes.items():
115
- if (
116
- getattr(record, key) != value
117
- and value is not None
118
- and key != "dtype"
119
- and key != "_aux"
120
- ):
121
- logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
122
- setattr(record, key, value)
123
-
124
-
125
- def validate_fields(record: Record, kwargs):
126
- from lamindb.base.validation import validate_literal_fields
127
-
128
- # validate required fields
129
- # a "required field" is a Django field that has `null=False, default=None`
130
- required_fields = {
131
- k.name for k in record._meta.fields if not k.null and k.default is None
132
- }
133
- required_fields_not_passed = {k: None for k in required_fields if k not in kwargs}
134
- kwargs.update(required_fields_not_passed)
135
- missing_fields = [
136
- k for k, v in kwargs.items() if v is None and k in required_fields
137
- ]
138
- if missing_fields:
139
- raise FieldValidationError(f"{missing_fields} are required.")
140
- # ensure the exact length of the internal uid for core entities
141
- if "uid" in kwargs and record.__class__ in {
142
- Artifact,
143
- Collection,
144
- Transform,
145
- Run,
146
- ULabel,
147
- Feature,
148
- Schema,
149
- Param,
150
- }:
151
- uid_max_length = record.__class__._meta.get_field(
152
- "uid"
153
- ).max_length # triggers FieldDoesNotExist
154
- if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
155
- raise ValidationError(
156
- f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
157
- )
158
- # validate is_type
159
- if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
160
- if kwargs["name"].endswith("s"):
161
- logger.warning(
162
- f"name '{kwargs['name']}' for type ends with 's', in case you're naming with plural, consider the singular for a type name"
163
- )
164
- is_approx_pascal_case(kwargs["name"])
165
- # validate literals
166
- validate_literal_fields(record, kwargs)
167
-
168
-
169
- def suggest_records_with_similar_names(
170
- record: Record, name_field: str, kwargs
171
- ) -> Record | None:
172
- """Returns True if found exact match, otherwise False.
173
-
174
- Logs similar matches if found.
175
- """
176
- if kwargs.get(name_field) is None or not isinstance(kwargs.get(name_field), str):
177
- return None
178
- # need to perform an additional request to find the exact match
179
- # previously, this was inferred from the truncated/fuzzy search below
180
- # but this isn't reliable: https://laminlabs.slack.com/archives/C04FPE8V01W/p1737812808563409
181
- # the below needs to be .first() because there might be multiple records with the same
182
- # name field in case the record is versioned (e.g. for Transform key)
183
- exact_match = record.__class__.filter(**{name_field: kwargs[name_field]}).first()
184
- if exact_match is not None:
185
- return exact_match
186
- queryset = _search(
187
- record.__class__,
188
- kwargs[name_field],
189
- field=name_field,
190
- truncate_string=True,
191
- limit=3,
192
- )
193
- if not queryset.exists(): # empty queryset
194
- return None
195
- s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
196
- msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
197
- if IPYTHON:
198
- from IPython.display import display
199
-
200
- logger.warning(f"{msg}")
201
- if settings._verbosity_int >= 1:
202
- display(queryset.df())
203
- else:
204
- logger.warning(f"{msg}\n{queryset}")
205
- return None
206
-
207
-
208
- def __init__(record: Record, *args, **kwargs):
209
- skip_validation = kwargs.pop("_skip_validation", False)
210
- if not args and skip_validation:
211
- super(BasicRecord, record).__init__(**kwargs)
212
- elif not args and not skip_validation:
213
- validate_fields(record, kwargs)
214
-
215
- # do not search for names if an id is passed; this is important
216
- # e.g. when synching ids from the notebook store to lamindb
217
- has_consciously_provided_uid = False
218
- if "_has_consciously_provided_uid" in kwargs:
219
- has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
220
- if (
221
- isinstance(record, (CanCurate, Collection, Transform))
222
- and settings.creation.search_names
223
- and not has_consciously_provided_uid
224
- ):
225
- name_field = getattr(record, "_name_field", "name")
226
- exact_match = suggest_records_with_similar_names(record, name_field, kwargs)
227
- if exact_match is not None:
228
- if "version" in kwargs:
229
- if kwargs["version"] is not None:
230
- version_comment = " and version"
231
- existing_record = record.__class__.filter(
232
- **{
233
- name_field: kwargs[name_field],
234
- "version": kwargs["version"],
235
- }
236
- ).one_or_none()
237
- else:
238
- # for a versioned record, an exact name match is not a criterion
239
- # for retrieving a record in case `version` isn't passed -
240
- # we'd always pull out many records with exactly the same name
241
- existing_record = None
242
- else:
243
- version_comment = ""
244
- existing_record = exact_match
245
- if existing_record is not None:
246
- logger.important(
247
- f"returning existing {record.__class__.__name__} record with same"
248
- f" {name_field}{version_comment}: '{kwargs[name_field]}'"
249
- )
250
- if isinstance(record, Schema):
251
- if Artifact.filter(schema=record).exists():
252
- if record.hash != kwargs["hash"]:
253
- raise ValueError(
254
- "Schema is already in use, can't be changed."
255
- )
256
- init_self_from_db(record, existing_record)
257
- update_attributes(record, kwargs)
258
- return None
259
- super(BasicRecord, record).__init__(**kwargs)
260
- if isinstance(record, ValidateFields):
261
- # this will trigger validation against django validators
262
- try:
263
- if hasattr(record, "clean_fields"):
264
- record.clean_fields()
265
- else:
266
- record._Model__clean_fields()
267
- except DjangoValidationError as e:
268
- message = _format_django_validation_error(record, e)
269
- raise FieldValidationError(message) from e
270
- elif len(args) != len(record._meta.concrete_fields):
271
- raise FieldValidationError(
272
- f"Use keyword arguments instead of positional arguments, e.g.: {record.__class__.__name__}(name='...')."
273
- )
274
- else:
275
- # object is loaded from DB (**kwargs could be omitted below, I believe)
276
- super(BasicRecord, record).__init__(*args, **kwargs)
277
- _store_record_old_name(record)
278
- _store_record_old_key(record)
279
-
280
-
281
- def _format_django_validation_error(record: Record, e: DjangoValidationError):
282
- """Pretty print Django validation errors."""
283
- errors = {}
284
- if hasattr(e, "error_dict"):
285
- error_dict = e.error_dict
286
- else:
287
- error_dict = {"__all__": e.error_list}
288
-
289
- for field_name, error_list in error_dict.items():
290
- for error in error_list:
291
- if hasattr(error, "message"):
292
- msg = error.message
293
- else:
294
- msg = str(error)
295
-
296
- if field_name == "__all__":
297
- errors[field_name] = f"{colors.yellow(msg)}"
298
- else:
299
- current_value = getattr(record, field_name, None)
300
- errors[field_name] = (
301
- f"{field_name}: {colors.yellow(current_value)} is not valid\n → {msg}"
302
- )
303
-
304
- if errors:
305
- message = "\n "
306
- for _, error in errors.items():
307
- message += error + "\n "
308
-
309
- return message
310
-
311
-
312
- def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
313
- """Gets the parameters of a Record from the overloaded signature.
314
-
315
- Example:
316
- >>> get_record_params(bt.Organism)
317
- >>> [('name', 'str'), ('taxon_id', 'str | None'), ('scientific_name', 'str | None')]
318
- """
319
- source = inspect.getsource(record_class)
320
-
321
- # Find first overload that's not *db_args
322
- pattern = r"@overload\s+def __init__\s*\(([\s\S]*?)\):\s*\.{3}"
323
- overloads = re.finditer(pattern, source)
324
-
325
- for overload in overloads:
326
- params_block = overload.group(1)
327
- # This is an additional safety measure if the overloaded signature that we're
328
- # looking for is not at the top but a "db_args" constructor
329
- if "*db_args" in params_block:
330
- continue
331
-
332
- params = []
333
- for line in params_block.split("\n"):
334
- line = line.strip()
335
- if not line or "self" in line:
336
- continue
337
-
338
- # Extract name and type annotation
339
- # The regex pattern finds parameter definitions like:
340
- # Simple: name: str
341
- # With default: age: int = 0
342
- # With complex types: items: List[str] = []
343
- param_pattern = (
344
- r"(\w+)" # Parameter name
345
- r"\s*:\s*" # Colon with optional whitespace
346
- r"((?:[^=,]|" # Type hint: either non-equals/comma chars
347
- r"(?<=\[)[^[\]]*" # or contents within square brackets
348
- r"(?=\]))+)" # looking ahead for closing bracket
349
- r"(?:\s*=\s*" # Optional default value part
350
- r"([^,]+))?" # Default value: anything but comma
351
- )
352
- match = re.match(param_pattern, line)
353
- if not match:
354
- continue
355
-
356
- name, type_str = match.group(1), match.group(2).strip()
357
-
358
- # Keep type as string instead of evaluating
359
- params.append((name, type_str))
360
-
361
- return params
362
-
363
- return []
364
-
365
-
366
- @classmethod # type:ignore
367
- @doc_args(Record.filter.__doc__)
368
- def filter(cls, *queries, **expressions) -> QuerySet:
369
- """{}""" # noqa: D415
370
- from lamindb._query_set import QuerySet
371
-
372
- _using_key = None
373
- if "_using_key" in expressions:
374
- _using_key = expressions.pop("_using_key")
375
-
376
- return QuerySet(model=cls, using=_using_key).filter(*queries, **expressions)
377
-
378
-
379
- @classmethod # type:ignore
380
- @doc_args(Record.get.__doc__)
381
- def get(
382
- cls,
383
- idlike: int | str | None = None,
384
- **expressions,
385
- ) -> Record:
386
- """{}""" # noqa: D415
387
- from lamindb._query_set import QuerySet
388
-
389
- return QuerySet(model=cls).get(idlike, **expressions)
390
-
391
-
392
- @classmethod # type:ignore
393
- @doc_args(Record.df.__doc__)
394
- def df(
395
- cls,
396
- include: str | list[str] | None = None,
397
- features: bool | list[str] = False,
398
- limit: int = 100,
399
- ) -> pd.DataFrame:
400
- """{}""" # noqa: D415
401
- query_set = cls.filter()
402
- if hasattr(cls, "updated_at"):
403
- query_set = query_set.order_by("-updated_at")
404
- return query_set[:limit].df(include=include, features=features)
405
-
406
-
407
- def _search(
408
- cls,
409
- string: str,
410
- *,
411
- field: StrField | list[StrField] | None = None,
412
- limit: int | None = 20,
413
- case_sensitive: bool = False,
414
- using_key: str | None = None,
415
- truncate_string: bool = False,
416
- ) -> QuerySet:
417
- if string is None:
418
- raise ValueError("Cannot search for None value! Please pass a valid string.")
419
-
420
- input_queryset = _queryset(cls, using_key=using_key)
421
- registry = input_queryset.model
422
- name_field = getattr(registry, "_name_field", "name")
423
- if field is None:
424
- fields = [
425
- field.name
426
- for field in registry._meta.fields
427
- if field.get_internal_type() in {"CharField", "TextField"}
428
- ]
429
- else:
430
- if not isinstance(field, list):
431
- fields_input = [field]
432
- else:
433
- fields_input = field
434
- fields = []
435
- for field in fields_input:
436
- if not isinstance(field, str):
437
- try:
438
- fields.append(field.field.name)
439
- except AttributeError as error:
440
- raise TypeError(
441
- "Please pass a Record string field, e.g., `CellType.name`!"
442
- ) from error
443
- else:
444
- fields.append(field)
445
-
446
- if truncate_string:
447
- if (len_string := len(string)) > 5:
448
- n_80_pct = int(len_string * 0.8)
449
- string = string[:n_80_pct]
450
-
451
- string = string.strip()
452
- string_escape = re.escape(string)
453
-
454
- exact_lookup = Exact if case_sensitive else IExact
455
- regex_lookup = Regex if case_sensitive else IRegex
456
- contains_lookup = Contains if case_sensitive else IContains
457
-
458
- ranks = []
459
- contains_filters = []
460
- for field in fields:
461
- field_expr = Coalesce(
462
- Cast(field, output_field=TextField()),
463
- Value(""),
464
- output_field=TextField(),
465
- )
466
- # exact rank
467
- exact_expr = exact_lookup(field_expr, string)
468
- exact_rank = Cast(exact_expr, output_field=IntegerField()) * 200
469
- ranks.append(exact_rank)
470
- # exact synonym
471
- synonym_expr = regex_lookup(field_expr, rf"(?:^|.*\|){string_escape}(?:\|.*|$)")
472
- synonym_rank = Cast(synonym_expr, output_field=IntegerField()) * 200
473
- ranks.append(synonym_rank)
474
- # match as sub-phrase
475
- sub_expr = regex_lookup(
476
- field_expr, rf"(?:^|.*[ \|\.,;:]){string_escape}(?:[ \|\.,;:].*|$)"
477
- )
478
- sub_rank = Cast(sub_expr, output_field=IntegerField()) * 10
479
- ranks.append(sub_rank)
480
- # startswith and avoid matching string with " " on the right
481
- # mostly for truncated
482
- startswith_expr = regex_lookup(
483
- field_expr, rf"(?:^|.*\|){string_escape}[^ ]*(?:\|.*|$)"
484
- )
485
- startswith_rank = Cast(startswith_expr, output_field=IntegerField()) * 8
486
- ranks.append(startswith_rank)
487
- # match as sub-phrase from the left, mostly for truncated
488
- right_expr = regex_lookup(field_expr, rf"(?:^|.*[ \|]){string_escape}.*")
489
- right_rank = Cast(right_expr, output_field=IntegerField()) * 2
490
- ranks.append(right_rank)
491
- # match as sub-phrase from the right
492
- left_expr = regex_lookup(field_expr, rf".*{string_escape}(?:$|[ \|\.,;:].*)")
493
- left_rank = Cast(left_expr, output_field=IntegerField()) * 2
494
- ranks.append(left_rank)
495
- # simple contains filter
496
- contains_expr = contains_lookup(field_expr, string)
497
- contains_filter = Q(contains_expr)
498
- contains_filters.append(contains_filter)
499
- # also rank by contains
500
- contains_rank = Cast(contains_expr, output_field=IntegerField())
501
- ranks.append(contains_rank)
502
- # additional rule for truncated strings
503
- # weight matches from the beginning of the string higher
504
- # sometimes whole words get truncated and startswith_expr is not enough
505
- if truncate_string and field == name_field:
506
- startswith_lookup = StartsWith if case_sensitive else IStartsWith
507
- name_startswith_expr = startswith_lookup(field_expr, string)
508
- name_startswith_rank = (
509
- Cast(name_startswith_expr, output_field=IntegerField()) * 2
510
- )
511
- ranks.append(name_startswith_rank)
512
-
513
- ranked_queryset = (
514
- input_queryset.filter(reduce(lambda a, b: a | b, contains_filters))
515
- .alias(rank=sum(ranks))
516
- .order_by("-rank")
517
- )
518
-
519
- return ranked_queryset[:limit]
520
-
521
-
522
- @classmethod # type: ignore
523
- @doc_args(Record.search.__doc__)
524
- def search(
525
- cls,
526
- string: str,
527
- *,
528
- field: StrField | None = None,
529
- limit: int | None = 20,
530
- case_sensitive: bool = False,
531
- ) -> QuerySet:
532
- """{}""" # noqa: D415
533
- return _search(
534
- cls=cls,
535
- string=string,
536
- field=field,
537
- limit=limit,
538
- case_sensitive=case_sensitive,
539
- )
540
-
541
-
542
- def _lookup(
543
- cls,
544
- field: StrField | None = None,
545
- return_field: StrField | None = None,
546
- using_key: str | None = None,
547
- ) -> NamedTuple:
548
- """{}""" # noqa: D415
549
- queryset = _queryset(cls, using_key=using_key)
550
- field = get_name_field(registry=queryset.model, field=field)
551
-
552
- return Lookup(
553
- records=queryset,
554
- values=[i.get(field) for i in queryset.values()],
555
- tuple_name=cls.__class__.__name__,
556
- prefix="ln",
557
- ).lookup(
558
- return_field=(
559
- get_name_field(registry=queryset.model, field=return_field)
560
- if return_field is not None
561
- else None
562
- )
563
- )
564
-
565
-
566
- @classmethod # type: ignore
567
- @doc_args(Record.lookup.__doc__)
568
- def lookup(
569
- cls,
570
- field: StrField | None = None,
571
- return_field: StrField | None = None,
572
- ) -> NamedTuple:
573
- """{}""" # noqa: D415
574
- return _lookup(cls=cls, field=field, return_field=return_field)
575
-
576
-
577
- def get_name_field(
578
- registry: type[Record] | QuerySet | Manager,
579
- *,
580
- field: str | StrField | None = None,
581
- ) -> str:
582
- """Get the 1st char or text field from the registry."""
583
- if isinstance(registry, (QuerySet, Manager)):
584
- registry = registry.model
585
- model_field_names = [i.name for i in registry._meta.fields]
586
-
587
- # set to default name field
588
- if field is None:
589
- if hasattr(registry, "_name_field"):
590
- field = registry._meta.get_field(registry._name_field)
591
- elif "name" in model_field_names:
592
- field = registry._meta.get_field("name")
593
- else:
594
- # first char or text field that doesn't contain "id"
595
- for i in registry._meta.fields:
596
- if "id" in i.name:
597
- continue
598
- if i.get_internal_type() in {"CharField", "TextField"}:
599
- field = i
600
- break
601
-
602
- # no default name field can be found
603
- if field is None:
604
- raise ValueError(
605
- "please pass a Record string field, e.g., `CellType.name`!"
606
- )
607
- else:
608
- field = field.name # type:ignore
609
- if not isinstance(field, str):
610
- try:
611
- field = field.field.name
612
- except AttributeError:
613
- raise TypeError(
614
- "please pass a Record string field, e.g., `CellType.name`!"
615
- ) from None
616
-
617
- return field
618
-
619
-
620
- def _queryset(cls: Record | QuerySet | Manager, using_key: str) -> QuerySet:
621
- if isinstance(cls, (QuerySet, Manager)):
622
- return cls.all()
623
- elif using_key is None or using_key == "default":
624
- return cls.objects.all()
625
- else:
626
- # using must be called on cls, otherwise the connection isn't found
627
- return cls.using(using_key).all()
628
-
629
-
630
- def add_db_connection(db: str, using: str):
631
- db_config = dj_database_url.config(
632
- default=db, conn_max_age=600, conn_health_checks=True
633
- )
634
- db_config["TIME_ZONE"] = "UTC"
635
- db_config["OPTIONS"] = {}
636
- db_config["AUTOCOMMIT"] = True
637
- connections.settings[using] = db_config
638
-
639
-
640
- @classmethod # type: ignore
641
- @doc_args(Record.using.__doc__)
642
- def using(
643
- cls,
644
- instance: str | None,
645
- ) -> QuerySet:
646
- """{}""" # noqa: D415
647
- from ._query_set import QuerySet
648
-
649
- if instance is None:
650
- return QuerySet(model=cls, using=None)
651
- owner, name = get_owner_name_from_identifier(instance)
652
- settings_file = instance_settings_file(name, owner)
653
- cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
654
- if not settings_file.exists():
655
- result = connect_instance_hub(owner=owner, name=name)
656
- if isinstance(result, str):
657
- raise RuntimeError(
658
- f"Failed to load instance {instance}, please check your permissions!"
659
- )
660
- iresult, _ = result
661
- source_module = {
662
- modules for modules in iresult["schema_str"].split(",") if modules != ""
663
- } # type: ignore
664
- target_module = ln_setup.settings.instance.modules
665
- if not source_module.issubset(target_module):
666
- missing_members = source_module - target_module
667
- logger.warning(
668
- f"source modules has additional modules: {missing_members}\nconsider mounting these registry modules to transfer all metadata"
669
- )
670
- cache_filepath.write_text(f"{iresult['lnid']}\n{iresult['schema_str']}") # type: ignore
671
- settings_file = instance_settings_file(name, owner)
672
- db = update_db_using_local(iresult, settings_file)
673
- else:
674
- isettings = load_instance_settings(settings_file)
675
- db = isettings.db
676
- cache_filepath.write_text(f"{isettings.uid}\n{','.join(isettings.modules)}") # type: ignore
677
- add_db_connection(db, instance)
678
- return QuerySet(model=cls, using=instance)
679
-
680
-
681
- REGISTRY_UNIQUE_FIELD = {
682
- "storage": "root",
683
- "feature": "name",
684
- "ulabel": "name",
685
- "space": "name", # TODO: this should be updated with the currently used space instead during transfer
686
- }
687
-
688
-
689
- def update_fk_to_default_db(
690
- records: Record | list[Record] | QuerySet,
691
- fk: str,
692
- using_key: str | None,
693
- transfer_logs: dict,
694
- ):
695
- record = records[0] if isinstance(records, (list, QuerySet)) else records
696
- if hasattr(record, f"{fk}_id") and getattr(record, f"{fk}_id") is not None:
697
- fk_record = getattr(record, fk)
698
- field = REGISTRY_UNIQUE_FIELD.get(fk, "uid")
699
- fk_record_default = fk_record.__class__.filter(
700
- **{field: getattr(fk_record, field)}
701
- ).one_or_none()
702
- if fk_record_default is None:
703
- from copy import copy
704
-
705
- fk_record_default = copy(fk_record)
706
- transfer_to_default_db(
707
- fk_record_default, using_key, save=True, transfer_logs=transfer_logs
708
- )
709
- if isinstance(records, (list, QuerySet)):
710
- for r in records:
711
- setattr(r, f"{fk}", None)
712
- setattr(r, f"{fk}_id", fk_record_default.id)
713
- else:
714
- setattr(records, f"{fk}", None)
715
- setattr(records, f"{fk}_id", fk_record_default.id)
716
-
717
-
718
- FKBULK = [
719
- "organism",
720
- "source",
721
- "report", # Run
722
- ]
723
-
724
-
725
- def transfer_fk_to_default_db_bulk(
726
- records: list | QuerySet, using_key: str | None, transfer_logs: dict
727
- ):
728
- for fk in FKBULK:
729
- update_fk_to_default_db(records, fk, using_key, transfer_logs=transfer_logs)
730
-
731
-
732
- def get_transfer_run(record) -> Run:
733
- from lamindb.core._context import context
734
- from lamindb.core._data import WARNING_RUN_TRANSFORM
735
-
736
- slug = record._state.db
737
- owner, name = get_owner_name_from_identifier(slug)
738
- cache_filepath = ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
739
- if not cache_filepath.exists():
740
- raise SystemExit("Need to call .using() before")
741
- instance_uid = cache_filepath.read_text().split("\n")[0]
742
- key = f"transfers/{instance_uid}"
743
- uid = instance_uid + "0000"
744
- transform = Transform.filter(uid=uid).one_or_none()
745
- if transform is None:
746
- search_names = settings.creation.search_names
747
- settings.creation.search_names = False
748
- transform = Transform( # type: ignore
749
- uid=uid, description=f"Transfer from `{slug}`", key=key, type="function"
750
- ).save()
751
- settings.creation.search_names = search_names
752
- # use the global run context to get the initiated_by_run run id
753
- if context.run is not None:
754
- initiated_by_run = context.run
755
- else:
756
- if not settings.creation.artifact_silence_missing_run_warning:
757
- logger.warning(WARNING_RUN_TRANSFORM)
758
- initiated_by_run = None
759
- # it doesn't seem to make sense to create new runs for every transfer
760
- run = Run.filter(
761
- transform=transform, initiated_by_run=initiated_by_run
762
- ).one_or_none()
763
- if run is None:
764
- run = Run(transform=transform, initiated_by_run=initiated_by_run).save() # type: ignore
765
- run.initiated_by_run = initiated_by_run # so that it's available in memory
766
- return run
767
-
768
-
769
- def transfer_to_default_db(
770
- record: Record,
771
- using_key: str | None,
772
- *,
773
- transfer_logs: dict,
774
- save: bool = False,
775
- transfer_fk: bool = True,
776
- ) -> Record | None:
777
- if record._state.db is None or record._state.db == "default":
778
- return None
779
- registry = record.__class__
780
- record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
781
- record_str = f"{record.__class__.__name__}(uid='{record.uid}')"
782
- if transfer_logs["run"] is None:
783
- transfer_logs["run"] = get_transfer_run(record)
784
- if record_on_default is not None:
785
- transfer_logs["mapped"].append(record_str)
786
- return record_on_default
787
- else:
788
- transfer_logs["transferred"].append(record_str)
789
-
790
- if hasattr(record, "created_by_id"):
791
- record.created_by = None
792
- record.created_by_id = ln_setup.settings.user.id
793
- # run & transform
794
- run = transfer_logs["run"]
795
- if hasattr(record, "run_id"):
796
- record.run = None
797
- record.run_id = run.id
798
- # deal with denormalized transform FK on artifact and collection
799
- if hasattr(record, "transform_id"):
800
- record.transform = None
801
- record.transform_id = run.transform_id
802
- # transfer other foreign key fields
803
- fk_fields = [
804
- i.name
805
- for i in record._meta.fields
806
- if i.get_internal_type() == "ForeignKey"
807
- if i.name not in {"created_by", "run", "transform"}
808
- ]
809
- if not transfer_fk:
810
- # don't transfer fk fields that are already bulk transferred
811
- fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
812
- for fk in fk_fields:
813
- update_fk_to_default_db(record, fk, using_key, transfer_logs=transfer_logs)
814
- record.id = None
815
- record._state.db = "default"
816
- if save:
817
- record.save()
818
- return None
819
-
820
-
821
- # docstring handled through attach_func_to_class_method
822
- def save(self, *args, **kwargs) -> Record:
823
- using_key = None
824
- if "using" in kwargs:
825
- using_key = kwargs["using"]
826
- db = self._state.db
827
- pk_on_db = self.pk
828
- artifacts: list = []
829
- if self.__class__.__name__ == "Collection" and self.id is not None:
830
- # when creating a new collection without being able to access artifacts
831
- artifacts = self.ordered_artifacts.list()
832
- pre_existing_record = None
833
- # consider records that are being transferred from other databases
834
- transfer_logs: dict[str, list[str]] = {"mapped": [], "transferred": [], "run": None}
835
- if db is not None and db != "default" and using_key is None:
836
- if isinstance(self, IsVersioned):
837
- if not self.is_latest:
838
- raise NotImplementedError(
839
- "You are attempting to transfer a record that's not the latest in its version history. This is currently not supported."
840
- )
841
- pre_existing_record = transfer_to_default_db(
842
- self, using_key, transfer_logs=transfer_logs
843
- )
844
- if pre_existing_record is not None:
845
- init_self_from_db(self, pre_existing_record)
846
- else:
847
- check_key_change(self)
848
- check_name_change(self)
849
- try:
850
- # save versioned record in presence of self._revises
851
- if isinstance(self, IsVersioned) and self._revises is not None:
852
- assert self._revises.is_latest # noqa: S101
853
- revises = self._revises
854
- revises.is_latest = False
855
- with transaction.atomic():
856
- revises._revises = None # ensure we don't start a recursion
857
- revises.save()
858
- super(BasicRecord, self).save(*args, **kwargs) # type: ignore
859
- self._revises = None
860
- # save unversioned record
861
- else:
862
- super(BasicRecord, self).save(*args, **kwargs)
863
- except IntegrityError as e:
864
- error_msg = str(e)
865
- # two possible error messages for hash duplication
866
- # "duplicate key value violates unique constraint"
867
- # "UNIQUE constraint failed"
868
- if (
869
- "UNIQUE constraint failed" in error_msg
870
- or "duplicate key value violates unique constraint" in error_msg
871
- ) and "hash" in error_msg:
872
- pre_existing_record = self.__class__.get(hash=self.hash)
873
- logger.warning(
874
- f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
875
- )
876
- init_self_from_db(self, pre_existing_record)
877
- else:
878
- raise
879
- _store_record_old_name(self)
880
- _store_record_old_key(self)
881
- # perform transfer of many-to-many fields
882
- # only supported for Artifact and Collection records
883
- if db is not None and db != "default" and using_key is None:
884
- if self.__class__.__name__ == "Collection":
885
- if len(artifacts) > 0:
886
- logger.info("transfer artifacts")
887
- for artifact in artifacts:
888
- artifact.save()
889
- self.artifacts.add(*artifacts)
890
- if hasattr(self, "labels"):
891
- from copy import copy
892
-
893
- from lamindb.models import FeatureManager
894
-
895
- # here we go back to original record on the source database
896
- self_on_db = copy(self)
897
- self_on_db._state.db = db
898
- self_on_db.pk = pk_on_db # manually set the primary key
899
- self_on_db.features = FeatureManager(self_on_db) # type: ignore
900
- self.features._add_from(self_on_db, transfer_logs=transfer_logs)
901
- self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
902
- for k, v in transfer_logs.items():
903
- if k != "run":
904
- logger.important(f"{k} records: {', '.join(v)}")
905
- return self
906
-
907
-
908
- def _store_record_old_name(record: Record):
909
- # writes the name to the _name attribute, so we can detect renaming upon save
910
- if hasattr(record, "_name_field"):
911
- record._old_name = getattr(record, record._name_field)
912
-
913
-
914
- def _store_record_old_key(record: Record):
915
- # writes the key to the _old_key attribute, so we can detect key changes upon save
916
- if isinstance(record, (Artifact, Transform)):
917
- record._old_key = record.key
918
-
919
-
920
- def check_name_change(record: Record):
921
- """Warns if a record's name has changed."""
922
- if (
923
- not record.pk
924
- or not hasattr(record, "_old_name")
925
- or not hasattr(record, "_name_field")
926
- ):
927
- return
928
-
929
- # checked in check_key_change or not checked at all
930
- if isinstance(record, (Artifact, Collection, Transform)):
931
- return
932
-
933
- # renaming feature sets is not checked
934
- if isinstance(record, Schema):
935
- return
936
-
937
- old_name = record._old_name
938
- new_name = getattr(record, record._name_field)
939
- registry = record.__class__.__name__
940
-
941
- if old_name != new_name:
942
- # when a label is renamed, only raise a warning if it has a feature
943
- if hasattr(record, "artifacts"):
944
- linked_records = (
945
- record.artifacts.through.filter(
946
- label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
947
- )
948
- .exclude(feature_id=None) # must have a feature
949
- .exclude(
950
- feature_ref_is_name=None
951
- ) # must be linked via Curator and therefore part of a schema
952
- .distinct()
953
- )
954
- artifact_ids = linked_records.list("artifact__uid")
955
- n = len(artifact_ids)
956
- if n > 0:
957
- s = "s" if n > 1 else ""
958
- logger.error(
959
- f"You are trying to {colors.red('rename label')} from '{old_name}' to '{new_name}'!\n"
960
- f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {artifact_ids}\n\n"
961
- f"{colors.bold('To rename this label')}, make it external:\n"
962
- f" → run `artifact.labels.make_external(label)`\n\n"
963
- f"After renaming, consider re-curating the above artifact{s}:\n"
964
- f' → in each dataset, manually modify label "{old_name}" to "{new_name}"\n'
965
- f" → run `ln.Curator`\n"
966
- )
967
- raise RecordNameChangeIntegrityError
968
-
969
- # when a feature is renamed
970
- elif isinstance(record, Feature):
971
- # only internal features are associated with schemas
972
- linked_artifacts = Artifact.filter(feature_sets__features=record).list(
973
- "uid"
974
- )
975
- n = len(linked_artifacts)
976
- if n > 0:
977
- s = "s" if n > 1 else ""
978
- logger.error(
979
- f"You are trying to {colors.red('rename feature')} from '{old_name}' to '{new_name}'!\n"
980
- f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {linked_artifacts}\n\n"
981
- f"{colors.bold('To rename this feature')}, make it external:\n"
982
- " → run `artifact.features.make_external(feature)`\n\n"
983
- f"After renaming, consider re-curating the above artifact{s}:\n"
984
- f" → in each dataset, manually modify feature '{old_name}' to '{new_name}'\n"
985
- f" → run `ln.Curator`\n"
986
- )
987
- raise RecordNameChangeIntegrityError
988
-
989
-
990
- def check_key_change(record: Artifact | Transform):
991
- """Errors if a record's key has falsely changed."""
992
- if not isinstance(record, Artifact) or not hasattr(record, "_old_key"):
993
- return
994
-
995
- old_key = record._old_key or ""
996
- new_key = record.key or ""
997
-
998
- if old_key != new_key:
999
- if not record._key_is_virtual:
1000
- raise InvalidArgument(
1001
- f"Changing a non-virtual key of an artifact is not allowed! Tried to change key from '{old_key}' to '{new_key}'."
1002
- )
1003
- old_key_suffix = (
1004
- record.suffix
1005
- if record.suffix
1006
- else extract_suffix_from_path(PurePosixPath(old_key), arg_name="key")
1007
- )
1008
- new_key_suffix = extract_suffix_from_path(
1009
- PurePosixPath(new_key), arg_name="key"
1010
- )
1011
- if old_key_suffix != new_key_suffix:
1012
- raise InvalidArgument(
1013
- f"The suffix '{new_key_suffix}' of the provided key is incorrect, it should be '{old_key_suffix}'."
1014
- )
1015
-
1016
-
1017
- def delete(self) -> None:
1018
- """Delete the record."""
1019
- # note that the logic below does not fire if a record is moved to the trash
1020
- # the idea is that moving a record to the trash should move its entire version family
1021
- # to the trash, whereas permanently deleting should default to only deleting a single record
1022
- # of a version family
1023
- # we can consider making it easy to permanently delete entire version families as well,
1024
- # but that's for another time
1025
- if isinstance(self, IsVersioned) and self.is_latest:
1026
- new_latest = (
1027
- self.__class__.objects.using(self._state.db)
1028
- .filter(is_latest=False, uid__startswith=self.stem_uid)
1029
- .order_by("-created_at")
1030
- .first()
1031
- )
1032
- if new_latest is not None:
1033
- new_latest.is_latest = True
1034
- with transaction.atomic():
1035
- new_latest.save()
1036
- super(BasicRecord, self).delete() # type: ignore
1037
- logger.warning(f"new latest version is {new_latest}")
1038
- return None
1039
- super(BasicRecord, self).delete()
1040
-
1041
-
1042
- METHOD_NAMES = [
1043
- "__init__",
1044
- "filter",
1045
- "get",
1046
- "df",
1047
- "search",
1048
- "lookup",
1049
- "save",
1050
- "delete",
1051
- "using",
1052
- ]
1053
-
1054
- if ln_setup._TESTING: # type: ignore
1055
- from inspect import signature
1056
-
1057
- SIGS = {
1058
- name: signature(getattr(Record, name))
1059
- for name in METHOD_NAMES
1060
- if not name.startswith("__")
1061
- }
1062
-
1063
- for name in METHOD_NAMES:
1064
- attach_func_to_class_method(name, BasicRecord, globals())
1065
- attach_func_to_class_method(name, Record, globals())