lamindb 1.5.2__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +25 -6
- lamindb/_finish.py +5 -5
- lamindb/_tracked.py +1 -1
- lamindb/_view.py +4 -4
- lamindb/core/_context.py +32 -6
- lamindb/core/_settings.py +1 -1
- lamindb/core/datasets/mini_immuno.py +8 -0
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_anndata_accessor.py +9 -9
- lamindb/core/storage/_valid_suffixes.py +1 -0
- lamindb/core/storage/_zarr.py +32 -107
- lamindb/curators/__init__.py +19 -2
- lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
- lamindb/curators/_legacy.py +15 -19
- lamindb/curators/core.py +247 -80
- lamindb/errors.py +2 -2
- lamindb/migrations/0069_squashed.py +8 -8
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
- lamindb/migrations/0073_merge_ourprojects.py +7 -7
- lamindb/migrations/0075_lamindbv1_part5.py +1 -1
- lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
- lamindb/migrations/0080_polish_lamindbv1.py +2 -2
- lamindb/migrations/0088_schema_components.py +1 -1
- lamindb/migrations/0090_runproject_project_runs.py +2 -2
- lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
- lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
- lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
- lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
- lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
- lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
- lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
- lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
- lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
- lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
- lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
- lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
- lamindb/models/__init__.py +35 -18
- lamindb/models/_describe.py +4 -4
- lamindb/models/_django.py +38 -4
- lamindb/models/_feature_manager.py +66 -123
- lamindb/models/_from_values.py +13 -13
- lamindb/models/_label_manager.py +8 -6
- lamindb/models/_relations.py +7 -7
- lamindb/models/artifact.py +166 -156
- lamindb/models/can_curate.py +25 -25
- lamindb/models/collection.py +48 -18
- lamindb/models/core.py +3 -3
- lamindb/models/feature.py +88 -60
- lamindb/models/has_parents.py +17 -17
- lamindb/models/project.py +52 -24
- lamindb/models/query_manager.py +5 -5
- lamindb/models/query_set.py +61 -37
- lamindb/models/record.py +158 -1583
- lamindb/models/run.py +39 -176
- lamindb/models/save.py +6 -6
- lamindb/models/schema.py +33 -44
- lamindb/models/sqlrecord.py +1743 -0
- lamindb/models/transform.py +17 -33
- lamindb/models/ulabel.py +21 -15
- {lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/METADATA +7 -11
- lamindb-1.6.0.dist-info/RECORD +118 -0
- lamindb/core/storage/_anndata_sizes.py +0 -41
- lamindb/models/flextable.py +0 -163
- lamindb-1.5.2.dist-info/RECORD +0 -109
- {lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/LICENSE +0 -0
- {lamindb-1.5.2.dist-info → lamindb-1.6.0.dist-info}/WHEEL +0 -0
lamindb/models/record.py
CHANGED
@@ -1,928 +1,108 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import
|
4
|
-
import inspect
|
5
|
-
import re
|
6
|
-
import sys
|
7
|
-
from collections import defaultdict
|
8
|
-
from itertools import chain
|
9
|
-
from pathlib import PurePosixPath
|
10
|
-
from typing import (
|
11
|
-
TYPE_CHECKING,
|
12
|
-
Any,
|
13
|
-
Literal,
|
14
|
-
NamedTuple,
|
15
|
-
TypeVar,
|
16
|
-
Union,
|
17
|
-
overload,
|
18
|
-
)
|
3
|
+
from typing import TYPE_CHECKING, Any, overload
|
19
4
|
|
20
|
-
import
|
21
|
-
import
|
22
|
-
from django.core.exceptions import ValidationError as DjangoValidationError
|
23
|
-
from django.db import IntegrityError, ProgrammingError, connections, models, transaction
|
24
|
-
from django.db.models import CASCADE, PROTECT, Field, Manager, QuerySet
|
25
|
-
from django.db.models.base import ModelBase
|
26
|
-
from django.db.models.fields.related import (
|
27
|
-
ManyToManyField,
|
28
|
-
ManyToManyRel,
|
29
|
-
ManyToOneRel,
|
30
|
-
)
|
31
|
-
from lamin_utils import colors, logger
|
32
|
-
from lamindb_setup import settings as setup_settings
|
33
|
-
from lamindb_setup._connect_instance import (
|
34
|
-
get_owner_name_from_identifier,
|
35
|
-
load_instance_settings,
|
36
|
-
update_db_using_local,
|
37
|
-
)
|
38
|
-
from lamindb_setup.core._docs import doc_args
|
39
|
-
from lamindb_setup.core._hub_core import connect_instance_hub
|
40
|
-
from lamindb_setup.core._settings_store import instance_settings_file
|
41
|
-
from lamindb_setup.core.django import DBToken, db_token_manager
|
42
|
-
from lamindb_setup.core.upath import extract_suffix_from_path
|
5
|
+
from django.db import models
|
6
|
+
from django.db.models import CASCADE, PROTECT
|
43
7
|
|
44
|
-
from
|
8
|
+
from lamindb.base.fields import (
|
9
|
+
BooleanField,
|
45
10
|
CharField,
|
46
|
-
DateTimeField,
|
47
11
|
ForeignKey,
|
48
12
|
JSONField,
|
49
13
|
)
|
50
|
-
from
|
51
|
-
from ..errors import (
|
52
|
-
FieldValidationError,
|
53
|
-
InvalidArgument,
|
54
|
-
NoWriteAccess,
|
55
|
-
RecordNameChangeIntegrityError,
|
56
|
-
ValidationError,
|
57
|
-
)
|
58
|
-
from ._is_versioned import IsVersioned
|
59
|
-
from .query_manager import QueryManager, _lookup, _search
|
60
|
-
|
61
|
-
if TYPE_CHECKING:
|
62
|
-
from datetime import datetime
|
63
|
-
|
64
|
-
import pandas as pd
|
65
|
-
|
66
|
-
from .artifact import Artifact
|
67
|
-
from .run import Run, User
|
68
|
-
from .transform import Transform
|
69
|
-
|
70
|
-
|
71
|
-
T = TypeVar("T", bound="Record")
|
72
|
-
IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
73
|
-
|
74
|
-
|
75
|
-
# -------------------------------------------------------------------------------------
|
76
|
-
# A note on required fields at the Record level
|
77
|
-
#
|
78
|
-
# As Django does most of its validation on the Form-level, it doesn't offer functionality
|
79
|
-
# for validating the integrity of an Record object upon instantation (similar to pydantic)
|
80
|
-
#
|
81
|
-
# For required fields, we define them as commonly done on the SQL level together
|
82
|
-
# with a validator in Record (validate_required_fields)
|
83
|
-
#
|
84
|
-
# This goes against the Django convention, but goes with the SQLModel convention
|
85
|
-
# (Optional fields can be null on the SQL level, non-optional fields cannot)
|
86
|
-
#
|
87
|
-
# Due to Django's convention where CharFieldAttr has pre-configured (null=False, default=""), marking
|
88
|
-
# a required field necessitates passing `default=None`. Without the validator it would trigger
|
89
|
-
# an error at the SQL-level, with it, it triggers it at instantiation
|
90
|
-
|
91
|
-
# -------------------------------------------------------------------------------------
|
92
|
-
# A note on class and instance methods of core Record
|
93
|
-
#
|
94
|
-
# All of these are defined and tested within lamindb, in files starting with _{orm_name}.py
|
95
|
-
|
96
|
-
# -------------------------------------------------------------------------------------
|
97
|
-
# A note on maximal lengths of char fields
|
98
|
-
#
|
99
|
-
# 100 characters:
|
100
|
-
# "Raindrops pitter-pattered on the windowpane, blurring the"
|
101
|
-
# "city lights outside, curled up with a mug."
|
102
|
-
# A good maximal length for a name (title).
|
103
|
-
#
|
104
|
-
# 150 characters: We choose this for name maximal length because some users like long names.
|
105
|
-
#
|
106
|
-
# 255 characters:
|
107
|
-
# "In creating a precise 255-character paragraph, one engages in"
|
108
|
-
# "a dance of words, where clarity meets brevity. Every syllable counts,"
|
109
|
-
# "illustrating the skill in compact expression, ensuring the essence of the"
|
110
|
-
# "message shines through within the exacting limit."
|
111
|
-
# This is a good maximal length for a description field.
|
112
|
-
|
113
|
-
|
114
|
-
class LinkORM:
|
115
|
-
pass
|
116
|
-
|
117
|
-
|
118
|
-
def deferred_attribute__repr__(self):
|
119
|
-
return f"FieldAttr({self.field.model.__name__}.{self.field.name})"
|
120
|
-
|
121
|
-
|
122
|
-
FieldAttr.__repr__ = deferred_attribute__repr__ # type: ignore
|
123
|
-
|
124
|
-
|
125
|
-
class ValidateFields:
|
126
|
-
pass
|
127
|
-
|
128
|
-
|
129
|
-
def is_approx_pascal_case(s):
|
130
|
-
"""Check if the last component of a dotted string is in PascalCase.
|
131
|
-
|
132
|
-
Args:
|
133
|
-
s (str): The string to check
|
134
|
-
|
135
|
-
Returns:
|
136
|
-
bool: True if the last component is in PascalCase
|
137
|
-
|
138
|
-
Raises:
|
139
|
-
ValueError: If the last component doesn't start with a capital letter
|
140
|
-
"""
|
141
|
-
if "[" in s: # this is because we allow types of form 'script[test_script.py]'
|
142
|
-
return True
|
143
|
-
last_component = s.split(".")[-1]
|
144
|
-
|
145
|
-
if not last_component[0].isupper():
|
146
|
-
raise ValueError(
|
147
|
-
f"'{last_component}' should start with a capital letter given you're defining a type"
|
148
|
-
)
|
149
|
-
|
150
|
-
return True
|
151
|
-
|
14
|
+
from lamindb.errors import FieldValidationError
|
152
15
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
16
|
+
from ..base.ids import base62_12, base62_16
|
17
|
+
from .artifact import Artifact
|
18
|
+
from .can_curate import CanCurate
|
19
|
+
from .feature import Feature
|
20
|
+
from .run import Run, TracksRun, TracksUpdates
|
21
|
+
from .sqlrecord import BaseSQLRecord, IsLink, SQLRecord, _get_record_kwargs
|
22
|
+
from .ulabel import ULabel
|
160
23
|
|
24
|
+
if TYPE_CHECKING:
|
25
|
+
from .project import Project
|
26
|
+
from .schema import Schema
|
161
27
|
|
162
|
-
def update_attributes(record: Record, attributes: dict[str, str]):
|
163
|
-
for key, value in attributes.items():
|
164
|
-
if getattr(record, key) != value and value is not None:
|
165
|
-
if key not in {"uid", "dtype", "otype", "hash"}:
|
166
|
-
logger.warning(f"updated {key} from {getattr(record, key)} to {value}")
|
167
|
-
setattr(record, key, value)
|
168
|
-
else:
|
169
|
-
hash_message = (
|
170
|
-
"recomputing on .save()"
|
171
|
-
if key == "hash"
|
172
|
-
else f"keeping {getattr(record, key)}"
|
173
|
-
)
|
174
|
-
logger.warning(
|
175
|
-
f"ignoring tentative value {value} for {key}, {hash_message}"
|
176
|
-
)
|
177
28
|
|
29
|
+
class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
30
|
+
"""Flexible records to register, e.g., samples, donors, cells, compounds, sequences.
|
178
31
|
|
179
|
-
|
180
|
-
"""Validate all Literal type fields in a record.
|
32
|
+
This is currently more convenient to use through the UI.
|
181
33
|
|
182
34
|
Args:
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
35
|
+
name: `str` A name.
|
36
|
+
description: `str` A description.
|
37
|
+
|
38
|
+
See Also:
|
39
|
+
:meth:`~lamindb.Sheet`
|
40
|
+
Sheets to group records.
|
41
|
+
:meth:`~lamindb.Feature`
|
42
|
+
Dimensions of measurement.
|
43
|
+
:attr:`~lamindb.Artifact.features`
|
44
|
+
Feature manager for an artifact.
|
187
45
|
"""
|
188
|
-
if isinstance(record, LinkORM):
|
189
|
-
return None
|
190
|
-
if record.__class__.__name__ in "Feature":
|
191
|
-
return None
|
192
|
-
from lamindb.base.types import Dtype, TransformType
|
193
46
|
|
194
|
-
|
195
|
-
|
196
|
-
"ArtifactKind": Dtype,
|
197
|
-
"Dtype": Dtype,
|
198
|
-
}
|
199
|
-
errors = {}
|
200
|
-
annotations = getattr(record.__class__, "__annotations__", {})
|
201
|
-
for field_name, annotation in annotations.items():
|
202
|
-
if field_name not in kwargs or kwargs[field_name] is None:
|
203
|
-
continue
|
204
|
-
value = kwargs[field_name]
|
205
|
-
if str(annotation) in types:
|
206
|
-
annotation = types[annotation]
|
207
|
-
if not hasattr(annotation, "__origin__"):
|
208
|
-
continue
|
209
|
-
literal_type = annotation if annotation.__origin__ is Literal else None
|
210
|
-
if literal_type is None:
|
211
|
-
continue
|
212
|
-
valid_values = set(literal_type.__args__)
|
213
|
-
if value not in valid_values:
|
214
|
-
errors[field_name] = (
|
215
|
-
f"{field_name}: {colors.yellow(value)} is not a valid value"
|
216
|
-
f"\n → Valid values are: {colors.green(', '.join(sorted(valid_values)))}"
|
217
|
-
)
|
218
|
-
if errors:
|
219
|
-
message = "\n "
|
220
|
-
for _, error in errors.items():
|
221
|
-
message += error + "\n "
|
222
|
-
raise FieldValidationError(message)
|
47
|
+
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
48
|
+
abstract = False
|
223
49
|
|
50
|
+
_name_field: str = "name"
|
224
51
|
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
Feature,
|
230
|
-
Param,
|
231
|
-
Run,
|
232
|
-
Schema,
|
233
|
-
Transform,
|
234
|
-
ULabel,
|
52
|
+
id: int = models.AutoField(primary_key=True)
|
53
|
+
"""Internal id, valid only in one DB instance."""
|
54
|
+
uid: str = CharField(
|
55
|
+
editable=False, unique=True, db_index=True, max_length=16, default=base62_16
|
235
56
|
)
|
57
|
+
"""A universal random id, valid across DB instances."""
|
58
|
+
name: str = CharField(max_length=150, db_index=True, null=True)
|
59
|
+
"""Name or title of record (optional)."""
|
60
|
+
type: Record | None = ForeignKey("self", PROTECT, null=True, related_name="records")
|
61
|
+
"""Type of record, e.g., `Sample`, `Donor`, `Cell`, `Compound`, `Sequence`.
|
236
62
|
|
237
|
-
|
238
|
-
# a "required field" is a Django field that has `null=False, default=None`
|
239
|
-
required_fields = {
|
240
|
-
k.name for k in record._meta.fields if not k.null and k.default is None
|
241
|
-
}
|
242
|
-
required_fields_not_passed = {k: None for k in required_fields if k not in kwargs}
|
243
|
-
kwargs.update(required_fields_not_passed)
|
244
|
-
missing_fields = [
|
245
|
-
k for k, v in kwargs.items() if v is None and k in required_fields
|
246
|
-
]
|
247
|
-
if missing_fields:
|
248
|
-
raise FieldValidationError(f"{missing_fields} are required.")
|
249
|
-
# ensure the exact length of the internal uid for core entities
|
250
|
-
if "uid" in kwargs and record.__class__ in {
|
251
|
-
Artifact,
|
252
|
-
Collection,
|
253
|
-
Transform,
|
254
|
-
Run,
|
255
|
-
ULabel,
|
256
|
-
Feature,
|
257
|
-
Schema,
|
258
|
-
Param,
|
259
|
-
}:
|
260
|
-
uid_max_length = record.__class__._meta.get_field(
|
261
|
-
"uid"
|
262
|
-
).max_length # triggers FieldDoesNotExist
|
263
|
-
if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
|
264
|
-
if not (
|
265
|
-
record.__class__ is Schema and len(kwargs["uid"]) == 16
|
266
|
-
): # no error for schema
|
267
|
-
raise ValidationError(
|
268
|
-
f"`uid` must be exactly {uid_max_length} characters long, got {len(kwargs['uid'])}."
|
269
|
-
)
|
270
|
-
# validate is_type
|
271
|
-
if "is_type" in kwargs and "name" in kwargs and kwargs["is_type"]:
|
272
|
-
if kwargs["name"].endswith("s"):
|
273
|
-
logger.warning(
|
274
|
-
f"name '{kwargs['name']}' for type ends with 's', in case you're naming with plural, consider the singular for a type name"
|
275
|
-
)
|
276
|
-
is_approx_pascal_case(kwargs["name"])
|
277
|
-
# validate literals
|
278
|
-
validate_literal_fields(record, kwargs)
|
279
|
-
|
280
|
-
|
281
|
-
def suggest_records_with_similar_names(
|
282
|
-
record: Record, name_field: str, kwargs
|
283
|
-
) -> Record | None:
|
284
|
-
"""Returns True if found exact match, otherwise False.
|
285
|
-
|
286
|
-
Logs similar matches if found.
|
63
|
+
Allows to group records by type, e.g., all samples, all donors, all cells, all compounds, all sequences.
|
287
64
|
"""
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
# but this isn't reliable: https://laminlabs.slack.com/archives/C04FPE8V01W/p1737812808563409
|
293
|
-
# the below needs to be .first() because there might be multiple records with the same
|
294
|
-
# name field in case the record is versioned (e.g. for Transform key)
|
295
|
-
exact_match = record.__class__.filter(**{name_field: kwargs[name_field]}).first()
|
296
|
-
if exact_match is not None:
|
297
|
-
return exact_match
|
298
|
-
queryset = _search(
|
299
|
-
record.__class__,
|
300
|
-
kwargs[name_field],
|
301
|
-
field=name_field,
|
302
|
-
truncate_string=True,
|
303
|
-
limit=3,
|
304
|
-
)
|
305
|
-
if not queryset.exists(): # empty queryset
|
306
|
-
return None
|
307
|
-
s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
|
308
|
-
msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
|
309
|
-
if IPYTHON:
|
310
|
-
from IPython.display import display
|
311
|
-
|
312
|
-
from lamindb import settings
|
313
|
-
|
314
|
-
logger.warning(f"{msg}")
|
315
|
-
if settings._verbosity_int >= 1:
|
316
|
-
display(queryset.df())
|
317
|
-
else:
|
318
|
-
logger.warning(f"{msg}\n{queryset}")
|
319
|
-
return None
|
320
|
-
|
321
|
-
|
322
|
-
RECORD_REGISTRY_EXAMPLE = """Example::
|
323
|
-
|
324
|
-
from lamindb import Record, fields
|
65
|
+
records: Record
|
66
|
+
"""Records of this type (can only be non-empty if `is_type` is `True`)."""
|
67
|
+
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
68
|
+
"""Distinguish types from instances of the type.
|
325
69
|
|
326
|
-
|
327
|
-
class Experiment(Record):
|
328
|
-
name: str = fields.CharField()
|
329
|
-
|
330
|
-
# instantiating `Experiment` creates a record `experiment`
|
331
|
-
experiment = Experiment(name="my experiment")
|
332
|
-
|
333
|
-
# you can save the record to the database
|
334
|
-
experiment.save()
|
335
|
-
|
336
|
-
# `Experiment` refers to the registry, which you can query
|
337
|
-
df = Experiment.filter(name__startswith="my ").df()
|
338
|
-
"""
|
339
|
-
|
340
|
-
|
341
|
-
# this is the metaclass for Record
|
342
|
-
@doc_args(RECORD_REGISTRY_EXAMPLE)
|
343
|
-
class Registry(ModelBase):
|
344
|
-
"""Metaclass for :class:`~lamindb.models.Record`.
|
345
|
-
|
346
|
-
Each `Registry` *object* is a `Record` *class* and corresponds to a table in the metadata SQL database.
|
347
|
-
|
348
|
-
You work with `Registry` objects whenever you use *class methods* of `Record`.
|
349
|
-
|
350
|
-
You call any subclass of `Record` a "registry" and their objects "records". A `Record` object corresponds to a row in the SQL table.
|
351
|
-
|
352
|
-
If you want to create a new registry, you sub-class `Record`.
|
353
|
-
|
354
|
-
{}
|
355
|
-
|
356
|
-
Note: `Registry` inherits from Django's `ModelBase`.
|
70
|
+
For example, if a record "Compound" is a `type`, the actual compounds "darerinib", "tramerinib", would be instances of that `type`.
|
357
71
|
"""
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
def __new__(cls, name, bases, attrs, **kwargs):
|
362
|
-
new_class = super().__new__(cls, name, bases, attrs, **kwargs)
|
363
|
-
return new_class
|
364
|
-
|
365
|
-
# below creates a sensible auto-complete behavior that differs across the
|
366
|
-
# class and instance level in Jupyter Editors it doesn't have any effect for
|
367
|
-
# static type analyzer like pylance used in VSCode
|
368
|
-
def __dir__(cls):
|
369
|
-
# this is needed to bring auto-complete on the class-level back
|
370
|
-
# https://laminlabs.slack.com/archives/C04FPE8V01W/p1717535625268849
|
371
|
-
# Filter class attributes, excluding instance methods
|
372
|
-
exclude_instance_methods = "sphinx" not in sys.modules
|
373
|
-
# https://laminlabs.slack.com/archives/C04FPE8V01W/p1721134595920959
|
374
|
-
|
375
|
-
def include_attribute(attr_name, attr_value):
|
376
|
-
if attr_name.startswith("__"):
|
377
|
-
return False
|
378
|
-
if exclude_instance_methods and callable(attr_value):
|
379
|
-
return isinstance(attr_value, (classmethod, staticmethod, type))
|
380
|
-
return True
|
381
|
-
|
382
|
-
# check also inherited attributes
|
383
|
-
if hasattr(cls, "mro"):
|
384
|
-
attrs = chain(*(c.__dict__.items() for c in cls.mro()))
|
385
|
-
else:
|
386
|
-
attrs = cls.__dict__.items()
|
387
|
-
|
388
|
-
result = []
|
389
|
-
for attr_name, attr_value in attrs:
|
390
|
-
if attr_name not in result and include_attribute(attr_name, attr_value):
|
391
|
-
result.append(attr_name)
|
392
|
-
|
393
|
-
# Add non-dunder attributes from Registry
|
394
|
-
for attr in dir(Registry):
|
395
|
-
if not attr.startswith("__") and attr not in result:
|
396
|
-
result.append(attr)
|
397
|
-
return result
|
398
|
-
|
399
|
-
def __repr__(cls) -> str:
|
400
|
-
return registry_repr(cls)
|
401
|
-
|
402
|
-
@doc_args(_lookup.__doc__)
|
403
|
-
def lookup(
|
404
|
-
cls,
|
405
|
-
field: StrField | None = None,
|
406
|
-
return_field: StrField | None = None,
|
407
|
-
) -> NamedTuple:
|
408
|
-
"""{}""" # noqa: D415
|
409
|
-
return _lookup(cls=cls, field=field, return_field=return_field)
|
410
|
-
|
411
|
-
def filter(cls, *queries, **expressions) -> QuerySet:
|
412
|
-
"""Query records.
|
413
|
-
|
414
|
-
Args:
|
415
|
-
queries: One or multiple `Q` objects.
|
416
|
-
expressions: Fields and values passed as Django query expressions.
|
417
|
-
|
418
|
-
Returns:
|
419
|
-
A :class:`~lamindb.models.QuerySet`.
|
420
|
-
|
421
|
-
See Also:
|
422
|
-
- Guide: :doc:`docs:registries`
|
423
|
-
- Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
|
424
|
-
|
425
|
-
Examples:
|
426
|
-
>>> ln.ULabel(name="my label").save()
|
427
|
-
>>> ln.ULabel.filter(name__startswith="my").df()
|
428
|
-
"""
|
429
|
-
from .query_set import QuerySet
|
430
|
-
|
431
|
-
_using_key = None
|
432
|
-
if "_using_key" in expressions:
|
433
|
-
_using_key = expressions.pop("_using_key")
|
434
|
-
|
435
|
-
return QuerySet(model=cls, using=_using_key).filter(*queries, **expressions)
|
436
|
-
|
437
|
-
def get(
|
438
|
-
cls: type[T],
|
439
|
-
idlike: int | str | None = None,
|
440
|
-
**expressions,
|
441
|
-
) -> T:
|
442
|
-
"""Get a single record.
|
443
|
-
|
444
|
-
Args:
|
445
|
-
idlike: Either a uid stub, uid or an integer id.
|
446
|
-
expressions: Fields and values passed as Django query expressions.
|
447
|
-
|
448
|
-
Raises:
|
449
|
-
:exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
|
450
|
-
|
451
|
-
See Also:
|
452
|
-
- Guide: :doc:`docs:registries`
|
453
|
-
- Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
|
454
|
-
|
455
|
-
Examples:
|
456
|
-
|
457
|
-
::
|
458
|
-
|
459
|
-
ulabel = ln.ULabel.get("FvtpPJLJ")
|
460
|
-
ulabel = ln.ULabel.get(name="my-label")
|
461
|
-
"""
|
462
|
-
from .query_set import QuerySet
|
463
|
-
|
464
|
-
return QuerySet(model=cls).get(idlike, **expressions)
|
465
|
-
|
466
|
-
def df(
|
467
|
-
cls,
|
468
|
-
include: str | list[str] | None = None,
|
469
|
-
features: bool | list[str] = False,
|
470
|
-
limit: int = 100,
|
471
|
-
) -> pd.DataFrame:
|
472
|
-
"""Convert to `pd.DataFrame`.
|
473
|
-
|
474
|
-
By default, shows all direct fields, except `updated_at`.
|
475
|
-
|
476
|
-
Use arguments `include` or `feature` to include other data.
|
477
|
-
|
478
|
-
Args:
|
479
|
-
include: Related fields to include as columns. Takes strings of
|
480
|
-
form `"ulabels__name"`, `"cell_types__name"`, etc. or a list
|
481
|
-
of such strings.
|
482
|
-
features: If `True`, map all features of the
|
483
|
-
:class:`~lamindb.Feature` registry onto the resulting
|
484
|
-
`DataFrame`. Only available for `Artifact`.
|
485
|
-
limit: Maximum number of rows to display from a Pandas DataFrame.
|
486
|
-
Defaults to 100 to reduce database load.
|
487
|
-
|
488
|
-
Examples:
|
489
|
-
|
490
|
-
Include the name of the creator in the `DataFrame`:
|
491
|
-
|
492
|
-
>>> ln.ULabel.df(include="created_by__name"])
|
493
|
-
|
494
|
-
Include display of features for `Artifact`:
|
495
|
-
|
496
|
-
>>> df = ln.Artifact.df(features=True)
|
497
|
-
>>> ln.view(df) # visualize with type annotations
|
498
|
-
|
499
|
-
Only include select features:
|
500
|
-
|
501
|
-
>>> df = ln.Artifact.df(features=["cell_type_by_expert", "cell_type_by_model"])
|
502
|
-
"""
|
503
|
-
query_set = cls.filter()
|
504
|
-
if hasattr(cls, "updated_at"):
|
505
|
-
query_set = query_set.order_by("-updated_at")
|
506
|
-
return query_set[:limit].df(include=include, features=features)
|
507
|
-
|
508
|
-
@doc_args(_search.__doc__)
|
509
|
-
def search(
|
510
|
-
cls,
|
511
|
-
string: str,
|
512
|
-
*,
|
513
|
-
field: StrField | None = None,
|
514
|
-
limit: int | None = 20,
|
515
|
-
case_sensitive: bool = False,
|
516
|
-
) -> QuerySet:
|
517
|
-
"""{}""" # noqa: D415
|
518
|
-
return _search(
|
519
|
-
cls=cls,
|
520
|
-
string=string,
|
521
|
-
field=field,
|
522
|
-
limit=limit,
|
523
|
-
case_sensitive=case_sensitive,
|
524
|
-
)
|
525
|
-
|
526
|
-
def using(
|
527
|
-
cls,
|
528
|
-
instance: str | None,
|
529
|
-
) -> QuerySet:
|
530
|
-
"""Use a non-default LaminDB instance.
|
531
|
-
|
532
|
-
Args:
|
533
|
-
instance: An instance identifier of form "account_handle/instance_name".
|
534
|
-
|
535
|
-
Examples:
|
536
|
-
>>> ln.ULabel.using("account_handle/instance_name").search("ULabel7", field="name")
|
537
|
-
uid score
|
538
|
-
name
|
539
|
-
ULabel7 g7Hk9b2v 100.0
|
540
|
-
ULabel5 t4Jm6s0q 75.0
|
541
|
-
ULabel6 r2Xw8p1z 75.0
|
542
|
-
"""
|
543
|
-
from .query_set import QuerySet
|
544
|
-
|
545
|
-
# connection already established
|
546
|
-
if instance in connections:
|
547
|
-
return QuerySet(model=cls, using=instance)
|
548
|
-
# we're in the default instance
|
549
|
-
if instance is None or instance == "default":
|
550
|
-
return QuerySet(model=cls, using=None)
|
551
|
-
owner, name = get_owner_name_from_identifier(instance)
|
552
|
-
if [owner, name] == setup_settings.instance.slug.split("/"):
|
553
|
-
return QuerySet(model=cls, using=None)
|
554
|
-
|
555
|
-
# move on to different instances
|
556
|
-
cache_using_filepath = (
|
557
|
-
setup_settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
|
558
|
-
)
|
559
|
-
settings_file = instance_settings_file(name, owner)
|
560
|
-
if not settings_file.exists():
|
561
|
-
result = connect_instance_hub(owner=owner, name=name)
|
562
|
-
if isinstance(result, str):
|
563
|
-
raise RuntimeError(
|
564
|
-
f"Failed to load instance {instance}, please check your permissions!"
|
565
|
-
)
|
566
|
-
iresult, _ = result
|
567
|
-
# do not use {} syntax below, it gives rise to a dict if the schema modules
|
568
|
-
# are empty and then triggers a TypeError in missing_members = source_modules - target_modules
|
569
|
-
source_modules = set( # noqa
|
570
|
-
[mod for mod in iresult["schema_str"].split(",") if mod != ""]
|
571
|
-
)
|
572
|
-
# this just retrives the full connection string from iresult
|
573
|
-
db = update_db_using_local(iresult, settings_file)
|
574
|
-
cache_using_filepath.write_text(
|
575
|
-
f"{iresult['lnid']}\n{iresult['schema_str']}"
|
576
|
-
)
|
577
|
-
# need to set the token if it is a fine_grained_access and the user is jwt (not public)
|
578
|
-
is_fine_grained_access = (
|
579
|
-
iresult["fine_grained_access"] and iresult["db_permissions"] == "jwt"
|
580
|
-
)
|
581
|
-
# access_db can take both: the dict from connect_instance_hub and isettings
|
582
|
-
into_db_token = iresult
|
583
|
-
else:
|
584
|
-
isettings = load_instance_settings(settings_file)
|
585
|
-
source_modules = isettings.modules
|
586
|
-
db = isettings.db
|
587
|
-
cache_using_filepath.write_text(
|
588
|
-
f"{isettings.uid}\n{','.join(source_modules)}"
|
589
|
-
)
|
590
|
-
# need to set the token if it is a fine_grained_access and the user is jwt (not public)
|
591
|
-
is_fine_grained_access = (
|
592
|
-
isettings._fine_grained_access and isettings._db_permissions == "jwt"
|
593
|
-
)
|
594
|
-
# access_db can take both: the dict from connect_instance_hub and isettings
|
595
|
-
into_db_token = isettings
|
596
|
-
|
597
|
-
target_modules = setup_settings.instance.modules
|
598
|
-
if missing_members := source_modules - target_modules:
|
599
|
-
logger.info(
|
600
|
-
f"in transfer, source lamindb instance has additional modules: {', '.join(missing_members)}"
|
601
|
-
)
|
602
|
-
|
603
|
-
add_db_connection(db, instance)
|
604
|
-
if is_fine_grained_access:
|
605
|
-
db_token = DBToken(into_db_token)
|
606
|
-
db_token_manager.set(db_token, instance)
|
607
|
-
return QuerySet(model=cls, using=instance)
|
608
|
-
|
609
|
-
def __get_module_name__(cls) -> str:
|
610
|
-
schema_module_name = cls.__module__.split(".")[0]
|
611
|
-
module_name = schema_module_name.replace("lnschema_", "")
|
612
|
-
if module_name == "lamindb":
|
613
|
-
module_name = "core"
|
614
|
-
return module_name
|
615
|
-
|
616
|
-
def __get_name_with_module__(cls) -> str:
|
617
|
-
module_name = cls.__get_module_name__()
|
618
|
-
if module_name == "core":
|
619
|
-
module_prefix = ""
|
620
|
-
else:
|
621
|
-
module_prefix = f"{module_name}."
|
622
|
-
return f"{module_prefix}{cls.__name__}"
|
623
|
-
|
624
|
-
def __get_available_fields__(cls) -> set[str]:
|
625
|
-
if cls._available_fields is None:
|
626
|
-
cls._available_fields = {
|
627
|
-
f.name
|
628
|
-
for f in cls._meta.get_fields()
|
629
|
-
if not f.name.startswith("_")
|
630
|
-
and not f.name.startswith("links_")
|
631
|
-
and not f.name.endswith("_id")
|
632
|
-
}
|
633
|
-
if cls.__name__ == "Artifact":
|
634
|
-
cls._available_fields.add("visibility")
|
635
|
-
cls._available_fields.add("transform")
|
636
|
-
return cls._available_fields
|
637
|
-
|
638
|
-
|
639
|
-
class BasicRecord(models.Model, metaclass=Registry):
|
640
|
-
"""Basic metadata record.
|
641
|
-
|
642
|
-
It has the same methods as Record, but doesn't have the additional fields.
|
643
|
-
|
644
|
-
It's mainly used for LinkORMs and similar.
|
645
|
-
"""
|
646
|
-
|
647
|
-
objects = QueryManager()
|
648
|
-
|
649
|
-
class Meta:
|
650
|
-
abstract = True
|
651
|
-
base_manager_name = "objects"
|
652
|
-
|
653
|
-
def __init__(self, *args, **kwargs):
|
654
|
-
skip_validation = kwargs.pop("_skip_validation", False)
|
655
|
-
if not args:
|
656
|
-
if (
|
657
|
-
issubclass(self.__class__, Record)
|
658
|
-
and self.__class__.__name__
|
659
|
-
not in {"Storage", "ULabel", "Feature", "Schema", "Param"}
|
660
|
-
# do not save bionty entities in restricted spaces by default
|
661
|
-
and self.__class__.__module__ != "bionty.models"
|
662
|
-
):
|
663
|
-
from lamindb import context as run_context
|
664
|
-
|
665
|
-
if run_context.space is not None:
|
666
|
-
kwargs["space"] = run_context.space
|
667
|
-
if skip_validation:
|
668
|
-
super().__init__(**kwargs)
|
669
|
-
else:
|
670
|
-
from ..core._settings import settings
|
671
|
-
from .can_curate import CanCurate
|
672
|
-
from .collection import Collection
|
673
|
-
from .transform import Transform
|
674
|
-
|
675
|
-
validate_fields(self, kwargs)
|
676
|
-
|
677
|
-
# do not search for names if an id is passed; this is important
|
678
|
-
# e.g. when synching ids from the notebook store to lamindb
|
679
|
-
has_consciously_provided_uid = False
|
680
|
-
if "_has_consciously_provided_uid" in kwargs:
|
681
|
-
has_consciously_provided_uid = kwargs.pop(
|
682
|
-
"_has_consciously_provided_uid"
|
683
|
-
)
|
684
|
-
if (
|
685
|
-
isinstance(self, (CanCurate, Collection, Transform))
|
686
|
-
and settings.creation.search_names
|
687
|
-
and not has_consciously_provided_uid
|
688
|
-
):
|
689
|
-
name_field = getattr(self, "_name_field", "name")
|
690
|
-
exact_match = suggest_records_with_similar_names(
|
691
|
-
self, name_field, kwargs
|
692
|
-
)
|
693
|
-
if exact_match is not None:
|
694
|
-
if "version" in kwargs:
|
695
|
-
if kwargs["version"] is not None:
|
696
|
-
version_comment = " and version"
|
697
|
-
existing_record = self.__class__.filter(
|
698
|
-
**{
|
699
|
-
name_field: kwargs[name_field],
|
700
|
-
"version": kwargs["version"],
|
701
|
-
}
|
702
|
-
).one_or_none()
|
703
|
-
else:
|
704
|
-
# for a versioned record, an exact name match is not a criterion
|
705
|
-
# for retrieving a record in case `version` isn't passed -
|
706
|
-
# we'd always pull out many records with exactly the same name
|
707
|
-
existing_record = None
|
708
|
-
else:
|
709
|
-
version_comment = ""
|
710
|
-
existing_record = exact_match
|
711
|
-
if existing_record is not None:
|
712
|
-
logger.important(
|
713
|
-
f"returning existing {self.__class__.__name__} record with same"
|
714
|
-
f" {name_field}{version_comment}: '{kwargs[name_field]}'"
|
715
|
-
)
|
716
|
-
init_self_from_db(self, existing_record)
|
717
|
-
update_attributes(self, kwargs)
|
718
|
-
return None
|
719
|
-
super().__init__(**kwargs)
|
720
|
-
if isinstance(self, ValidateFields):
|
721
|
-
# this will trigger validation against django validators
|
722
|
-
try:
|
723
|
-
if hasattr(self, "clean_fields"):
|
724
|
-
self.clean_fields()
|
725
|
-
else:
|
726
|
-
self._Model__clean_fields()
|
727
|
-
except DjangoValidationError as e:
|
728
|
-
message = _format_django_validation_error(self, e)
|
729
|
-
raise FieldValidationError(message) from e
|
730
|
-
elif len(args) != len(self._meta.concrete_fields):
|
731
|
-
raise FieldValidationError(
|
732
|
-
f"Use keyword arguments instead of positional arguments, e.g.: {self.__class__.__name__}(name='...')."
|
733
|
-
)
|
734
|
-
else:
|
735
|
-
super().__init__(*args)
|
736
|
-
track_current_key_and_name_values(self)
|
737
|
-
|
738
|
-
def save(self, *args, **kwargs) -> Record:
|
739
|
-
"""Save.
|
740
|
-
|
741
|
-
Always saves to the default database.
|
742
|
-
"""
|
743
|
-
using_key = None
|
744
|
-
if "using" in kwargs:
|
745
|
-
using_key = kwargs["using"]
|
746
|
-
db = self._state.db
|
747
|
-
pk_on_db = self.pk
|
748
|
-
artifacts: list = []
|
749
|
-
if self.__class__.__name__ == "Collection" and self.id is not None:
|
750
|
-
# when creating a new collection without being able to access artifacts
|
751
|
-
artifacts = self.ordered_artifacts.list()
|
752
|
-
pre_existing_record = None
|
753
|
-
# consider records that are being transferred from other databases
|
754
|
-
transfer_logs: dict[str, list[str]] = {
|
755
|
-
"mapped": [],
|
756
|
-
"transferred": [],
|
757
|
-
"run": None,
|
758
|
-
}
|
759
|
-
if db is not None and db != "default" and using_key is None:
|
760
|
-
if isinstance(self, IsVersioned):
|
761
|
-
if not self.is_latest:
|
762
|
-
raise NotImplementedError(
|
763
|
-
"You are attempting to transfer a record that's not the latest in its version history. This is currently not supported."
|
764
|
-
)
|
765
|
-
pre_existing_record = transfer_to_default_db(
|
766
|
-
self, using_key, transfer_logs=transfer_logs
|
767
|
-
)
|
768
|
-
self._revises: IsVersioned
|
769
|
-
if pre_existing_record is not None:
|
770
|
-
init_self_from_db(self, pre_existing_record)
|
771
|
-
else:
|
772
|
-
check_key_change(self)
|
773
|
-
check_name_change(self)
|
774
|
-
try:
|
775
|
-
# save versioned record in presence of self._revises
|
776
|
-
if isinstance(self, IsVersioned) and self._revises is not None:
|
777
|
-
assert self._revises.is_latest # noqa: S101
|
778
|
-
revises = self._revises
|
779
|
-
revises.is_latest = False
|
780
|
-
with transaction.atomic():
|
781
|
-
revises._revises = None # ensure we don't start a recursion
|
782
|
-
revises.save()
|
783
|
-
super().save(*args, **kwargs) # type: ignore
|
784
|
-
self._revises = None
|
785
|
-
# save unversioned record
|
786
|
-
else:
|
787
|
-
super().save(*args, **kwargs)
|
788
|
-
except (IntegrityError, ProgrammingError) as e:
|
789
|
-
error_msg = str(e)
|
790
|
-
# two possible error messages for hash duplication
|
791
|
-
# "duplicate key value violates unique constraint"
|
792
|
-
# "UNIQUE constraint failed"
|
793
|
-
if (
|
794
|
-
isinstance(e, IntegrityError)
|
795
|
-
and "hash" in error_msg
|
796
|
-
and (
|
797
|
-
"UNIQUE constraint failed" in error_msg
|
798
|
-
or "duplicate key value violates unique constraint" in error_msg
|
799
|
-
)
|
800
|
-
):
|
801
|
-
pre_existing_record = self.__class__.get(hash=self.hash)
|
802
|
-
logger.warning(
|
803
|
-
f"returning {self.__class__.__name__.lower()} with same hash: {pre_existing_record}"
|
804
|
-
)
|
805
|
-
init_self_from_db(self, pre_existing_record)
|
806
|
-
elif (
|
807
|
-
isinstance(e, ProgrammingError)
|
808
|
-
and hasattr(self, "space")
|
809
|
-
and "new row violates row-level security policy" in error_msg
|
810
|
-
):
|
811
|
-
raise NoWriteAccess(
|
812
|
-
f"You’re not allowed to write to the space '{self.space.name}'.\n"
|
813
|
-
"Please contact an administrator of the space if you need write access."
|
814
|
-
) from None
|
815
|
-
else:
|
816
|
-
raise
|
817
|
-
# call the below in case a user makes more updates to the record
|
818
|
-
track_current_key_and_name_values(self)
|
819
|
-
# perform transfer of many-to-many fields
|
820
|
-
# only supported for Artifact and Collection records
|
821
|
-
if db is not None and db != "default" and using_key is None:
|
822
|
-
if self.__class__.__name__ == "Collection":
|
823
|
-
if len(artifacts) > 0:
|
824
|
-
logger.info("transfer artifacts")
|
825
|
-
for artifact in artifacts:
|
826
|
-
artifact.save()
|
827
|
-
self.artifacts.add(*artifacts)
|
828
|
-
if hasattr(self, "labels"):
|
829
|
-
from copy import copy
|
830
|
-
|
831
|
-
from lamindb.models._feature_manager import FeatureManager
|
832
|
-
|
833
|
-
# here we go back to original record on the source database
|
834
|
-
self_on_db = copy(self)
|
835
|
-
self_on_db._state.db = db
|
836
|
-
self_on_db.pk = pk_on_db # manually set the primary key
|
837
|
-
self_on_db.features = FeatureManager(self_on_db) # type: ignore
|
838
|
-
self.features._add_from(self_on_db, transfer_logs=transfer_logs)
|
839
|
-
self.labels.add_from(self_on_db, transfer_logs=transfer_logs)
|
840
|
-
for k, v in transfer_logs.items():
|
841
|
-
if k != "run" and len(v) > 0:
|
842
|
-
logger.important(f"{k} records: {', '.join(v)}")
|
843
|
-
|
844
|
-
if (
|
845
|
-
self.__class__.__name__
|
846
|
-
in {
|
847
|
-
"Artifact",
|
848
|
-
"Transform",
|
849
|
-
"Run",
|
850
|
-
"ULabel",
|
851
|
-
"Feature",
|
852
|
-
"Schema",
|
853
|
-
"Collection",
|
854
|
-
"Reference",
|
855
|
-
}
|
856
|
-
and self._branch_code >= 1
|
857
|
-
):
|
858
|
-
import lamindb as ln
|
859
|
-
|
860
|
-
if ln.context.project is not None:
|
861
|
-
self.projects.add(ln.context.project)
|
862
|
-
return self
|
863
|
-
|
864
|
-
def delete(self) -> None:
|
865
|
-
"""Delete."""
|
866
|
-
# note that the logic below does not fire if a record is moved to the trash
|
867
|
-
# the idea is that moving a record to the trash should move its entire version family
|
868
|
-
# to the trash, whereas permanently deleting should default to only deleting a single record
|
869
|
-
# of a version family
|
870
|
-
# we can consider making it easy to permanently delete entire version families as well,
|
871
|
-
# but that's for another time
|
872
|
-
if isinstance(self, IsVersioned) and self.is_latest:
|
873
|
-
new_latest = (
|
874
|
-
self.__class__.objects.using(self._state.db)
|
875
|
-
.filter(is_latest=False, uid__startswith=self.stem_uid)
|
876
|
-
.order_by("-created_at")
|
877
|
-
.first()
|
878
|
-
)
|
879
|
-
if new_latest is not None:
|
880
|
-
new_latest.is_latest = True
|
881
|
-
with transaction.atomic():
|
882
|
-
new_latest.save()
|
883
|
-
super().delete() # type: ignore
|
884
|
-
logger.warning(f"new latest version is {new_latest}")
|
885
|
-
return None
|
886
|
-
super().delete()
|
887
|
-
|
888
|
-
|
889
|
-
class Space(BasicRecord):
|
890
|
-
"""Spaces to restrict access to records to specific users or teams.
|
891
|
-
|
892
|
-
You can use spaces to restrict access to records within an instance.
|
893
|
-
|
894
|
-
All data in this registry is synced from `lamin.ai` to enable re-using spaces across instances.
|
895
|
-
There is no need to manually create records.
|
896
|
-
"""
|
897
|
-
|
898
|
-
id: int = models.SmallAutoField(primary_key=True)
|
899
|
-
"""Internal id, valid only in one DB instance."""
|
900
|
-
name: str = models.CharField(max_length=100, db_index=True)
|
901
|
-
"""Name of space."""
|
902
|
-
uid: str = CharField(
|
903
|
-
editable=False,
|
904
|
-
unique=True,
|
905
|
-
max_length=12,
|
906
|
-
default="00000000",
|
907
|
-
db_default="00000000",
|
908
|
-
db_index=True,
|
72
|
+
# naming convention in analogy with Schema
|
73
|
+
components: Record = models.ManyToManyField(
|
74
|
+
"Record", through="RecordRecord", symmetrical=False, related_name="composites"
|
909
75
|
)
|
910
|
-
"""
|
76
|
+
"""Record-like components of this record."""
|
77
|
+
composites: Record
|
78
|
+
"""Record-like composites of this record."""
|
79
|
+
sheet: Sheet | None = ForeignKey(
|
80
|
+
"Sheet", CASCADE, null=True, related_name="records"
|
81
|
+
)
|
82
|
+
"""Group records by sheet."""
|
911
83
|
description: str | None = CharField(null=True)
|
912
|
-
"""
|
913
|
-
|
914
|
-
|
84
|
+
"""A description (optional)."""
|
85
|
+
artifacts: Artifact = models.ManyToManyField(
|
86
|
+
Artifact, through="RecordArtifact", related_name="records"
|
915
87
|
)
|
916
|
-
"""
|
917
|
-
|
918
|
-
|
88
|
+
"""Linked artifacts."""
|
89
|
+
runs: Run = models.ManyToManyField(Run, through="RecordRun", related_name="records")
|
90
|
+
"""Linked runs."""
|
91
|
+
ulabels: ULabel = models.ManyToManyField(
|
92
|
+
ULabel,
|
93
|
+
through="RecordULabel",
|
94
|
+
related_name="_records", # in transition period
|
919
95
|
)
|
920
|
-
"""
|
96
|
+
"""Linked runs."""
|
97
|
+
projects: Project
|
98
|
+
"""Linked projects."""
|
921
99
|
|
922
100
|
@overload
|
923
101
|
def __init__(
|
924
102
|
self,
|
925
103
|
name: str,
|
104
|
+
type: Record | None = None,
|
105
|
+
is_type: bool = False,
|
926
106
|
description: str | None = None,
|
927
107
|
): ...
|
928
108
|
|
@@ -937,716 +117,111 @@ class Space(BasicRecord):
|
|
937
117
|
*args,
|
938
118
|
**kwargs,
|
939
119
|
):
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
- 3: template (hidden in queries & searches)
|
970
|
-
- 2: draft (hidden in queries & searches)
|
971
|
-
- 1: default (visible in queries & searches)
|
972
|
-
- 0: archive (hidden, meant to be kept)
|
973
|
-
- -1: trash (hidden, scheduled for deletion)
|
974
|
-
|
975
|
-
Any integer higher than >3 codes a branch that's involved in a pull request.
|
976
|
-
"""
|
977
|
-
space: Space = ForeignKey(Space, PROTECT, default=1, db_default=1)
|
978
|
-
"""The space in which the record lives."""
|
979
|
-
_aux: dict[str, Any] | None = JSONField(default=None, db_default=None, null=True)
|
980
|
-
"""Auxiliary field for dictionary-like metadata."""
|
981
|
-
|
982
|
-
class Meta:
|
983
|
-
abstract = True
|
984
|
-
|
985
|
-
|
986
|
-
def _format_django_validation_error(record: Record, e: DjangoValidationError):
|
987
|
-
"""Pretty print Django validation errors."""
|
988
|
-
errors = {}
|
989
|
-
if hasattr(e, "error_dict"):
|
990
|
-
error_dict = e.error_dict
|
991
|
-
else:
|
992
|
-
error_dict = {"__all__": e.error_list}
|
993
|
-
|
994
|
-
for field_name, error_list in error_dict.items():
|
995
|
-
for error in error_list:
|
996
|
-
if hasattr(error, "message"):
|
997
|
-
msg = error.message
|
998
|
-
else:
|
999
|
-
msg = str(error)
|
1000
|
-
|
1001
|
-
if field_name == "__all__":
|
1002
|
-
errors[field_name] = f"{colors.yellow(msg)}"
|
1003
|
-
else:
|
1004
|
-
current_value = getattr(record, field_name, None)
|
1005
|
-
errors[field_name] = (
|
1006
|
-
f"{field_name}: {colors.yellow(current_value)} is not valid\n → {msg}"
|
1007
|
-
)
|
1008
|
-
|
1009
|
-
if errors:
|
1010
|
-
message = "\n "
|
1011
|
-
for _, error in errors.items():
|
1012
|
-
message += error + "\n "
|
1013
|
-
|
1014
|
-
return message
|
1015
|
-
|
1016
|
-
|
1017
|
-
def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
|
1018
|
-
"""Gets the parameters of a Record from the overloaded signature.
|
1019
|
-
|
1020
|
-
Example:
|
1021
|
-
>>> get_record_params(bt.Organism)
|
1022
|
-
>>> [('name', 'str'), ('taxon_id', 'str | None'), ('scientific_name', 'str | None')]
|
1023
|
-
"""
|
1024
|
-
source = inspect.getsource(record_class)
|
1025
|
-
|
1026
|
-
# Find first overload that's not *db_args
|
1027
|
-
pattern = r"@overload\s+def __init__\s*\(([\s\S]*?)\):\s*\.{3}"
|
1028
|
-
overloads = re.finditer(pattern, source)
|
1029
|
-
|
1030
|
-
for single_overload in overloads:
|
1031
|
-
params_block = single_overload.group(1)
|
1032
|
-
# This is an additional safety measure if the overloaded signature that we're
|
1033
|
-
# looking for is not at the top but a "db_args" constructor
|
1034
|
-
if "*db_args" in params_block:
|
1035
|
-
continue
|
1036
|
-
|
1037
|
-
params = []
|
1038
|
-
for line in params_block.split("\n"):
|
1039
|
-
line = line.strip()
|
1040
|
-
if not line or "self" in line:
|
1041
|
-
continue
|
1042
|
-
|
1043
|
-
# Extract name and type annotation
|
1044
|
-
# The regex pattern finds parameter definitions like:
|
1045
|
-
# Simple: name: str
|
1046
|
-
# With default: age: int = 0
|
1047
|
-
# With complex types: items: List[str] = []
|
1048
|
-
param_pattern = (
|
1049
|
-
r"(\w+)" # Parameter name
|
1050
|
-
r"\s*:\s*" # Colon with optional whitespace
|
1051
|
-
r"((?:[^=,]|" # Type hint: either non-equals/comma chars
|
1052
|
-
r"(?<=\[)[^[\]]*" # or contents within square brackets
|
1053
|
-
r"(?=\]))+)" # looking ahead for closing bracket
|
1054
|
-
r"(?:\s*=\s*" # Optional default value part
|
1055
|
-
r"([^,]+))?" # Default value: anything but comma
|
1056
|
-
)
|
1057
|
-
match = re.match(param_pattern, line)
|
1058
|
-
if not match:
|
1059
|
-
continue
|
1060
|
-
|
1061
|
-
name, type_str = match.group(1), match.group(2).strip()
|
1062
|
-
|
1063
|
-
# Keep type as string instead of evaluating
|
1064
|
-
params.append((name, type_str))
|
1065
|
-
|
1066
|
-
return params
|
1067
|
-
|
1068
|
-
return []
|
1069
|
-
|
1070
|
-
|
1071
|
-
def get_name_field(
|
1072
|
-
registry: type[Record] | QuerySet | Manager,
|
1073
|
-
*,
|
1074
|
-
field: StrField | None = None,
|
1075
|
-
) -> str:
|
1076
|
-
"""Get the 1st char or text field from the registry."""
|
1077
|
-
if isinstance(registry, (QuerySet, Manager)):
|
1078
|
-
registry = registry.model
|
1079
|
-
model_field_names = [i.name for i in registry._meta.fields]
|
1080
|
-
|
1081
|
-
# set to default name field
|
1082
|
-
if field is None:
|
1083
|
-
if hasattr(registry, "_name_field"):
|
1084
|
-
field = registry._meta.get_field(registry._name_field)
|
1085
|
-
elif "name" in model_field_names:
|
1086
|
-
field = registry._meta.get_field("name")
|
1087
|
-
else:
|
1088
|
-
# first char or text field that doesn't contain "id"
|
1089
|
-
for i in registry._meta.fields:
|
1090
|
-
if "id" in i.name:
|
1091
|
-
continue
|
1092
|
-
if i.get_internal_type() in {"CharField", "TextField"}:
|
1093
|
-
field = i
|
1094
|
-
break
|
120
|
+
if len(args) == len(self._meta.concrete_fields):
|
121
|
+
super().__init__(*args, **kwargs)
|
122
|
+
return None
|
123
|
+
if len(args) > 0:
|
124
|
+
raise ValueError("Only one non-keyword arg allowed")
|
125
|
+
name: str = kwargs.pop("name", None)
|
126
|
+
type: str | None = kwargs.pop("type", None)
|
127
|
+
is_type: bool = kwargs.pop("is_type", False)
|
128
|
+
sheet: Sheet = kwargs.pop("sheet", None)
|
129
|
+
description: str | None = kwargs.pop("description", None)
|
130
|
+
_skip_validation = kwargs.pop(
|
131
|
+
"_skip_validation", True
|
132
|
+
) # should not validate records
|
133
|
+
_aux = kwargs.pop("_aux", None)
|
134
|
+
if len(kwargs) > 0:
|
135
|
+
valid_keywords = ", ".join([val[0] for val in _get_record_kwargs(Record)])
|
136
|
+
raise FieldValidationError(
|
137
|
+
f"Only {valid_keywords} are valid keyword arguments"
|
138
|
+
)
|
139
|
+
super().__init__(
|
140
|
+
name=name,
|
141
|
+
type=type,
|
142
|
+
is_type=is_type,
|
143
|
+
sheet=sheet,
|
144
|
+
description=description,
|
145
|
+
_skip_validation=_skip_validation,
|
146
|
+
_aux=_aux,
|
147
|
+
)
|
1095
148
|
|
1096
|
-
# no default name field can be found
|
1097
|
-
if field is None:
|
1098
|
-
raise ValueError(
|
1099
|
-
"please pass a Record string field, e.g., `CellType.name`!"
|
1100
|
-
)
|
1101
|
-
else:
|
1102
|
-
field = field.name # type:ignore
|
1103
|
-
if not isinstance(field, str):
|
1104
|
-
try:
|
1105
|
-
field = field.field.name
|
1106
|
-
except AttributeError:
|
1107
|
-
raise TypeError(
|
1108
|
-
"please pass a Record string field, e.g., `CellType.name`!"
|
1109
|
-
) from None
|
1110
149
|
|
1111
|
-
|
150
|
+
class Sheet(SQLRecord, TracksRun, TracksUpdates):
|
151
|
+
"""Sheets to group records."""
|
1112
152
|
|
153
|
+
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
154
|
+
abstract = False
|
1113
155
|
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
156
|
+
id: int = models.AutoField(primary_key=True)
|
157
|
+
uid: str = CharField(
|
158
|
+
editable=False, unique=True, db_index=True, max_length=12, default=base62_12
|
1117
159
|
)
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
REGISTRY_UNIQUE_FIELD = {
|
1125
|
-
"storage": "root",
|
1126
|
-
"feature": "name",
|
1127
|
-
"ulabel": "name",
|
1128
|
-
"space": "name", # TODO: this should be updated with the currently used space instead during transfer
|
1129
|
-
}
|
1130
|
-
|
1131
|
-
|
1132
|
-
def update_fk_to_default_db(
|
1133
|
-
records: Record | list[Record] | QuerySet,
|
1134
|
-
fk: str,
|
1135
|
-
using_key: str | None,
|
1136
|
-
transfer_logs: dict,
|
1137
|
-
):
|
1138
|
-
record = records[0] if isinstance(records, (list, QuerySet)) else records
|
1139
|
-
if hasattr(record, f"{fk}_id") and getattr(record, f"{fk}_id") is not None:
|
1140
|
-
fk_record = getattr(record, fk)
|
1141
|
-
field = REGISTRY_UNIQUE_FIELD.get(fk, "uid")
|
1142
|
-
fk_record_default = fk_record.__class__.filter(
|
1143
|
-
**{field: getattr(fk_record, field)}
|
1144
|
-
).one_or_none()
|
1145
|
-
if fk_record_default is None:
|
1146
|
-
from copy import copy
|
1147
|
-
|
1148
|
-
fk_record_default = copy(fk_record)
|
1149
|
-
transfer_to_default_db(
|
1150
|
-
fk_record_default, using_key, save=True, transfer_logs=transfer_logs
|
1151
|
-
)
|
1152
|
-
if isinstance(records, (list, QuerySet)):
|
1153
|
-
for r in records:
|
1154
|
-
setattr(r, f"{fk}", None)
|
1155
|
-
setattr(r, f"{fk}_id", fk_record_default.id)
|
1156
|
-
else:
|
1157
|
-
setattr(records, f"{fk}", None)
|
1158
|
-
setattr(records, f"{fk}_id", fk_record_default.id)
|
1159
|
-
|
1160
|
-
|
1161
|
-
FKBULK = [
|
1162
|
-
"organism",
|
1163
|
-
"source",
|
1164
|
-
"report", # Run
|
1165
|
-
]
|
1166
|
-
|
1167
|
-
|
1168
|
-
def transfer_fk_to_default_db_bulk(
|
1169
|
-
records: list | QuerySet, using_key: str | None, transfer_logs: dict
|
1170
|
-
):
|
1171
|
-
for fk in FKBULK:
|
1172
|
-
update_fk_to_default_db(records, fk, using_key, transfer_logs=transfer_logs)
|
1173
|
-
|
1174
|
-
|
1175
|
-
def get_transfer_run(record) -> Run:
|
1176
|
-
from lamindb import settings
|
1177
|
-
from lamindb.core._context import context
|
1178
|
-
from lamindb.models import Run, Transform
|
1179
|
-
from lamindb.models.artifact import WARNING_RUN_TRANSFORM
|
1180
|
-
|
1181
|
-
slug = record._state.db
|
1182
|
-
owner, name = get_owner_name_from_identifier(slug)
|
1183
|
-
cache_using_filepath = (
|
1184
|
-
ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
|
160
|
+
"""A universal random id, valid across DB instances."""
|
161
|
+
name: str = CharField(db_index=True)
|
162
|
+
"""Name or title of sheet."""
|
163
|
+
schema: Schema | None = ForeignKey(
|
164
|
+
"Schema", CASCADE, null=True, related_name="sheets"
|
1185
165
|
)
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
transform = Transform.filter(uid=uid).one_or_none()
|
1192
|
-
if transform is None:
|
1193
|
-
search_names = settings.creation.search_names
|
1194
|
-
settings.creation.search_names = False
|
1195
|
-
transform = Transform( # type: ignore
|
1196
|
-
uid=uid, description=f"Transfer from `{slug}`", key=key, type="function"
|
1197
|
-
).save()
|
1198
|
-
settings.creation.search_names = search_names
|
1199
|
-
# use the global run context to get the initiated_by_run run id
|
1200
|
-
if context.run is not None:
|
1201
|
-
initiated_by_run = context.run
|
1202
|
-
else:
|
1203
|
-
if not settings.creation.artifact_silence_missing_run_warning:
|
1204
|
-
logger.warning(WARNING_RUN_TRANSFORM)
|
1205
|
-
initiated_by_run = None
|
1206
|
-
# it doesn't seem to make sense to create new runs for every transfer
|
1207
|
-
run = Run.filter(
|
1208
|
-
transform=transform, initiated_by_run=initiated_by_run
|
1209
|
-
).one_or_none()
|
1210
|
-
if run is None:
|
1211
|
-
run = Run(transform=transform, initiated_by_run=initiated_by_run).save() # type: ignore
|
1212
|
-
run.initiated_by_run = initiated_by_run # so that it's available in memory
|
1213
|
-
return run
|
1214
|
-
|
1215
|
-
|
1216
|
-
def transfer_to_default_db(
|
1217
|
-
record: Record,
|
1218
|
-
using_key: str | None,
|
1219
|
-
*,
|
1220
|
-
transfer_logs: dict,
|
1221
|
-
save: bool = False,
|
1222
|
-
transfer_fk: bool = True,
|
1223
|
-
) -> Record | None:
|
1224
|
-
if record._state.db is None or record._state.db == "default":
|
1225
|
-
return None
|
1226
|
-
registry = record.__class__
|
1227
|
-
record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
|
1228
|
-
record_str = f"{record.__class__.__name__}(uid='{record.uid}')"
|
1229
|
-
if transfer_logs["run"] is None:
|
1230
|
-
transfer_logs["run"] = get_transfer_run(record)
|
1231
|
-
if record_on_default is not None:
|
1232
|
-
transfer_logs["mapped"].append(record_str)
|
1233
|
-
return record_on_default
|
1234
|
-
else:
|
1235
|
-
transfer_logs["transferred"].append(record_str)
|
1236
|
-
|
1237
|
-
if hasattr(record, "created_by_id"):
|
1238
|
-
record.created_by = None
|
1239
|
-
record.created_by_id = ln_setup.settings.user.id
|
1240
|
-
# run & transform
|
1241
|
-
run = transfer_logs["run"]
|
1242
|
-
if hasattr(record, "run_id"):
|
1243
|
-
record.run = None
|
1244
|
-
record.run_id = run.id
|
1245
|
-
# deal with denormalized transform FK on artifact and collection
|
1246
|
-
if hasattr(record, "transform_id"):
|
1247
|
-
record.transform = None
|
1248
|
-
record.transform_id = run.transform_id
|
1249
|
-
# transfer other foreign key fields
|
1250
|
-
fk_fields = [
|
1251
|
-
i.name
|
1252
|
-
for i in record._meta.fields
|
1253
|
-
if i.get_internal_type() == "ForeignKey"
|
1254
|
-
if i.name not in {"created_by", "run", "transform"}
|
1255
|
-
]
|
1256
|
-
if not transfer_fk:
|
1257
|
-
# don't transfer fk fields that are already bulk transferred
|
1258
|
-
fk_fields = [fk for fk in fk_fields if fk not in FKBULK]
|
1259
|
-
for fk in fk_fields:
|
1260
|
-
update_fk_to_default_db(record, fk, using_key, transfer_logs=transfer_logs)
|
1261
|
-
record.id = None
|
1262
|
-
record._state.db = "default"
|
1263
|
-
if save:
|
1264
|
-
record.save()
|
1265
|
-
return None
|
1266
|
-
|
1267
|
-
|
1268
|
-
def track_current_key_and_name_values(record: Record):
|
1269
|
-
from lamindb.models import Artifact
|
1270
|
-
|
1271
|
-
# below, we're using __dict__ to avoid triggering the refresh from the database
|
1272
|
-
# which can lead to a recursion
|
1273
|
-
if isinstance(record, Artifact):
|
1274
|
-
record._old_key = record.__dict__.get("key")
|
1275
|
-
record._old_suffix = record.__dict__.get("suffix")
|
1276
|
-
elif hasattr(record, "_name_field"):
|
1277
|
-
record._old_name = record.__dict__.get(record._name_field)
|
1278
|
-
|
1279
|
-
|
1280
|
-
def check_name_change(record: Record):
|
1281
|
-
"""Warns if a record's name has changed."""
|
1282
|
-
from lamindb.models import Artifact, Collection, Feature, Schema, Transform
|
1283
|
-
|
1284
|
-
if (
|
1285
|
-
not record.pk
|
1286
|
-
or not hasattr(record, "_old_name")
|
1287
|
-
or not hasattr(record, "_name_field")
|
1288
|
-
):
|
1289
|
-
return
|
166
|
+
"""A schema to enforce for the sheet (optional)."""
|
167
|
+
description: str | None = CharField(null=True, db_index=True)
|
168
|
+
"""A description (optional)."""
|
169
|
+
projects: Project
|
170
|
+
"""Linked projects."""
|
1290
171
|
|
1291
|
-
# checked in check_key_change or not checked at all
|
1292
|
-
if isinstance(record, (Artifact, Collection, Transform)):
|
1293
|
-
return
|
1294
172
|
|
1295
|
-
|
1296
|
-
|
1297
|
-
|
1298
|
-
|
1299
|
-
|
1300
|
-
new_name = getattr(record, record._name_field)
|
1301
|
-
registry = record.__class__.__name__
|
1302
|
-
|
1303
|
-
if old_name != new_name:
|
1304
|
-
# when a label is renamed, only raise a warning if it has a feature
|
1305
|
-
if hasattr(record, "artifacts"):
|
1306
|
-
linked_records = (
|
1307
|
-
record.artifacts.through.filter(
|
1308
|
-
label_ref_is_name=True, **{f"{registry.lower()}_id": record.pk}
|
1309
|
-
)
|
1310
|
-
.exclude(feature_id=None) # must have a feature
|
1311
|
-
.distinct()
|
1312
|
-
)
|
1313
|
-
artifact_ids = linked_records.list("artifact__uid")
|
1314
|
-
n = len(artifact_ids)
|
1315
|
-
if n > 0:
|
1316
|
-
s = "s" if n > 1 else ""
|
1317
|
-
logger.error(
|
1318
|
-
f"You are trying to {colors.red('rename label')} from '{old_name}' to '{new_name}'!\n"
|
1319
|
-
f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {artifact_ids}\n\n"
|
1320
|
-
f"{colors.bold('To rename this label')}, make it external:\n"
|
1321
|
-
f" → run `artifact.labels.make_external(label)`\n\n"
|
1322
|
-
f"After renaming, consider re-curating the above artifact{s}:\n"
|
1323
|
-
f' → in each dataset, manually modify label "{old_name}" to "{new_name}"\n'
|
1324
|
-
f" → run `ln.Curator`\n"
|
1325
|
-
)
|
1326
|
-
raise RecordNameChangeIntegrityError
|
1327
|
-
|
1328
|
-
# when a feature is renamed
|
1329
|
-
elif isinstance(record, Feature):
|
1330
|
-
# only internal features are associated with schemas
|
1331
|
-
linked_artifacts = Artifact.filter(feature_sets__features=record).list(
|
1332
|
-
"uid"
|
1333
|
-
)
|
1334
|
-
n = len(linked_artifacts)
|
1335
|
-
if n > 0:
|
1336
|
-
s = "s" if n > 1 else ""
|
1337
|
-
logger.error(
|
1338
|
-
f"You are trying to {colors.red('rename feature')} from '{old_name}' to '{new_name}'!\n"
|
1339
|
-
f" → The following {n} artifact{s} {colors.red('will no longer be validated')}: {linked_artifacts}\n\n"
|
1340
|
-
f"{colors.bold('To rename this feature')}, make it external:\n"
|
1341
|
-
" → run `artifact.features.make_external(feature)`\n\n"
|
1342
|
-
f"After renaming, consider re-curating the above artifact{s}:\n"
|
1343
|
-
f" → in each dataset, manually modify feature '{old_name}' to '{new_name}'\n"
|
1344
|
-
f" → run `ln.Curator`\n"
|
1345
|
-
)
|
1346
|
-
raise RecordNameChangeIntegrityError
|
1347
|
-
|
1348
|
-
|
1349
|
-
def check_key_change(record: Union[Artifact, Transform]):
|
1350
|
-
"""Errors if a record's key has falsely changed."""
|
1351
|
-
from .artifact import Artifact
|
1352
|
-
|
1353
|
-
if not isinstance(record, Artifact) or not hasattr(record, "_old_key"):
|
1354
|
-
return
|
1355
|
-
if record._old_suffix != record.suffix:
|
1356
|
-
raise InvalidArgument(
|
1357
|
-
f"Changing the `.suffix` of an artifact is not allowed! You tried to change it from '{record._old_suffix}' to '{record.suffix}'."
|
1358
|
-
)
|
1359
|
-
|
1360
|
-
old_key = record._old_key
|
1361
|
-
new_key = record.key
|
1362
|
-
|
1363
|
-
if old_key != new_key:
|
1364
|
-
if not record._key_is_virtual:
|
1365
|
-
raise InvalidArgument(
|
1366
|
-
f"Changing a non-virtual key of an artifact is not allowed! You tried to change it from '{old_key}' to '{new_key}'."
|
1367
|
-
)
|
1368
|
-
if old_key is not None:
|
1369
|
-
old_key_suffix = extract_suffix_from_path(
|
1370
|
-
PurePosixPath(old_key), arg_name="key"
|
1371
|
-
)
|
1372
|
-
assert old_key_suffix == record.suffix, ( # noqa: S101
|
1373
|
-
old_key_suffix,
|
1374
|
-
record.suffix,
|
1375
|
-
)
|
1376
|
-
else:
|
1377
|
-
old_key_suffix = record.suffix
|
1378
|
-
new_key_suffix = extract_suffix_from_path(
|
1379
|
-
PurePosixPath(new_key), arg_name="key"
|
1380
|
-
)
|
1381
|
-
if old_key_suffix != new_key_suffix:
|
1382
|
-
raise InvalidArgument(
|
1383
|
-
f"The suffix '{new_key_suffix}' of the provided key is incorrect, it should be '{old_key_suffix}'."
|
1384
|
-
)
|
173
|
+
class RecordJson(BaseSQLRecord, IsLink):
|
174
|
+
id: int = models.BigAutoField(primary_key=True)
|
175
|
+
record: Record = ForeignKey(Record, CASCADE, related_name="values_json")
|
176
|
+
feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_recordjson")
|
177
|
+
value: Any = JSONField(default=None, db_default=None)
|
1385
178
|
|
179
|
+
class Meta:
|
180
|
+
unique_together = ("record", "feature")
|
1386
181
|
|
1387
|
-
def format_field_value(value: datetime | str | Any) -> Any:
|
1388
|
-
from datetime import datetime
|
1389
|
-
|
1390
|
-
if isinstance(value, datetime):
|
1391
|
-
return value.strftime("%Y-%m-%d %H:%M:%S %Z")
|
1392
|
-
|
1393
|
-
if isinstance(value, str):
|
1394
|
-
try:
|
1395
|
-
value = datetime.fromisoformat(value)
|
1396
|
-
value = value.strftime("%Y-%m-%d %H:%M:%S %Z")
|
1397
|
-
except ValueError:
|
1398
|
-
pass
|
1399
|
-
return f"'{value}'"
|
1400
|
-
else:
|
1401
|
-
return value
|
1402
|
-
|
1403
|
-
|
1404
|
-
class RecordInfo:
|
1405
|
-
def __init__(self, registry: Registry):
|
1406
|
-
self.registry = registry
|
1407
|
-
|
1408
|
-
def _get_type_for_field(self, field_name: str) -> str:
|
1409
|
-
field = self.registry._meta.get_field(field_name)
|
1410
|
-
related_model_name = (
|
1411
|
-
field.related_model.__name__
|
1412
|
-
if hasattr(field, "related_model") and field.related_model
|
1413
|
-
else None
|
1414
|
-
)
|
1415
|
-
return related_model_name if related_model_name else field.get_internal_type()
|
1416
|
-
|
1417
|
-
def _get_base_class_fields(self) -> list[str]:
|
1418
|
-
return [
|
1419
|
-
field.name
|
1420
|
-
for base in self.registry.__bases__
|
1421
|
-
if hasattr(base, "_meta")
|
1422
|
-
for field in base._meta.get_fields()
|
1423
|
-
]
|
1424
|
-
|
1425
|
-
def _reorder_fields_by_class(self, fields_to_order: list[Field]) -> list[Field]:
|
1426
|
-
"""Reorders the fields so that base class fields come last."""
|
1427
|
-
non_base_class_fields = [
|
1428
|
-
field
|
1429
|
-
for field in fields_to_order
|
1430
|
-
if field.name not in self._get_base_class_fields()
|
1431
|
-
]
|
1432
|
-
found_base_class_fields = [
|
1433
|
-
field
|
1434
|
-
for field in fields_to_order
|
1435
|
-
if field.name in self._get_base_class_fields()
|
1436
|
-
]
|
1437
|
-
return non_base_class_fields + found_base_class_fields
|
1438
|
-
|
1439
|
-
def get_simple_fields(self, return_str: bool = False) -> Any:
|
1440
|
-
simple_fields = [
|
1441
|
-
field
|
1442
|
-
for field in self.registry._meta.get_fields()
|
1443
|
-
if not (
|
1444
|
-
isinstance(field, ManyToOneRel)
|
1445
|
-
or isinstance(field, ManyToManyRel)
|
1446
|
-
or isinstance(field, ManyToManyField)
|
1447
|
-
or isinstance(field, ForeignKey)
|
1448
|
-
or field.name.startswith("_")
|
1449
|
-
or field.name == "id"
|
1450
|
-
)
|
1451
|
-
]
|
1452
|
-
simple_fields = self._reorder_fields_by_class(simple_fields)
|
1453
|
-
if not return_str:
|
1454
|
-
return simple_fields
|
1455
|
-
else:
|
1456
|
-
repr_str = f" {colors.italic('Simple fields')}\n"
|
1457
|
-
if simple_fields:
|
1458
|
-
repr_str += "".join(
|
1459
|
-
[
|
1460
|
-
f" .{field_name.name}: {self._get_type_for_field(field_name.name)}\n"
|
1461
|
-
for field_name in simple_fields
|
1462
|
-
]
|
1463
|
-
)
|
1464
|
-
return repr_str
|
1465
|
-
|
1466
|
-
def get_relational_fields(self, return_str: bool = False):
|
1467
|
-
# we ignore ManyToOneRel because it leads to so much clutter in the API
|
1468
|
-
# also note that our general guideline is to have related_name="+"
|
1469
|
-
# for ForeignKey fields
|
1470
|
-
relational_fields = (ManyToOneRel, ManyToManyRel, ManyToManyField, ForeignKey)
|
1471
|
-
|
1472
|
-
class_specific_relational_fields = [
|
1473
|
-
field
|
1474
|
-
for field in self.registry._meta.fields + self.registry._meta.many_to_many
|
1475
|
-
if isinstance(field, relational_fields)
|
1476
|
-
and not field.name.startswith(("links_", "_"))
|
1477
|
-
]
|
1478
|
-
|
1479
|
-
non_class_specific_relational_fields = [
|
1480
|
-
field
|
1481
|
-
for field in self.registry._meta.get_fields()
|
1482
|
-
if isinstance(field, relational_fields)
|
1483
|
-
and not field.name.startswith(("links_", "_"))
|
1484
|
-
]
|
1485
|
-
non_class_specific_relational_fields = self._reorder_fields_by_class(
|
1486
|
-
non_class_specific_relational_fields
|
1487
|
-
)
|
1488
|
-
|
1489
|
-
# Ensure that class specific fields (e.g. Artifact) come before non-class specific fields (e.g. collection)
|
1490
|
-
filtered_non_class_specific = [
|
1491
|
-
field
|
1492
|
-
for field in non_class_specific_relational_fields
|
1493
|
-
if field not in class_specific_relational_fields
|
1494
|
-
]
|
1495
|
-
ordered_relational_fields = (
|
1496
|
-
class_specific_relational_fields + filtered_non_class_specific
|
1497
|
-
)
|
1498
|
-
|
1499
|
-
core_module_fields = []
|
1500
|
-
external_modules_fields = []
|
1501
|
-
for field in ordered_relational_fields:
|
1502
|
-
field_name = repr(field).split(": ")[1][:-1]
|
1503
|
-
if field_name.count(".") == 1 and "lamindb" not in field_name:
|
1504
|
-
external_modules_fields.append(field)
|
1505
|
-
else:
|
1506
|
-
core_module_fields.append(field)
|
1507
|
-
|
1508
|
-
def _get_related_field_type(field) -> str:
|
1509
|
-
field_type = (
|
1510
|
-
field.related_model.__get_name_with_module__()
|
1511
|
-
.replace(
|
1512
|
-
"Artifact", ""
|
1513
|
-
) # some fields have an unnecessary 'Artifact' in their name
|
1514
|
-
.replace(
|
1515
|
-
"Collection", ""
|
1516
|
-
) # some fields have an unnecessary 'Collection' in their name
|
1517
|
-
)
|
1518
|
-
return (
|
1519
|
-
self._get_type_for_field(field.name)
|
1520
|
-
if not field_type.strip()
|
1521
|
-
else field_type
|
1522
|
-
)
|
1523
|
-
|
1524
|
-
core_module_fields_formatted = [
|
1525
|
-
f" .{field.name}: {_get_related_field_type(field)}\n"
|
1526
|
-
for field in core_module_fields
|
1527
|
-
]
|
1528
|
-
external_modules_fields_formatted = [
|
1529
|
-
f" .{field.name}: {_get_related_field_type(field)}\n"
|
1530
|
-
for field in external_modules_fields
|
1531
|
-
]
|
1532
|
-
|
1533
|
-
if not return_str:
|
1534
|
-
external_modules_fields_by_modules = defaultdict(list)
|
1535
|
-
for field_str, field in zip(
|
1536
|
-
external_modules_fields_formatted, external_modules_fields
|
1537
|
-
):
|
1538
|
-
field_type = field_str.split(":")[1].split()[0]
|
1539
|
-
module_name = field_type.split(".")[0]
|
1540
|
-
external_modules_fields_by_modules[module_name].append(field)
|
1541
|
-
return core_module_fields, external_modules_fields_by_modules
|
1542
|
-
else:
|
1543
|
-
repr_str = ""
|
1544
|
-
|
1545
|
-
# Non-external relational fields
|
1546
|
-
if core_module_fields:
|
1547
|
-
repr_str += f" {colors.italic('Relational fields')}\n"
|
1548
|
-
repr_str += "".join(core_module_fields_formatted)
|
1549
|
-
|
1550
|
-
# External relational fields
|
1551
|
-
external_modules = set()
|
1552
|
-
for field in external_modules_fields_formatted:
|
1553
|
-
field_type = field.split(":")[1].split()[0]
|
1554
|
-
external_modules.add(field_type.split(".")[0])
|
1555
|
-
|
1556
|
-
if external_modules:
|
1557
|
-
# We want Bionty to show up before other modules
|
1558
|
-
external_modules = (
|
1559
|
-
["bionty"] + sorted(external_modules - {"bionty"}) # type: ignore
|
1560
|
-
if "bionty" in external_modules
|
1561
|
-
else sorted(external_modules)
|
1562
|
-
)
|
1563
|
-
for ext_module in external_modules:
|
1564
|
-
ext_module_fields = [
|
1565
|
-
field
|
1566
|
-
for field in external_modules_fields_formatted
|
1567
|
-
if ext_module in field
|
1568
|
-
]
|
1569
|
-
|
1570
|
-
if ext_module_fields:
|
1571
|
-
repr_str += (
|
1572
|
-
f" {colors.italic(f'{ext_module.capitalize()} fields')}\n"
|
1573
|
-
)
|
1574
|
-
repr_str += "".join(ext_module_fields)
|
1575
|
-
|
1576
|
-
return repr_str
|
1577
182
|
|
183
|
+
class RecordRecord(SQLRecord, IsLink):
|
184
|
+
id: int = models.BigAutoField(primary_key=True)
|
185
|
+
record: Record = ForeignKey(
|
186
|
+
Record, CASCADE, related_name="values_record"
|
187
|
+
) # composite
|
188
|
+
feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_recordrecord")
|
189
|
+
value: Record = ForeignKey(
|
190
|
+
Record, PROTECT, related_name="links_record"
|
191
|
+
) # component
|
1578
192
|
|
1579
|
-
|
1580
|
-
|
1581
|
-
repr_str = f"{colors.green(cls.__name__)}\n"
|
1582
|
-
info = RecordInfo(cls)
|
1583
|
-
repr_str += info.get_simple_fields(return_str=True)
|
1584
|
-
repr_str += info.get_relational_fields(return_str=True)
|
1585
|
-
repr_str = repr_str.rstrip("\n")
|
1586
|
-
return repr_str
|
193
|
+
class Meta:
|
194
|
+
unique_together = ("record", "feature")
|
1587
195
|
|
1588
196
|
|
1589
|
-
|
1590
|
-
|
1591
|
-
|
1592
|
-
|
1593
|
-
|
1594
|
-
field_names = [
|
1595
|
-
field.name
|
1596
|
-
for field in self._meta.fields
|
1597
|
-
if (not isinstance(field, ForeignKey) and field.name not in exclude_field_names)
|
1598
|
-
]
|
1599
|
-
if include_foreign_keys:
|
1600
|
-
field_names += [
|
1601
|
-
f"{field.name}_id"
|
1602
|
-
for field in self._meta.fields
|
1603
|
-
if isinstance(field, ForeignKey)
|
1604
|
-
]
|
1605
|
-
if "created_at" in field_names:
|
1606
|
-
field_names.remove("created_at")
|
1607
|
-
field_names.append("created_at")
|
1608
|
-
if field_names[0] != "uid" and "uid" in field_names:
|
1609
|
-
field_names.remove("uid")
|
1610
|
-
field_names.insert(0, "uid")
|
1611
|
-
fields_str = {}
|
1612
|
-
for k in field_names:
|
1613
|
-
if not k.startswith("_") and hasattr(self, k):
|
1614
|
-
value = getattr(self, k)
|
1615
|
-
# Force strip the time component of the version
|
1616
|
-
if k == "version" and value:
|
1617
|
-
fields_str[k] = f"'{str(value).split()[0]}'"
|
1618
|
-
else:
|
1619
|
-
fields_str[k] = format_field_value(value)
|
1620
|
-
fields_joined_str = ", ".join(
|
1621
|
-
[f"{k}={fields_str[k]}" for k in fields_str if fields_str[k] is not None]
|
1622
|
-
)
|
1623
|
-
return f"{self.__class__.__name__}({fields_joined_str})"
|
197
|
+
class RecordULabel(BaseSQLRecord, IsLink):
|
198
|
+
id: int = models.BigAutoField(primary_key=True)
|
199
|
+
record: Record = ForeignKey(Record, CASCADE, related_name="values_ulabel")
|
200
|
+
feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_recordulabel")
|
201
|
+
value: ULabel = ForeignKey(ULabel, PROTECT, related_name="links_record")
|
1624
202
|
|
203
|
+
class Meta:
|
204
|
+
# allows linking exactly one record to one ulabel per feature, because we likely don't want to have Many
|
205
|
+
unique_together = ("record", "feature")
|
1625
206
|
|
1626
|
-
# below is code to further format the repr of a record
|
1627
|
-
#
|
1628
|
-
# def format_repr(
|
1629
|
-
# record: Record, exclude_field_names: str | list[str] | None = None
|
1630
|
-
# ) -> str:
|
1631
|
-
# if isinstance(exclude_field_names, str):
|
1632
|
-
# exclude_field_names = [exclude_field_names]
|
1633
|
-
# exclude_field_names_init = ["id", "created_at", "updated_at"]
|
1634
|
-
# if exclude_field_names is not None:
|
1635
|
-
# exclude_field_names_init += exclude_field_names
|
1636
|
-
# return record.__repr__(
|
1637
|
-
# include_foreign_keys=False, exclude_field_names=exclude_field_names_init
|
1638
|
-
# )
|
1639
207
|
|
208
|
+
class RecordRun(BaseSQLRecord, IsLink):
|
209
|
+
id: int = models.BigAutoField(primary_key=True)
|
210
|
+
record: Record = ForeignKey(Record, CASCADE, related_name="values_run")
|
211
|
+
feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_recordrun")
|
212
|
+
value: Run = ForeignKey(Run, PROTECT, related_name="links_record")
|
1640
213
|
|
1641
|
-
|
1642
|
-
|
214
|
+
class Meta:
|
215
|
+
# allows linking several records to a single run for the same feature because we'll likely need this
|
216
|
+
unique_together = ("record", "feature")
|
1643
217
|
|
1644
218
|
|
1645
|
-
class
|
1646
|
-
|
1647
|
-
|
1648
|
-
|
219
|
+
class RecordArtifact(BaseSQLRecord, IsLink):
|
220
|
+
id: int = models.BigAutoField(primary_key=True)
|
221
|
+
record: Record = ForeignKey(Record, CASCADE, related_name="values_artifact")
|
222
|
+
feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_recordartifact")
|
223
|
+
value: Artifact = ForeignKey(Artifact, PROTECT, related_name="links_record")
|
1649
224
|
|
1650
225
|
class Meta:
|
1651
|
-
|
1652
|
-
|
226
|
+
# allows linking several records to a single artifact for the same feature because we'll likely need this
|
227
|
+
unique_together = ("record", "feature", "value")
|