lamindb 1.2a2__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +3 -1
- lamindb/_view.py +2 -2
- lamindb/base/types.py +50 -11
- lamindb/core/_compat.py +60 -0
- lamindb/core/_context.py +15 -12
- lamindb/core/datasets/__init__.py +1 -0
- lamindb/core/datasets/_core.py +23 -0
- lamindb/core/datasets/_small.py +16 -2
- lamindb/core/loaders.py +22 -12
- lamindb/core/storage/_tiledbsoma.py +2 -2
- lamindb/core/storage/_zarr.py +84 -26
- lamindb/core/storage/objects.py +45 -44
- lamindb/core/types.py +11 -1
- lamindb/curators/__init__.py +1430 -1665
- lamindb/curators/_cellxgene_schemas/__init__.py +190 -18
- lamindb/curators/_cellxgene_schemas/schema_versions.csv +43 -0
- lamindb/models/_feature_manager.py +86 -42
- lamindb/models/_from_values.py +110 -119
- lamindb/models/_label_manager.py +17 -10
- lamindb/models/artifact.py +170 -102
- lamindb/models/can_curate.py +200 -231
- lamindb/models/feature.py +76 -47
- lamindb/models/project.py +69 -7
- lamindb/models/query_set.py +12 -2
- lamindb/models/record.py +77 -50
- lamindb/models/run.py +20 -7
- lamindb/models/schema.py +7 -15
- {lamindb-1.2a2.dist-info → lamindb-1.3.1.dist-info}/METADATA +8 -7
- {lamindb-1.2a2.dist-info → lamindb-1.3.1.dist-info}/RECORD +31 -30
- lamindb/curators/_cellxgene_schemas/schema_versions.yml +0 -104
- {lamindb-1.2a2.dist-info → lamindb-1.3.1.dist-info}/LICENSE +0 -0
- {lamindb-1.2a2.dist-info → lamindb-1.3.1.dist-info}/WHEEL +0 -0
lamindb/models/feature.py
CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
import importlib
|
4
4
|
from typing import TYPE_CHECKING, Any, get_args, overload
|
5
5
|
|
6
|
+
import numpy as np
|
6
7
|
import pandas as pd
|
7
8
|
from django.db import models
|
8
9
|
from django.db.models import CASCADE, PROTECT, Q
|
@@ -12,6 +13,7 @@ from lamin_utils import logger
|
|
12
13
|
from lamindb_setup._init_instance import get_schema_module_name
|
13
14
|
from lamindb_setup.core.hashing import HASH_LENGTH, hash_dict
|
14
15
|
from pandas.api.types import CategoricalDtype, is_string_dtype
|
16
|
+
from pandas.core.dtypes.base import ExtensionDtype
|
15
17
|
|
16
18
|
from lamindb.base.fields import (
|
17
19
|
BooleanField,
|
@@ -20,7 +22,7 @@ from lamindb.base.fields import (
|
|
20
22
|
JSONField,
|
21
23
|
TextField,
|
22
24
|
)
|
23
|
-
from lamindb.base.types import
|
25
|
+
from lamindb.base.types import Dtype, FieldAttr
|
24
26
|
from lamindb.errors import FieldValidationError, ValidationError
|
25
27
|
|
26
28
|
from ..base.ids import base62_12
|
@@ -36,18 +38,43 @@ from .run import (
|
|
36
38
|
if TYPE_CHECKING:
|
37
39
|
from collections.abc import Iterable
|
38
40
|
|
39
|
-
from pandas.core.dtypes.base import ExtensionDtype
|
40
|
-
|
41
41
|
from .schema import Schema
|
42
42
|
|
43
|
-
FEATURE_DTYPES = set(get_args(
|
43
|
+
FEATURE_DTYPES = set(get_args(Dtype))
|
44
|
+
|
44
45
|
|
46
|
+
def parse_dtype(dtype_str: str, is_param: bool = False) -> list[dict[str, str]]:
|
47
|
+
"""Parses feature data type string into a structured list of components."""
|
48
|
+
from .artifact import Artifact
|
45
49
|
|
46
|
-
|
50
|
+
allowed_dtypes = FEATURE_DTYPES
|
51
|
+
if is_param:
|
52
|
+
allowed_dtypes.add("dict")
|
53
|
+
is_composed_cat = dtype_str.startswith("cat[") and dtype_str.endswith("]")
|
54
|
+
result = []
|
55
|
+
if is_composed_cat:
|
56
|
+
related_registries = dict_module_name_to_model_name(Artifact)
|
57
|
+
registries_str = dtype_str.replace("cat[", "")[:-1] # strip last ]
|
58
|
+
if registries_str != "":
|
59
|
+
registry_str_list = registries_str.split("|")
|
60
|
+
for cat_single_dtype_str in registry_str_list:
|
61
|
+
single_result = parse_cat_dtype(
|
62
|
+
cat_single_dtype_str, related_registries
|
63
|
+
)
|
64
|
+
result.append(single_result)
|
65
|
+
elif dtype_str not in allowed_dtypes:
|
66
|
+
raise ValueError(
|
67
|
+
f"dtype is '{dtype_str}' but has to be one of {FEATURE_DTYPES}!"
|
68
|
+
)
|
69
|
+
return result
|
70
|
+
|
71
|
+
|
72
|
+
def parse_cat_dtype(
|
47
73
|
dtype_str: str,
|
48
74
|
related_registries: dict[str, Record] | None = None,
|
49
75
|
is_itype: bool = False,
|
50
|
-
) -> dict:
|
76
|
+
) -> dict[str, Any]:
|
77
|
+
"""Parses a categorical dtype string into its components (registry, field, subtypes)."""
|
51
78
|
from .artifact import Artifact
|
52
79
|
|
53
80
|
assert isinstance(dtype_str, str) # noqa: S101
|
@@ -115,38 +142,18 @@ def parse_dtype_single_cat(
|
|
115
142
|
}
|
116
143
|
|
117
144
|
|
118
|
-
def
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
if is_param:
|
123
|
-
allowed_dtypes.add("dict")
|
124
|
-
is_composed_cat = dtype_str.startswith("cat[") and dtype_str.endswith("]")
|
125
|
-
result = []
|
126
|
-
if is_composed_cat:
|
127
|
-
related_registries = dict_module_name_to_model_name(Artifact)
|
128
|
-
registries_str = dtype_str.replace("cat[", "")[:-1] # strip last ]
|
129
|
-
if registries_str != "":
|
130
|
-
registry_str_list = registries_str.split("|")
|
131
|
-
for cat_single_dtype_str in registry_str_list:
|
132
|
-
single_result = parse_dtype_single_cat(
|
133
|
-
cat_single_dtype_str, related_registries
|
134
|
-
)
|
135
|
-
result.append(single_result)
|
136
|
-
elif dtype_str not in allowed_dtypes:
|
137
|
-
raise ValueError(
|
138
|
-
f"dtype is '{dtype_str}' but has to be one of {FEATURE_DTYPES}!"
|
139
|
-
)
|
140
|
-
return result
|
141
|
-
|
142
|
-
|
143
|
-
def get_dtype_str_from_dtype(dtype: Any, is_itype: bool = False) -> str:
|
145
|
+
def serialize_dtype(
|
146
|
+
dtype: Record | FieldAttr | list[Record], is_itype: bool = False
|
147
|
+
) -> str:
|
148
|
+
"""Converts a data type object into its string representation."""
|
144
149
|
if (
|
145
150
|
not isinstance(dtype, list)
|
146
151
|
and hasattr(dtype, "__name__")
|
147
152
|
and dtype.__name__ in FEATURE_DTYPES
|
148
153
|
):
|
149
154
|
dtype_str = dtype.__name__
|
155
|
+
elif isinstance(dtype, (ExtensionDtype, np.dtype)):
|
156
|
+
dtype_str = serialize_pandas_dtype(dtype)
|
150
157
|
else:
|
151
158
|
error_message = (
|
152
159
|
"dtype has to be a record, a record field, or a list of records, not {}"
|
@@ -177,7 +184,7 @@ def get_dtype_str_from_dtype(dtype: Any, is_itype: bool = False) -> str:
|
|
177
184
|
return dtype_str
|
178
185
|
|
179
186
|
|
180
|
-
def
|
187
|
+
def serialize_pandas_dtype(pandas_dtype: ExtensionDtype) -> str:
|
181
188
|
if is_string_dtype(pandas_dtype):
|
182
189
|
if not isinstance(pandas_dtype, CategoricalDtype):
|
183
190
|
dtype = "str"
|
@@ -189,6 +196,8 @@ def convert_pandas_dtype_to_lamin_dtype(pandas_dtype: ExtensionDtype) -> str:
|
|
189
196
|
else:
|
190
197
|
# strip precision qualifiers
|
191
198
|
dtype = "".join(dt for dt in pandas_dtype.name if not dt.isdigit())
|
199
|
+
if dtype == "uint":
|
200
|
+
dtype = "int"
|
192
201
|
if dtype.startswith("datetime"):
|
193
202
|
dtype = dtype.split("[")[0]
|
194
203
|
assert dtype in FEATURE_DTYPES # noqa: S101
|
@@ -220,7 +229,7 @@ def process_init_feature_param(args, kwargs, is_param: bool = False):
|
|
220
229
|
dtype_str = None
|
221
230
|
if dtype is not None:
|
222
231
|
if not isinstance(dtype, str):
|
223
|
-
dtype_str =
|
232
|
+
dtype_str = serialize_dtype(dtype)
|
224
233
|
else:
|
225
234
|
dtype_str = dtype
|
226
235
|
parse_dtype(dtype_str, is_param=is_param)
|
@@ -247,14 +256,16 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
247
256
|
|
248
257
|
Args:
|
249
258
|
name: `str` Name of the feature, typically. column name.
|
250
|
-
dtype: `
|
251
|
-
For categorical types, can define
|
252
|
-
|
259
|
+
dtype: `Dtype | Registry | list[Registry] | FieldAttr` See :class:`~lamindb.base.types.Dtype`.
|
260
|
+
For categorical types, you can define to which registry values are
|
261
|
+
restricted, e.g., `ULabel` or `[ULabel, bionty.CellType]`.
|
253
262
|
unit: `str | None = None` Unit of measure, ideally SI (`"m"`, `"s"`, `"kg"`, etc.) or `"normalized"` etc.
|
254
263
|
description: `str | None = None` A description.
|
255
264
|
synonyms: `str | None = None` Bar-separated synonyms.
|
256
265
|
nullable: `bool = True` Whether the feature can have null-like values (`None`, `pd.NA`, `NaN`, etc.), see :attr:`~lamindb.Feature.nullable`.
|
257
266
|
default_value: `Any | None = None` Default value for the feature.
|
267
|
+
coerce_dtype: `bool = False` When True, attempts to coerce values to the specified dtype
|
268
|
+
during validation, see :attr:`~lamindb.Feature.coerce_dtype`.
|
258
269
|
cat_filters: `dict[str, str] | None = None` Subset a registry by additional filters to define valid categories.
|
259
270
|
|
260
271
|
Note:
|
@@ -323,6 +334,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
323
334
|
_aux_fields: dict[str, tuple[str, type]] = {
|
324
335
|
"0": ("default_value", bool),
|
325
336
|
"1": ("nullable", bool),
|
337
|
+
"2": ("coerce_dtype", bool),
|
326
338
|
}
|
327
339
|
|
328
340
|
id: int = models.AutoField(primary_key=True)
|
@@ -333,13 +345,8 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
333
345
|
"""Universal id, valid across DB instances."""
|
334
346
|
name: str = CharField(max_length=150, db_index=True, unique=True)
|
335
347
|
"""Name of feature (hard unique constraint `unique=True`)."""
|
336
|
-
dtype:
|
337
|
-
"""Data type (:class:`~lamindb.base.types.
|
338
|
-
|
339
|
-
For categorical types, can define from which registry values are
|
340
|
-
sampled, e.g., `'cat[ULabel]'` or `'cat[bionty.CellType]'`. Unions are also
|
341
|
-
allowed if the feature samples from two registries, e.g., `'cat[ULabel|bionty.CellType]'`
|
342
|
-
"""
|
348
|
+
dtype: Dtype | None = CharField(db_index=True, null=True)
|
349
|
+
"""Data type (:class:`~lamindb.base.types.Dtype`)."""
|
343
350
|
type: Feature | None = ForeignKey(
|
344
351
|
"self", PROTECT, null=True, related_name="records"
|
345
352
|
)
|
@@ -381,7 +388,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
381
388
|
|
382
389
|
Is stored as a list rather than a tuple because it's serialized as JSON.
|
383
390
|
"""
|
384
|
-
proxy_dtype:
|
391
|
+
proxy_dtype: Dtype | None = CharField(default=None, null=True)
|
385
392
|
"""Proxy data type.
|
386
393
|
|
387
394
|
If the feature is an image it's often stored via a path to the image file. Hence, while the dtype might be
|
@@ -411,7 +418,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
411
418
|
def __init__(
|
412
419
|
self,
|
413
420
|
name: str,
|
414
|
-
dtype:
|
421
|
+
dtype: Dtype | Registry | list[Registry] | FieldAttr,
|
415
422
|
type: Feature | None = None,
|
416
423
|
is_type: bool = False,
|
417
424
|
unit: str | None = None,
|
@@ -419,6 +426,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
419
426
|
synonyms: str | None = None,
|
420
427
|
nullable: bool = True,
|
421
428
|
default_value: str | None = None,
|
429
|
+
coerce_dtype: bool = False,
|
422
430
|
cat_filters: dict[str, str] | None = None,
|
423
431
|
): ...
|
424
432
|
|
@@ -440,10 +448,12 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
440
448
|
default_value = kwargs.pop("default_value", None)
|
441
449
|
nullable = kwargs.pop("nullable", True) # default value of nullable
|
442
450
|
cat_filters = kwargs.pop("cat_filters", None)
|
451
|
+
coerce_dtype = kwargs.pop("coerce_dtype", False)
|
443
452
|
kwargs = process_init_feature_param(args, kwargs)
|
444
453
|
super().__init__(*args, **kwargs)
|
445
454
|
self.default_value = default_value
|
446
455
|
self.nullable = nullable
|
456
|
+
self.coerce_dtype = coerce_dtype
|
447
457
|
dtype_str = kwargs.pop("dtype", None)
|
448
458
|
if cat_filters:
|
449
459
|
assert "|" not in dtype_str # noqa: S101
|
@@ -476,7 +486,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
476
486
|
if name in categoricals:
|
477
487
|
dtypes[name] = "cat"
|
478
488
|
else:
|
479
|
-
dtypes[name] =
|
489
|
+
dtypes[name] = serialize_pandas_dtype(col.dtype)
|
480
490
|
with logger.mute(): # silence the warning "loaded record with exact same name "
|
481
491
|
features = [
|
482
492
|
Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()
|
@@ -489,6 +499,25 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
489
499
|
super().save(*args, **kwargs)
|
490
500
|
return self
|
491
501
|
|
502
|
+
@property
|
503
|
+
def coerce_dtype(self) -> bool:
|
504
|
+
"""Whether dtypes should be coerced during validation.
|
505
|
+
|
506
|
+
For example, a `objects`-dtyped pandas column can be coerced to `categorical` and would pass validation if this is true.
|
507
|
+
"""
|
508
|
+
if self._aux is not None and "af" in self._aux and "2" in self._aux["af"]: # type: ignore
|
509
|
+
return self._aux["af"]["2"] # type: ignore
|
510
|
+
else:
|
511
|
+
return False
|
512
|
+
|
513
|
+
@coerce_dtype.setter
|
514
|
+
def coerce_dtype(self, value: bool) -> None:
|
515
|
+
if self._aux is None: # type: ignore
|
516
|
+
self._aux = {} # type: ignore
|
517
|
+
if "af" not in self._aux:
|
518
|
+
self._aux["af"] = {}
|
519
|
+
self._aux["af"]["2"] = value
|
520
|
+
|
492
521
|
@property
|
493
522
|
def default_value(self) -> Any:
|
494
523
|
"""A default value that overwrites missing values (default `None`).
|
lamindb/models/project.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import TYPE_CHECKING
|
3
|
+
from typing import TYPE_CHECKING, overload
|
4
4
|
|
5
5
|
from django.core.validators import RegexValidator
|
6
6
|
from django.db import models
|
@@ -66,6 +66,23 @@ class Person(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
66
66
|
external: bool = BooleanField(default=True, db_index=True)
|
67
67
|
"""Whether the person is external to the organization."""
|
68
68
|
|
69
|
+
@overload
|
70
|
+
def __init__(
|
71
|
+
self,
|
72
|
+
name: str,
|
73
|
+
email: str | None = None,
|
74
|
+
external: bool = True,
|
75
|
+
): ...
|
76
|
+
|
77
|
+
@overload
|
78
|
+
def __init__(
|
79
|
+
self,
|
80
|
+
*db_args,
|
81
|
+
): ...
|
82
|
+
|
83
|
+
def __init__(self, *args, **kwargs):
|
84
|
+
super().__init__(*args, **kwargs)
|
85
|
+
|
69
86
|
|
70
87
|
class Reference(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
71
88
|
"""References such as internal studies, papers, documents, or URLs.
|
@@ -94,12 +111,6 @@ class Reference(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
94
111
|
"""Universal id, valid across DB instances."""
|
95
112
|
name: str = CharField(db_index=True)
|
96
113
|
"""Title or name of the reference document."""
|
97
|
-
abbr: str | None = CharField(
|
98
|
-
max_length=32,
|
99
|
-
db_index=True,
|
100
|
-
null=True,
|
101
|
-
)
|
102
|
-
"""An abbreviation for the reference."""
|
103
114
|
type: Reference | None = ForeignKey(
|
104
115
|
"self", PROTECT, null=True, related_name="records"
|
105
116
|
)
|
@@ -111,6 +122,12 @@ class Reference(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
111
122
|
"""Records of this type."""
|
112
123
|
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
113
124
|
"""Distinguish types from instances of the type."""
|
125
|
+
abbr: str | None = CharField(
|
126
|
+
max_length=32,
|
127
|
+
db_index=True,
|
128
|
+
null=True,
|
129
|
+
)
|
130
|
+
"""An abbreviation for the reference."""
|
114
131
|
url: str | None = URLField(null=True)
|
115
132
|
"""URL linking to the reference."""
|
116
133
|
pubmed_id: int | None = BigIntegerField(null=True, db_index=True)
|
@@ -147,6 +164,30 @@ class Reference(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
147
164
|
)
|
148
165
|
"""Collections associated with this reference."""
|
149
166
|
|
167
|
+
@overload
|
168
|
+
def __init__(
|
169
|
+
self,
|
170
|
+
name: str,
|
171
|
+
type: Reference | None = None,
|
172
|
+
is_type: bool = False,
|
173
|
+
abbr: str | None = None,
|
174
|
+
url: str | None = None,
|
175
|
+
pubmed_id: int | None = None,
|
176
|
+
doi: str | None = None,
|
177
|
+
description: str | None = None,
|
178
|
+
text: str | None = None,
|
179
|
+
date: DateType | None = None,
|
180
|
+
): ...
|
181
|
+
|
182
|
+
@overload
|
183
|
+
def __init__(
|
184
|
+
self,
|
185
|
+
*db_args,
|
186
|
+
): ...
|
187
|
+
|
188
|
+
def __init__(self, *args, **kwargs):
|
189
|
+
super().__init__(*args, **kwargs)
|
190
|
+
|
150
191
|
|
151
192
|
class Project(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
152
193
|
"""Projects.
|
@@ -241,6 +282,27 @@ class Project(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
241
282
|
_status_code: int = models.SmallIntegerField(default=0, db_index=True)
|
242
283
|
"""Status code."""
|
243
284
|
|
285
|
+
@overload
|
286
|
+
def __init__(
|
287
|
+
self,
|
288
|
+
name: str,
|
289
|
+
type: Project | None = None,
|
290
|
+
is_type: bool = False,
|
291
|
+
abbr: str | None = None,
|
292
|
+
url: str | None = None,
|
293
|
+
start_date: DateType | None = None,
|
294
|
+
end_date: DateType | None = None,
|
295
|
+
): ...
|
296
|
+
|
297
|
+
@overload
|
298
|
+
def __init__(
|
299
|
+
self,
|
300
|
+
*db_args,
|
301
|
+
): ...
|
302
|
+
|
303
|
+
def __init__(self, *args, **kwargs):
|
304
|
+
super().__init__(*args, **kwargs)
|
305
|
+
|
244
306
|
|
245
307
|
class ArtifactProject(BasicRecord, LinkORM, TracksRun):
|
246
308
|
id: int = models.BigAutoField(primary_key=True)
|
lamindb/models/query_set.py
CHANGED
@@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar, Union
|
|
10
10
|
import pandas as pd
|
11
11
|
from django.core.exceptions import FieldError
|
12
12
|
from django.db import models
|
13
|
-
from django.db.models import F, ForeignKey, ManyToManyField
|
13
|
+
from django.db.models import F, ForeignKey, ManyToManyField, Subquery
|
14
14
|
from django.db.models.fields.related import ForeignObjectRel
|
15
15
|
from lamin_utils import logger
|
16
16
|
from lamindb_setup.core._docs import doc_args
|
@@ -567,7 +567,17 @@ class QuerySet(models.QuerySet):
|
|
567
567
|
include_kwargs = {s: F(s) for s in include if s not in field_names}
|
568
568
|
annotate_kwargs.update(include_kwargs)
|
569
569
|
if annotate_kwargs:
|
570
|
-
|
570
|
+
id_subquery = self.values("id")
|
571
|
+
# for annotate, we want the queryset without filters so that joins don't affect the annotations
|
572
|
+
query_set_without_filters = self.model.objects.filter(
|
573
|
+
id__in=Subquery(id_subquery)
|
574
|
+
)
|
575
|
+
if self.query.order_by:
|
576
|
+
# Apply the same ordering to the new queryset
|
577
|
+
query_set_without_filters = query_set_without_filters.order_by(
|
578
|
+
*self.query.order_by
|
579
|
+
)
|
580
|
+
queryset = query_set_without_filters.annotate(**annotate_kwargs)
|
571
581
|
else:
|
572
582
|
queryset = self
|
573
583
|
|
lamindb/models/record.py
CHANGED
@@ -13,7 +13,9 @@ from typing import (
|
|
13
13
|
Any,
|
14
14
|
Literal,
|
15
15
|
NamedTuple,
|
16
|
+
TypeVar,
|
16
17
|
Union,
|
18
|
+
overload,
|
17
19
|
)
|
18
20
|
|
19
21
|
import dj_database_url
|
@@ -49,6 +51,7 @@ from django.db.models.lookups import (
|
|
49
51
|
)
|
50
52
|
from lamin_utils import colors, logger
|
51
53
|
from lamin_utils._lookup import Lookup
|
54
|
+
from lamindb_setup import settings as setup_settings
|
52
55
|
from lamindb_setup._connect_instance import (
|
53
56
|
get_owner_name_from_identifier,
|
54
57
|
load_instance_settings,
|
@@ -87,6 +90,7 @@ if TYPE_CHECKING:
|
|
87
90
|
from .transform import Transform
|
88
91
|
|
89
92
|
|
93
|
+
T = TypeVar("T", bound="Record")
|
90
94
|
IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
91
95
|
|
92
96
|
|
@@ -202,12 +206,12 @@ def validate_literal_fields(record: Record, kwargs) -> None:
|
|
202
206
|
return None
|
203
207
|
if record.__class__.__name__ in "Feature":
|
204
208
|
return None
|
205
|
-
from lamindb.base.types import
|
209
|
+
from lamindb.base.types import Dtype, TransformType
|
206
210
|
|
207
211
|
types = {
|
208
212
|
"TransformType": TransformType,
|
209
|
-
"ArtifactKind":
|
210
|
-
"
|
213
|
+
"ArtifactKind": Dtype,
|
214
|
+
"Dtype": Dtype,
|
211
215
|
}
|
212
216
|
errors = {}
|
213
217
|
annotations = getattr(record.__class__, "__annotations__", {})
|
@@ -466,19 +470,16 @@ class Registry(ModelBase):
|
|
466
470
|
return QuerySet(model=cls, using=_using_key).filter(*queries, **expressions)
|
467
471
|
|
468
472
|
def get(
|
469
|
-
cls,
|
473
|
+
cls: type[T],
|
470
474
|
idlike: int | str | None = None,
|
471
475
|
**expressions,
|
472
|
-
) ->
|
476
|
+
) -> T:
|
473
477
|
"""Get a single record.
|
474
478
|
|
475
479
|
Args:
|
476
480
|
idlike: Either a uid stub, uid or an integer id.
|
477
481
|
expressions: Fields and values passed as Django query expressions.
|
478
482
|
|
479
|
-
Returns:
|
480
|
-
A record.
|
481
|
-
|
482
483
|
Raises:
|
483
484
|
:exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
|
484
485
|
|
@@ -486,9 +487,10 @@ class Registry(ModelBase):
|
|
486
487
|
- Guide: :doc:`docs:registries`
|
487
488
|
- Django documentation: `Queries <https://docs.djangoproject.com/en/stable/topics/db/queries/>`__
|
488
489
|
|
489
|
-
Examples
|
490
|
-
|
491
|
-
|
490
|
+
Examples::
|
491
|
+
|
492
|
+
ulabel = ln.ULabel.get("FvtpPJLJ")
|
493
|
+
ulabel = ln.ULabel.get(name="my-label")
|
492
494
|
"""
|
493
495
|
from .query_set import QuerySet
|
494
496
|
|
@@ -594,7 +596,11 @@ class Registry(ModelBase):
|
|
594
596
|
|
595
597
|
if instance is None:
|
596
598
|
return QuerySet(model=cls, using=None)
|
599
|
+
|
597
600
|
owner, name = get_owner_name_from_identifier(instance)
|
601
|
+
if f"{owner}/{name}" == setup_settings.instance.slug:
|
602
|
+
return QuerySet(model=cls, using=None)
|
603
|
+
|
598
604
|
settings_file = instance_settings_file(name, owner)
|
599
605
|
cache_filepath = (
|
600
606
|
ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
|
@@ -737,8 +743,7 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
737
743
|
)
|
738
744
|
else:
|
739
745
|
super().__init__(*args)
|
740
|
-
|
741
|
-
_store_record_old_key(self)
|
746
|
+
track_current_key_and_name_values(self)
|
742
747
|
|
743
748
|
def save(self, *args, **kwargs) -> Record:
|
744
749
|
"""Save.
|
@@ -806,8 +811,8 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
806
811
|
init_self_from_db(self, pre_existing_record)
|
807
812
|
else:
|
808
813
|
raise
|
809
|
-
|
810
|
-
|
814
|
+
# call the below in case a user makes more updates to the record
|
815
|
+
track_current_key_and_name_values(self)
|
811
816
|
# perform transfer of many-to-many fields
|
812
817
|
# only supported for Artifact and Collection records
|
813
818
|
if db is not None and db != "default" and using_key is None:
|
@@ -875,7 +880,13 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
875
880
|
|
876
881
|
|
877
882
|
class Space(BasicRecord):
|
878
|
-
"""Spaces.
|
883
|
+
"""Spaces.
|
884
|
+
|
885
|
+
You can use spaces to restrict access to records within an instance.
|
886
|
+
|
887
|
+
All data in this registry is synced from `lamin.ai` to enable re-using spaces across instances.
|
888
|
+
There is no need to manually create records.
|
889
|
+
"""
|
879
890
|
|
880
891
|
id: int = models.SmallAutoField(primary_key=True)
|
881
892
|
"""Internal id, valid only in one DB instance."""
|
@@ -901,6 +912,26 @@ class Space(BasicRecord):
|
|
901
912
|
)
|
902
913
|
"""Creator of run."""
|
903
914
|
|
915
|
+
@overload
|
916
|
+
def __init__(
|
917
|
+
self,
|
918
|
+
name: str,
|
919
|
+
description: str | None = None,
|
920
|
+
): ...
|
921
|
+
|
922
|
+
@overload
|
923
|
+
def __init__(
|
924
|
+
self,
|
925
|
+
*db_args,
|
926
|
+
): ...
|
927
|
+
|
928
|
+
def __init__(
|
929
|
+
self,
|
930
|
+
*args,
|
931
|
+
**kwargs,
|
932
|
+
):
|
933
|
+
super().__init__(*args, **kwargs)
|
934
|
+
|
904
935
|
|
905
936
|
@doc_args(RECORD_REGISTRY_EXAMPLE)
|
906
937
|
class Record(BasicRecord, metaclass=Registry):
|
@@ -989,8 +1020,8 @@ def _get_record_kwargs(record_class) -> list[tuple[str, str]]:
|
|
989
1020
|
pattern = r"@overload\s+def __init__\s*\(([\s\S]*?)\):\s*\.{3}"
|
990
1021
|
overloads = re.finditer(pattern, source)
|
991
1022
|
|
992
|
-
for
|
993
|
-
params_block =
|
1023
|
+
for single_overload in overloads:
|
1024
|
+
params_block = single_overload.group(1)
|
994
1025
|
# This is an additional safety measure if the overloaded signature that we're
|
995
1026
|
# looking for is not at the top but a "db_args" constructor
|
996
1027
|
if "*db_args" in params_block:
|
@@ -1037,13 +1068,14 @@ def _search(
|
|
1037
1068
|
field: StrField | list[StrField] | None = None,
|
1038
1069
|
limit: int | None = 20,
|
1039
1070
|
case_sensitive: bool = False,
|
1040
|
-
using_key: str | None = None,
|
1041
1071
|
truncate_string: bool = False,
|
1042
1072
|
) -> QuerySet:
|
1043
1073
|
if string is None:
|
1044
1074
|
raise ValueError("Cannot search for None value! Please pass a valid string.")
|
1045
1075
|
|
1046
|
-
input_queryset =
|
1076
|
+
input_queryset = (
|
1077
|
+
cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
|
1078
|
+
)
|
1047
1079
|
registry = input_queryset.model
|
1048
1080
|
name_field = getattr(registry, "_name_field", "name")
|
1049
1081
|
if field is None:
|
@@ -1152,7 +1184,7 @@ def _lookup(
|
|
1152
1184
|
using_key: str | None = None,
|
1153
1185
|
) -> NamedTuple:
|
1154
1186
|
"""{}""" # noqa: D415
|
1155
|
-
queryset =
|
1187
|
+
queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
|
1156
1188
|
field = get_name_field(registry=queryset.model, field=field)
|
1157
1189
|
|
1158
1190
|
return Lookup(
|
@@ -1172,7 +1204,7 @@ def _lookup(
|
|
1172
1204
|
def get_name_field(
|
1173
1205
|
registry: type[Record] | QuerySet | Manager,
|
1174
1206
|
*,
|
1175
|
-
field:
|
1207
|
+
field: StrField | None = None,
|
1176
1208
|
) -> str:
|
1177
1209
|
"""Get the 1st char or text field from the registry."""
|
1178
1210
|
if isinstance(registry, (QuerySet, Manager)):
|
@@ -1212,16 +1244,6 @@ def get_name_field(
|
|
1212
1244
|
return field
|
1213
1245
|
|
1214
1246
|
|
1215
|
-
def _queryset(cls: Record | QuerySet | Manager, using_key: str) -> QuerySet:
|
1216
|
-
if isinstance(cls, (QuerySet, Manager)):
|
1217
|
-
return cls.all()
|
1218
|
-
elif using_key is None or using_key == "default":
|
1219
|
-
return cls.objects.all()
|
1220
|
-
else:
|
1221
|
-
# using must be called on cls, otherwise the connection isn't found
|
1222
|
-
return cls.using(using_key).all()
|
1223
|
-
|
1224
|
-
|
1225
1247
|
def add_db_connection(db: str, using: str):
|
1226
1248
|
db_config = dj_database_url.config(
|
1227
1249
|
default=db, conn_max_age=600, conn_health_checks=True
|
@@ -1374,18 +1396,14 @@ def transfer_to_default_db(
|
|
1374
1396
|
return None
|
1375
1397
|
|
1376
1398
|
|
1377
|
-
def
|
1378
|
-
|
1379
|
-
if hasattr(record, "_name_field"):
|
1380
|
-
record._old_name = getattr(record, record._name_field)
|
1381
|
-
|
1382
|
-
|
1383
|
-
def _store_record_old_key(record: Record):
|
1384
|
-
from lamindb.models import Artifact, Transform
|
1399
|
+
def track_current_key_and_name_values(record: Record):
|
1400
|
+
from lamindb.models import Artifact
|
1385
1401
|
|
1386
|
-
|
1387
|
-
if isinstance(record, (Artifact, Transform)):
|
1402
|
+
if isinstance(record, Artifact):
|
1388
1403
|
record._old_key = record.key
|
1404
|
+
record._old_suffix = record.suffix
|
1405
|
+
elif hasattr(record, "_name_field"):
|
1406
|
+
record._old_name = getattr(record, record._name_field)
|
1389
1407
|
|
1390
1408
|
|
1391
1409
|
def check_name_change(record: Record):
|
@@ -1466,20 +1484,29 @@ def check_key_change(record: Union[Artifact, Transform]):
|
|
1466
1484
|
|
1467
1485
|
if not isinstance(record, Artifact) or not hasattr(record, "_old_key"):
|
1468
1486
|
return
|
1487
|
+
if record._old_suffix != record.suffix:
|
1488
|
+
raise InvalidArgument(
|
1489
|
+
f"Changing the `.suffix` of an artifact is not allowed! You tried to change it from '{record._old_suffix}' to '{record.suffix}'."
|
1490
|
+
)
|
1469
1491
|
|
1470
|
-
old_key = record._old_key
|
1471
|
-
new_key = record.key
|
1492
|
+
old_key = record._old_key
|
1493
|
+
new_key = record.key
|
1472
1494
|
|
1473
1495
|
if old_key != new_key:
|
1474
1496
|
if not record._key_is_virtual:
|
1475
1497
|
raise InvalidArgument(
|
1476
|
-
f"Changing a non-virtual key of an artifact is not allowed!
|
1498
|
+
f"Changing a non-virtual key of an artifact is not allowed! You tried to change it from '{old_key}' to '{new_key}'."
|
1477
1499
|
)
|
1478
|
-
|
1479
|
-
|
1480
|
-
|
1481
|
-
|
1482
|
-
|
1500
|
+
if old_key is not None:
|
1501
|
+
old_key_suffix = extract_suffix_from_path(
|
1502
|
+
PurePosixPath(old_key), arg_name="key"
|
1503
|
+
)
|
1504
|
+
assert old_key_suffix == record.suffix, ( # noqa: S101
|
1505
|
+
old_key_suffix,
|
1506
|
+
record.suffix,
|
1507
|
+
)
|
1508
|
+
else:
|
1509
|
+
old_key_suffix = record.suffix
|
1483
1510
|
new_key_suffix = extract_suffix_from_path(
|
1484
1511
|
PurePosixPath(new_key), arg_name="key"
|
1485
1512
|
)
|