lamindb 1.3.0__py3-none-any.whl → 1.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_view.py +2 -2
- lamindb/base/types.py +50 -11
- lamindb/core/types.py +1 -1
- lamindb/curators/__init__.py +232 -222
- lamindb/curators/_cellxgene_schemas/__init__.py +1 -1
- lamindb/models/_feature_manager.py +21 -28
- lamindb/models/_from_values.py +53 -97
- lamindb/models/_label_manager.py +17 -10
- lamindb/models/artifact.py +30 -6
- lamindb/models/can_curate.py +20 -20
- lamindb/models/feature.py +47 -48
- lamindb/models/record.py +34 -28
- lamindb/models/run.py +4 -8
- lamindb/models/schema.py +7 -7
- {lamindb-1.3.0.dist-info → lamindb-1.3.2.dist-info}/METADATA +3 -3
- {lamindb-1.3.0.dist-info → lamindb-1.3.2.dist-info}/RECORD +19 -19
- {lamindb-1.3.0.dist-info → lamindb-1.3.2.dist-info}/LICENSE +0 -0
- {lamindb-1.3.0.dist-info → lamindb-1.3.2.dist-info}/WHEEL +0 -0
lamindb/models/can_curate.py
CHANGED
@@ -12,7 +12,7 @@ from ..errors import ValidationError
|
|
12
12
|
from ._from_values import (
|
13
13
|
_format_values,
|
14
14
|
_from_values,
|
15
|
-
|
15
|
+
get_organism_record_from_field,
|
16
16
|
)
|
17
17
|
from .record import Record, get_name_field
|
18
18
|
|
@@ -74,7 +74,7 @@ def _inspect(
|
|
74
74
|
# otherwise, inspect across records present in the DB from all ontology sources and no-source
|
75
75
|
if strict_source:
|
76
76
|
queryset = queryset.filter(source=source)
|
77
|
-
organism_record =
|
77
|
+
organism_record = get_organism_record_from_field(
|
78
78
|
getattr(registry, field_str), organism, values, queryset.db
|
79
79
|
)
|
80
80
|
_check_if_record_in_db(organism_record, queryset.db)
|
@@ -96,7 +96,7 @@ def _inspect(
|
|
96
96
|
|
97
97
|
if len(nonval) > 0 and hasattr(registry, "source_id"):
|
98
98
|
try:
|
99
|
-
|
99
|
+
public_result = registry.public(
|
100
100
|
organism=organism_record, source=source
|
101
101
|
).inspect(
|
102
102
|
values=nonval,
|
@@ -104,23 +104,23 @@ def _inspect(
|
|
104
104
|
mute=True,
|
105
105
|
inspect_synonyms=inspect_synonyms,
|
106
106
|
)
|
107
|
-
|
108
|
-
|
107
|
+
public_validated = public_result.validated
|
108
|
+
public_mapper = public_result.synonyms_mapper
|
109
109
|
hint = False
|
110
|
-
if len(
|
111
|
-
print_values = _format_values(
|
112
|
-
s = "" if len(
|
113
|
-
labels = colors.yellow(f"{len(
|
110
|
+
if len(public_validated) > 0 and not mute:
|
111
|
+
print_values = _format_values(public_validated)
|
112
|
+
s = "" if len(public_validated) == 1 else "s"
|
113
|
+
labels = colors.yellow(f"{len(public_validated)} {model_name} term{s}")
|
114
114
|
logger.print(
|
115
115
|
f" detected {labels} in public source for"
|
116
116
|
f" {colors.italic(field_str)}: {colors.yellow(print_values)}"
|
117
117
|
)
|
118
118
|
hint = True
|
119
119
|
|
120
|
-
if len(
|
121
|
-
print_values = _format_values(list(
|
122
|
-
s = "" if len(
|
123
|
-
labels = colors.yellow(f"{len(
|
120
|
+
if len(public_mapper) > 0 and not mute:
|
121
|
+
print_values = _format_values(list(public_mapper.keys()))
|
122
|
+
s = "" if len(public_mapper) == 1 else "s"
|
123
|
+
labels = colors.yellow(f"{len(public_mapper)} {model_name} term{s}")
|
124
124
|
logger.print(
|
125
125
|
f" detected {labels} in public source as {colors.italic(f'synonym{s}')}:"
|
126
126
|
f" {colors.yellow(print_values)}"
|
@@ -133,8 +133,8 @@ def _inspect(
|
|
133
133
|
f" {colors.italic('.from_values()')}"
|
134
134
|
)
|
135
135
|
|
136
|
-
nonval = [i for i in
|
137
|
-
# no
|
136
|
+
nonval = [i for i in public_result.non_validated if i not in public_mapper] # type: ignore
|
137
|
+
# no public source is found
|
138
138
|
except ValueError:
|
139
139
|
logger.warning("no public source found, skipping source validation")
|
140
140
|
|
@@ -176,7 +176,7 @@ def _validate(
|
|
176
176
|
if strict_source:
|
177
177
|
queryset = queryset.filter(source=source)
|
178
178
|
|
179
|
-
organism_record =
|
179
|
+
organism_record = get_organism_record_from_field(
|
180
180
|
getattr(registry, field_str), organism, values, queryset.db
|
181
181
|
)
|
182
182
|
_check_if_record_in_db(organism_record, queryset.db)
|
@@ -243,7 +243,7 @@ def _standardize(
|
|
243
243
|
_check_if_record_in_db(source, queryset.db)
|
244
244
|
if strict_source:
|
245
245
|
queryset = queryset.filter(source=source)
|
246
|
-
organism_record =
|
246
|
+
organism_record = get_organism_record_from_field(
|
247
247
|
getattr(registry, field_str), organism, values, queryset.db
|
248
248
|
)
|
249
249
|
_check_if_record_in_db(organism_record, queryset.db)
|
@@ -480,7 +480,7 @@ class CanCurate:
|
|
480
480
|
strict_source: Determines the validation behavior against records in the registry.
|
481
481
|
- If `False`, validation will include all records in the registry, ignoring the specified source.
|
482
482
|
- If `True`, validation will only include records in the registry that are linked to the specified source.
|
483
|
-
Note: this parameter won't affect validation against
|
483
|
+
Note: this parameter won't affect validation against public sources.
|
484
484
|
|
485
485
|
See Also:
|
486
486
|
:meth:`~lamindb.models.CanCurate.validate`
|
@@ -534,7 +534,7 @@ class CanCurate:
|
|
534
534
|
strict_source: Determines the validation behavior against records in the registry.
|
535
535
|
- If `False`, validation will include all records in the registry, ignoring the specified source.
|
536
536
|
- If `True`, validation will only include records in the registry that are linked to the specified source.
|
537
|
-
Note: this parameter won't affect validation against
|
537
|
+
Note: this parameter won't affect validation against public sources.
|
538
538
|
|
539
539
|
Returns:
|
540
540
|
A vector of booleans indicating if an element is validated.
|
@@ -654,7 +654,7 @@ class CanCurate:
|
|
654
654
|
strict_source: Determines the validation behavior against records in the registry.
|
655
655
|
- If `False`, validation will include all records in the registry, ignoring the specified source.
|
656
656
|
- If `True`, validation will only include records in the registry that are linked to the specified source.
|
657
|
-
Note: this parameter won't affect validation against
|
657
|
+
Note: this parameter won't affect validation against public sources.
|
658
658
|
|
659
659
|
Returns:
|
660
660
|
If `return_mapper` is `False`: a list of standardized names. Otherwise,
|
lamindb/models/feature.py
CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
import importlib
|
4
4
|
from typing import TYPE_CHECKING, Any, get_args, overload
|
5
5
|
|
6
|
+
import numpy as np
|
6
7
|
import pandas as pd
|
7
8
|
from django.db import models
|
8
9
|
from django.db.models import CASCADE, PROTECT, Q
|
@@ -12,6 +13,7 @@ from lamin_utils import logger
|
|
12
13
|
from lamindb_setup._init_instance import get_schema_module_name
|
13
14
|
from lamindb_setup.core.hashing import HASH_LENGTH, hash_dict
|
14
15
|
from pandas.api.types import CategoricalDtype, is_string_dtype
|
16
|
+
from pandas.core.dtypes.base import ExtensionDtype
|
15
17
|
|
16
18
|
from lamindb.base.fields import (
|
17
19
|
BooleanField,
|
@@ -20,7 +22,7 @@ from lamindb.base.fields import (
|
|
20
22
|
JSONField,
|
21
23
|
TextField,
|
22
24
|
)
|
23
|
-
from lamindb.base.types import
|
25
|
+
from lamindb.base.types import Dtype, FieldAttr
|
24
26
|
from lamindb.errors import FieldValidationError, ValidationError
|
25
27
|
|
26
28
|
from ..base.ids import base62_12
|
@@ -36,19 +38,43 @@ from .run import (
|
|
36
38
|
if TYPE_CHECKING:
|
37
39
|
from collections.abc import Iterable
|
38
40
|
|
39
|
-
from pandas.core.dtypes.base import ExtensionDtype
|
40
|
-
|
41
41
|
from .schema import Schema
|
42
42
|
|
43
|
-
FEATURE_DTYPES = set(get_args(
|
43
|
+
FEATURE_DTYPES = set(get_args(Dtype))
|
44
44
|
|
45
45
|
|
46
|
-
def
|
46
|
+
def parse_dtype(dtype_str: str, is_param: bool = False) -> list[dict[str, str]]:
|
47
|
+
"""Parses feature data type string into a structured list of components."""
|
48
|
+
from .artifact import Artifact
|
49
|
+
|
50
|
+
allowed_dtypes = FEATURE_DTYPES
|
51
|
+
if is_param:
|
52
|
+
allowed_dtypes.add("dict")
|
53
|
+
is_composed_cat = dtype_str.startswith("cat[") and dtype_str.endswith("]")
|
54
|
+
result = []
|
55
|
+
if is_composed_cat:
|
56
|
+
related_registries = dict_module_name_to_model_name(Artifact)
|
57
|
+
registries_str = dtype_str.replace("cat[", "")[:-1] # strip last ]
|
58
|
+
if registries_str != "":
|
59
|
+
registry_str_list = registries_str.split("|")
|
60
|
+
for cat_single_dtype_str in registry_str_list:
|
61
|
+
single_result = parse_cat_dtype(
|
62
|
+
cat_single_dtype_str, related_registries
|
63
|
+
)
|
64
|
+
result.append(single_result)
|
65
|
+
elif dtype_str not in allowed_dtypes:
|
66
|
+
raise ValueError(
|
67
|
+
f"dtype is '{dtype_str}' but has to be one of {FEATURE_DTYPES}!"
|
68
|
+
)
|
69
|
+
return result
|
70
|
+
|
71
|
+
|
72
|
+
def parse_cat_dtype(
|
47
73
|
dtype_str: str,
|
48
74
|
related_registries: dict[str, Record] | None = None,
|
49
75
|
is_itype: bool = False,
|
50
76
|
) -> dict[str, Any]:
|
51
|
-
"""Parses a categorical
|
77
|
+
"""Parses a categorical dtype string into its components (registry, field, subtypes)."""
|
52
78
|
from .artifact import Artifact
|
53
79
|
|
54
80
|
assert isinstance(dtype_str, str) # noqa: S101
|
@@ -116,33 +142,7 @@ def parse_dtype_single_cat(
|
|
116
142
|
}
|
117
143
|
|
118
144
|
|
119
|
-
def
|
120
|
-
"""Parses feature data type string into a structured list of components."""
|
121
|
-
from .artifact import Artifact
|
122
|
-
|
123
|
-
allowed_dtypes = FEATURE_DTYPES
|
124
|
-
if is_param:
|
125
|
-
allowed_dtypes.add("dict")
|
126
|
-
is_composed_cat = dtype_str.startswith("cat[") and dtype_str.endswith("]")
|
127
|
-
result = []
|
128
|
-
if is_composed_cat:
|
129
|
-
related_registries = dict_module_name_to_model_name(Artifact)
|
130
|
-
registries_str = dtype_str.replace("cat[", "")[:-1] # strip last ]
|
131
|
-
if registries_str != "":
|
132
|
-
registry_str_list = registries_str.split("|")
|
133
|
-
for cat_single_dtype_str in registry_str_list:
|
134
|
-
single_result = parse_dtype_single_cat(
|
135
|
-
cat_single_dtype_str, related_registries
|
136
|
-
)
|
137
|
-
result.append(single_result)
|
138
|
-
elif dtype_str not in allowed_dtypes:
|
139
|
-
raise ValueError(
|
140
|
-
f"dtype is '{dtype_str}' but has to be one of {FEATURE_DTYPES}!"
|
141
|
-
)
|
142
|
-
return result
|
143
|
-
|
144
|
-
|
145
|
-
def get_dtype_str_from_dtype(
|
145
|
+
def serialize_dtype(
|
146
146
|
dtype: Record | FieldAttr | list[Record], is_itype: bool = False
|
147
147
|
) -> str:
|
148
148
|
"""Converts a data type object into its string representation."""
|
@@ -152,6 +152,8 @@ def get_dtype_str_from_dtype(
|
|
152
152
|
and dtype.__name__ in FEATURE_DTYPES
|
153
153
|
):
|
154
154
|
dtype_str = dtype.__name__
|
155
|
+
elif isinstance(dtype, (ExtensionDtype, np.dtype)):
|
156
|
+
dtype_str = serialize_pandas_dtype(dtype)
|
155
157
|
else:
|
156
158
|
error_message = (
|
157
159
|
"dtype has to be a record, a record field, or a list of records, not {}"
|
@@ -182,7 +184,7 @@ def get_dtype_str_from_dtype(
|
|
182
184
|
return dtype_str
|
183
185
|
|
184
186
|
|
185
|
-
def
|
187
|
+
def serialize_pandas_dtype(pandas_dtype: ExtensionDtype) -> str:
|
186
188
|
if is_string_dtype(pandas_dtype):
|
187
189
|
if not isinstance(pandas_dtype, CategoricalDtype):
|
188
190
|
dtype = "str"
|
@@ -194,6 +196,8 @@ def convert_pandas_dtype_to_lamin_dtype(pandas_dtype: ExtensionDtype) -> str:
|
|
194
196
|
else:
|
195
197
|
# strip precision qualifiers
|
196
198
|
dtype = "".join(dt for dt in pandas_dtype.name if not dt.isdigit())
|
199
|
+
if dtype == "uint":
|
200
|
+
dtype = "int"
|
197
201
|
if dtype.startswith("datetime"):
|
198
202
|
dtype = dtype.split("[")[0]
|
199
203
|
assert dtype in FEATURE_DTYPES # noqa: S101
|
@@ -225,7 +229,7 @@ def process_init_feature_param(args, kwargs, is_param: bool = False):
|
|
225
229
|
dtype_str = None
|
226
230
|
if dtype is not None:
|
227
231
|
if not isinstance(dtype, str):
|
228
|
-
dtype_str =
|
232
|
+
dtype_str = serialize_dtype(dtype)
|
229
233
|
else:
|
230
234
|
dtype_str = dtype
|
231
235
|
parse_dtype(dtype_str, is_param=is_param)
|
@@ -252,9 +256,9 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
252
256
|
|
253
257
|
Args:
|
254
258
|
name: `str` Name of the feature, typically. column name.
|
255
|
-
dtype: `
|
256
|
-
For categorical types, can define
|
257
|
-
|
259
|
+
dtype: `Dtype | Registry | list[Registry] | FieldAttr` See :class:`~lamindb.base.types.Dtype`.
|
260
|
+
For categorical types, you can define to which registry values are
|
261
|
+
restricted, e.g., `ULabel` or `[ULabel, bionty.CellType]`.
|
258
262
|
unit: `str | None = None` Unit of measure, ideally SI (`"m"`, `"s"`, `"kg"`, etc.) or `"normalized"` etc.
|
259
263
|
description: `str | None = None` A description.
|
260
264
|
synonyms: `str | None = None` Bar-separated synonyms.
|
@@ -341,13 +345,8 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
341
345
|
"""Universal id, valid across DB instances."""
|
342
346
|
name: str = CharField(max_length=150, db_index=True, unique=True)
|
343
347
|
"""Name of feature (hard unique constraint `unique=True`)."""
|
344
|
-
dtype:
|
345
|
-
"""Data type (:class:`~lamindb.base.types.
|
346
|
-
|
347
|
-
For categorical types, can define from which registry values are
|
348
|
-
sampled, e.g., `'cat[ULabel]'` or `'cat[bionty.CellType]'`. Unions are also
|
349
|
-
allowed if the feature samples from two registries, e.g., `'cat[ULabel|bionty.CellType]'`
|
350
|
-
"""
|
348
|
+
dtype: Dtype | None = CharField(db_index=True, null=True)
|
349
|
+
"""Data type (:class:`~lamindb.base.types.Dtype`)."""
|
351
350
|
type: Feature | None = ForeignKey(
|
352
351
|
"self", PROTECT, null=True, related_name="records"
|
353
352
|
)
|
@@ -389,7 +388,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
389
388
|
|
390
389
|
Is stored as a list rather than a tuple because it's serialized as JSON.
|
391
390
|
"""
|
392
|
-
proxy_dtype:
|
391
|
+
proxy_dtype: Dtype | None = CharField(default=None, null=True)
|
393
392
|
"""Proxy data type.
|
394
393
|
|
395
394
|
If the feature is an image it's often stored via a path to the image file. Hence, while the dtype might be
|
@@ -419,7 +418,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
419
418
|
def __init__(
|
420
419
|
self,
|
421
420
|
name: str,
|
422
|
-
dtype:
|
421
|
+
dtype: Dtype | Registry | list[Registry] | FieldAttr,
|
423
422
|
type: Feature | None = None,
|
424
423
|
is_type: bool = False,
|
425
424
|
unit: str | None = None,
|
@@ -487,7 +486,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
487
486
|
if name in categoricals:
|
488
487
|
dtypes[name] = "cat"
|
489
488
|
else:
|
490
|
-
dtypes[name] =
|
489
|
+
dtypes[name] = serialize_pandas_dtype(col.dtype)
|
491
490
|
with logger.mute(): # silence the warning "loaded record with exact same name "
|
492
491
|
features = [
|
493
492
|
Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()
|
lamindb/models/record.py
CHANGED
@@ -206,12 +206,12 @@ def validate_literal_fields(record: Record, kwargs) -> None:
|
|
206
206
|
return None
|
207
207
|
if record.__class__.__name__ in "Feature":
|
208
208
|
return None
|
209
|
-
from lamindb.base.types import
|
209
|
+
from lamindb.base.types import Dtype, TransformType
|
210
210
|
|
211
211
|
types = {
|
212
212
|
"TransformType": TransformType,
|
213
|
-
"ArtifactKind":
|
214
|
-
"
|
213
|
+
"ArtifactKind": Dtype,
|
214
|
+
"Dtype": Dtype,
|
215
215
|
}
|
216
216
|
errors = {}
|
217
217
|
annotations = getattr(record.__class__, "__annotations__", {})
|
@@ -612,9 +612,11 @@ class Registry(ModelBase):
|
|
612
612
|
f"Failed to load instance {instance}, please check your permissions!"
|
613
613
|
)
|
614
614
|
iresult, _ = result
|
615
|
-
|
616
|
-
|
617
|
-
|
615
|
+
# do not use {} syntax below, it gives rise to a dict if the schema modules
|
616
|
+
# are empty and then triggers a TypeError in missing_members = source_module - target_module
|
617
|
+
source_module = set( # noqa
|
618
|
+
[mod for mod in iresult["schema_str"].split(",") if mod != ""]
|
619
|
+
)
|
618
620
|
target_module = ln_setup.settings.instance.modules
|
619
621
|
if not source_module.issubset(target_module):
|
620
622
|
missing_members = source_module - target_module
|
@@ -743,8 +745,7 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
743
745
|
)
|
744
746
|
else:
|
745
747
|
super().__init__(*args)
|
746
|
-
|
747
|
-
_store_record_old_key(self)
|
748
|
+
track_current_key_and_name_values(self)
|
748
749
|
|
749
750
|
def save(self, *args, **kwargs) -> Record:
|
750
751
|
"""Save.
|
@@ -812,8 +813,8 @@ class BasicRecord(models.Model, metaclass=Registry):
|
|
812
813
|
init_self_from_db(self, pre_existing_record)
|
813
814
|
else:
|
814
815
|
raise
|
815
|
-
|
816
|
-
|
816
|
+
# call the below in case a user makes more updates to the record
|
817
|
+
track_current_key_and_name_values(self)
|
817
818
|
# perform transfer of many-to-many fields
|
818
819
|
# only supported for Artifact and Collection records
|
819
820
|
if db is not None and db != "default" and using_key is None:
|
@@ -1397,18 +1398,14 @@ def transfer_to_default_db(
|
|
1397
1398
|
return None
|
1398
1399
|
|
1399
1400
|
|
1400
|
-
def
|
1401
|
-
|
1402
|
-
if hasattr(record, "_name_field"):
|
1403
|
-
record._old_name = getattr(record, record._name_field)
|
1404
|
-
|
1401
|
+
def track_current_key_and_name_values(record: Record):
|
1402
|
+
from lamindb.models import Artifact
|
1405
1403
|
|
1406
|
-
|
1407
|
-
from lamindb.models import Artifact, Transform
|
1408
|
-
|
1409
|
-
# writes the key to the _old_key attribute, so we can detect key changes upon save
|
1410
|
-
if isinstance(record, (Artifact, Transform)):
|
1404
|
+
if isinstance(record, Artifact):
|
1411
1405
|
record._old_key = record.key
|
1406
|
+
record._old_suffix = record.suffix
|
1407
|
+
elif hasattr(record, "_name_field"):
|
1408
|
+
record._old_name = getattr(record, record._name_field)
|
1412
1409
|
|
1413
1410
|
|
1414
1411
|
def check_name_change(record: Record):
|
@@ -1489,20 +1486,29 @@ def check_key_change(record: Union[Artifact, Transform]):
|
|
1489
1486
|
|
1490
1487
|
if not isinstance(record, Artifact) or not hasattr(record, "_old_key"):
|
1491
1488
|
return
|
1489
|
+
if record._old_suffix != record.suffix:
|
1490
|
+
raise InvalidArgument(
|
1491
|
+
f"Changing the `.suffix` of an artifact is not allowed! You tried to change it from '{record._old_suffix}' to '{record.suffix}'."
|
1492
|
+
)
|
1492
1493
|
|
1493
|
-
old_key = record._old_key
|
1494
|
-
new_key = record.key
|
1494
|
+
old_key = record._old_key
|
1495
|
+
new_key = record.key
|
1495
1496
|
|
1496
1497
|
if old_key != new_key:
|
1497
1498
|
if not record._key_is_virtual:
|
1498
1499
|
raise InvalidArgument(
|
1499
|
-
f"Changing a non-virtual key of an artifact is not allowed!
|
1500
|
+
f"Changing a non-virtual key of an artifact is not allowed! You tried to change it from '{old_key}' to '{new_key}'."
|
1500
1501
|
)
|
1501
|
-
|
1502
|
-
|
1503
|
-
|
1504
|
-
|
1505
|
-
|
1502
|
+
if old_key is not None:
|
1503
|
+
old_key_suffix = extract_suffix_from_path(
|
1504
|
+
PurePosixPath(old_key), arg_name="key"
|
1505
|
+
)
|
1506
|
+
assert old_key_suffix == record.suffix, ( # noqa: S101
|
1507
|
+
old_key_suffix,
|
1508
|
+
record.suffix,
|
1509
|
+
)
|
1510
|
+
else:
|
1511
|
+
old_key_suffix = record.suffix
|
1506
1512
|
new_key_suffix = extract_suffix_from_path(
|
1507
1513
|
PurePosixPath(new_key), arg_name="key"
|
1508
1514
|
)
|
lamindb/models/run.py
CHANGED
@@ -28,7 +28,7 @@ from .record import BasicRecord, LinkORM, Record, Registry
|
|
28
28
|
if TYPE_CHECKING:
|
29
29
|
from datetime import datetime
|
30
30
|
|
31
|
-
from lamindb.base.types import
|
31
|
+
from lamindb.base.types import Dtype, FieldAttr
|
32
32
|
|
33
33
|
from .artifact import Artifact
|
34
34
|
from .collection import Collection
|
@@ -208,12 +208,8 @@ class Param(Record, CanCurate, TracksRun, TracksUpdates):
|
|
208
208
|
_name_field: str = "name"
|
209
209
|
|
210
210
|
name: str = CharField(max_length=100, db_index=True)
|
211
|
-
dtype:
|
212
|
-
"""Data type ("
|
213
|
-
|
214
|
-
For categorical types, can define from which registry values are
|
215
|
-
sampled, e.g., `cat[ULabel]` or `cat[bionty.CellType]`.
|
216
|
-
"""
|
211
|
+
dtype: Dtype | None = CharField(db_index=True, null=True)
|
212
|
+
"""Data type (:class:`~lamindb.base.types.Dtype`)."""
|
217
213
|
type: Param | None = ForeignKey("self", PROTECT, null=True, related_name="records")
|
218
214
|
"""Type of param (e.g., 'Pipeline', 'ModelTraining', 'PostProcessing').
|
219
215
|
|
@@ -241,7 +237,7 @@ class Param(Record, CanCurate, TracksRun, TracksUpdates):
|
|
241
237
|
def __init__(
|
242
238
|
self,
|
243
239
|
name: str,
|
244
|
-
dtype:
|
240
|
+
dtype: Dtype | Registry | list[Registry] | FieldAttr,
|
245
241
|
type: Param | None = None,
|
246
242
|
is_type: bool = False,
|
247
243
|
): ...
|
lamindb/models/schema.py
CHANGED
@@ -28,8 +28,8 @@ from ._relations import (
|
|
28
28
|
from .can_curate import CanCurate
|
29
29
|
from .feature import (
|
30
30
|
Feature,
|
31
|
-
|
32
|
-
|
31
|
+
serialize_dtype,
|
32
|
+
serialize_pandas_dtype,
|
33
33
|
)
|
34
34
|
from .record import (
|
35
35
|
BasicRecord,
|
@@ -352,7 +352,7 @@ class Schema(Record, CanCurate, TracksRun):
|
|
352
352
|
if otype is None:
|
353
353
|
raise InvalidArgument("Please pass otype != None for composite schemas")
|
354
354
|
if itype is not None and not isinstance(itype, str):
|
355
|
-
itype_str =
|
355
|
+
itype_str = serialize_dtype(itype, is_itype=True)
|
356
356
|
else:
|
357
357
|
itype_str = itype
|
358
358
|
validated_kwargs = {
|
@@ -482,14 +482,14 @@ class Schema(Record, CanCurate, TracksRun):
|
|
482
482
|
organism: Record | str | None = None,
|
483
483
|
source: Record | None = None,
|
484
484
|
) -> Schema | None:
|
485
|
-
"""Create
|
485
|
+
"""Create schema for valid columns."""
|
486
486
|
registry = field.field.model
|
487
487
|
validated = registry.validate(
|
488
488
|
df.columns, field=field, mute=mute, organism=organism
|
489
489
|
)
|
490
490
|
if validated.sum() == 0:
|
491
|
-
if mute
|
492
|
-
logger.warning("no validated features, skip creating
|
491
|
+
if not mute:
|
492
|
+
logger.warning("no validated features, skip creating schema")
|
493
493
|
return None
|
494
494
|
if registry == Feature:
|
495
495
|
validated_features = Feature.from_values( # type: ignore
|
@@ -502,7 +502,7 @@ class Schema(Record, CanCurate, TracksRun):
|
|
502
502
|
dtypes = [col.dtype for (_, col) in df.loc[:, validated].items()]
|
503
503
|
if len(set(dtypes)) != 1:
|
504
504
|
raise ValueError(f"data types are heterogeneous: {set(dtypes)}")
|
505
|
-
dtype =
|
505
|
+
dtype = serialize_pandas_dtype(dtypes[0])
|
506
506
|
validated_features = registry.from_values(
|
507
507
|
df.columns[validated],
|
508
508
|
field=field,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lamindb
|
3
|
-
Version: 1.3.
|
3
|
+
Version: 1.3.2
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.10,<3.14
|
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.13
|
12
12
|
Requires-Dist: lamin_utils==0.13.11
|
13
13
|
Requires-Dist: lamin_cli==1.2.0
|
14
|
-
Requires-Dist: lamindb_setup[aws]==1.
|
14
|
+
Requires-Dist: lamindb_setup[aws]==1.4.1
|
15
15
|
Requires-Dist: pyyaml
|
16
16
|
Requires-Dist: pyarrow
|
17
17
|
Requires-Dist: pandera
|
@@ -23,7 +23,7 @@ Requires-Dist: anndata>=0.8.0,<=0.11.3
|
|
23
23
|
Requires-Dist: fsspec
|
24
24
|
Requires-Dist: graphviz
|
25
25
|
Requires-Dist: psycopg2-binary
|
26
|
-
Requires-Dist: bionty ; extra == "bionty"
|
26
|
+
Requires-Dist: bionty>=1.2.1 ; extra == "bionty"
|
27
27
|
Requires-Dist: cellregistry ; extra == "cellregistry"
|
28
28
|
Requires-Dist: clinicore ; extra == "clinicore"
|
29
29
|
Requires-Dist: tomlkit ; extra == "dev"
|
@@ -1,12 +1,12 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=PEWc1Xf6XBM-jY-0n8kaf2pm3k3D2Fem4H7LNUw5Nt8,2468
|
2
2
|
lamindb/_finish.py,sha256=UK9XW1qZCd32Nqz0cdKYmpX9ilFU0nGyNb6Urwfx_Nw,19612
|
3
3
|
lamindb/_tracked.py,sha256=JKzYEpqVojklTms0VpP-tU34AHVZG8a13dSl3CfIzwQ,4472
|
4
|
-
lamindb/_view.py,sha256=
|
4
|
+
lamindb/_view.py,sha256=kSmG8X4ULQZEKxY7ESnthQqsUf1DEzoYGeTLYRU1I7s,4938
|
5
5
|
lamindb/errors.py,sha256=F6einUIStsTgWcBfSlG8eGf2Q6yWUaqMlSULqmkV8GA,1734
|
6
6
|
lamindb/base/__init__.py,sha256=qS7BM1YVHWridJp2CsiH5Rb38z6kkuDYCjerNHvI2qQ,263
|
7
7
|
lamindb/base/fields.py,sha256=RdwYHQmB7B-jopD_K2QNL5vjhOelu7DWGgqQItXr3pg,8024
|
8
8
|
lamindb/base/ids.py,sha256=OOgD5vxry6s2vSslb8-E9zEykDMpyhnungfT844DhSU,1547
|
9
|
-
lamindb/base/types.py,sha256=
|
9
|
+
lamindb/base/types.py,sha256=w_dQ2t9Htk8030OA_blksY9FG4NNcx3p7qJumFkBWkg,2714
|
10
10
|
lamindb/base/users.py,sha256=8MSmAvCKoUF15YsDE6BGLBXsFWpfoEEg8iDTKZ7kD48,848
|
11
11
|
lamindb/core/__init__.py,sha256=aaBq0UVjNolMynbT1V5hB6UrJm1tK0M6WHu_r6em9_4,604
|
12
12
|
lamindb/core/_compat.py,sha256=NLnKk1qk4xdgMV-QwFDnBnbio02ujjlF86icvhpdv4c,2029
|
@@ -17,7 +17,7 @@ lamindb/core/_sync_git.py,sha256=Z7keuyS5X7CAj285sEbZIFExZF9mtjGH8DzKwz3xhHw,588
|
|
17
17
|
lamindb/core/_track_environment.py,sha256=gKmXiL2meqJT65X-66p_GlonoxzBZXNwNm-G9gk0fS4,847
|
18
18
|
lamindb/core/exceptions.py,sha256=FMEoSvT3FvtLkxQAt2oDXPeaPem8V5x5UBbTsPFYU5w,53
|
19
19
|
lamindb/core/loaders.py,sha256=1JHLr4e-gbh8QXiy5duOPsiKo7TKjo74vmvolqhkhgs,5458
|
20
|
-
lamindb/core/types.py,sha256=
|
20
|
+
lamindb/core/types.py,sha256=yHr2Vn_p1Hepz_mBooXmsKudqu8Tco7lXZmVS_ORQIw,383
|
21
21
|
lamindb/core/datasets/__init__.py,sha256=g6cSgJmlkLuI6CoxB-Lbg70cpkVZWDuPv-2kcFb0uYs,1745
|
22
22
|
lamindb/core/datasets/_core.py,sha256=_PrZSr_rRpfScdzU216YMUR6TxihqA2hffRXmjD5Azw,20344
|
23
23
|
lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh4,953
|
@@ -34,8 +34,8 @@ lamindb/core/storage/objects.py,sha256=l2JTHI7oLMH7JJqxwpyIgMXZJ3gaGv7xl_Jr21wWF
|
|
34
34
|
lamindb/core/storage/paths.py,sha256=wJTD7qza87Xx7ZMo9HFHKgZWaVnst6qc4F2SzqvBMrE,7118
|
35
35
|
lamindb/core/subsettings/__init__.py,sha256=j6G9WAJLK-x9FzPSFw-HJUmOseZKGTbK-oLTKI_X_zs,126
|
36
36
|
lamindb/core/subsettings/_creation_settings.py,sha256=NGHWKqCFSzVNBxAr2VnmdYguiFdW29XUK7T9wRsVshg,906
|
37
|
-
lamindb/curators/__init__.py,sha256=
|
38
|
-
lamindb/curators/_cellxgene_schemas/__init__.py,sha256=
|
37
|
+
lamindb/curators/__init__.py,sha256=0yZXh_VNkkU_A6jkhzrYiKa1TUmPqfmFn6wUIv82B_Y,131303
|
38
|
+
lamindb/curators/_cellxgene_schemas/__init__.py,sha256=zqlFzMNMDGEBe6DV0gBsBMpfc9UHvNv1EpBsz_ktMoA,7502
|
39
39
|
lamindb/curators/_cellxgene_schemas/schema_versions.csv,sha256=X9rmO88TW1Fht1f5mJs0JdW-VPvyKSajpf8lHNeECj4,1680
|
40
40
|
lamindb/integrations/__init__.py,sha256=RWGMYYIzr8zvmNPyVB4m-p4gMDhxdRbjES2Ed23OItw,215
|
41
41
|
lamindb/integrations/_vitessce.py,sha256=VgO9zAlTSIKDo1wEef_Q4BudTAVtRSZmuzRdCGwBvJk,4016
|
@@ -66,30 +66,30 @@ lamindb/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
|
|
66
66
|
lamindb/models/__init__.py,sha256=NlnrPiBSv93PttHDrAYnF3RTfiIhgC7QOE_mP0M9Ddc,1934
|
67
67
|
lamindb/models/_describe.py,sha256=B-lmzc8AYaeuKwwRRsF0q8qT6P1i93sEjYkfl0NuyWQ,4926
|
68
68
|
lamindb/models/_django.py,sha256=2LFaTvIPtxIV8_T6Kx0cvquTetj7C3OcnKukUyC9msY,7705
|
69
|
-
lamindb/models/_feature_manager.py,sha256=
|
70
|
-
lamindb/models/_from_values.py,sha256=
|
69
|
+
lamindb/models/_feature_manager.py,sha256=f81DJElY1XXX-ps9tfnk3ddJJBzDO-QPzIP-Dn4rxe8,50058
|
70
|
+
lamindb/models/_from_values.py,sha256=3IkJTA3KTJHyuVZ1Hki1MFa4PKbHrK6mhPsshYygku8,13327
|
71
71
|
lamindb/models/_is_versioned.py,sha256=ivtC0t96YI6eaMFqg0ctWY3ert96I_2R-DI5O0Zx7kU,8011
|
72
|
-
lamindb/models/_label_manager.py,sha256=
|
72
|
+
lamindb/models/_label_manager.py,sha256=rdTAjN0PNxIuHv289_JPoxvdPVVOTgBWhjWqBpKEnm4,11935
|
73
73
|
lamindb/models/_relations.py,sha256=ONjHPiWIa_Ur7zMNTa_9Uw7K-366GORyPvGoVjf4EQs,3681
|
74
|
-
lamindb/models/artifact.py,sha256=
|
75
|
-
lamindb/models/can_curate.py,sha256
|
74
|
+
lamindb/models/artifact.py,sha256=nWTUIkV8RyK_mBJ-aTEGOiF-Q1HG4Bi8wgTkQZ5MxE8,103678
|
75
|
+
lamindb/models/can_curate.py,sha256=_XraPciWg75gnT1j5HgXhsaUFOVtO27wrVageQgsxqM,29071
|
76
76
|
lamindb/models/collection.py,sha256=P1E4olaqaPsVYdcQe8AgH_yUUdeQBa6QcyD1Y6Gedjo,26311
|
77
77
|
lamindb/models/core.py,sha256=cjQGk5r0Rzf3zTeC0gn_GB29UfKq34l4hThsNNVhi3o,3965
|
78
|
-
lamindb/models/feature.py,sha256
|
78
|
+
lamindb/models/feature.py,sha256=OTmh0zIESnMDVgIIJYlL8018qKSAKYLuoG1cYlhTk-g,26086
|
79
79
|
lamindb/models/flextable.py,sha256=ET9j0fTFYQIdXOZfwCnosXOag7nYD1DUV6_wZNqhvOs,5400
|
80
80
|
lamindb/models/has_parents.py,sha256=PEGDiNTK7ikHBHAGsiHK4e6TA9jqUFRom1HSQuyReyE,17942
|
81
81
|
lamindb/models/project.py,sha256=WSOtM6-hKPeDNOCR6Frq1bJxc27j0HJWhCmFh5L3CiM,15174
|
82
82
|
lamindb/models/query_manager.py,sha256=RqF842cqloAv5z4zLDlWAZfVkLQbhCPry6WQW3CaznI,3713
|
83
83
|
lamindb/models/query_set.py,sha256=buJ-zuua5MTqeEE8WD3lOBZXs19k_r4DuRWPB8Bai5Y,27060
|
84
|
-
lamindb/models/record.py,sha256=
|
85
|
-
lamindb/models/run.py,sha256=
|
84
|
+
lamindb/models/record.py,sha256=RYx1PeJkfMu6BQuROIG-sVtj3lV0Ck7PwVfQ51nVlyQ,65278
|
85
|
+
lamindb/models/run.py,sha256=_qCFjeGcK1-MEpRJCdnl6NScCO7Rye3lYl56wEbq9mM,18716
|
86
86
|
lamindb/models/save.py,sha256=VEq4kmDyDiw9zTQY6meA9c5yT_YU5ldFzRDgKqCX59M,13031
|
87
|
-
lamindb/models/schema.py,sha256=
|
87
|
+
lamindb/models/schema.py,sha256=zbzzQ8UOLqhJh-PC0CoRu-sN5-NnIFXoDtj1KSPhj_s,28672
|
88
88
|
lamindb/models/transform.py,sha256=PbtjakPWmw0iuCG0HPEbysISVX_CoIE2KAPF7L18Vak,13064
|
89
89
|
lamindb/models/ulabel.py,sha256=A8zJcRiGNmq24njLJv7_FuVZJmdtSkN-MSKw5c1QJMo,8605
|
90
90
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
91
91
|
lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
|
92
|
-
lamindb-1.3.
|
93
|
-
lamindb-1.3.
|
94
|
-
lamindb-1.3.
|
95
|
-
lamindb-1.3.
|
92
|
+
lamindb-1.3.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
93
|
+
lamindb-1.3.2.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
|
94
|
+
lamindb-1.3.2.dist-info/METADATA,sha256=JwVHPE26gd4jVRHnFY0-3n7AELypOIGZoJh0c1_uhm8,2733
|
95
|
+
lamindb-1.3.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|