lamindb 0.71.3__py3-none-any.whl → 0.72.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +2 -2
- lamindb/_annotate.py +6 -10
- lamindb/_artifact.py +6 -2
- lamindb/_collection.py +4 -4
- lamindb/_feature.py +53 -45
- lamindb/_feature_set.py +37 -74
- lamindb/_from_values.py +0 -3
- lamindb/_query_manager.py +6 -1
- lamindb/_registry.py +8 -6
- lamindb/_run.py +0 -2
- lamindb/core/__init__.py +4 -0
- lamindb/core/_data.py +30 -27
- lamindb/core/_feature_manager.py +96 -24
- lamindb/core/_label_manager.py +13 -10
- lamindb/core/_run_context.py +24 -1
- lamindb/core/datasets/_core.py +10 -18
- lamindb/core/schema.py +53 -0
- {lamindb-0.71.3.dist-info → lamindb-0.72.0.dist-info}/METADATA +4 -4
- {lamindb-0.71.3.dist-info → lamindb-0.72.0.dist-info}/RECORD +21 -20
- {lamindb-0.71.3.dist-info → lamindb-0.72.0.dist-info}/LICENSE +0 -0
- {lamindb-0.71.3.dist-info → lamindb-0.72.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -41,7 +41,7 @@ Modules & settings:
|
|
41
41
|
"""
|
42
42
|
|
43
43
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
44
|
-
__version__ = "0.
|
44
|
+
__version__ = "0.72.0"
|
45
45
|
|
46
46
|
import os as _os
|
47
47
|
|
@@ -72,6 +72,7 @@ if _check_instance_setup(from_lamindb=True):
|
|
72
72
|
User,
|
73
73
|
)
|
74
74
|
|
75
|
+
from . import core # isort: split
|
75
76
|
from . import (
|
76
77
|
_annotate,
|
77
78
|
_artifact,
|
@@ -86,7 +87,6 @@ if _check_instance_setup(from_lamindb=True):
|
|
86
87
|
_storage,
|
87
88
|
_transform,
|
88
89
|
_ulabel,
|
89
|
-
core,
|
90
90
|
)
|
91
91
|
|
92
92
|
dev = core # backward compat
|
lamindb/_annotate.py
CHANGED
@@ -9,18 +9,14 @@ from lamin_utils import colors, logger
|
|
9
9
|
from lamindb_setup.core._docs import doc_args
|
10
10
|
from lnschema_core import Artifact, Collection, Feature, Registry, Run, ULabel
|
11
11
|
|
12
|
+
from .core.exceptions import ValidationError
|
13
|
+
|
12
14
|
if TYPE_CHECKING:
|
13
15
|
from lamindb_setup.core.types import UPathStr
|
14
16
|
from lnschema_core.types import FieldAttr
|
15
17
|
from mudata import MuData
|
16
18
|
|
17
19
|
|
18
|
-
class ValidationError(ValueError):
|
19
|
-
"""Validation error."""
|
20
|
-
|
21
|
-
pass
|
22
|
-
|
23
|
-
|
24
20
|
class AnnotateLookup:
|
25
21
|
"""Lookup categories from the reference instance."""
|
26
22
|
|
@@ -566,7 +562,7 @@ class MuDataAnnotator:
|
|
566
562
|
save_function="add_new_from_var_index",
|
567
563
|
using=self._using,
|
568
564
|
validated_only=validated_only,
|
569
|
-
|
565
|
+
dtype="number",
|
570
566
|
**kwargs,
|
571
567
|
)
|
572
568
|
|
@@ -1034,7 +1030,7 @@ def update_registry(
|
|
1034
1030
|
validated_only: bool = True,
|
1035
1031
|
df: pd.DataFrame | None = None,
|
1036
1032
|
organism: str | None = None,
|
1037
|
-
|
1033
|
+
dtype: str | None = None,
|
1038
1034
|
**kwargs,
|
1039
1035
|
) -> None:
|
1040
1036
|
"""Save features or labels records in the default instance from the using instance.
|
@@ -1048,7 +1044,7 @@ def update_registry(
|
|
1048
1044
|
validated_only: If True, only save validated labels.
|
1049
1045
|
df: A DataFrame to save labels from.
|
1050
1046
|
organism: The organism name.
|
1051
|
-
|
1047
|
+
dtype: The type of the feature.
|
1052
1048
|
kwargs: Additional keyword arguments to pass to the registry model to create new records.
|
1053
1049
|
"""
|
1054
1050
|
from lamindb._save import save as ln_save
|
@@ -1102,7 +1098,7 @@ def update_registry(
|
|
1102
1098
|
for value in labels_saved["without reference"]:
|
1103
1099
|
filter_kwargs[field.field.name] = value
|
1104
1100
|
if registry == Feature:
|
1105
|
-
filter_kwargs["
|
1101
|
+
filter_kwargs["dtype"] = "cat" if dtype is None else dtype
|
1106
1102
|
non_validated_records.append(registry(**filter_kwargs, **kwargs))
|
1107
1103
|
ln_save(non_validated_records)
|
1108
1104
|
|
lamindb/_artifact.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import os
|
3
4
|
import shutil
|
4
5
|
from concurrent.futures import ThreadPoolExecutor
|
5
6
|
from pathlib import Path, PurePath, PurePosixPath
|
@@ -212,7 +213,10 @@ def get_stat_or_artifact(
|
|
212
213
|
file_size = file.stat().st_size
|
213
214
|
return hash_file(file, file_size)[0], file_size
|
214
215
|
|
215
|
-
|
216
|
+
try:
|
217
|
+
n_workers = len(psutil.Process().cpu_affinity())
|
218
|
+
except AttributeError:
|
219
|
+
n_workers = psutil.cpu_count()
|
216
220
|
if n_workers > 1:
|
217
221
|
with ThreadPoolExecutor(n_workers) as pool:
|
218
222
|
hashes_sizes = pool.map(hash_size, files)
|
@@ -345,7 +349,7 @@ def get_artifact_kwargs_from_data(
|
|
345
349
|
# save the information that this artifact was previously
|
346
350
|
# produced by another run
|
347
351
|
if artifact.run is not None:
|
348
|
-
artifact.run.
|
352
|
+
artifact.run.output_artifacts_with_later_updates.add(artifact)
|
349
353
|
# update the run of the artifact with the latest run
|
350
354
|
stat_or_artifact.run = run
|
351
355
|
stat_or_artifact.transform = run.transform
|
lamindb/_collection.py
CHANGED
@@ -121,7 +121,7 @@ def __init__(
|
|
121
121
|
# save the information that this artifact was previously
|
122
122
|
# produced by another run
|
123
123
|
if existing_collection.run is not None:
|
124
|
-
existing_collection.run.
|
124
|
+
existing_collection.run.output_collections_with_later_updates.add(
|
125
125
|
existing_collection
|
126
126
|
)
|
127
127
|
# update the run of the artifact with the latest run
|
@@ -177,7 +177,7 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
|
|
177
177
|
feature_sets_by_slots = defaultdict(list)
|
178
178
|
logger.debug("slots")
|
179
179
|
for link in feature_set_artifact_links:
|
180
|
-
feature_sets_by_slots[link.slot].append(link.
|
180
|
+
feature_sets_by_slots[link.slot].append(link.featureset_id)
|
181
181
|
feature_sets_union = {}
|
182
182
|
logger.debug("union")
|
183
183
|
for slot, feature_set_ids_slot in feature_sets_by_slots.items():
|
@@ -197,7 +197,7 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
|
|
197
197
|
)
|
198
198
|
start_time = logger.debug("done, start evaluate", time=start_time)
|
199
199
|
features = features_registry.filter(id__in=feature_ids)
|
200
|
-
feature_sets_union[slot] = FeatureSet(features,
|
200
|
+
feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
|
201
201
|
start_time = logger.debug("done", time=start_time)
|
202
202
|
# validate consistency of hashes
|
203
203
|
# we do not allow duplicate hashes
|
@@ -361,7 +361,7 @@ def restore(self) -> None:
|
|
361
361
|
@doc_args(Collection.artifacts.__doc__)
|
362
362
|
def artifacts(self) -> QuerySet:
|
363
363
|
"""{}."""
|
364
|
-
return self.unordered_artifacts.order_by("
|
364
|
+
return self.unordered_artifacts.order_by("collection_links__id")
|
365
365
|
|
366
366
|
|
367
367
|
METHOD_NAMES = [
|
lamindb/_feature.py
CHANGED
@@ -1,26 +1,29 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import TYPE_CHECKING
|
3
|
+
from typing import TYPE_CHECKING
|
4
4
|
|
5
5
|
import lamindb_setup as ln_setup
|
6
6
|
import pandas as pd
|
7
7
|
from lamindb_setup.core._docs import doc_args
|
8
|
-
from lnschema_core.models import
|
8
|
+
from lnschema_core.models import Artifact, Feature
|
9
9
|
from pandas.api.types import CategoricalDtype, is_string_dtype
|
10
10
|
|
11
11
|
from lamindb._utils import attach_func_to_class_method
|
12
12
|
from lamindb.core._settings import settings
|
13
13
|
|
14
14
|
from ._query_set import RecordsList
|
15
|
+
from .core.schema import dict_schema_name_to_model_name
|
15
16
|
|
16
17
|
if TYPE_CHECKING:
|
17
18
|
from lnschema_core.types import FieldAttr
|
18
19
|
|
19
20
|
FEATURE_TYPES = {
|
20
|
-
"
|
21
|
-
"
|
22
|
-
"
|
23
|
-
"
|
21
|
+
"number": "number",
|
22
|
+
"int": "int",
|
23
|
+
"float": "float",
|
24
|
+
"bool": "bool",
|
25
|
+
"str": "cat",
|
26
|
+
"object": "cat",
|
24
27
|
}
|
25
28
|
|
26
29
|
|
@@ -28,10 +31,8 @@ def convert_numpy_dtype_to_lamin_feature_type(dtype) -> str:
|
|
28
31
|
orig_type = dtype.name
|
29
32
|
# strip precision qualifiers
|
30
33
|
type = "".join(i for i in orig_type if not i.isdigit())
|
31
|
-
if type == "
|
32
|
-
type = "
|
33
|
-
elif type == "object" or type == "str":
|
34
|
-
type = "category"
|
34
|
+
if type == "object" or type == "str":
|
35
|
+
type = "cat"
|
35
36
|
return type
|
36
37
|
|
37
38
|
|
@@ -42,38 +43,44 @@ def __init__(self, *args, **kwargs):
|
|
42
43
|
# now we proceed with the user-facing constructor
|
43
44
|
if len(args) != 0:
|
44
45
|
raise ValueError("Only non-keyword args allowed")
|
45
|
-
|
46
|
-
kwargs.pop("type") if "type" in kwargs else None
|
47
|
-
)
|
48
|
-
registries: list[Registry] | None = (
|
49
|
-
kwargs.pop("registries") if "registries" in kwargs else None
|
50
|
-
)
|
46
|
+
dtype: type | str = kwargs.pop("dtype") if "dtype" in kwargs else None
|
51
47
|
# cast type
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
48
|
+
if dtype is None:
|
49
|
+
raise ValueError("Please pass a type!")
|
50
|
+
elif dtype is not None:
|
51
|
+
if not isinstance(dtype, str):
|
52
|
+
if not isinstance(dtype, list) and dtype.__name__ in FEATURE_TYPES:
|
53
|
+
dtype_str = FEATURE_TYPES[dtype.__name__]
|
54
|
+
else:
|
55
|
+
if not isinstance(dtype, list):
|
56
|
+
raise ValueError("dtype has to be a list of Registry types")
|
57
|
+
registries_str = ""
|
58
|
+
for cls in dtype:
|
59
|
+
if not hasattr(cls, "__get_name_with_schema__"):
|
60
|
+
raise ValueError(
|
61
|
+
"each element of the list has to be a Registry"
|
62
|
+
)
|
63
|
+
registries_str += cls.__get_name_with_schema__() + "|"
|
64
|
+
dtype_str = f'cat[{registries_str.rstrip("|")}]'
|
67
65
|
else:
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
66
|
+
dtype_str = dtype
|
67
|
+
# add validation that a registry actually exists
|
68
|
+
if dtype_str not in FEATURE_TYPES.values() and not dtype_str.startswith(
|
69
|
+
"cat"
|
70
|
+
):
|
71
|
+
raise ValueError(
|
72
|
+
f"dtype is {dtype_str} but has to be one of 'number', 'int', 'float', 'cat', 'bool', 'cat[...]'!"
|
73
|
+
)
|
74
|
+
if dtype_str != "cat" and dtype_str.startswith("cat"):
|
75
|
+
registries_str = dtype_str.replace("cat[", "").rstrip("]")
|
76
|
+
if registries_str != "":
|
77
|
+
registry_str_list = registries_str.split("|")
|
78
|
+
for registry_str in registry_str_list:
|
79
|
+
if registry_str not in dict_schema_name_to_model_name(Artifact):
|
80
|
+
raise ValueError(
|
81
|
+
f"'{registry_str}' is an invalid dtype, pass, e.g. `[ln.ULabel, bt.CellType]` or similar"
|
82
|
+
)
|
83
|
+
kwargs["dtype"] = dtype_str
|
77
84
|
super(Feature, self).__init__(*args, **kwargs)
|
78
85
|
|
79
86
|
|
@@ -99,11 +106,11 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
|
|
99
106
|
field = Feature.name if field is None else field
|
100
107
|
categoricals = categoricals_from_df(df)
|
101
108
|
|
102
|
-
|
109
|
+
dtypes = {}
|
103
110
|
# categoricals_with_unmapped_categories = {} # type: ignore
|
104
111
|
for name, col in df.items():
|
105
112
|
if name in categoricals:
|
106
|
-
|
113
|
+
dtypes[name] = "cat"
|
107
114
|
# below is a harder feature to write, now, because it requires to
|
108
115
|
# query the link tables between the label Registry and file or collection
|
109
116
|
# the original implementation fell short
|
@@ -117,7 +124,7 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
|
|
117
124
|
# feature=name
|
118
125
|
# ).inspect(categories, "name", logging=False)["not_mapped"]
|
119
126
|
else:
|
120
|
-
|
127
|
+
dtypes[name] = convert_numpy_dtype_to_lamin_feature_type(col.dtype)
|
121
128
|
|
122
129
|
# silence the warning "loaded record with exact same name "
|
123
130
|
verbosity = settings.verbosity
|
@@ -128,7 +135,7 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
|
|
128
135
|
if registry != Feature:
|
129
136
|
raise ValueError("field must be a Feature FieldAttr!")
|
130
137
|
# create records for all features including non-validated
|
131
|
-
features = [Feature(name=name,
|
138
|
+
features = [Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()]
|
132
139
|
finally:
|
133
140
|
settings.verbosity = verbosity
|
134
141
|
|
@@ -174,9 +181,10 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
|
|
174
181
|
|
175
182
|
|
176
183
|
@doc_args(Feature.save.__doc__)
|
177
|
-
def save(self, *args, **kwargs) ->
|
184
|
+
def save(self, *args, **kwargs) -> Feature:
|
178
185
|
"""{}."""
|
179
186
|
super(Feature, self).save(*args, **kwargs)
|
187
|
+
return self
|
180
188
|
|
181
189
|
|
182
190
|
METHOD_NAMES = [
|
lamindb/_feature_set.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import TYPE_CHECKING, Iterable
|
3
|
+
from typing import TYPE_CHECKING, Iterable, Type
|
4
4
|
|
5
5
|
import lamindb_setup as ln_setup
|
6
6
|
import numpy as np
|
@@ -14,6 +14,11 @@ from lamindb._utils import attach_func_to_class_method
|
|
14
14
|
|
15
15
|
from ._feature import convert_numpy_dtype_to_lamin_feature_type
|
16
16
|
from ._registry import init_self_from_db
|
17
|
+
from .core.exceptions import ValidationError
|
18
|
+
from .core.schema import (
|
19
|
+
dict_related_model_to_related_name,
|
20
|
+
get_related_name,
|
21
|
+
)
|
17
22
|
|
18
23
|
if TYPE_CHECKING:
|
19
24
|
import pandas as pd
|
@@ -21,57 +26,7 @@ if TYPE_CHECKING:
|
|
21
26
|
from ._query_set import QuerySet
|
22
27
|
|
23
28
|
NUMBER_TYPE = "number"
|
24
|
-
|
25
|
-
|
26
|
-
def dict_related_model_to_related_name(orm):
|
27
|
-
d: dict = {
|
28
|
-
i.related_model.__get_name_with_schema__(): i.related_name
|
29
|
-
for i in orm._meta.related_objects
|
30
|
-
if i.related_name is not None
|
31
|
-
}
|
32
|
-
d.update(
|
33
|
-
{
|
34
|
-
i.related_model.__get_name_with_schema__(): i.name
|
35
|
-
for i in orm._meta.many_to_many
|
36
|
-
if i.name is not None
|
37
|
-
}
|
38
|
-
)
|
39
|
-
|
40
|
-
return d
|
41
|
-
|
42
|
-
|
43
|
-
def dict_schema_name_to_model_name(orm):
|
44
|
-
d: dict = {
|
45
|
-
i.related_model.__get_name_with_schema__(): i.related_model
|
46
|
-
for i in orm._meta.related_objects
|
47
|
-
if i.related_name is not None
|
48
|
-
}
|
49
|
-
d.update(
|
50
|
-
{
|
51
|
-
i.related_model.__get_name_with_schema__(): i.related_model
|
52
|
-
for i in orm._meta.many_to_many
|
53
|
-
if i.name is not None
|
54
|
-
}
|
55
|
-
)
|
56
|
-
|
57
|
-
return d
|
58
|
-
|
59
|
-
|
60
|
-
def get_related_name(features_type: Registry):
|
61
|
-
candidates = [
|
62
|
-
field.related_name
|
63
|
-
for field in FeatureSet._meta.related_objects
|
64
|
-
if field.related_model == features_type
|
65
|
-
]
|
66
|
-
if not candidates:
|
67
|
-
raise ValueError(
|
68
|
-
f"Can't create feature sets from {features_type.__name__} because it's not"
|
69
|
-
" related to it!\nYou need to create a link model between FeatureSet and"
|
70
|
-
" your Registry in your custom schema.\nTo do so, add a"
|
71
|
-
" line:\nfeature_sets = models.ManyToMany(FeatureSet,"
|
72
|
-
" related_name='mythings')\n"
|
73
|
-
)
|
74
|
-
return candidates[0]
|
29
|
+
DICT_KEYS_TYPE = type({}.keys()) # type: ignore
|
75
30
|
|
76
31
|
|
77
32
|
def validate_features(features: list[Registry]) -> Registry:
|
@@ -106,14 +61,14 @@ def __init__(self, *args, **kwargs):
|
|
106
61
|
if len(args) > 1:
|
107
62
|
raise ValueError("Only one non-keyword arg allowed: features")
|
108
63
|
features: Iterable[Registry] = kwargs.pop("features") if len(args) == 0 else args[0]
|
109
|
-
|
64
|
+
dtype: str | None = kwargs.pop("dtype") if "dtype" in kwargs else None
|
110
65
|
name: str | None = kwargs.pop("name") if "name" in kwargs else None
|
111
66
|
if len(kwargs) > 0:
|
112
67
|
raise ValueError("Only features, type, name are valid keyword arguments")
|
113
68
|
# now code
|
114
69
|
features_registry = validate_features(features)
|
115
|
-
if
|
116
|
-
|
70
|
+
if dtype is None:
|
71
|
+
dtype = None if features_registry == Feature else NUMBER_TYPE
|
117
72
|
n_features = len(features)
|
118
73
|
features_hash = hash_set({feature.uid for feature in features})
|
119
74
|
feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
|
@@ -128,7 +83,7 @@ def __init__(self, *args, **kwargs):
|
|
128
83
|
super(FeatureSet, self).__init__(
|
129
84
|
uid=ids.base62_20(),
|
130
85
|
name=name,
|
131
|
-
|
86
|
+
dtype=get_type_str(dtype),
|
132
87
|
n=n_features,
|
133
88
|
registry=features_registry.__get_name_with_schema__(),
|
134
89
|
hash=hash,
|
@@ -144,13 +99,11 @@ def save(self, *args, **kwargs) -> None:
|
|
144
99
|
getattr(self, related_name).set(records)
|
145
100
|
|
146
101
|
|
147
|
-
def get_type_str(
|
148
|
-
if
|
149
|
-
type_str =
|
102
|
+
def get_type_str(dtype: str | None) -> str | None:
|
103
|
+
if dtype is not None:
|
104
|
+
type_str = dtype.__name__ if not isinstance(dtype, str) else dtype # type: ignore
|
150
105
|
else:
|
151
106
|
type_str = None
|
152
|
-
if type == "int" or type == "float":
|
153
|
-
type_str = NUMBER_TYPE
|
154
107
|
return type_str
|
155
108
|
|
156
109
|
|
@@ -165,7 +118,8 @@ def from_values(
|
|
165
118
|
mute: bool = False,
|
166
119
|
organism: Registry | str | None = None,
|
167
120
|
public_source: Registry | None = None,
|
168
|
-
|
121
|
+
raise_validation_error: bool = True,
|
122
|
+
) -> FeatureSet:
|
169
123
|
"""{}."""
|
170
124
|
if not isinstance(field, FieldAttr):
|
171
125
|
raise TypeError(
|
@@ -173,16 +127,25 @@ def from_values(
|
|
173
127
|
)
|
174
128
|
if len(values) == 0:
|
175
129
|
raise ValueError("Provide a list of at least one value")
|
130
|
+
if isinstance(values, DICT_KEYS_TYPE):
|
131
|
+
values = list(values)
|
176
132
|
registry = field.field.model
|
177
133
|
if registry != Feature and type is None:
|
178
134
|
type = NUMBER_TYPE
|
179
135
|
logger.debug("setting feature set to 'number'")
|
180
136
|
validated = registry.validate(values, field=field, mute=mute, organism=organism)
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
137
|
+
values_array = np.array(values)
|
138
|
+
validated_values = values_array[validated]
|
139
|
+
if validated.sum() != len(values):
|
140
|
+
not_validated_values = values_array[~validated]
|
141
|
+
msg = (
|
142
|
+
f"These values could not be validated: {not_validated_values.tolist()}\n"
|
143
|
+
f"If there are no typos, add them to their registry: {registry}"
|
144
|
+
)
|
145
|
+
if raise_validation_error:
|
146
|
+
raise ValidationError(msg)
|
147
|
+
elif len(validated_values) == 0:
|
148
|
+
return None # temporarily return None here
|
186
149
|
validated_features = registry.from_values(
|
187
150
|
validated_values,
|
188
151
|
field=field,
|
@@ -192,7 +155,7 @@ def from_values(
|
|
192
155
|
feature_set = FeatureSet(
|
193
156
|
features=validated_features,
|
194
157
|
name=name,
|
195
|
-
|
158
|
+
dtype=get_type_str(type),
|
196
159
|
)
|
197
160
|
return feature_set
|
198
161
|
|
@@ -217,12 +180,12 @@ def from_df(
|
|
217
180
|
return None
|
218
181
|
if registry == Feature:
|
219
182
|
validated_features = Feature.from_df(df.loc[:, validated])
|
220
|
-
feature_set = FeatureSet(validated_features, name=name,
|
183
|
+
feature_set = FeatureSet(validated_features, name=name, dtype=None)
|
221
184
|
else:
|
222
185
|
dtypes = [col.dtype for (_, col) in df.loc[:, validated].items()]
|
223
186
|
if len(set(dtypes)) != 1:
|
224
187
|
raise ValueError(f"data types are heterogeneous: {set(dtypes)}")
|
225
|
-
|
188
|
+
dtype = convert_numpy_dtype_to_lamin_feature_type(dtypes[0])
|
226
189
|
validated_features = registry.from_values(
|
227
190
|
df.columns[validated],
|
228
191
|
field=field,
|
@@ -232,7 +195,7 @@ def from_df(
|
|
232
195
|
feature_set = FeatureSet(
|
233
196
|
features=validated_features,
|
234
197
|
name=name,
|
235
|
-
|
198
|
+
dtype=get_type_str(dtype),
|
236
199
|
)
|
237
200
|
return feature_set
|
238
201
|
|
@@ -246,14 +209,14 @@ def members(self) -> QuerySet:
|
|
246
209
|
# need to fix this
|
247
210
|
return self._features[1]
|
248
211
|
related_name = self._get_related_name()
|
212
|
+
if related_name is None:
|
213
|
+
related_name = "features"
|
249
214
|
return self.__getattribute__(related_name).all()
|
250
215
|
|
251
216
|
|
252
217
|
def _get_related_name(self: FeatureSet) -> str:
|
253
|
-
key_split = self.registry.split(".")
|
254
|
-
orm_name_with_schema = f"{key_split[0]}.{key_split[1]}"
|
255
218
|
feature_sets_related_models = dict_related_model_to_related_name(self)
|
256
|
-
related_name = feature_sets_related_models.get(
|
219
|
+
related_name = feature_sets_related_models.get(self.registry)
|
257
220
|
return related_name
|
258
221
|
|
259
222
|
|
lamindb/_from_values.py
CHANGED
@@ -102,9 +102,6 @@ def get_existing_records(
|
|
102
102
|
|
103
103
|
# standardize based on the DB reference
|
104
104
|
# log synonyms mapped terms
|
105
|
-
print("field", field)
|
106
|
-
print("organism", kwargs.get("organism"))
|
107
|
-
print("public_source", kwargs.get("public_source"))
|
108
105
|
result = model.inspect(
|
109
106
|
iterable_idx,
|
110
107
|
field=field,
|
lamindb/_query_manager.py
CHANGED
@@ -7,6 +7,8 @@ from lamin_utils import logger
|
|
7
7
|
from lamindb_setup.core._docs import doc_args
|
8
8
|
from lnschema_core.models import Registry
|
9
9
|
|
10
|
+
from lamindb.core._settings import settings
|
11
|
+
|
10
12
|
from .core._feature_manager import get_feature_set_by_slot
|
11
13
|
|
12
14
|
if TYPE_CHECKING:
|
@@ -41,7 +43,10 @@ class QueryManager(models.Manager):
|
|
41
43
|
from lamindb.core._data import WARNING_RUN_TRANSFORM, _track_run_input
|
42
44
|
from lamindb.core._run_context import run_context
|
43
45
|
|
44
|
-
if
|
46
|
+
if (
|
47
|
+
run_context.run is None
|
48
|
+
and not settings.silence_file_run_transform_warning
|
49
|
+
):
|
45
50
|
logger.warning(WARNING_RUN_TRANSFORM)
|
46
51
|
_track_run_input(self.instance)
|
47
52
|
|
lamindb/_registry.py
CHANGED
@@ -20,6 +20,7 @@ from lnschema_core import Registry
|
|
20
20
|
|
21
21
|
from lamindb._utils import attach_func_to_class_method
|
22
22
|
from lamindb.core._settings import settings
|
23
|
+
from lamindb.core.exceptions import ValidationError
|
23
24
|
|
24
25
|
from ._from_values import get_or_create_records
|
25
26
|
|
@@ -30,10 +31,6 @@ if TYPE_CHECKING:
|
|
30
31
|
IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
31
32
|
|
32
33
|
|
33
|
-
class ValidationError(Exception):
|
34
|
-
pass
|
35
|
-
|
36
|
-
|
37
34
|
def init_self_from_db(self: Registry, existing_record: Registry):
|
38
35
|
new_args = [
|
39
36
|
getattr(existing_record, field.attname) for field in self._meta.concrete_fields
|
@@ -422,7 +419,8 @@ def transfer_to_default_db(
|
|
422
419
|
if run_context.run is not None:
|
423
420
|
record.run_id = run_context.run.id
|
424
421
|
else:
|
425
|
-
|
422
|
+
if not settings.silence_file_run_transform_warning:
|
423
|
+
logger.warning(WARNING_RUN_TRANSFORM)
|
426
424
|
record.run_id = None
|
427
425
|
if hasattr(record, "transform_id") and record._meta.model_name != "run":
|
428
426
|
record.transform = None
|
@@ -533,7 +531,11 @@ def __get_schema_name__(cls) -> str:
|
|
533
531
|
@classmethod # type: ignore
|
534
532
|
def __get_name_with_schema__(cls) -> str:
|
535
533
|
schema_name = cls.__get_schema_name__()
|
536
|
-
|
534
|
+
if schema_name == "core":
|
535
|
+
schema_prefix = ""
|
536
|
+
else:
|
537
|
+
schema_prefix = f"{schema_name}."
|
538
|
+
return f"{schema_prefix}{cls.__name__}"
|
537
539
|
|
538
540
|
|
539
541
|
Registry.__get_schema_name__ = __get_schema_name__
|
lamindb/_run.py
CHANGED
@@ -13,7 +13,6 @@ def __init__(run: Run, *args, **kwargs):
|
|
13
13
|
transform: Transform = None
|
14
14
|
if "transform" in kwargs or len(args) == 1:
|
15
15
|
transform = kwargs.pop("transform") if len(args) == 0 else args[0]
|
16
|
-
params: str | None = kwargs.pop("params") if "params" in kwargs else None
|
17
16
|
reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
|
18
17
|
reference_type: str | None = (
|
19
18
|
kwargs.pop("reference_type") if "reference_type" in kwargs else None
|
@@ -26,7 +25,6 @@ def __init__(run: Run, *args, **kwargs):
|
|
26
25
|
transform=transform,
|
27
26
|
reference=reference,
|
28
27
|
reference_type=reference_type,
|
29
|
-
json=params,
|
30
28
|
)
|
31
29
|
|
32
30
|
|
lamindb/core/__init__.py
CHANGED
@@ -15,6 +15,8 @@ Registries:
|
|
15
15
|
IsVersioned
|
16
16
|
CanValidate
|
17
17
|
HasParents
|
18
|
+
TracksRun
|
19
|
+
TracksUpdates
|
18
20
|
InspectResult
|
19
21
|
fields
|
20
22
|
|
@@ -56,6 +58,8 @@ from lnschema_core.models import (
|
|
56
58
|
HasParents,
|
57
59
|
IsVersioned,
|
58
60
|
Registry,
|
61
|
+
TracksRun,
|
62
|
+
TracksUpdates,
|
59
63
|
)
|
60
64
|
|
61
65
|
from lamindb._annotate import (
|