lamindb 0.49.3__py3-none-any.whl → 0.50.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +55 -15
- lamindb/_context.py +25 -25
- lamindb/_delete.py +8 -8
- lamindb/_feature.py +15 -11
- lamindb/_feature_set.py +70 -39
- lamindb/_file.py +80 -56
- lamindb/_filter.py +5 -5
- lamindb/_from_values.py +55 -92
- lamindb/{_manager.py → _query_manager.py} +8 -5
- lamindb/{_queryset.py → _query_set.py} +31 -28
- lamindb/{_orm.py → _registry.py} +53 -294
- lamindb/_save.py +14 -13
- lamindb/_synonym.py +203 -0
- lamindb/_validate.py +134 -0
- lamindb/_view.py +15 -9
- lamindb/dev/__init__.py +13 -6
- lamindb/dev/_data.py +195 -0
- lamindb/dev/_feature_manager.py +102 -0
- lamindb/dev/_settings.py +10 -9
- lamindb/dev/_view_parents.py +36 -17
- lamindb/dev/datasets/__init__.py +5 -3
- lamindb/dev/datasets/_core.py +35 -17
- lamindb/dev/exc.py +4 -0
- lamindb/dev/storage/_backed_access.py +53 -17
- lamindb/dev/storage/file.py +44 -15
- {lamindb-0.49.3.dist-info → lamindb-0.50.1.dist-info}/METADATA +34 -36
- lamindb-0.50.1.dist-info/RECORD +47 -0
- lamindb/_feature_manager.py +0 -237
- lamindb-0.49.3.dist-info/RECORD +0 -43
- {lamindb-0.49.3.dist-info → lamindb-0.50.1.dist-info}/LICENSE +0 -0
- {lamindb-0.49.3.dist-info → lamindb-0.50.1.dist-info}/WHEEL +0 -0
- {lamindb-0.49.3.dist-info → lamindb-0.50.1.dist-info}/entry_points.txt +0 -0
lamindb/_synonym.py
ADDED
@@ -0,0 +1,203 @@
|
|
1
|
+
from typing import Dict, Iterable, List, Literal, Optional, Set, Union
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
from django.core.exceptions import FieldDoesNotExist
|
5
|
+
from django.db.models import QuerySet
|
6
|
+
from lamin_utils import logger
|
7
|
+
from lamindb_setup.dev._docs import doc_args
|
8
|
+
from lnschema_core import Registry, SynonymsAware
|
9
|
+
from lnschema_core.types import ListLike
|
10
|
+
|
11
|
+
from lamindb.dev.utils import attach_func_to_class_method
|
12
|
+
|
13
|
+
from . import _TESTING
|
14
|
+
from ._registry import get_default_str_field
|
15
|
+
from ._validate import _filter_query_based_on_species
|
16
|
+
|
17
|
+
|
18
|
+
@classmethod # type: ignore
|
19
|
+
@doc_args(SynonymsAware.map_synonyms.__doc__)
|
20
|
+
def map_synonyms(
|
21
|
+
cls,
|
22
|
+
synonyms: Iterable,
|
23
|
+
*,
|
24
|
+
return_mapper: bool = False,
|
25
|
+
case_sensitive: bool = False,
|
26
|
+
keep: Literal["first", "last", False] = "first",
|
27
|
+
synonyms_field: str = "synonyms",
|
28
|
+
field: Optional[str] = None,
|
29
|
+
**kwargs,
|
30
|
+
) -> Union[List[str], Dict[str, str]]:
|
31
|
+
"""{}"""
|
32
|
+
return _map_synonyms(
|
33
|
+
cls=cls,
|
34
|
+
synonyms=synonyms,
|
35
|
+
return_mapper=return_mapper,
|
36
|
+
case_sensitive=case_sensitive,
|
37
|
+
keep=keep,
|
38
|
+
synonyms_field=synonyms_field,
|
39
|
+
field=field,
|
40
|
+
**kwargs,
|
41
|
+
)
|
42
|
+
|
43
|
+
|
44
|
+
def set_abbr(self, value: str):
|
45
|
+
try:
|
46
|
+
self.add_synonym(value, save=False)
|
47
|
+
except NotImplementedError:
|
48
|
+
pass
|
49
|
+
self.abbr = value
|
50
|
+
if not self._state.adding:
|
51
|
+
self.save()
|
52
|
+
|
53
|
+
|
54
|
+
def add_synonym(
|
55
|
+
self,
|
56
|
+
synonym: Union[str, ListLike],
|
57
|
+
force: bool = False,
|
58
|
+
save: Optional[bool] = None,
|
59
|
+
):
|
60
|
+
_check_synonyms_field_exist(self)
|
61
|
+
_add_or_remove_synonyms(
|
62
|
+
synonym=synonym, record=self, force=force, action="add", save=save
|
63
|
+
)
|
64
|
+
|
65
|
+
|
66
|
+
def remove_synonym(self, synonym: Union[str, ListLike]):
|
67
|
+
_check_synonyms_field_exist(self)
|
68
|
+
_add_or_remove_synonyms(synonym=synonym, record=self, action="remove")
|
69
|
+
|
70
|
+
|
71
|
+
def _add_or_remove_synonyms(
|
72
|
+
synonym: Union[str, Iterable],
|
73
|
+
record: Registry,
|
74
|
+
action: Literal["add", "remove"],
|
75
|
+
force: bool = False,
|
76
|
+
save: Optional[bool] = None,
|
77
|
+
):
|
78
|
+
"""Add or remove synonyms."""
|
79
|
+
|
80
|
+
def check_synonyms_in_all_records(synonyms: Set[str], record: Registry):
|
81
|
+
"""Errors if input synonym is associated with other records in the DB."""
|
82
|
+
import pandas as pd
|
83
|
+
from IPython.display import display
|
84
|
+
|
85
|
+
syns_all = (
|
86
|
+
record.__class__.objects.exclude(synonyms="").exclude(synonyms=None).all()
|
87
|
+
)
|
88
|
+
if len(syns_all) == 0:
|
89
|
+
return
|
90
|
+
df = pd.DataFrame(syns_all.values())
|
91
|
+
df["synonyms"] = df["synonyms"].str.split("|")
|
92
|
+
df = df.explode("synonyms")
|
93
|
+
matches_df = df[(df["synonyms"].isin(synonyms)) & (df["id"] != record.id)]
|
94
|
+
if matches_df.shape[0] > 0:
|
95
|
+
records_df = pd.DataFrame(syns_all.filter(id__in=matches_df["id"]).values())
|
96
|
+
logger.error(
|
97
|
+
f"input synonyms {matches_df['synonyms'].unique()} already associated"
|
98
|
+
" with the following records:\n"
|
99
|
+
)
|
100
|
+
display(records_df)
|
101
|
+
raise SystemExit(AssertionError)
|
102
|
+
|
103
|
+
# passed synonyms
|
104
|
+
if isinstance(synonym, str):
|
105
|
+
syn_new_set = set([synonym])
|
106
|
+
else:
|
107
|
+
syn_new_set = set(synonym)
|
108
|
+
# nothing happens when passing an empty string or list
|
109
|
+
if len(syn_new_set) == 0:
|
110
|
+
return
|
111
|
+
# because we use | as the separator
|
112
|
+
if any(["|" in i for i in syn_new_set]):
|
113
|
+
raise AssertionError("a synonym can't contain '|'!")
|
114
|
+
|
115
|
+
# existing synonyms
|
116
|
+
syns_exist = record.synonyms
|
117
|
+
if syns_exist is None or len(syns_exist) == 0:
|
118
|
+
syns_exist_set = set()
|
119
|
+
else:
|
120
|
+
syns_exist_set = set(syns_exist.split("|"))
|
121
|
+
|
122
|
+
if action == "add":
|
123
|
+
if not force:
|
124
|
+
check_synonyms_in_all_records(syn_new_set, record)
|
125
|
+
syns_exist_set.update(syn_new_set)
|
126
|
+
elif action == "remove":
|
127
|
+
syns_exist_set = syns_exist_set.difference(syn_new_set)
|
128
|
+
|
129
|
+
if len(syns_exist_set) == 0:
|
130
|
+
syns_str = None
|
131
|
+
else:
|
132
|
+
syns_str = "|".join(syns_exist_set)
|
133
|
+
|
134
|
+
record.synonyms = syns_str
|
135
|
+
|
136
|
+
if save is None:
|
137
|
+
# if record is already in DB, save the changes to DB
|
138
|
+
save = not record._state.adding
|
139
|
+
if save:
|
140
|
+
record.save()
|
141
|
+
|
142
|
+
|
143
|
+
def _check_synonyms_field_exist(record: Registry):
|
144
|
+
try:
|
145
|
+
record.__getattribute__("synonyms")
|
146
|
+
except AttributeError:
|
147
|
+
raise NotImplementedError(
|
148
|
+
f"No synonyms field found in table {record.__class__.__name__}!"
|
149
|
+
)
|
150
|
+
|
151
|
+
|
152
|
+
def _map_synonyms(
|
153
|
+
cls,
|
154
|
+
synonyms: Iterable,
|
155
|
+
*,
|
156
|
+
return_mapper: bool = False,
|
157
|
+
case_sensitive: bool = False,
|
158
|
+
keep: Literal["first", "last", False] = "first",
|
159
|
+
synonyms_field: str = "synonyms",
|
160
|
+
field: Optional[str] = None,
|
161
|
+
**kwargs,
|
162
|
+
) -> Union[List[str], Dict[str, str]]:
|
163
|
+
"""{}"""
|
164
|
+
from lamin_utils._map_synonyms import map_synonyms
|
165
|
+
|
166
|
+
if isinstance(synonyms, str):
|
167
|
+
synonyms = [synonyms]
|
168
|
+
if field is None:
|
169
|
+
field = get_default_str_field(cls)
|
170
|
+
if not isinstance(field, str):
|
171
|
+
field = field.field.name
|
172
|
+
|
173
|
+
cls = cls.model if isinstance(cls, QuerySet) else cls
|
174
|
+
|
175
|
+
try:
|
176
|
+
cls._meta.get_field(synonyms_field)
|
177
|
+
df = _filter_query_based_on_species(orm=cls, species=kwargs.get("species"))
|
178
|
+
except FieldDoesNotExist:
|
179
|
+
df = pd.DataFrame()
|
180
|
+
return map_synonyms(
|
181
|
+
df=df,
|
182
|
+
identifiers=synonyms,
|
183
|
+
field=field,
|
184
|
+
return_mapper=return_mapper,
|
185
|
+
case_sensitive=case_sensitive,
|
186
|
+
keep=keep,
|
187
|
+
synonyms_field=synonyms_field,
|
188
|
+
)
|
189
|
+
|
190
|
+
|
191
|
+
METHOD_NAMES = ["map_synonyms", "add_synonym", "remove_synonym", "set_abbr"]
|
192
|
+
|
193
|
+
if _TESTING: # type: ignore
|
194
|
+
from inspect import signature
|
195
|
+
|
196
|
+
SIGS = {
|
197
|
+
name: signature(getattr(SynonymsAware, name))
|
198
|
+
for name in METHOD_NAMES
|
199
|
+
if not name.startswith("__")
|
200
|
+
}
|
201
|
+
|
202
|
+
for name in METHOD_NAMES:
|
203
|
+
attach_func_to_class_method(name, SynonymsAware, globals())
|
lamindb/_validate.py
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
from typing import Dict, List, Optional, Union
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
import pandas as pd
|
5
|
+
from django.db.models import QuerySet
|
6
|
+
from lamin_utils._inspect import InspectResult
|
7
|
+
from lamindb_setup.dev._docs import doc_args
|
8
|
+
from lnschema_core import Registry, ValidationAware
|
9
|
+
from lnschema_core.types import ListLike, StrField
|
10
|
+
|
11
|
+
from lamindb.dev.utils import attach_func_to_class_method
|
12
|
+
|
13
|
+
from . import _TESTING
|
14
|
+
from ._from_values import _has_species_field
|
15
|
+
|
16
|
+
|
17
|
+
@classmethod # type: ignore
|
18
|
+
@doc_args(ValidationAware.inspect.__doc__)
|
19
|
+
def inspect(
|
20
|
+
cls,
|
21
|
+
values: ListLike,
|
22
|
+
field: StrField,
|
23
|
+
*,
|
24
|
+
mute: bool = False,
|
25
|
+
**kwargs,
|
26
|
+
) -> InspectResult:
|
27
|
+
"""{}"""
|
28
|
+
return _inspect(
|
29
|
+
cls=cls,
|
30
|
+
values=values,
|
31
|
+
field=field,
|
32
|
+
mute=mute,
|
33
|
+
**kwargs,
|
34
|
+
)
|
35
|
+
|
36
|
+
|
37
|
+
@classmethod # type: ignore
|
38
|
+
@doc_args(ValidationAware.validate.__doc__)
|
39
|
+
def validate(cls, values: ListLike, field: StrField, **kwargs) -> np.ndarray[bool]:
|
40
|
+
"""{}"""
|
41
|
+
return _validate(cls=cls, values=values, field=field, **kwargs)
|
42
|
+
|
43
|
+
|
44
|
+
def _inspect(
|
45
|
+
cls,
|
46
|
+
values: ListLike,
|
47
|
+
field: StrField,
|
48
|
+
*,
|
49
|
+
mute: bool = False,
|
50
|
+
**kwargs,
|
51
|
+
) -> Union["pd.DataFrame", Dict[str, List[str]]]:
|
52
|
+
"""{}"""
|
53
|
+
from lamin_utils._inspect import inspect
|
54
|
+
|
55
|
+
if isinstance(values, str):
|
56
|
+
values = [values]
|
57
|
+
if not isinstance(field, str):
|
58
|
+
field = field.field.name
|
59
|
+
|
60
|
+
orm = cls.model if isinstance(cls, QuerySet) else cls
|
61
|
+
|
62
|
+
return inspect(
|
63
|
+
df=_filter_query_based_on_species(orm=orm, species=kwargs.get("species")),
|
64
|
+
identifiers=values,
|
65
|
+
field=str(field),
|
66
|
+
inspect_synonyms=True,
|
67
|
+
mute=mute,
|
68
|
+
**kwargs,
|
69
|
+
)
|
70
|
+
|
71
|
+
|
72
|
+
def _validate(cls, values: ListLike, field: StrField, **kwargs) -> np.ndarray[bool]:
|
73
|
+
"""{}"""
|
74
|
+
from lamin_utils._inspect import validate
|
75
|
+
|
76
|
+
if isinstance(values, str):
|
77
|
+
values = [values]
|
78
|
+
if not isinstance(field, str):
|
79
|
+
field = field.field.name
|
80
|
+
|
81
|
+
orm = cls.model if isinstance(cls, QuerySet) else cls
|
82
|
+
field_values = pd.Series(
|
83
|
+
_filter_query_based_on_species(
|
84
|
+
orm=orm, species=kwargs.get("species"), values_list_field=field
|
85
|
+
)
|
86
|
+
)
|
87
|
+
return validate(
|
88
|
+
identifiers=values, field_values=field_values, case_sensitive=True, **kwargs
|
89
|
+
)
|
90
|
+
|
91
|
+
|
92
|
+
def _filter_query_based_on_species(
|
93
|
+
orm: Union[Registry, QuerySet],
|
94
|
+
species: Optional[Union[str, Registry]] = None,
|
95
|
+
values_list_field: Optional[str] = None,
|
96
|
+
):
|
97
|
+
import pandas as pd
|
98
|
+
|
99
|
+
if values_list_field is None:
|
100
|
+
records = orm.all() if isinstance(orm, QuerySet) else orm.objects.all()
|
101
|
+
else:
|
102
|
+
records = orm if isinstance(orm, QuerySet) else orm.objects
|
103
|
+
if _has_species_field(orm):
|
104
|
+
# here, we can safely import lnschema_bionty
|
105
|
+
from lnschema_bionty._bionty import create_or_get_species_record
|
106
|
+
|
107
|
+
species_record = create_or_get_species_record(
|
108
|
+
species=species, orm=orm.model if isinstance(orm, QuerySet) else orm
|
109
|
+
)
|
110
|
+
if species_record is not None:
|
111
|
+
records = records.filter(species__name=species_record.name)
|
112
|
+
|
113
|
+
if values_list_field is None:
|
114
|
+
return pd.DataFrame.from_records(records.values())
|
115
|
+
else:
|
116
|
+
return records.values_list(values_list_field, flat=True)
|
117
|
+
|
118
|
+
|
119
|
+
METHOD_NAMES = [
|
120
|
+
"validate",
|
121
|
+
"inspect",
|
122
|
+
]
|
123
|
+
|
124
|
+
if _TESTING: # type: ignore
|
125
|
+
from inspect import signature
|
126
|
+
|
127
|
+
SIGS = {
|
128
|
+
name: signature(getattr(ValidationAware, name))
|
129
|
+
for name in METHOD_NAMES
|
130
|
+
if not name.startswith("__")
|
131
|
+
}
|
132
|
+
|
133
|
+
for name in METHOD_NAMES:
|
134
|
+
attach_func_to_class_method(name, ValidationAware, globals())
|
lamindb/_view.py
CHANGED
@@ -6,17 +6,19 @@ from IPython.display import display
|
|
6
6
|
from lamin_utils import colors
|
7
7
|
from lamindb_setup import settings
|
8
8
|
from lamindb_setup.dev._setup_schema import get_schema_module_name
|
9
|
-
from lnschema_core import
|
9
|
+
from lnschema_core import Registry
|
10
10
|
|
11
11
|
|
12
|
-
def view(
|
12
|
+
def view(
|
13
|
+
n: int = 10, schema: Optional[str] = None, registries: Optional[List[str]] = None
|
14
|
+
):
|
13
15
|
"""View data.
|
14
16
|
|
15
17
|
Args:
|
16
18
|
n: ``int = 10`` Display the last `n` rows of a table.
|
17
19
|
schema: ``Optional[str] = None`` Schema module to view. Default's to
|
18
20
|
`None` and displays all schema modules.
|
19
|
-
|
21
|
+
registries: ``Optional[List[str]] = None`` List of Registry names. Defaults to
|
20
22
|
`None` and lists all ORMs.
|
21
23
|
|
22
24
|
Examples:
|
@@ -30,22 +32,26 @@ def view(n: int = 10, schema: Optional[str] = None, orms: Optional[List[str]] =
|
|
30
32
|
for schema_name in schema_names:
|
31
33
|
schema_module = importlib.import_module(get_schema_module_name(schema_name))
|
32
34
|
|
33
|
-
|
35
|
+
all_registries = {
|
34
36
|
orm
|
35
37
|
for orm in schema_module.__dict__.values()
|
36
|
-
if inspect.isclass(orm)
|
38
|
+
if inspect.isclass(orm)
|
39
|
+
and issubclass(orm, Registry)
|
40
|
+
and orm.__name__ != "Registry"
|
37
41
|
}
|
38
|
-
if
|
39
|
-
|
42
|
+
if registries is not None:
|
43
|
+
filtered_registries = {
|
44
|
+
orm for orm in all_registries if orm.__name__ in registries
|
45
|
+
}
|
40
46
|
else:
|
41
|
-
|
47
|
+
filtered_registries = all_registries
|
42
48
|
if len(schema_names) > 1:
|
43
49
|
section = f"* module: {colors.green(colors.bold(schema_name))} *"
|
44
50
|
section_no_color = f"* module: {schema_name} *"
|
45
51
|
print("*" * len(section_no_color))
|
46
52
|
print(section)
|
47
53
|
print("*" * len(section_no_color))
|
48
|
-
for orm in sorted(
|
54
|
+
for orm in sorted(filtered_registries, key=lambda x: x.__name__):
|
49
55
|
if hasattr(orm, "updated_at"):
|
50
56
|
df = orm.filter().order_by("-updated_at")[:n].df()
|
51
57
|
else:
|
lamindb/dev/__init__.py
CHANGED
@@ -3,23 +3,30 @@
|
|
3
3
|
.. autosummary::
|
4
4
|
:toctree: .
|
5
5
|
|
6
|
-
|
6
|
+
Registry
|
7
|
+
Data
|
7
8
|
QuerySet
|
8
|
-
|
9
|
+
QueryManager
|
9
10
|
FeatureManager
|
11
|
+
ValidationAware
|
12
|
+
SynonymsAware
|
13
|
+
InspectResult
|
10
14
|
datasets
|
11
15
|
hashing
|
12
16
|
storage
|
13
17
|
Settings
|
14
18
|
run_context
|
19
|
+
exc.ValidationError
|
15
20
|
"""
|
16
21
|
|
17
|
-
from
|
22
|
+
from lamin_utils._inspect import InspectResult
|
23
|
+
from lnschema_core.models import Data, Registry, SynonymsAware, ValidationAware
|
18
24
|
|
19
|
-
from lamindb.
|
20
|
-
from lamindb.
|
21
|
-
from lamindb.
|
25
|
+
from lamindb._query_manager import QueryManager
|
26
|
+
from lamindb._query_set import QuerySet
|
27
|
+
from lamindb.dev._feature_manager import FeatureManager
|
22
28
|
|
23
29
|
from .._context import run_context
|
24
30
|
from . import datasets # noqa
|
31
|
+
from . import _data, exc
|
25
32
|
from ._settings import Settings
|
lamindb/dev/_data.py
ADDED
@@ -0,0 +1,195 @@
|
|
1
|
+
from collections import defaultdict
|
2
|
+
from typing import Dict, List, Optional, Union
|
3
|
+
|
4
|
+
from lamin_utils import logger
|
5
|
+
from lamindb_setup.dev._docs import doc_args
|
6
|
+
from lnschema_core.models import Data, Feature, FeatureSet, Label, Registry
|
7
|
+
|
8
|
+
from .._query_set import QuerySet
|
9
|
+
from .._registry import get_default_str_field
|
10
|
+
from .._save import save
|
11
|
+
from ._feature_manager import FeatureManager
|
12
|
+
from .exc import ValidationError
|
13
|
+
|
14
|
+
|
15
|
+
def validate_and_cast_feature(
|
16
|
+
feature: Union[str, Feature], records: List[Registry]
|
17
|
+
) -> Feature:
|
18
|
+
if isinstance(feature, str):
|
19
|
+
feature_name = feature
|
20
|
+
feature = Feature.filter(name=feature_name).one_or_none()
|
21
|
+
if feature is None:
|
22
|
+
registries = set(
|
23
|
+
[record.__class__.__get_name_with_schema__() for record in records]
|
24
|
+
)
|
25
|
+
registries_str = "|".join(registries)
|
26
|
+
msg = (
|
27
|
+
f"ln.Feature(name='{feature_name}', type='category',"
|
28
|
+
f" registries='{registries_str}').save()"
|
29
|
+
)
|
30
|
+
raise ValidationError(f"Feature not validated. If it looks correct: {msg}")
|
31
|
+
return feature
|
32
|
+
|
33
|
+
|
34
|
+
@doc_args(Data.get_labels.__doc__)
|
35
|
+
def get_labels(
|
36
|
+
self,
|
37
|
+
feature: Optional[Union[str, Registry]] = None,
|
38
|
+
mute: bool = False,
|
39
|
+
flat_names: bool = False,
|
40
|
+
) -> Union[QuerySet, Dict[str, QuerySet], List]:
|
41
|
+
"""{}"""
|
42
|
+
if isinstance(feature, str):
|
43
|
+
feature_name = feature
|
44
|
+
feature = Feature.filter(name=feature_name).one_or_none()
|
45
|
+
if feature is None:
|
46
|
+
raise ValueError("feature doesn't exist")
|
47
|
+
if feature.registries is None:
|
48
|
+
raise ValueError("feature does not have linked labels")
|
49
|
+
registries_to_check = feature.registries.split("|")
|
50
|
+
if len(registries_to_check) > 1 and not mute:
|
51
|
+
logger.warning("labels come from multiple registries!")
|
52
|
+
qs_by_registry = {}
|
53
|
+
for registry in registries_to_check:
|
54
|
+
# currently need to distinguish between Label and non-Label, because
|
55
|
+
# we only have the feature information for Label
|
56
|
+
if registry == "core.Label":
|
57
|
+
links_to_labels = getattr(
|
58
|
+
self, self.features._accessor_by_orm[registry]
|
59
|
+
).through.objects.filter(file_id=self.id, feature_id=feature.id)
|
60
|
+
label_ids = [link.label_id for link in links_to_labels]
|
61
|
+
qs_by_registry[registry] = Label.objects.filter(id__in=label_ids)
|
62
|
+
else:
|
63
|
+
qs_by_registry[registry] = getattr(
|
64
|
+
self, self.features._accessor_by_orm[registry]
|
65
|
+
).all()
|
66
|
+
if flat_names:
|
67
|
+
# returns a flat list of names
|
68
|
+
from .._registry import get_default_str_field
|
69
|
+
|
70
|
+
values = []
|
71
|
+
for v in qs_by_registry.values():
|
72
|
+
values += v.list(get_default_str_field(v))
|
73
|
+
return values
|
74
|
+
if len(registries_to_check) == 1:
|
75
|
+
return qs_by_registry[registry]
|
76
|
+
else:
|
77
|
+
return qs_by_registry
|
78
|
+
|
79
|
+
|
80
|
+
@doc_args(Data.add_labels.__doc__)
|
81
|
+
def add_labels(
|
82
|
+
self,
|
83
|
+
records: Union[Registry, List[Registry], QuerySet],
|
84
|
+
feature: Optional[Union[str, Registry]] = None,
|
85
|
+
) -> None:
|
86
|
+
"""{}"""
|
87
|
+
if isinstance(records, (QuerySet, QuerySet.__base__)): # need to have both
|
88
|
+
records = records.list()
|
89
|
+
if isinstance(records, str) or not isinstance(records, List):
|
90
|
+
records = [records]
|
91
|
+
if isinstance(records[0], str): # type: ignore
|
92
|
+
raise ValueError(
|
93
|
+
"Please pass a record (a `Registry` object), not a string, e.g., via:"
|
94
|
+
" label"
|
95
|
+
f" = ln.Label(name='{records[0]}')" # type: ignore
|
96
|
+
)
|
97
|
+
if self._state.adding:
|
98
|
+
raise ValueError("Please save the file or dataset before adding a label!")
|
99
|
+
for record in records:
|
100
|
+
if record._state.adding:
|
101
|
+
raise ValidationError(
|
102
|
+
f"{record} not validated. If it looks correct: record.save()"
|
103
|
+
)
|
104
|
+
feature = validate_and_cast_feature(feature, records)
|
105
|
+
orig_feature = feature
|
106
|
+
records_by_feature_orm = defaultdict(list)
|
107
|
+
for record in records:
|
108
|
+
if feature is None:
|
109
|
+
error_msg = "Please pass feature: add_labels(labels, feature='myfeature')"
|
110
|
+
record_feature = feature
|
111
|
+
if hasattr(record, "_feature"):
|
112
|
+
record_feature = record._feature
|
113
|
+
if record_feature is None:
|
114
|
+
raise ValueError(error_msg)
|
115
|
+
# TODO: refactor so that we don't call the following line
|
116
|
+
# repeatedly for the same feature
|
117
|
+
record_feature = validate_and_cast_feature(record_feature, [record])
|
118
|
+
else:
|
119
|
+
record_feature = feature
|
120
|
+
records_by_feature_orm[
|
121
|
+
(record_feature, record.__class__.__get_name_with_schema__())
|
122
|
+
].append(record)
|
123
|
+
# ensure all labels are saved
|
124
|
+
save(records)
|
125
|
+
for (feature, orm_name), records in records_by_feature_orm.items():
|
126
|
+
getattr(self, self.features._accessor_by_orm[orm_name]).add(
|
127
|
+
*records, through_defaults={"feature_id": feature.id}
|
128
|
+
)
|
129
|
+
feature_set_links = self.feature_sets.through.objects.filter(file_id=self.id)
|
130
|
+
feature_set_ids = [link.feature_set_id for link in feature_set_links.all()]
|
131
|
+
# get all linked features of type Feature
|
132
|
+
feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
|
133
|
+
linked_features_by_slot = {
|
134
|
+
feature_set_links.filter(feature_set_id=feature_set.id)
|
135
|
+
.one()
|
136
|
+
.slot: feature_set.features.all()
|
137
|
+
for feature_set in feature_sets
|
138
|
+
if "core.Feature" == feature_set.registry
|
139
|
+
}
|
140
|
+
for (feature, orm_name), records in records_by_feature_orm.items():
|
141
|
+
feature = validate_and_cast_feature(feature, records)
|
142
|
+
msg = ""
|
143
|
+
if orig_feature is None:
|
144
|
+
records_display = ", ".join(
|
145
|
+
[
|
146
|
+
f"'{getattr(record, get_default_str_field(record))}'"
|
147
|
+
for record in records
|
148
|
+
]
|
149
|
+
)
|
150
|
+
msg += f"linked labels {records_display} to feature '{feature.name}'"
|
151
|
+
if feature.registries is None or orm_name not in feature.registries:
|
152
|
+
if len(msg) > 0:
|
153
|
+
msg += ", "
|
154
|
+
msg += f"linked feature '{feature.name}' to registry '{orm_name}'"
|
155
|
+
if feature.registries is None:
|
156
|
+
feature.registries = orm_name
|
157
|
+
elif orm_name not in feature.registries:
|
158
|
+
feature.registries += f"|{orm_name}"
|
159
|
+
feature.save()
|
160
|
+
if len(msg) > 0:
|
161
|
+
logger.save(msg)
|
162
|
+
# check whether we have to update the feature set that manages labels
|
163
|
+
# (Feature) to account for a new feature
|
164
|
+
found_feature = False
|
165
|
+
for _, linked_features in linked_features_by_slot.items():
|
166
|
+
if feature in linked_features:
|
167
|
+
found_feature = True
|
168
|
+
if not found_feature:
|
169
|
+
if "external" not in linked_features_by_slot:
|
170
|
+
feature_set = FeatureSet([feature], modality="meta")
|
171
|
+
feature_set.save()
|
172
|
+
self.features.add_feature_set(feature_set, slot="external")
|
173
|
+
logger.save("created feature set for slot 'external'")
|
174
|
+
else:
|
175
|
+
feature_set = self.features._feature_set_by_slot["external"]
|
176
|
+
feature_set.features.add(feature)
|
177
|
+
feature_set.n += 1
|
178
|
+
feature_set.save()
|
179
|
+
logger.save(
|
180
|
+
f"linked feature {feature.name} to feature set {feature_set}"
|
181
|
+
)
|
182
|
+
|
183
|
+
|
184
|
+
@property # type: ignore
|
185
|
+
@doc_args(Data.features.__doc__)
|
186
|
+
def features(self) -> "FeatureManager":
|
187
|
+
"""{}"""
|
188
|
+
from lamindb.dev._feature_manager import FeatureManager
|
189
|
+
|
190
|
+
return FeatureManager(self)
|
191
|
+
|
192
|
+
|
193
|
+
setattr(Data, "features", features)
|
194
|
+
setattr(Data, "add_labels", add_labels)
|
195
|
+
setattr(Data, "get_labels", get_labels)
|