lamindb 0.49.3__py3-none-any.whl → 0.50.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_synonym.py ADDED
@@ -0,0 +1,203 @@
1
+ from typing import Dict, Iterable, List, Literal, Optional, Set, Union
2
+
3
+ import pandas as pd
4
+ from django.core.exceptions import FieldDoesNotExist
5
+ from django.db.models import QuerySet
6
+ from lamin_utils import logger
7
+ from lamindb_setup.dev._docs import doc_args
8
+ from lnschema_core import Registry, SynonymsAware
9
+ from lnschema_core.types import ListLike
10
+
11
+ from lamindb.dev.utils import attach_func_to_class_method
12
+
13
+ from . import _TESTING
14
+ from ._registry import get_default_str_field
15
+ from ._validate import _filter_query_based_on_species
16
+
17
+
18
+ @classmethod # type: ignore
19
+ @doc_args(SynonymsAware.map_synonyms.__doc__)
20
+ def map_synonyms(
21
+ cls,
22
+ synonyms: Iterable,
23
+ *,
24
+ return_mapper: bool = False,
25
+ case_sensitive: bool = False,
26
+ keep: Literal["first", "last", False] = "first",
27
+ synonyms_field: str = "synonyms",
28
+ field: Optional[str] = None,
29
+ **kwargs,
30
+ ) -> Union[List[str], Dict[str, str]]:
31
+ """{}"""
32
+ return _map_synonyms(
33
+ cls=cls,
34
+ synonyms=synonyms,
35
+ return_mapper=return_mapper,
36
+ case_sensitive=case_sensitive,
37
+ keep=keep,
38
+ synonyms_field=synonyms_field,
39
+ field=field,
40
+ **kwargs,
41
+ )
42
+
43
+
44
+ def set_abbr(self, value: str):
45
+ try:
46
+ self.add_synonym(value, save=False)
47
+ except NotImplementedError:
48
+ pass
49
+ self.abbr = value
50
+ if not self._state.adding:
51
+ self.save()
52
+
53
+
54
+ def add_synonym(
55
+ self,
56
+ synonym: Union[str, ListLike],
57
+ force: bool = False,
58
+ save: Optional[bool] = None,
59
+ ):
60
+ _check_synonyms_field_exist(self)
61
+ _add_or_remove_synonyms(
62
+ synonym=synonym, record=self, force=force, action="add", save=save
63
+ )
64
+
65
+
66
+ def remove_synonym(self, synonym: Union[str, ListLike]):
67
+ _check_synonyms_field_exist(self)
68
+ _add_or_remove_synonyms(synonym=synonym, record=self, action="remove")
69
+
70
+
71
+ def _add_or_remove_synonyms(
72
+ synonym: Union[str, Iterable],
73
+ record: Registry,
74
+ action: Literal["add", "remove"],
75
+ force: bool = False,
76
+ save: Optional[bool] = None,
77
+ ):
78
+ """Add or remove synonyms."""
79
+
80
+ def check_synonyms_in_all_records(synonyms: Set[str], record: Registry):
81
+ """Errors if input synonym is associated with other records in the DB."""
82
+ import pandas as pd
83
+ from IPython.display import display
84
+
85
+ syns_all = (
86
+ record.__class__.objects.exclude(synonyms="").exclude(synonyms=None).all()
87
+ )
88
+ if len(syns_all) == 0:
89
+ return
90
+ df = pd.DataFrame(syns_all.values())
91
+ df["synonyms"] = df["synonyms"].str.split("|")
92
+ df = df.explode("synonyms")
93
+ matches_df = df[(df["synonyms"].isin(synonyms)) & (df["id"] != record.id)]
94
+ if matches_df.shape[0] > 0:
95
+ records_df = pd.DataFrame(syns_all.filter(id__in=matches_df["id"]).values())
96
+ logger.error(
97
+ f"input synonyms {matches_df['synonyms'].unique()} already associated"
98
+ " with the following records:\n"
99
+ )
100
+ display(records_df)
101
+ raise SystemExit(AssertionError)
102
+
103
+ # passed synonyms
104
+ if isinstance(synonym, str):
105
+ syn_new_set = set([synonym])
106
+ else:
107
+ syn_new_set = set(synonym)
108
+ # nothing happens when passing an empty string or list
109
+ if len(syn_new_set) == 0:
110
+ return
111
+ # because we use | as the separator
112
+ if any(["|" in i for i in syn_new_set]):
113
+ raise AssertionError("a synonym can't contain '|'!")
114
+
115
+ # existing synonyms
116
+ syns_exist = record.synonyms
117
+ if syns_exist is None or len(syns_exist) == 0:
118
+ syns_exist_set = set()
119
+ else:
120
+ syns_exist_set = set(syns_exist.split("|"))
121
+
122
+ if action == "add":
123
+ if not force:
124
+ check_synonyms_in_all_records(syn_new_set, record)
125
+ syns_exist_set.update(syn_new_set)
126
+ elif action == "remove":
127
+ syns_exist_set = syns_exist_set.difference(syn_new_set)
128
+
129
+ if len(syns_exist_set) == 0:
130
+ syns_str = None
131
+ else:
132
+ syns_str = "|".join(syns_exist_set)
133
+
134
+ record.synonyms = syns_str
135
+
136
+ if save is None:
137
+ # if record is already in DB, save the changes to DB
138
+ save = not record._state.adding
139
+ if save:
140
+ record.save()
141
+
142
+
143
+ def _check_synonyms_field_exist(record: Registry):
144
+ try:
145
+ record.__getattribute__("synonyms")
146
+ except AttributeError:
147
+ raise NotImplementedError(
148
+ f"No synonyms field found in table {record.__class__.__name__}!"
149
+ )
150
+
151
+
152
+ def _map_synonyms(
153
+ cls,
154
+ synonyms: Iterable,
155
+ *,
156
+ return_mapper: bool = False,
157
+ case_sensitive: bool = False,
158
+ keep: Literal["first", "last", False] = "first",
159
+ synonyms_field: str = "synonyms",
160
+ field: Optional[str] = None,
161
+ **kwargs,
162
+ ) -> Union[List[str], Dict[str, str]]:
163
+ """{}"""
164
+ from lamin_utils._map_synonyms import map_synonyms
165
+
166
+ if isinstance(synonyms, str):
167
+ synonyms = [synonyms]
168
+ if field is None:
169
+ field = get_default_str_field(cls)
170
+ if not isinstance(field, str):
171
+ field = field.field.name
172
+
173
+ cls = cls.model if isinstance(cls, QuerySet) else cls
174
+
175
+ try:
176
+ cls._meta.get_field(synonyms_field)
177
+ df = _filter_query_based_on_species(orm=cls, species=kwargs.get("species"))
178
+ except FieldDoesNotExist:
179
+ df = pd.DataFrame()
180
+ return map_synonyms(
181
+ df=df,
182
+ identifiers=synonyms,
183
+ field=field,
184
+ return_mapper=return_mapper,
185
+ case_sensitive=case_sensitive,
186
+ keep=keep,
187
+ synonyms_field=synonyms_field,
188
+ )
189
+
190
+
191
+ METHOD_NAMES = ["map_synonyms", "add_synonym", "remove_synonym", "set_abbr"]
192
+
193
+ if _TESTING: # type: ignore
194
+ from inspect import signature
195
+
196
+ SIGS = {
197
+ name: signature(getattr(SynonymsAware, name))
198
+ for name in METHOD_NAMES
199
+ if not name.startswith("__")
200
+ }
201
+
202
+ for name in METHOD_NAMES:
203
+ attach_func_to_class_method(name, SynonymsAware, globals())
lamindb/_validate.py ADDED
@@ -0,0 +1,134 @@
1
+ from typing import Dict, List, Optional, Union
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from django.db.models import QuerySet
6
+ from lamin_utils._inspect import InspectResult
7
+ from lamindb_setup.dev._docs import doc_args
8
+ from lnschema_core import Registry, ValidationAware
9
+ from lnschema_core.types import ListLike, StrField
10
+
11
+ from lamindb.dev.utils import attach_func_to_class_method
12
+
13
+ from . import _TESTING
14
+ from ._from_values import _has_species_field
15
+
16
+
17
+ @classmethod # type: ignore
18
+ @doc_args(ValidationAware.inspect.__doc__)
19
+ def inspect(
20
+ cls,
21
+ values: ListLike,
22
+ field: StrField,
23
+ *,
24
+ mute: bool = False,
25
+ **kwargs,
26
+ ) -> InspectResult:
27
+ """{}"""
28
+ return _inspect(
29
+ cls=cls,
30
+ values=values,
31
+ field=field,
32
+ mute=mute,
33
+ **kwargs,
34
+ )
35
+
36
+
37
+ @classmethod # type: ignore
38
+ @doc_args(ValidationAware.validate.__doc__)
39
+ def validate(cls, values: ListLike, field: StrField, **kwargs) -> np.ndarray[bool]:
40
+ """{}"""
41
+ return _validate(cls=cls, values=values, field=field, **kwargs)
42
+
43
+
44
+ def _inspect(
45
+ cls,
46
+ values: ListLike,
47
+ field: StrField,
48
+ *,
49
+ mute: bool = False,
50
+ **kwargs,
51
+ ) -> Union["pd.DataFrame", Dict[str, List[str]]]:
52
+ """{}"""
53
+ from lamin_utils._inspect import inspect
54
+
55
+ if isinstance(values, str):
56
+ values = [values]
57
+ if not isinstance(field, str):
58
+ field = field.field.name
59
+
60
+ orm = cls.model if isinstance(cls, QuerySet) else cls
61
+
62
+ return inspect(
63
+ df=_filter_query_based_on_species(orm=orm, species=kwargs.get("species")),
64
+ identifiers=values,
65
+ field=str(field),
66
+ inspect_synonyms=True,
67
+ mute=mute,
68
+ **kwargs,
69
+ )
70
+
71
+
72
+ def _validate(cls, values: ListLike, field: StrField, **kwargs) -> np.ndarray[bool]:
73
+ """{}"""
74
+ from lamin_utils._inspect import validate
75
+
76
+ if isinstance(values, str):
77
+ values = [values]
78
+ if not isinstance(field, str):
79
+ field = field.field.name
80
+
81
+ orm = cls.model if isinstance(cls, QuerySet) else cls
82
+ field_values = pd.Series(
83
+ _filter_query_based_on_species(
84
+ orm=orm, species=kwargs.get("species"), values_list_field=field
85
+ )
86
+ )
87
+ return validate(
88
+ identifiers=values, field_values=field_values, case_sensitive=True, **kwargs
89
+ )
90
+
91
+
92
+ def _filter_query_based_on_species(
93
+ orm: Union[Registry, QuerySet],
94
+ species: Optional[Union[str, Registry]] = None,
95
+ values_list_field: Optional[str] = None,
96
+ ):
97
+ import pandas as pd
98
+
99
+ if values_list_field is None:
100
+ records = orm.all() if isinstance(orm, QuerySet) else orm.objects.all()
101
+ else:
102
+ records = orm if isinstance(orm, QuerySet) else orm.objects
103
+ if _has_species_field(orm):
104
+ # here, we can safely import lnschema_bionty
105
+ from lnschema_bionty._bionty import create_or_get_species_record
106
+
107
+ species_record = create_or_get_species_record(
108
+ species=species, orm=orm.model if isinstance(orm, QuerySet) else orm
109
+ )
110
+ if species_record is not None:
111
+ records = records.filter(species__name=species_record.name)
112
+
113
+ if values_list_field is None:
114
+ return pd.DataFrame.from_records(records.values())
115
+ else:
116
+ return records.values_list(values_list_field, flat=True)
117
+
118
+
119
+ METHOD_NAMES = [
120
+ "validate",
121
+ "inspect",
122
+ ]
123
+
124
+ if _TESTING: # type: ignore
125
+ from inspect import signature
126
+
127
+ SIGS = {
128
+ name: signature(getattr(ValidationAware, name))
129
+ for name in METHOD_NAMES
130
+ if not name.startswith("__")
131
+ }
132
+
133
+ for name in METHOD_NAMES:
134
+ attach_func_to_class_method(name, ValidationAware, globals())
lamindb/_view.py CHANGED
@@ -6,17 +6,19 @@ from IPython.display import display
6
6
  from lamin_utils import colors
7
7
  from lamindb_setup import settings
8
8
  from lamindb_setup.dev._setup_schema import get_schema_module_name
9
- from lnschema_core import ORM
9
+ from lnschema_core import Registry
10
10
 
11
11
 
12
- def view(n: int = 10, schema: Optional[str] = None, orms: Optional[List[str]] = None):
12
+ def view(
13
+ n: int = 10, schema: Optional[str] = None, registries: Optional[List[str]] = None
14
+ ):
13
15
  """View data.
14
16
 
15
17
  Args:
16
18
  n: ``int = 10`` Display the last `n` rows of a table.
17
19
  schema: ``Optional[str] = None`` Schema module to view. Default's to
18
20
  `None` and displays all schema modules.
19
- orms: ``Optional[List[str]] = None`` List of ORM names. Defaults to
21
+ registries: ``Optional[List[str]] = None`` List of Registry names. Defaults to
20
22
  `None` and lists all ORMs.
21
23
 
22
24
  Examples:
@@ -30,22 +32,26 @@ def view(n: int = 10, schema: Optional[str] = None, orms: Optional[List[str]] =
30
32
  for schema_name in schema_names:
31
33
  schema_module = importlib.import_module(get_schema_module_name(schema_name))
32
34
 
33
- all_orms = {
35
+ all_registries = {
34
36
  orm
35
37
  for orm in schema_module.__dict__.values()
36
- if inspect.isclass(orm) and issubclass(orm, ORM) and orm.__name__ != "ORM"
38
+ if inspect.isclass(orm)
39
+ and issubclass(orm, Registry)
40
+ and orm.__name__ != "Registry"
37
41
  }
38
- if orms is not None:
39
- filtered_orms = {orm for orm in all_orms if orm.__name__ in orms}
42
+ if registries is not None:
43
+ filtered_registries = {
44
+ orm for orm in all_registries if orm.__name__ in registries
45
+ }
40
46
  else:
41
- filtered_orms = all_orms
47
+ filtered_registries = all_registries
42
48
  if len(schema_names) > 1:
43
49
  section = f"* module: {colors.green(colors.bold(schema_name))} *"
44
50
  section_no_color = f"* module: {schema_name} *"
45
51
  print("*" * len(section_no_color))
46
52
  print(section)
47
53
  print("*" * len(section_no_color))
48
- for orm in sorted(filtered_orms, key=lambda x: x.__name__):
54
+ for orm in sorted(filtered_registries, key=lambda x: x.__name__):
49
55
  if hasattr(orm, "updated_at"):
50
56
  df = orm.filter().order_by("-updated_at")[:n].df()
51
57
  else:
lamindb/dev/__init__.py CHANGED
@@ -3,23 +3,30 @@
3
3
  .. autosummary::
4
4
  :toctree: .
5
5
 
6
- ORM
6
+ Registry
7
+ Data
7
8
  QuerySet
8
- Manager
9
+ QueryManager
9
10
  FeatureManager
11
+ ValidationAware
12
+ SynonymsAware
13
+ InspectResult
10
14
  datasets
11
15
  hashing
12
16
  storage
13
17
  Settings
14
18
  run_context
19
+ exc.ValidationError
15
20
  """
16
21
 
17
- from lnschema_core.models import ORM
22
+ from lamin_utils._inspect import InspectResult
23
+ from lnschema_core.models import Data, Registry, SynonymsAware, ValidationAware
18
24
 
19
- from lamindb._feature_manager import FeatureManager
20
- from lamindb._manager import Manager
21
- from lamindb._queryset import QuerySet
25
+ from lamindb._query_manager import QueryManager
26
+ from lamindb._query_set import QuerySet
27
+ from lamindb.dev._feature_manager import FeatureManager
22
28
 
23
29
  from .._context import run_context
24
30
  from . import datasets # noqa
31
+ from . import _data, exc
25
32
  from ._settings import Settings
lamindb/dev/_data.py ADDED
@@ -0,0 +1,195 @@
1
+ from collections import defaultdict
2
+ from typing import Dict, List, Optional, Union
3
+
4
+ from lamin_utils import logger
5
+ from lamindb_setup.dev._docs import doc_args
6
+ from lnschema_core.models import Data, Feature, FeatureSet, Label, Registry
7
+
8
+ from .._query_set import QuerySet
9
+ from .._registry import get_default_str_field
10
+ from .._save import save
11
+ from ._feature_manager import FeatureManager
12
+ from .exc import ValidationError
13
+
14
+
15
+ def validate_and_cast_feature(
16
+ feature: Union[str, Feature], records: List[Registry]
17
+ ) -> Feature:
18
+ if isinstance(feature, str):
19
+ feature_name = feature
20
+ feature = Feature.filter(name=feature_name).one_or_none()
21
+ if feature is None:
22
+ registries = set(
23
+ [record.__class__.__get_name_with_schema__() for record in records]
24
+ )
25
+ registries_str = "|".join(registries)
26
+ msg = (
27
+ f"ln.Feature(name='{feature_name}', type='category',"
28
+ f" registries='{registries_str}').save()"
29
+ )
30
+ raise ValidationError(f"Feature not validated. If it looks correct: {msg}")
31
+ return feature
32
+
33
+
34
+ @doc_args(Data.get_labels.__doc__)
35
+ def get_labels(
36
+ self,
37
+ feature: Optional[Union[str, Registry]] = None,
38
+ mute: bool = False,
39
+ flat_names: bool = False,
40
+ ) -> Union[QuerySet, Dict[str, QuerySet], List]:
41
+ """{}"""
42
+ if isinstance(feature, str):
43
+ feature_name = feature
44
+ feature = Feature.filter(name=feature_name).one_or_none()
45
+ if feature is None:
46
+ raise ValueError("feature doesn't exist")
47
+ if feature.registries is None:
48
+ raise ValueError("feature does not have linked labels")
49
+ registries_to_check = feature.registries.split("|")
50
+ if len(registries_to_check) > 1 and not mute:
51
+ logger.warning("labels come from multiple registries!")
52
+ qs_by_registry = {}
53
+ for registry in registries_to_check:
54
+ # currently need to distinguish between Label and non-Label, because
55
+ # we only have the feature information for Label
56
+ if registry == "core.Label":
57
+ links_to_labels = getattr(
58
+ self, self.features._accessor_by_orm[registry]
59
+ ).through.objects.filter(file_id=self.id, feature_id=feature.id)
60
+ label_ids = [link.label_id for link in links_to_labels]
61
+ qs_by_registry[registry] = Label.objects.filter(id__in=label_ids)
62
+ else:
63
+ qs_by_registry[registry] = getattr(
64
+ self, self.features._accessor_by_orm[registry]
65
+ ).all()
66
+ if flat_names:
67
+ # returns a flat list of names
68
+ from .._registry import get_default_str_field
69
+
70
+ values = []
71
+ for v in qs_by_registry.values():
72
+ values += v.list(get_default_str_field(v))
73
+ return values
74
+ if len(registries_to_check) == 1:
75
+ return qs_by_registry[registry]
76
+ else:
77
+ return qs_by_registry
78
+
79
+
80
+ @doc_args(Data.add_labels.__doc__)
81
+ def add_labels(
82
+ self,
83
+ records: Union[Registry, List[Registry], QuerySet],
84
+ feature: Optional[Union[str, Registry]] = None,
85
+ ) -> None:
86
+ """{}"""
87
+ if isinstance(records, (QuerySet, QuerySet.__base__)): # need to have both
88
+ records = records.list()
89
+ if isinstance(records, str) or not isinstance(records, List):
90
+ records = [records]
91
+ if isinstance(records[0], str): # type: ignore
92
+ raise ValueError(
93
+ "Please pass a record (a `Registry` object), not a string, e.g., via:"
94
+ " label"
95
+ f" = ln.Label(name='{records[0]}')" # type: ignore
96
+ )
97
+ if self._state.adding:
98
+ raise ValueError("Please save the file or dataset before adding a label!")
99
+ for record in records:
100
+ if record._state.adding:
101
+ raise ValidationError(
102
+ f"{record} not validated. If it looks correct: record.save()"
103
+ )
104
+ feature = validate_and_cast_feature(feature, records)
105
+ orig_feature = feature
106
+ records_by_feature_orm = defaultdict(list)
107
+ for record in records:
108
+ if feature is None:
109
+ error_msg = "Please pass feature: add_labels(labels, feature='myfeature')"
110
+ record_feature = feature
111
+ if hasattr(record, "_feature"):
112
+ record_feature = record._feature
113
+ if record_feature is None:
114
+ raise ValueError(error_msg)
115
+ # TODO: refactor so that we don't call the following line
116
+ # repeatedly for the same feature
117
+ record_feature = validate_and_cast_feature(record_feature, [record])
118
+ else:
119
+ record_feature = feature
120
+ records_by_feature_orm[
121
+ (record_feature, record.__class__.__get_name_with_schema__())
122
+ ].append(record)
123
+ # ensure all labels are saved
124
+ save(records)
125
+ for (feature, orm_name), records in records_by_feature_orm.items():
126
+ getattr(self, self.features._accessor_by_orm[orm_name]).add(
127
+ *records, through_defaults={"feature_id": feature.id}
128
+ )
129
+ feature_set_links = self.feature_sets.through.objects.filter(file_id=self.id)
130
+ feature_set_ids = [link.feature_set_id for link in feature_set_links.all()]
131
+ # get all linked features of type Feature
132
+ feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
133
+ linked_features_by_slot = {
134
+ feature_set_links.filter(feature_set_id=feature_set.id)
135
+ .one()
136
+ .slot: feature_set.features.all()
137
+ for feature_set in feature_sets
138
+ if "core.Feature" == feature_set.registry
139
+ }
140
+ for (feature, orm_name), records in records_by_feature_orm.items():
141
+ feature = validate_and_cast_feature(feature, records)
142
+ msg = ""
143
+ if orig_feature is None:
144
+ records_display = ", ".join(
145
+ [
146
+ f"'{getattr(record, get_default_str_field(record))}'"
147
+ for record in records
148
+ ]
149
+ )
150
+ msg += f"linked labels {records_display} to feature '{feature.name}'"
151
+ if feature.registries is None or orm_name not in feature.registries:
152
+ if len(msg) > 0:
153
+ msg += ", "
154
+ msg += f"linked feature '{feature.name}' to registry '{orm_name}'"
155
+ if feature.registries is None:
156
+ feature.registries = orm_name
157
+ elif orm_name not in feature.registries:
158
+ feature.registries += f"|{orm_name}"
159
+ feature.save()
160
+ if len(msg) > 0:
161
+ logger.save(msg)
162
+ # check whether we have to update the feature set that manages labels
163
+ # (Feature) to account for a new feature
164
+ found_feature = False
165
+ for _, linked_features in linked_features_by_slot.items():
166
+ if feature in linked_features:
167
+ found_feature = True
168
+ if not found_feature:
169
+ if "external" not in linked_features_by_slot:
170
+ feature_set = FeatureSet([feature], modality="meta")
171
+ feature_set.save()
172
+ self.features.add_feature_set(feature_set, slot="external")
173
+ logger.save("created feature set for slot 'external'")
174
+ else:
175
+ feature_set = self.features._feature_set_by_slot["external"]
176
+ feature_set.features.add(feature)
177
+ feature_set.n += 1
178
+ feature_set.save()
179
+ logger.save(
180
+ f"linked feature {feature.name} to feature set {feature_set}"
181
+ )
182
+
183
+
184
+ @property # type: ignore
185
+ @doc_args(Data.features.__doc__)
186
+ def features(self) -> "FeatureManager":
187
+ """{}"""
188
+ from lamindb.dev._feature_manager import FeatureManager
189
+
190
+ return FeatureManager(self)
191
+
192
+
193
+ setattr(Data, "features", features)
194
+ setattr(Data, "add_labels", add_labels)
195
+ setattr(Data, "get_labels", get_labels)