lamindb 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +52 -36
- lamindb/_finish.py +17 -10
- lamindb/_tracked.py +1 -1
- lamindb/base/__init__.py +3 -1
- lamindb/base/fields.py +40 -22
- lamindb/base/ids.py +1 -94
- lamindb/base/types.py +2 -0
- lamindb/base/uids.py +117 -0
- lamindb/core/_context.py +203 -102
- lamindb/core/_settings.py +38 -25
- lamindb/core/datasets/__init__.py +11 -4
- lamindb/core/datasets/_core.py +5 -5
- lamindb/core/datasets/_small.py +0 -93
- lamindb/core/datasets/mini_immuno.py +172 -0
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_backed_access.py +100 -6
- lamindb/core/storage/_polars_lazy_df.py +51 -0
- lamindb/core/storage/_pyarrow_dataset.py +15 -30
- lamindb/core/storage/_tiledbsoma.py +29 -13
- lamindb/core/storage/objects.py +6 -0
- lamindb/core/subsettings/__init__.py +2 -0
- lamindb/core/subsettings/_annotation_settings.py +11 -0
- lamindb/curators/__init__.py +7 -3349
- lamindb/curators/_legacy.py +2056 -0
- lamindb/curators/core.py +1534 -0
- lamindb/errors.py +11 -0
- lamindb/examples/__init__.py +27 -0
- lamindb/examples/schemas/__init__.py +12 -0
- lamindb/examples/schemas/_anndata.py +25 -0
- lamindb/examples/schemas/_simple.py +19 -0
- lamindb/integrations/_vitessce.py +8 -5
- lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
- lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
- lamindb/migrations/0093_alter_schemacomponent_unique_together.py +16 -0
- lamindb/models/__init__.py +4 -1
- lamindb/models/_describe.py +21 -4
- lamindb/models/_feature_manager.py +382 -287
- lamindb/models/_label_manager.py +8 -2
- lamindb/models/artifact.py +177 -106
- lamindb/models/artifact_set.py +122 -0
- lamindb/models/collection.py +73 -52
- lamindb/models/core.py +1 -1
- lamindb/models/feature.py +51 -17
- lamindb/models/has_parents.py +69 -14
- lamindb/models/project.py +1 -1
- lamindb/models/query_manager.py +221 -22
- lamindb/models/query_set.py +247 -172
- lamindb/models/record.py +65 -247
- lamindb/models/run.py +4 -4
- lamindb/models/save.py +8 -2
- lamindb/models/schema.py +456 -184
- lamindb/models/transform.py +2 -2
- lamindb/models/ulabel.py +8 -5
- {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/METADATA +6 -6
- {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/RECORD +57 -43
- {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/LICENSE +0 -0
- {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/WHEEL +0 -0
lamindb/models/query_manager.py
CHANGED
@@ -1,18 +1,225 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import re
|
4
|
+
from functools import reduce
|
3
5
|
from typing import TYPE_CHECKING, NamedTuple
|
4
6
|
|
5
|
-
from django.db import
|
7
|
+
from django.db.models import (
|
8
|
+
IntegerField,
|
9
|
+
Manager,
|
10
|
+
Q,
|
11
|
+
QuerySet,
|
12
|
+
TextField,
|
13
|
+
Value,
|
14
|
+
)
|
15
|
+
from django.db.models.functions import Cast, Coalesce
|
16
|
+
from django.db.models.lookups import (
|
17
|
+
Contains,
|
18
|
+
Exact,
|
19
|
+
IContains,
|
20
|
+
IExact,
|
21
|
+
IRegex,
|
22
|
+
IStartsWith,
|
23
|
+
Regex,
|
24
|
+
StartsWith,
|
25
|
+
)
|
6
26
|
from lamin_utils import logger
|
27
|
+
from lamin_utils._lookup import Lookup
|
7
28
|
from lamindb_setup.core._docs import doc_args
|
8
29
|
|
9
|
-
from lamindb.models import Record
|
10
|
-
|
11
30
|
if TYPE_CHECKING:
|
12
|
-
from
|
31
|
+
from ..base.types import StrField
|
32
|
+
|
33
|
+
|
34
|
+
def _search(
|
35
|
+
cls,
|
36
|
+
string: str,
|
37
|
+
*,
|
38
|
+
field: StrField | list[StrField] | None = None,
|
39
|
+
limit: int | None = 20,
|
40
|
+
case_sensitive: bool = False,
|
41
|
+
truncate_string: bool = False,
|
42
|
+
) -> QuerySet:
|
43
|
+
"""Search.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
string: The input string to match against the field ontology values.
|
47
|
+
field: The field or fields to search. Search all string fields by default.
|
48
|
+
limit: Maximum amount of top results to return.
|
49
|
+
case_sensitive: Whether the match is case sensitive.
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
A sorted `DataFrame` of search results with a score in column `score`.
|
53
|
+
If `return_queryset` is `True`. `QuerySet`.
|
54
|
+
|
55
|
+
See Also:
|
56
|
+
:meth:`~lamindb.models.Record.filter`
|
57
|
+
:meth:`~lamindb.models.Record.lookup`
|
58
|
+
|
59
|
+
Examples:
|
60
|
+
>>> ulabels = ln.ULabel.from_values(["ULabel1", "ULabel2", "ULabel3"], field="name")
|
61
|
+
>>> ln.save(ulabels)
|
62
|
+
>>> ln.ULabel.search("ULabel2")
|
63
|
+
"""
|
64
|
+
if string is None:
|
65
|
+
raise ValueError("Cannot search for None value! Please pass a valid string.")
|
66
|
+
|
67
|
+
input_queryset = (
|
68
|
+
cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
|
69
|
+
)
|
70
|
+
registry = input_queryset.model
|
71
|
+
name_field = getattr(registry, "_name_field", "name")
|
72
|
+
if field is None:
|
73
|
+
fields = [
|
74
|
+
field.name
|
75
|
+
for field in registry._meta.fields
|
76
|
+
if field.get_internal_type() in {"CharField", "TextField"}
|
77
|
+
]
|
78
|
+
else:
|
79
|
+
if not isinstance(field, list):
|
80
|
+
fields_input = [field]
|
81
|
+
else:
|
82
|
+
fields_input = field
|
83
|
+
fields = []
|
84
|
+
for field in fields_input:
|
85
|
+
if not isinstance(field, str):
|
86
|
+
try:
|
87
|
+
fields.append(field.field.name)
|
88
|
+
except AttributeError as error:
|
89
|
+
raise TypeError(
|
90
|
+
"Please pass a Record string field, e.g., `CellType.name`!"
|
91
|
+
) from error
|
92
|
+
else:
|
93
|
+
fields.append(field)
|
94
|
+
|
95
|
+
if truncate_string:
|
96
|
+
if (len_string := len(string)) > 5:
|
97
|
+
n_80_pct = int(len_string * 0.8)
|
98
|
+
string = string[:n_80_pct]
|
99
|
+
|
100
|
+
string = string.strip()
|
101
|
+
string_escape = re.escape(string)
|
102
|
+
|
103
|
+
exact_lookup = Exact if case_sensitive else IExact
|
104
|
+
regex_lookup = Regex if case_sensitive else IRegex
|
105
|
+
contains_lookup = Contains if case_sensitive else IContains
|
106
|
+
|
107
|
+
ranks = []
|
108
|
+
contains_filters = []
|
109
|
+
for field in fields:
|
110
|
+
field_expr = Coalesce(
|
111
|
+
Cast(field, output_field=TextField()),
|
112
|
+
Value(""),
|
113
|
+
output_field=TextField(),
|
114
|
+
)
|
115
|
+
# exact rank
|
116
|
+
exact_expr = exact_lookup(field_expr, string)
|
117
|
+
exact_rank = Cast(exact_expr, output_field=IntegerField()) * 200
|
118
|
+
ranks.append(exact_rank)
|
119
|
+
# exact synonym
|
120
|
+
synonym_expr = regex_lookup(field_expr, rf"(?:^|.*\|){string_escape}(?:\|.*|$)")
|
121
|
+
synonym_rank = Cast(synonym_expr, output_field=IntegerField()) * 200
|
122
|
+
ranks.append(synonym_rank)
|
123
|
+
# match as sub-phrase
|
124
|
+
sub_expr = regex_lookup(
|
125
|
+
field_expr, rf"(?:^|.*[ \|\.,;:]){string_escape}(?:[ \|\.,;:].*|$)"
|
126
|
+
)
|
127
|
+
sub_rank = Cast(sub_expr, output_field=IntegerField()) * 10
|
128
|
+
ranks.append(sub_rank)
|
129
|
+
# startswith and avoid matching string with " " on the right
|
130
|
+
# mostly for truncated
|
131
|
+
startswith_expr = regex_lookup(
|
132
|
+
field_expr, rf"(?:^|.*\|){string_escape}[^ ]*(?:\|.*|$)"
|
133
|
+
)
|
134
|
+
startswith_rank = Cast(startswith_expr, output_field=IntegerField()) * 8
|
135
|
+
ranks.append(startswith_rank)
|
136
|
+
# match as sub-phrase from the left, mostly for truncated
|
137
|
+
right_expr = regex_lookup(field_expr, rf"(?:^|.*[ \|]){string_escape}.*")
|
138
|
+
right_rank = Cast(right_expr, output_field=IntegerField()) * 2
|
139
|
+
ranks.append(right_rank)
|
140
|
+
# match as sub-phrase from the right
|
141
|
+
left_expr = regex_lookup(field_expr, rf".*{string_escape}(?:$|[ \|\.,;:].*)")
|
142
|
+
left_rank = Cast(left_expr, output_field=IntegerField()) * 2
|
143
|
+
ranks.append(left_rank)
|
144
|
+
# simple contains filter
|
145
|
+
contains_expr = contains_lookup(field_expr, string)
|
146
|
+
contains_filter = Q(contains_expr)
|
147
|
+
contains_filters.append(contains_filter)
|
148
|
+
# also rank by contains
|
149
|
+
contains_rank = Cast(contains_expr, output_field=IntegerField())
|
150
|
+
ranks.append(contains_rank)
|
151
|
+
# additional rule for truncated strings
|
152
|
+
# weight matches from the beginning of the string higher
|
153
|
+
# sometimes whole words get truncated and startswith_expr is not enough
|
154
|
+
if truncate_string and field == name_field:
|
155
|
+
startswith_lookup = StartsWith if case_sensitive else IStartsWith
|
156
|
+
name_startswith_expr = startswith_lookup(field_expr, string)
|
157
|
+
name_startswith_rank = (
|
158
|
+
Cast(name_startswith_expr, output_field=IntegerField()) * 2
|
159
|
+
)
|
160
|
+
ranks.append(name_startswith_rank)
|
13
161
|
|
162
|
+
ranked_queryset = (
|
163
|
+
input_queryset.filter(reduce(lambda a, b: a | b, contains_filters))
|
164
|
+
.alias(rank=sum(ranks))
|
165
|
+
.order_by("-rank")
|
166
|
+
)
|
14
167
|
|
15
|
-
|
168
|
+
return ranked_queryset[:limit]
|
169
|
+
|
170
|
+
|
171
|
+
def _lookup(
|
172
|
+
cls,
|
173
|
+
field: StrField | None = None,
|
174
|
+
return_field: StrField | None = None,
|
175
|
+
using_key: str | None = None,
|
176
|
+
) -> NamedTuple:
|
177
|
+
"""Return an auto-complete object for a field.
|
178
|
+
|
179
|
+
Args:
|
180
|
+
field: The field to look up the values for. Defaults to first string field.
|
181
|
+
return_field: The field to return. If `None`, returns the whole record.
|
182
|
+
|
183
|
+
Returns:
|
184
|
+
A `NamedTuple` of lookup information of the field values with a
|
185
|
+
dictionary converter.
|
186
|
+
|
187
|
+
See Also:
|
188
|
+
:meth:`~lamindb.models.Record.search`
|
189
|
+
|
190
|
+
Examples:
|
191
|
+
>>> import bionty as bt
|
192
|
+
>>> bt.settings.organism = "human"
|
193
|
+
>>> bt.Gene.from_source(symbol="ADGB-DT").save()
|
194
|
+
>>> lookup = bt.Gene.lookup()
|
195
|
+
>>> lookup.adgb_dt
|
196
|
+
>>> lookup_dict = lookup.dict()
|
197
|
+
>>> lookup_dict['ADGB-DT']
|
198
|
+
>>> lookup_by_ensembl_id = bt.Gene.lookup(field="ensembl_gene_id")
|
199
|
+
>>> genes.ensg00000002745
|
200
|
+
>>> lookup_return_symbols = bt.Gene.lookup(field="ensembl_gene_id", return_field="symbol")
|
201
|
+
"""
|
202
|
+
from .record import get_name_field
|
203
|
+
|
204
|
+
queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
|
205
|
+
field = get_name_field(registry=queryset.model, field=field)
|
206
|
+
|
207
|
+
return Lookup(
|
208
|
+
records=queryset,
|
209
|
+
values=[i.get(field) for i in queryset.values()],
|
210
|
+
tuple_name=cls.__class__.__name__,
|
211
|
+
prefix="ln",
|
212
|
+
).lookup(
|
213
|
+
return_field=(
|
214
|
+
get_name_field(registry=queryset.model, field=return_field)
|
215
|
+
if return_field is not None
|
216
|
+
else None
|
217
|
+
)
|
218
|
+
)
|
219
|
+
|
220
|
+
|
221
|
+
# this is the default (._default_manager and ._base_manager) for lamindb models
|
222
|
+
class QueryManager(Manager):
|
16
223
|
"""Manage queries through fields.
|
17
224
|
|
18
225
|
See Also:
|
@@ -64,10 +271,10 @@ class QueryManager(models.Manager):
|
|
64
271
|
>>> label.parents.list("name")
|
65
272
|
['ULabel1', 'ULabel2', 'ULabel3']
|
66
273
|
"""
|
67
|
-
self._track_run_input_manager()
|
68
274
|
if field is None:
|
69
275
|
return list(self.all())
|
70
276
|
else:
|
277
|
+
self._track_run_input_manager()
|
71
278
|
return list(self.values_list(field, flat=True))
|
72
279
|
|
73
280
|
def df(self, **kwargs):
|
@@ -83,29 +290,21 @@ class QueryManager(models.Manager):
|
|
83
290
|
For `**kwargs`, see :meth:`lamindb.models.QuerySet.df`.
|
84
291
|
"""
|
85
292
|
self._track_run_input_manager()
|
86
|
-
return
|
293
|
+
return super().all()
|
87
294
|
|
88
|
-
@doc_args(
|
295
|
+
@doc_args(_search.__doc__)
|
89
296
|
def search(self, string: str, **kwargs):
|
90
297
|
"""{}""" # noqa: D415
|
91
|
-
from .record import _search
|
92
|
-
|
93
298
|
return _search(cls=self.all(), string=string, **kwargs)
|
94
299
|
|
95
|
-
@doc_args(
|
300
|
+
@doc_args(_lookup.__doc__)
|
96
301
|
def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
|
97
302
|
"""{}""" # noqa: D415
|
98
|
-
from .record import _lookup
|
99
|
-
|
100
303
|
return _lookup(cls=self.all(), field=field, **kwargs)
|
101
304
|
|
305
|
+
def get_queryset(self):
|
306
|
+
from .query_set import BasicQuerySet
|
102
307
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
models.Manager.lookup = QueryManager.lookup
|
107
|
-
models.Manager._track_run_input_manager = QueryManager._track_run_input_manager
|
108
|
-
# the two lines below would be easy if we could actually inherit; like this,
|
109
|
-
# they're suboptimal
|
110
|
-
models.Manager._all_base_class = models.Manager.all
|
111
|
-
models.Manager.all = QueryManager.all
|
308
|
+
# QueryManager returns BasicQuerySet because it is problematic to redefine .filter and .get
|
309
|
+
# for a query set used by the default manager
|
310
|
+
return BasicQuerySet(model=self.model, using=self._db, hints=self._hints)
|