lamindb 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. lamindb/__init__.py +52 -36
  2. lamindb/_finish.py +17 -10
  3. lamindb/_tracked.py +1 -1
  4. lamindb/base/__init__.py +3 -1
  5. lamindb/base/fields.py +40 -22
  6. lamindb/base/ids.py +1 -94
  7. lamindb/base/types.py +2 -0
  8. lamindb/base/uids.py +117 -0
  9. lamindb/core/_context.py +203 -102
  10. lamindb/core/_settings.py +38 -25
  11. lamindb/core/datasets/__init__.py +11 -4
  12. lamindb/core/datasets/_core.py +5 -5
  13. lamindb/core/datasets/_small.py +0 -93
  14. lamindb/core/datasets/mini_immuno.py +172 -0
  15. lamindb/core/loaders.py +1 -1
  16. lamindb/core/storage/_backed_access.py +100 -6
  17. lamindb/core/storage/_polars_lazy_df.py +51 -0
  18. lamindb/core/storage/_pyarrow_dataset.py +15 -30
  19. lamindb/core/storage/_tiledbsoma.py +29 -13
  20. lamindb/core/storage/objects.py +6 -0
  21. lamindb/core/subsettings/__init__.py +2 -0
  22. lamindb/core/subsettings/_annotation_settings.py +11 -0
  23. lamindb/curators/__init__.py +7 -3349
  24. lamindb/curators/_legacy.py +2056 -0
  25. lamindb/curators/core.py +1534 -0
  26. lamindb/errors.py +11 -0
  27. lamindb/examples/__init__.py +27 -0
  28. lamindb/examples/schemas/__init__.py +12 -0
  29. lamindb/examples/schemas/_anndata.py +25 -0
  30. lamindb/examples/schemas/_simple.py +19 -0
  31. lamindb/integrations/_vitessce.py +8 -5
  32. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
  33. lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
  34. lamindb/migrations/0093_alter_schemacomponent_unique_together.py +16 -0
  35. lamindb/models/__init__.py +4 -1
  36. lamindb/models/_describe.py +21 -4
  37. lamindb/models/_feature_manager.py +382 -287
  38. lamindb/models/_label_manager.py +8 -2
  39. lamindb/models/artifact.py +177 -106
  40. lamindb/models/artifact_set.py +122 -0
  41. lamindb/models/collection.py +73 -52
  42. lamindb/models/core.py +1 -1
  43. lamindb/models/feature.py +51 -17
  44. lamindb/models/has_parents.py +69 -14
  45. lamindb/models/project.py +1 -1
  46. lamindb/models/query_manager.py +221 -22
  47. lamindb/models/query_set.py +247 -172
  48. lamindb/models/record.py +65 -247
  49. lamindb/models/run.py +4 -4
  50. lamindb/models/save.py +8 -2
  51. lamindb/models/schema.py +456 -184
  52. lamindb/models/transform.py +2 -2
  53. lamindb/models/ulabel.py +8 -5
  54. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/METADATA +6 -6
  55. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/RECORD +57 -43
  56. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/LICENSE +0 -0
  57. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/WHEEL +0 -0
@@ -1,18 +1,225 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import re
4
+ from functools import reduce
3
5
  from typing import TYPE_CHECKING, NamedTuple
4
6
 
5
- from django.db import models
7
+ from django.db.models import (
8
+ IntegerField,
9
+ Manager,
10
+ Q,
11
+ QuerySet,
12
+ TextField,
13
+ Value,
14
+ )
15
+ from django.db.models.functions import Cast, Coalesce
16
+ from django.db.models.lookups import (
17
+ Contains,
18
+ Exact,
19
+ IContains,
20
+ IExact,
21
+ IRegex,
22
+ IStartsWith,
23
+ Regex,
24
+ StartsWith,
25
+ )
6
26
  from lamin_utils import logger
27
+ from lamin_utils._lookup import Lookup
7
28
  from lamindb_setup.core._docs import doc_args
8
29
 
9
- from lamindb.models import Record
10
-
11
30
  if TYPE_CHECKING:
12
- from lamindb.base.types import StrField
31
+ from ..base.types import StrField
32
+
33
+
34
+ def _search(
35
+ cls,
36
+ string: str,
37
+ *,
38
+ field: StrField | list[StrField] | None = None,
39
+ limit: int | None = 20,
40
+ case_sensitive: bool = False,
41
+ truncate_string: bool = False,
42
+ ) -> QuerySet:
43
+ """Search.
44
+
45
+ Args:
46
+ string: The input string to match against the field ontology values.
47
+ field: The field or fields to search. Search all string fields by default.
48
+ limit: Maximum amount of top results to return.
49
+ case_sensitive: Whether the match is case sensitive.
50
+
51
+ Returns:
52
+ A sorted `DataFrame` of search results with a score in column `score`.
53
+ If `return_queryset` is `True`. `QuerySet`.
54
+
55
+ See Also:
56
+ :meth:`~lamindb.models.Record.filter`
57
+ :meth:`~lamindb.models.Record.lookup`
58
+
59
+ Examples:
60
+ >>> ulabels = ln.ULabel.from_values(["ULabel1", "ULabel2", "ULabel3"], field="name")
61
+ >>> ln.save(ulabels)
62
+ >>> ln.ULabel.search("ULabel2")
63
+ """
64
+ if string is None:
65
+ raise ValueError("Cannot search for None value! Please pass a valid string.")
66
+
67
+ input_queryset = (
68
+ cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
69
+ )
70
+ registry = input_queryset.model
71
+ name_field = getattr(registry, "_name_field", "name")
72
+ if field is None:
73
+ fields = [
74
+ field.name
75
+ for field in registry._meta.fields
76
+ if field.get_internal_type() in {"CharField", "TextField"}
77
+ ]
78
+ else:
79
+ if not isinstance(field, list):
80
+ fields_input = [field]
81
+ else:
82
+ fields_input = field
83
+ fields = []
84
+ for field in fields_input:
85
+ if not isinstance(field, str):
86
+ try:
87
+ fields.append(field.field.name)
88
+ except AttributeError as error:
89
+ raise TypeError(
90
+ "Please pass a Record string field, e.g., `CellType.name`!"
91
+ ) from error
92
+ else:
93
+ fields.append(field)
94
+
95
+ if truncate_string:
96
+ if (len_string := len(string)) > 5:
97
+ n_80_pct = int(len_string * 0.8)
98
+ string = string[:n_80_pct]
99
+
100
+ string = string.strip()
101
+ string_escape = re.escape(string)
102
+
103
+ exact_lookup = Exact if case_sensitive else IExact
104
+ regex_lookup = Regex if case_sensitive else IRegex
105
+ contains_lookup = Contains if case_sensitive else IContains
106
+
107
+ ranks = []
108
+ contains_filters = []
109
+ for field in fields:
110
+ field_expr = Coalesce(
111
+ Cast(field, output_field=TextField()),
112
+ Value(""),
113
+ output_field=TextField(),
114
+ )
115
+ # exact rank
116
+ exact_expr = exact_lookup(field_expr, string)
117
+ exact_rank = Cast(exact_expr, output_field=IntegerField()) * 200
118
+ ranks.append(exact_rank)
119
+ # exact synonym
120
+ synonym_expr = regex_lookup(field_expr, rf"(?:^|.*\|){string_escape}(?:\|.*|$)")
121
+ synonym_rank = Cast(synonym_expr, output_field=IntegerField()) * 200
122
+ ranks.append(synonym_rank)
123
+ # match as sub-phrase
124
+ sub_expr = regex_lookup(
125
+ field_expr, rf"(?:^|.*[ \|\.,;:]){string_escape}(?:[ \|\.,;:].*|$)"
126
+ )
127
+ sub_rank = Cast(sub_expr, output_field=IntegerField()) * 10
128
+ ranks.append(sub_rank)
129
+ # startswith and avoid matching string with " " on the right
130
+ # mostly for truncated
131
+ startswith_expr = regex_lookup(
132
+ field_expr, rf"(?:^|.*\|){string_escape}[^ ]*(?:\|.*|$)"
133
+ )
134
+ startswith_rank = Cast(startswith_expr, output_field=IntegerField()) * 8
135
+ ranks.append(startswith_rank)
136
+ # match as sub-phrase from the left, mostly for truncated
137
+ right_expr = regex_lookup(field_expr, rf"(?:^|.*[ \|]){string_escape}.*")
138
+ right_rank = Cast(right_expr, output_field=IntegerField()) * 2
139
+ ranks.append(right_rank)
140
+ # match as sub-phrase from the right
141
+ left_expr = regex_lookup(field_expr, rf".*{string_escape}(?:$|[ \|\.,;:].*)")
142
+ left_rank = Cast(left_expr, output_field=IntegerField()) * 2
143
+ ranks.append(left_rank)
144
+ # simple contains filter
145
+ contains_expr = contains_lookup(field_expr, string)
146
+ contains_filter = Q(contains_expr)
147
+ contains_filters.append(contains_filter)
148
+ # also rank by contains
149
+ contains_rank = Cast(contains_expr, output_field=IntegerField())
150
+ ranks.append(contains_rank)
151
+ # additional rule for truncated strings
152
+ # weight matches from the beginning of the string higher
153
+ # sometimes whole words get truncated and startswith_expr is not enough
154
+ if truncate_string and field == name_field:
155
+ startswith_lookup = StartsWith if case_sensitive else IStartsWith
156
+ name_startswith_expr = startswith_lookup(field_expr, string)
157
+ name_startswith_rank = (
158
+ Cast(name_startswith_expr, output_field=IntegerField()) * 2
159
+ )
160
+ ranks.append(name_startswith_rank)
13
161
 
162
+ ranked_queryset = (
163
+ input_queryset.filter(reduce(lambda a, b: a | b, contains_filters))
164
+ .alias(rank=sum(ranks))
165
+ .order_by("-rank")
166
+ )
14
167
 
15
- class QueryManager(models.Manager):
168
+ return ranked_queryset[:limit]
169
+
170
+
171
+ def _lookup(
172
+ cls,
173
+ field: StrField | None = None,
174
+ return_field: StrField | None = None,
175
+ using_key: str | None = None,
176
+ ) -> NamedTuple:
177
+ """Return an auto-complete object for a field.
178
+
179
+ Args:
180
+ field: The field to look up the values for. Defaults to first string field.
181
+ return_field: The field to return. If `None`, returns the whole record.
182
+
183
+ Returns:
184
+ A `NamedTuple` of lookup information of the field values with a
185
+ dictionary converter.
186
+
187
+ See Also:
188
+ :meth:`~lamindb.models.Record.search`
189
+
190
+ Examples:
191
+ >>> import bionty as bt
192
+ >>> bt.settings.organism = "human"
193
+ >>> bt.Gene.from_source(symbol="ADGB-DT").save()
194
+ >>> lookup = bt.Gene.lookup()
195
+ >>> lookup.adgb_dt
196
+ >>> lookup_dict = lookup.dict()
197
+ >>> lookup_dict['ADGB-DT']
198
+ >>> lookup_by_ensembl_id = bt.Gene.lookup(field="ensembl_gene_id")
199
+ >>> genes.ensg00000002745
200
+ >>> lookup_return_symbols = bt.Gene.lookup(field="ensembl_gene_id", return_field="symbol")
201
+ """
202
+ from .record import get_name_field
203
+
204
+ queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
205
+ field = get_name_field(registry=queryset.model, field=field)
206
+
207
+ return Lookup(
208
+ records=queryset,
209
+ values=[i.get(field) for i in queryset.values()],
210
+ tuple_name=cls.__class__.__name__,
211
+ prefix="ln",
212
+ ).lookup(
213
+ return_field=(
214
+ get_name_field(registry=queryset.model, field=return_field)
215
+ if return_field is not None
216
+ else None
217
+ )
218
+ )
219
+
220
+
221
+ # this is the default (._default_manager and ._base_manager) for lamindb models
222
+ class QueryManager(Manager):
16
223
  """Manage queries through fields.
17
224
 
18
225
  See Also:
@@ -64,10 +271,10 @@ class QueryManager(models.Manager):
64
271
  >>> label.parents.list("name")
65
272
  ['ULabel1', 'ULabel2', 'ULabel3']
66
273
  """
67
- self._track_run_input_manager()
68
274
  if field is None:
69
275
  return list(self.all())
70
276
  else:
277
+ self._track_run_input_manager()
71
278
  return list(self.values_list(field, flat=True))
72
279
 
73
280
  def df(self, **kwargs):
@@ -83,29 +290,21 @@ class QueryManager(models.Manager):
83
290
  For `**kwargs`, see :meth:`lamindb.models.QuerySet.df`.
84
291
  """
85
292
  self._track_run_input_manager()
86
- return self._all_base_class()
293
+ return super().all()
87
294
 
88
- @doc_args(Record.search.__doc__)
295
+ @doc_args(_search.__doc__)
89
296
  def search(self, string: str, **kwargs):
90
297
  """{}""" # noqa: D415
91
- from .record import _search
92
-
93
298
  return _search(cls=self.all(), string=string, **kwargs)
94
299
 
95
- @doc_args(Record.lookup.__doc__)
300
+ @doc_args(_lookup.__doc__)
96
301
  def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
97
302
  """{}""" # noqa: D415
98
- from .record import _lookup
99
-
100
303
  return _lookup(cls=self.all(), field=field, **kwargs)
101
304
 
305
+ def get_queryset(self):
306
+ from .query_set import BasicQuerySet
102
307
 
103
- models.Manager.list = QueryManager.list
104
- models.Manager.df = QueryManager.df
105
- models.Manager.search = QueryManager.search
106
- models.Manager.lookup = QueryManager.lookup
107
- models.Manager._track_run_input_manager = QueryManager._track_run_input_manager
108
- # the two lines below would be easy if we could actually inherit; like this,
109
- # they're suboptimal
110
- models.Manager._all_base_class = models.Manager.all
111
- models.Manager.all = QueryManager.all
308
+ # QueryManager returns BasicQuerySet because it is problematic to redefine .filter and .get
309
+ # for a query set used by the default manager
310
+ return BasicQuerySet(model=self.model, using=self._db, hints=self._hints)