lamindb 0.76.1__py3-none-any.whl → 0.76.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +10 -6
- lamindb/_artifact.py +86 -53
- lamindb/_can_validate.py +10 -3
- lamindb/_collection.py +17 -18
- lamindb/_curate.py +130 -49
- lamindb/_feature.py +0 -49
- lamindb/_filter.py +10 -23
- lamindb/_finish.py +3 -3
- lamindb/_from_values.py +14 -10
- lamindb/_is_versioned.py +3 -5
- lamindb/_query_manager.py +2 -2
- lamindb/_query_set.py +58 -5
- lamindb/_record.py +29 -39
- lamindb/_save.py +2 -3
- lamindb/_transform.py +23 -10
- lamindb/core/__init__.py +2 -0
- lamindb/core/_context.py +19 -14
- lamindb/core/_feature_manager.py +25 -8
- lamindb/core/_label_manager.py +1 -1
- lamindb/core/_mapped_collection.py +31 -1
- lamindb/core/exceptions.py +1 -1
- lamindb/core/storage/__init__.py +1 -1
- lamindb/core/storage/_backed_access.py +2 -38
- lamindb/core/storage/_tiledbsoma.py +192 -0
- lamindb/core/storage/paths.py +2 -6
- lamindb/core/versioning.py +43 -47
- lamindb/integrations/__init__.py +3 -0
- lamindb/integrations/_vitessce.py +2 -0
- {lamindb-0.76.1.dist-info → lamindb-0.76.3.dist-info}/METADATA +6 -14
- lamindb-0.76.3.dist-info/RECORD +59 -0
- lamindb-0.76.1.dist-info/RECORD +0 -58
- {lamindb-0.76.1.dist-info → lamindb-0.76.3.dist-info}/LICENSE +0 -0
- {lamindb-0.76.1.dist-info → lamindb-0.76.3.dist-info}/WHEEL +0 -0
lamindb/_query_set.py
CHANGED
@@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Iterable, NamedTuple
|
|
6
6
|
import pandas as pd
|
7
7
|
from django.db import models
|
8
8
|
from django.db.models import F
|
9
|
+
from lamin_utils import logger
|
9
10
|
from lamindb_setup.core._docs import doc_args
|
10
11
|
from lnschema_core.models import (
|
11
12
|
Artifact,
|
@@ -13,8 +14,10 @@ from lnschema_core.models import (
|
|
13
14
|
Collection,
|
14
15
|
IsVersioned,
|
15
16
|
Record,
|
17
|
+
Registry,
|
16
18
|
Run,
|
17
19
|
Transform,
|
20
|
+
VisibilityChoice,
|
18
21
|
)
|
19
22
|
|
20
23
|
from lamindb.core.exceptions import DoesNotExist
|
@@ -64,6 +67,55 @@ def one_helper(self):
|
|
64
67
|
return self[0]
|
65
68
|
|
66
69
|
|
70
|
+
def process_expressions(registry: Registry, expressions: dict) -> dict:
|
71
|
+
if registry in {Artifact, Collection}:
|
72
|
+
# visibility is set to 0 unless expressions contains id or uid equality
|
73
|
+
if not (
|
74
|
+
"id" in expressions
|
75
|
+
or "uid" in expressions
|
76
|
+
or "uid__startswith" in expressions
|
77
|
+
):
|
78
|
+
visibility = "visibility"
|
79
|
+
if not any(e.startswith(visibility) for e in expressions):
|
80
|
+
expressions[visibility] = (
|
81
|
+
VisibilityChoice.default.value
|
82
|
+
) # default visibility
|
83
|
+
# if visibility is None, do not apply a filter
|
84
|
+
# otherwise, it would mean filtering for NULL values, which doesn't make
|
85
|
+
# sense for a non-NULLABLE column
|
86
|
+
elif visibility in expressions and expressions[visibility] is None:
|
87
|
+
expressions.pop(visibility)
|
88
|
+
return expressions
|
89
|
+
|
90
|
+
|
91
|
+
def get(
|
92
|
+
registry_or_queryset: type[Record] | QuerySet,
|
93
|
+
idlike: int | str | None = None,
|
94
|
+
**expressions,
|
95
|
+
) -> Record:
|
96
|
+
if isinstance(registry_or_queryset, QuerySet):
|
97
|
+
qs = registry_or_queryset
|
98
|
+
registry = qs.model
|
99
|
+
else:
|
100
|
+
qs = QuerySet(model=registry_or_queryset)
|
101
|
+
registry = registry_or_queryset
|
102
|
+
if isinstance(idlike, int):
|
103
|
+
return super(QuerySet, qs).get(id=idlike)
|
104
|
+
elif isinstance(idlike, str):
|
105
|
+
qs = qs.filter(uid__startswith=idlike)
|
106
|
+
if issubclass(registry, IsVersioned):
|
107
|
+
if len(idlike) <= registry._len_stem_uid:
|
108
|
+
return qs.latest_version().one()
|
109
|
+
else:
|
110
|
+
return qs.one()
|
111
|
+
else:
|
112
|
+
return qs.one()
|
113
|
+
else:
|
114
|
+
assert idlike is None # noqa: S101
|
115
|
+
expressions = process_expressions(registry, expressions)
|
116
|
+
return registry.objects.get(**expressions)
|
117
|
+
|
118
|
+
|
67
119
|
class RecordsList(UserList):
|
68
120
|
"""Is ordered, can't be queried, but has `.df()`."""
|
69
121
|
|
@@ -193,6 +245,7 @@ class QuerySet(models.QuerySet, CanValidate):
|
|
193
245
|
# both Transform & Run might reference artifacts
|
194
246
|
if self.model in {Artifact, Collection, Transform, Run}:
|
195
247
|
for record in self:
|
248
|
+
logger.important(f"deleting {record}")
|
196
249
|
record.delete(*args, **kwargs)
|
197
250
|
else:
|
198
251
|
self._delete_base_class(*args, **kwargs)
|
@@ -219,12 +272,12 @@ class QuerySet(models.QuerySet, CanValidate):
|
|
219
272
|
return None
|
220
273
|
return self[0]
|
221
274
|
|
222
|
-
def
|
223
|
-
"""
|
275
|
+
def get(self, idlike: int | str | None = None, **expressions) -> Record:
|
276
|
+
"""Query a single record. Raises error if there are more or none."""
|
277
|
+
return get(self, idlike, **expressions)
|
224
278
|
|
225
|
-
|
226
|
-
|
227
|
-
"""
|
279
|
+
def one(self) -> Record:
|
280
|
+
"""Exactly one result. Raises error if there are more or none."""
|
228
281
|
return one_helper(self)
|
229
282
|
|
230
283
|
def one_or_none(self) -> Record | None:
|
lamindb/_record.py
CHANGED
@@ -12,7 +12,7 @@ from lamin_utils._lookup import Lookup
|
|
12
12
|
from lamindb_setup._connect_instance import get_owner_name_from_identifier
|
13
13
|
from lamindb_setup.core._docs import doc_args
|
14
14
|
from lamindb_setup.core._hub_core import connect_instance
|
15
|
-
from lnschema_core.models import IsVersioned, Record
|
15
|
+
from lnschema_core.models import Collection, IsVersioned, Record
|
16
16
|
|
17
17
|
from lamindb._utils import attach_func_to_class_method
|
18
18
|
from lamindb.core._settings import settings
|
@@ -90,13 +90,22 @@ def __init__(record: Record, *args, **kwargs):
|
|
90
90
|
match = suggest_records_with_similar_names(record, kwargs)
|
91
91
|
if match:
|
92
92
|
if "version" in kwargs:
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
93
|
+
if kwargs["version"] is not None:
|
94
|
+
version_comment = " and version"
|
95
|
+
existing_record = record.__class__.filter(
|
96
|
+
name=kwargs["name"], version=kwargs["version"]
|
97
|
+
).one_or_none()
|
98
|
+
else:
|
99
|
+
# for a versioned record, an exact name match is not a
|
100
|
+
# criterion for retrieving a record in case `version`
|
101
|
+
# isn't passed - we'd always pull out many records with exactly the
|
102
|
+
# same name
|
103
|
+
existing_record = None
|
97
104
|
else:
|
98
105
|
version_comment = ""
|
99
|
-
existing_record = record.__class__.filter(
|
106
|
+
existing_record = record.__class__.filter(
|
107
|
+
name=kwargs["name"]
|
108
|
+
).one_or_none()
|
100
109
|
if existing_record is not None:
|
101
110
|
logger.important(
|
102
111
|
f"returning existing {record.__class__.__name__} record with same"
|
@@ -129,23 +138,11 @@ def get(
|
|
129
138
|
**expressions,
|
130
139
|
) -> Record:
|
131
140
|
"""{}""" # noqa: D415
|
132
|
-
|
141
|
+
# this is the only place in which we need the lamindb queryset
|
142
|
+
# in this file; everywhere else it should be Django's
|
143
|
+
from lamindb._query_set import QuerySet
|
133
144
|
|
134
|
-
|
135
|
-
return filter(cls, id=idlike).one()
|
136
|
-
elif isinstance(idlike, str):
|
137
|
-
qs = filter(cls, uid__startswith=idlike)
|
138
|
-
if issubclass(cls, IsVersioned):
|
139
|
-
if len(idlike) <= cls._len_stem_uid:
|
140
|
-
return qs.latest_version().one()
|
141
|
-
else:
|
142
|
-
return qs.one()
|
143
|
-
else:
|
144
|
-
return qs.one()
|
145
|
-
else:
|
146
|
-
assert idlike is None # noqa: S101
|
147
|
-
# below behaves exactly like `.one()`
|
148
|
-
return cls.objects.get(**expressions)
|
145
|
+
return QuerySet(model=cls).get(idlike, **expressions)
|
149
146
|
|
150
147
|
|
151
148
|
@classmethod # type:ignore
|
@@ -393,9 +390,11 @@ def add_db_connection(db: str, using: str):
|
|
393
390
|
@doc_args(Record.using.__doc__)
|
394
391
|
def using(
|
395
392
|
cls,
|
396
|
-
instance: str,
|
393
|
+
instance: str | None,
|
397
394
|
) -> QuerySet:
|
398
395
|
"""{}""" # noqa: D415
|
396
|
+
if instance is None:
|
397
|
+
return QuerySet(model=cls, using=None)
|
399
398
|
from lamindb_setup._connect_instance import (
|
400
399
|
load_instance_settings,
|
401
400
|
update_db_using_local,
|
@@ -541,24 +540,15 @@ def save(self, *args, **kwargs) -> Record:
|
|
541
540
|
init_self_from_db(self, result)
|
542
541
|
else:
|
543
542
|
# save versioned record
|
544
|
-
if isinstance(self, IsVersioned) and self.
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
# need one additional request
|
549
|
-
is_new_version_of = self.__class__.objects.get(
|
550
|
-
is_latest=True, uid__startswith=self.stem_uid
|
551
|
-
)
|
552
|
-
logger.warning(
|
553
|
-
f"didn't pass the latest version in `is_new_version_of`, retrieved it: {is_new_version_of}"
|
554
|
-
)
|
555
|
-
is_new_version_of.is_latest = False
|
543
|
+
if isinstance(self, IsVersioned) and self._revises is not None:
|
544
|
+
assert self._revises.is_latest # noqa: S101
|
545
|
+
revises = self._revises
|
546
|
+
revises.is_latest = False
|
556
547
|
with transaction.atomic():
|
557
|
-
|
558
|
-
|
559
|
-
)
|
560
|
-
is_new_version_of.save()
|
548
|
+
revises._revises = None # ensure we don't start a recursion
|
549
|
+
revises.save()
|
561
550
|
super(Record, self).save(*args, **kwargs)
|
551
|
+
self._revises = None
|
562
552
|
# save unversioned record
|
563
553
|
else:
|
564
554
|
super(Record, self).save(*args, **kwargs)
|
lamindb/_save.py
CHANGED
@@ -48,8 +48,7 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
|
|
48
48
|
|
49
49
|
Examples:
|
50
50
|
|
51
|
-
Save a
|
52
|
-
than writing a loop over ``projects.save()``:
|
51
|
+
Save a list of records:
|
53
52
|
|
54
53
|
>>> labels = [ln.ULabel(f"Label {i}") for i in range(10)]
|
55
54
|
>>> ln.save(projects)
|
@@ -61,7 +60,7 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
|
|
61
60
|
|
62
61
|
Update a single existing record:
|
63
62
|
|
64
|
-
>>> transform = ln.
|
63
|
+
>>> transform = ln.Transform.get("0Cb86EZj")
|
65
64
|
>>> transform.name = "New name"
|
66
65
|
>>> transform.save()
|
67
66
|
|
lamindb/_transform.py
CHANGED
@@ -2,12 +2,13 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING
|
4
4
|
|
5
|
+
from lamin_utils import logger
|
5
6
|
from lamindb_setup.core._docs import doc_args
|
6
7
|
from lnschema_core.models import Run, Transform
|
7
8
|
|
8
9
|
from ._parents import _view_parents
|
9
10
|
from ._run import delete_run_artifacts
|
10
|
-
from .core.versioning import
|
11
|
+
from .core.versioning import message_update_key_in_version_family, process_revises
|
11
12
|
|
12
13
|
if TYPE_CHECKING:
|
13
14
|
from lnschema_core.types import TransformType
|
@@ -19,26 +20,38 @@ def __init__(transform: Transform, *args, **kwargs):
|
|
19
20
|
return None
|
20
21
|
name: str | None = kwargs.pop("name") if "name" in kwargs else None
|
21
22
|
key: str | None = kwargs.pop("key") if "key" in kwargs else None
|
22
|
-
|
23
|
-
kwargs.pop("is_new_version_of") if "is_new_version_of" in kwargs else None
|
24
|
-
)
|
23
|
+
revises: Transform | None = kwargs.pop("revises") if "revises" in kwargs else None
|
25
24
|
version: str | None = kwargs.pop("version") if "version" in kwargs else None
|
26
25
|
type: TransformType | None = kwargs.pop("type") if "type" in kwargs else "pipeline"
|
27
26
|
reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
|
28
27
|
reference_type: str | None = (
|
29
28
|
kwargs.pop("reference_type") if "reference_type" in kwargs else None
|
30
29
|
)
|
30
|
+
if "is_new_version_of" in kwargs:
|
31
|
+
logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
|
32
|
+
revises = kwargs.pop("is_new_version_of")
|
31
33
|
# below is internal use that we'll hopefully be able to eliminate
|
32
34
|
uid: str | None = kwargs.pop("uid") if "uid" in kwargs else None
|
33
35
|
if not len(kwargs) == 0:
|
34
36
|
raise ValueError(
|
35
|
-
"Only name, key, version, type,
|
37
|
+
"Only name, key, version, type, revises, reference, "
|
36
38
|
f"reference_type can be passed, but you passed: {kwargs}"
|
37
39
|
)
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
40
|
+
# Transform allows passing a uid, all others don't
|
41
|
+
if uid is None and key is not None:
|
42
|
+
revises = Transform.filter(key=key).order_by("-created_at").first()
|
43
|
+
if revises is not None and key is not None and revises.key != key:
|
44
|
+
note = message_update_key_in_version_family(
|
45
|
+
suid=revises.stem_uid,
|
46
|
+
existing_key=revises.key,
|
47
|
+
new_key=key,
|
48
|
+
registry="Artifact",
|
49
|
+
)
|
50
|
+
raise ValueError(
|
51
|
+
f"`key` is {key}, but `revises.key` is '{revises.key}'\n\n Either do *not* pass `key`.\n\n{note}"
|
52
|
+
)
|
53
|
+
new_uid, version, name, revises = process_revises(revises, version, name, Transform)
|
54
|
+
# this is only because the user-facing constructor allows passing a uid
|
42
55
|
# most others don't
|
43
56
|
if uid is None:
|
44
57
|
has_consciously_provided_uid = False
|
@@ -54,7 +67,7 @@ def __init__(transform: Transform, *args, **kwargs):
|
|
54
67
|
reference=reference,
|
55
68
|
reference_type=reference_type,
|
56
69
|
_has_consciously_provided_uid=has_consciously_provided_uid,
|
57
|
-
|
70
|
+
revises=revises,
|
58
71
|
)
|
59
72
|
|
60
73
|
|
lamindb/core/__init__.py
CHANGED
@@ -30,6 +30,7 @@ Curators:
|
|
30
30
|
.. autosummary::
|
31
31
|
:toctree: .
|
32
32
|
|
33
|
+
BaseCurator
|
33
34
|
DataFrameCurator
|
34
35
|
AnnDataCurator
|
35
36
|
MuDataCurator
|
@@ -80,6 +81,7 @@ from lnschema_core.models import (
|
|
80
81
|
|
81
82
|
from lamindb._curate import (
|
82
83
|
AnnDataCurator,
|
84
|
+
BaseCurator,
|
83
85
|
CurateLookup,
|
84
86
|
DataFrameCurator,
|
85
87
|
MuDataCurator,
|
lamindb/core/_context.py
CHANGED
@@ -18,7 +18,7 @@ from ._sync_git import get_transform_reference_from_git_repo
|
|
18
18
|
from ._track_environment import track_environment
|
19
19
|
from .exceptions import (
|
20
20
|
MissingContext,
|
21
|
-
|
21
|
+
NotebookFileNotSavedToDisk,
|
22
22
|
NotebookNotSavedError,
|
23
23
|
NoTitleError,
|
24
24
|
TrackNotCalled,
|
@@ -26,7 +26,7 @@ from .exceptions import (
|
|
26
26
|
)
|
27
27
|
from .subsettings._transform_settings import transform_settings
|
28
28
|
from .versioning import bump_version as bump_version_function
|
29
|
-
from .versioning import increment_base62
|
29
|
+
from .versioning import increment_base62, message_update_key_in_version_family
|
30
30
|
|
31
31
|
if TYPE_CHECKING:
|
32
32
|
from lamindb_setup.core.types import UPathStr
|
@@ -87,9 +87,9 @@ def raise_missing_context(transform_type: str, key: str) -> None:
|
|
87
87
|
message = f"To track this {transform_type}, set\n\n"
|
88
88
|
else:
|
89
89
|
uid = transform.uid
|
90
|
-
suid,
|
91
|
-
|
92
|
-
new_uid = f"{suid}{
|
90
|
+
suid, vuid = uid[: Transform._len_stem_uid], uid[Transform._len_stem_uid :]
|
91
|
+
new_vuid = increment_base62(vuid)
|
92
|
+
new_uid = f"{suid}{new_vuid}"
|
93
93
|
message = f"You already have a {transform_type} version family with key '{key}', suid '{transform.stem_uid}' & name '{transform.name}'.\n\n- to create a new {transform_type} version family, rename your file and rerun: ln.context.track()\n- to bump the version, set: "
|
94
94
|
message += f'ln.context.uid = "{new_uid}"'
|
95
95
|
if transform_type == "notebook":
|
@@ -199,7 +199,7 @@ class Context:
|
|
199
199
|
:class:`~lamindb.Transform` object of ``type`` ``"pipeline"``:
|
200
200
|
|
201
201
|
>>> ln.Transform(name="Cell Ranger", version="2", type="pipeline").save()
|
202
|
-
>>> transform = ln.Transform.
|
202
|
+
>>> transform = ln.Transform.get(name="Cell Ranger", version="2")
|
203
203
|
>>> ln.context.track(transform=transform)
|
204
204
|
"""
|
205
205
|
self._path = None
|
@@ -224,7 +224,7 @@ class Context:
|
|
224
224
|
f"Please pass consistent version: ln.context.version = '{transform.version}'"
|
225
225
|
)
|
226
226
|
# test whether version was already used for another member of the family
|
227
|
-
suid,
|
227
|
+
suid, vuid = (
|
228
228
|
self.uid[: Transform._len_stem_uid],
|
229
229
|
self.uid[Transform._len_stem_uid :],
|
230
230
|
)
|
@@ -233,7 +233,7 @@ class Context:
|
|
233
233
|
).one_or_none()
|
234
234
|
if (
|
235
235
|
transform is not None
|
236
|
-
and
|
236
|
+
and vuid != transform.uid[Transform._len_stem_uid :]
|
237
237
|
):
|
238
238
|
better_version = bump_version_function(self.version)
|
239
239
|
raise SystemExit(
|
@@ -432,7 +432,12 @@ class Context:
|
|
432
432
|
suid = transform.stem_uid
|
433
433
|
new_suid = ids.base62_12()
|
434
434
|
transform_type = "Notebook" if is_run_from_ipython else "Script"
|
435
|
-
note =
|
435
|
+
note = message_update_key_in_version_family(
|
436
|
+
suid=suid,
|
437
|
+
existing_key=transform.key,
|
438
|
+
new_key=key,
|
439
|
+
registry="Transform",
|
440
|
+
)
|
436
441
|
raise UpdateContext(
|
437
442
|
f"{transform_type} filename changed.\n\nEither init a new transform family by setting:\n\n"
|
438
443
|
f'ln.context.uid = "{new_suid}0000"\n\n{note}'
|
@@ -460,14 +465,14 @@ class Context:
|
|
460
465
|
if is_run_from_ipython
|
461
466
|
else "Source code changed"
|
462
467
|
)
|
463
|
-
suid,
|
468
|
+
suid, vuid = (
|
464
469
|
uid[: Transform._len_stem_uid],
|
465
470
|
uid[Transform._len_stem_uid :],
|
466
471
|
)
|
467
|
-
|
472
|
+
new_vuid = increment_base62(vuid)
|
468
473
|
raise UpdateContext(
|
469
474
|
f"{change_type}, bump version by setting:\n\n"
|
470
|
-
f'ln.context.uid = "{suid}{
|
475
|
+
f'ln.context.uid = "{suid}{new_vuid}"'
|
471
476
|
)
|
472
477
|
else:
|
473
478
|
self._logging_message += f"loaded Transform('{transform.uid}')"
|
@@ -499,8 +504,8 @@ class Context:
|
|
499
504
|
get_seconds_since_modified(context._path) > 3
|
500
505
|
and os.getenv("LAMIN_TESTING") is None
|
501
506
|
):
|
502
|
-
raise
|
503
|
-
"Please save the notebook in your editor right before running `ln.finish()`"
|
507
|
+
raise NotebookFileNotSavedToDisk(
|
508
|
+
"Please save the notebook manually in your editor right before running `ln.finish()`"
|
504
509
|
)
|
505
510
|
save_context_core(
|
506
511
|
run=context.run,
|
lamindb/core/_feature_manager.py
CHANGED
@@ -361,9 +361,7 @@ def __getitem__(self, slot) -> QuerySet:
|
|
361
361
|
return getattr(feature_set, self._accessor_by_registry[orm_name]).all()
|
362
362
|
|
363
363
|
|
364
|
-
|
365
|
-
def filter(cls, **expression) -> QuerySet:
|
366
|
-
"""Filter features."""
|
364
|
+
def filter_base(cls, **expression):
|
367
365
|
if cls in {FeatureManagerArtifact, FeatureManagerCollection}:
|
368
366
|
model = Feature
|
369
367
|
value_model = FeatureValue
|
@@ -379,14 +377,20 @@ def filter(cls, **expression) -> QuerySet:
|
|
379
377
|
new_expression = {}
|
380
378
|
features = model.filter(name__in=keys_normalized).all().distinct()
|
381
379
|
for key, value in expression.items():
|
382
|
-
|
380
|
+
split_key = key.split("__")
|
381
|
+
normalized_key = split_key[0]
|
382
|
+
comparator = ""
|
383
|
+
if len(split_key) == 2:
|
384
|
+
comparator = f"__{split_key[1]}"
|
383
385
|
feature = features.get(name=normalized_key)
|
384
386
|
if not feature.dtype.startswith("cat"):
|
385
|
-
|
386
|
-
|
387
|
+
expression = {"feature": feature, f"value{comparator}": value}
|
388
|
+
feature_value = value_model.filter(**expression)
|
389
|
+
new_expression["_feature_values__in"] = feature_value
|
387
390
|
else:
|
388
391
|
if isinstance(value, str):
|
389
|
-
|
392
|
+
expression = {f"name{comparator}": value}
|
393
|
+
label = ULabel.get(**expression)
|
390
394
|
new_expression["ulabels"] = label
|
391
395
|
else:
|
392
396
|
raise NotImplementedError
|
@@ -398,6 +402,18 @@ def filter(cls, **expression) -> QuerySet:
|
|
398
402
|
return Run.filter(**new_expression)
|
399
403
|
|
400
404
|
|
405
|
+
@classmethod # type: ignore
|
406
|
+
def filter(cls, **expression) -> QuerySet:
|
407
|
+
"""Query artifacts by features."""
|
408
|
+
return filter_base(cls, **expression)
|
409
|
+
|
410
|
+
|
411
|
+
@classmethod # type: ignore
|
412
|
+
def get(cls, **expression) -> Record:
|
413
|
+
"""Query a single artifact by feature."""
|
414
|
+
return filter_base(cls, **expression).one()
|
415
|
+
|
416
|
+
|
401
417
|
@property # type: ignore
|
402
418
|
def _feature_set_by_slot(self):
|
403
419
|
"""Feature sets by slot."""
|
@@ -474,7 +490,7 @@ def _add_values(
|
|
474
490
|
_feature_values = []
|
475
491
|
not_validated_values = []
|
476
492
|
for key, value in features_values.items():
|
477
|
-
feature = model.
|
493
|
+
feature = model.get(name=key)
|
478
494
|
inferred_type, converted_value = infer_feature_type_convert_json(
|
479
495
|
value,
|
480
496
|
mute=True,
|
@@ -843,5 +859,6 @@ FeatureManager._add_set_from_anndata = _add_set_from_anndata
|
|
843
859
|
FeatureManager._add_set_from_mudata = _add_set_from_mudata
|
844
860
|
FeatureManager._add_from = _add_from
|
845
861
|
FeatureManager.filter = filter
|
862
|
+
FeatureManager.get = get
|
846
863
|
ParamManager.add_values = add_values_params
|
847
864
|
ParamManager.get_values = get_values
|
lamindb/core/_label_manager.py
CHANGED
@@ -234,7 +234,7 @@ class LabelManager:
|
|
234
234
|
if hasattr(self._host, related_name):
|
235
235
|
for feature_name, labels in labels_by_features.items():
|
236
236
|
if feature_name is not None:
|
237
|
-
feature_id = Feature.
|
237
|
+
feature_id = Feature.get(name=feature_name).id
|
238
238
|
else:
|
239
239
|
feature_id = None
|
240
240
|
getattr(self._host, related_name).add(
|
@@ -17,6 +17,7 @@ from .storage._anndata_accessor import (
|
|
17
17
|
GroupTypes,
|
18
18
|
StorageType,
|
19
19
|
_safer_read_index,
|
20
|
+
get_spec,
|
20
21
|
registry,
|
21
22
|
)
|
22
23
|
|
@@ -153,13 +154,30 @@ class MappedCollection:
|
|
153
154
|
self._make_connections(path_list, parallel)
|
154
155
|
|
155
156
|
self.n_obs_list = []
|
156
|
-
for storage in self.storages:
|
157
|
+
for i, storage in enumerate(self.storages):
|
157
158
|
with _Connect(storage) as store:
|
158
159
|
X = store["X"]
|
160
|
+
store_path = self.path_list[i]
|
161
|
+
self._check_csc_raise_error(X, "X", store_path)
|
159
162
|
if isinstance(X, ArrayTypes): # type: ignore
|
160
163
|
self.n_obs_list.append(X.shape[0])
|
161
164
|
else:
|
162
165
|
self.n_obs_list.append(X.attrs["shape"][0])
|
166
|
+
for layer_key in self.layers_keys:
|
167
|
+
if layer_key == "X":
|
168
|
+
continue
|
169
|
+
self._check_csc_raise_error(
|
170
|
+
store["layers"][layer_key],
|
171
|
+
f"layers/{layer_key}",
|
172
|
+
store_path,
|
173
|
+
)
|
174
|
+
if self.obsm_keys is not None:
|
175
|
+
for obsm_key in self.obsm_keys:
|
176
|
+
self._check_csc_raise_error(
|
177
|
+
store["obsm"][obsm_key],
|
178
|
+
f"obsm/{obsm_key}",
|
179
|
+
store_path,
|
180
|
+
)
|
163
181
|
self.n_obs = sum(self.n_obs_list)
|
164
182
|
|
165
183
|
self.indices = np.hstack([np.arange(n_obs) for n_obs in self.n_obs_list])
|
@@ -281,6 +299,18 @@ class MappedCollection:
|
|
281
299
|
vars = pd.Index(vars)
|
282
300
|
return [i for i, vrs in enumerate(self.var_list) if not vrs.equals(vars)]
|
283
301
|
|
302
|
+
def _check_csc_raise_error(
|
303
|
+
self, elem: GroupType | ArrayType, key: str, path: UPathStr
|
304
|
+
):
|
305
|
+
if isinstance(elem, ArrayTypes): # type: ignore
|
306
|
+
return
|
307
|
+
if get_spec(elem).encoding_type == "csc_matrix":
|
308
|
+
if not self.parallel:
|
309
|
+
self.close()
|
310
|
+
raise ValueError(
|
311
|
+
f"{key} in {path} is a csc matrix, `MappedCollection` doesn't support this format yet."
|
312
|
+
)
|
313
|
+
|
284
314
|
def __len__(self):
|
285
315
|
return self.n_obs
|
286
316
|
|
lamindb/core/exceptions.py
CHANGED
lamindb/core/storage/__init__.py
CHANGED
@@ -18,8 +18,8 @@ Array accessors.
|
|
18
18
|
|
19
19
|
from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
|
20
20
|
|
21
|
-
from ._anndata_sizes import size_adata
|
22
21
|
from ._backed_access import AnnDataAccessor, BackedAccessor
|
22
|
+
from ._tiledbsoma import save_tiledbsoma_experiment
|
23
23
|
from ._valid_suffixes import VALID_SUFFIXES
|
24
24
|
from .objects import infer_suffix, write_to_disk
|
25
25
|
from .paths import delete_storage, load_to_memory
|
@@ -1,12 +1,13 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from dataclasses import dataclass
|
4
|
-
from typing import TYPE_CHECKING, Any, Callable
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable
|
5
5
|
|
6
6
|
from anndata._io.specs.registry import get_spec
|
7
7
|
from lnschema_core import Artifact
|
8
8
|
|
9
9
|
from ._anndata_accessor import AnnDataAccessor, StorageType, registry
|
10
|
+
from ._tiledbsoma import _open_tiledbsoma
|
10
11
|
from .paths import filepath_from_artifact
|
11
12
|
|
12
13
|
if TYPE_CHECKING:
|
@@ -52,43 +53,6 @@ def _track_writes_factory(obj: Any, finalize: Callable):
|
|
52
53
|
return obj
|
53
54
|
|
54
55
|
|
55
|
-
def _open_tiledbsoma(
|
56
|
-
filepath: UPath, mode: Literal["r", "w"] = "r"
|
57
|
-
) -> SOMACollection | SOMAExperiment:
|
58
|
-
try:
|
59
|
-
import tiledbsoma as soma
|
60
|
-
except ImportError as e:
|
61
|
-
raise ImportError("Please install tiledbsoma: pip install tiledbsoma") from e
|
62
|
-
filepath_str = filepath.as_posix()
|
63
|
-
if filepath.protocol == "s3":
|
64
|
-
from lamindb_setup.core._settings_storage import get_storage_region
|
65
|
-
|
66
|
-
region = get_storage_region(filepath_str)
|
67
|
-
tiledb_config = {"vfs.s3.region": region}
|
68
|
-
storage_options = filepath.storage_options
|
69
|
-
if "key" in storage_options:
|
70
|
-
tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
|
71
|
-
if "secret" in storage_options:
|
72
|
-
tiledb_config["vfs.s3.aws_secret_access_key"] = storage_options["secret"]
|
73
|
-
if "token" in storage_options:
|
74
|
-
tiledb_config["vfs.s3.aws_session_token"] = storage_options["token"]
|
75
|
-
ctx = soma.SOMATileDBContext(tiledb_config=tiledb_config)
|
76
|
-
# this is a strange bug
|
77
|
-
# for some reason iterdir futher gives incorrect results
|
78
|
-
# if cache is not invalidated
|
79
|
-
# instead of obs and ms it gives ms and ms in the list of names
|
80
|
-
filepath.fs.invalidate_cache()
|
81
|
-
else:
|
82
|
-
ctx = None
|
83
|
-
|
84
|
-
soma_objects = [obj.name for obj in filepath.iterdir()]
|
85
|
-
if "obs" in soma_objects and "ms" in soma_objects:
|
86
|
-
SOMAType = soma.Experiment
|
87
|
-
else:
|
88
|
-
SOMAType = soma.Collection
|
89
|
-
return SOMAType.open(filepath_str, mode=mode, context=ctx)
|
90
|
-
|
91
|
-
|
92
56
|
@dataclass
|
93
57
|
class BackedAccessor:
|
94
58
|
"""h5py.File or zarr.Group accessor."""
|