lamindb 0.77.2__py3-none-any.whl → 1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +39 -32
- lamindb/_artifact.py +95 -64
- lamindb/_can_curate.py +19 -10
- lamindb/_collection.py +51 -49
- lamindb/_feature.py +9 -9
- lamindb/_finish.py +99 -86
- lamindb/_from_values.py +20 -17
- lamindb/_is_versioned.py +2 -1
- lamindb/_parents.py +23 -16
- lamindb/_query_manager.py +3 -3
- lamindb/_query_set.py +85 -18
- lamindb/_record.py +121 -46
- lamindb/_run.py +3 -3
- lamindb/_save.py +14 -8
- lamindb/{_feature_set.py → _schema.py} +34 -31
- lamindb/_storage.py +2 -1
- lamindb/_transform.py +51 -23
- lamindb/_ulabel.py +17 -8
- lamindb/_view.py +15 -14
- lamindb/base/__init__.py +24 -0
- lamindb/base/fields.py +281 -0
- lamindb/base/ids.py +103 -0
- lamindb/base/types.py +51 -0
- lamindb/base/users.py +30 -0
- lamindb/base/validation.py +67 -0
- lamindb/core/__init__.py +19 -14
- lamindb/core/_context.py +297 -228
- lamindb/core/_data.py +44 -49
- lamindb/core/_describe.py +41 -31
- lamindb/core/_django.py +59 -44
- lamindb/core/_feature_manager.py +192 -168
- lamindb/core/_label_manager.py +22 -22
- lamindb/core/_mapped_collection.py +17 -14
- lamindb/core/_settings.py +1 -12
- lamindb/core/_sync_git.py +56 -9
- lamindb/core/_track_environment.py +1 -1
- lamindb/core/datasets/_core.py +5 -6
- lamindb/core/exceptions.py +0 -7
- lamindb/core/fields.py +1 -1
- lamindb/core/loaders.py +18 -2
- lamindb/core/{schema.py → relations.py} +22 -19
- lamindb/core/storage/_anndata_accessor.py +1 -2
- lamindb/core/storage/_backed_access.py +2 -1
- lamindb/core/storage/_tiledbsoma.py +40 -13
- lamindb/core/storage/objects.py +1 -1
- lamindb/core/storage/paths.py +13 -8
- lamindb/core/subsettings/__init__.py +0 -2
- lamindb/core/types.py +2 -23
- lamindb/core/versioning.py +11 -7
- lamindb/{_curate.py → curators/__init__.py} +700 -57
- lamindb/curators/_spatial.py +528 -0
- lamindb/integrations/_vitessce.py +1 -3
- lamindb/migrations/0052_squashed.py +1261 -0
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
- lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
- lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
- lamindb/migrations/0060_alter_artifact__actions.py +22 -0
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
- lamindb/migrations/0062_add_is_latest_field.py +32 -0
- lamindb/migrations/0063_populate_latest_field.py +45 -0
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
- lamindb/migrations/0069_squashed.py +1770 -0
- lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
- lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
- lamindb/migrations/0073_merge_ourprojects.py +945 -0
- lamindb/migrations/0074_lamindbv1_part4.py +374 -0
- lamindb/migrations/0075_lamindbv1_part5.py +276 -0
- lamindb/migrations/0076_lamindbv1_part6.py +621 -0
- lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
- lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
- lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
- lamindb/migrations/__init__.py +0 -0
- lamindb/models.py +4064 -0
- {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/METADATA +15 -20
- lamindb-1.0rc1.dist-info/RECORD +100 -0
- {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
- lamindb/core/subsettings/_transform_settings.py +0 -21
- lamindb-0.77.2.dist-info/RECORD +0 -63
- {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0
lamindb/core/_label_manager.py
CHANGED
@@ -5,12 +5,12 @@ from collections import defaultdict
|
|
5
5
|
from typing import TYPE_CHECKING
|
6
6
|
|
7
7
|
from django.db import connections
|
8
|
-
from lamin_utils import
|
9
|
-
from lnschema_core.models import CanCurate, Feature
|
8
|
+
from lamin_utils import logger
|
10
9
|
from rich.table import Column, Table
|
11
10
|
from rich.text import Text
|
11
|
+
from rich.tree import Tree
|
12
12
|
|
13
|
-
from lamindb._from_values import
|
13
|
+
from lamindb._from_values import _format_values
|
14
14
|
from lamindb._record import (
|
15
15
|
REGISTRY_UNIQUE_FIELD,
|
16
16
|
get_name_field,
|
@@ -18,6 +18,7 @@ from lamindb._record import (
|
|
18
18
|
transfer_to_default_db,
|
19
19
|
)
|
20
20
|
from lamindb._save import save
|
21
|
+
from lamindb.models import CanCurate, Feature
|
21
22
|
|
22
23
|
from ._describe import (
|
23
24
|
NAME_WIDTH,
|
@@ -28,15 +29,13 @@ from ._describe import (
|
|
28
29
|
)
|
29
30
|
from ._django import get_artifact_with_related, get_related_model
|
30
31
|
from ._settings import settings
|
31
|
-
from .
|
32
|
+
from .relations import dict_related_model_to_related_name
|
32
33
|
|
33
34
|
if TYPE_CHECKING:
|
34
|
-
from lnschema_core.models import Artifact, Collection, Record
|
35
|
-
from rich.tree import Tree
|
36
|
-
|
37
35
|
from lamindb._query_set import QuerySet
|
36
|
+
from lamindb.models import Artifact, Collection, Record
|
38
37
|
|
39
|
-
EXCLUDE_LABELS = {"
|
38
|
+
EXCLUDE_LABELS = {"_schemas_m2m"}
|
40
39
|
|
41
40
|
|
42
41
|
def _get_labels(
|
@@ -99,39 +98,40 @@ def describe_labels(
|
|
99
98
|
return tree
|
100
99
|
|
101
100
|
labels_table = Table(
|
102
|
-
Column(
|
103
|
-
Text.assemble(("Labels", "green_yellow")),
|
104
|
-
style="",
|
105
|
-
no_wrap=True,
|
106
|
-
width=NAME_WIDTH,
|
107
|
-
),
|
101
|
+
Column("", style="", no_wrap=True, width=NAME_WIDTH),
|
108
102
|
Column("", style="dim", no_wrap=True, width=TYPE_WIDTH),
|
109
103
|
Column("", width=VALUES_WIDTH, no_wrap=True),
|
110
|
-
|
104
|
+
show_header=False,
|
111
105
|
box=None,
|
112
106
|
pad_edge=False,
|
113
107
|
)
|
114
108
|
for related_name, labels in labels_data.items():
|
115
|
-
if not labels or related_name == "
|
109
|
+
if not labels or related_name == "_schemas_m2m":
|
116
110
|
continue
|
117
111
|
if isinstance(labels, dict): # postgres, labels are a dict[id, name]
|
118
|
-
print_values =
|
112
|
+
print_values = _format_values(labels.values(), n=10, quotes=False)
|
119
113
|
else: # labels are a QuerySet
|
120
114
|
field = get_name_field(labels)
|
121
|
-
print_values =
|
115
|
+
print_values = _format_values(
|
116
|
+
labels.values_list(field, flat=True), n=10, quotes=False
|
117
|
+
)
|
122
118
|
if print_values:
|
123
119
|
related_model = get_related_model(self, related_name)
|
124
|
-
type_str = related_model.
|
120
|
+
type_str = related_model.__get_name_with_module__()
|
125
121
|
labels_table.add_row(
|
126
122
|
f".{related_name}", Text(type_str, style="dim"), print_values
|
127
123
|
)
|
128
124
|
|
125
|
+
labels_header = Text("Labels", style="bold green_yellow")
|
129
126
|
if as_subtree:
|
130
127
|
if labels_table.rows:
|
131
|
-
|
128
|
+
labels_tree = Tree(labels_header, guide_style="dim")
|
129
|
+
labels_tree.add(labels_table)
|
130
|
+
return labels_tree
|
132
131
|
else:
|
133
132
|
if labels_table.rows:
|
134
|
-
tree.add(
|
133
|
+
labels_tree = tree.add(labels_header)
|
134
|
+
labels_tree.add(labels_table)
|
135
135
|
return tree
|
136
136
|
|
137
137
|
|
@@ -310,7 +310,7 @@ class LabelManager:
|
|
310
310
|
"""
|
311
311
|
d = dict_related_model_to_related_name(self._host)
|
312
312
|
registry = label.__class__
|
313
|
-
related_name = d.get(registry.
|
313
|
+
related_name = d.get(registry.__get_name_with_module__())
|
314
314
|
link_model = getattr(self._host, related_name).through
|
315
315
|
link_records = link_model.filter(
|
316
316
|
artifact_id=self._host.id, **{f"{registry.__name__.lower()}_id": label.id}
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
from collections import Counter
|
4
4
|
from functools import reduce
|
5
|
-
from pathlib import Path
|
6
5
|
from typing import TYPE_CHECKING, Literal
|
7
6
|
|
8
7
|
import numpy as np
|
@@ -86,9 +85,9 @@ class MappedCollection:
|
|
86
85
|
retrieves ``.X``.
|
87
86
|
obsm_keys: Keys from the ``.obsm`` slots.
|
88
87
|
obs_keys: Keys from the ``.obs`` slots.
|
89
|
-
obs_filter: Select only observations with these values for the given obs
|
90
|
-
Should be a
|
91
|
-
and filtering values (a string or a tuple of strings) as
|
88
|
+
obs_filter: Select only observations with these values for the given obs columns.
|
89
|
+
Should be a dictionary with obs column names as keys
|
90
|
+
and filtering values (a string or a tuple of strings) as values.
|
92
91
|
join: `"inner"` or `"outer"` virtual joins. If ``None`` is passed,
|
93
92
|
does not join.
|
94
93
|
encode_labels: Encode labels into integers.
|
@@ -107,7 +106,7 @@ class MappedCollection:
|
|
107
106
|
layers_keys: str | list[str] | None = None,
|
108
107
|
obs_keys: str | list[str] | None = None,
|
109
108
|
obsm_keys: str | list[str] | None = None,
|
110
|
-
obs_filter:
|
109
|
+
obs_filter: dict[str, str | tuple[str, ...]] | None = None,
|
111
110
|
join: Literal["inner", "outer"] | None = "inner",
|
112
111
|
encode_labels: bool | list[str] = True,
|
113
112
|
unknown_label: str | dict[str, str] | None = None,
|
@@ -121,11 +120,11 @@ class MappedCollection:
|
|
121
120
|
)
|
122
121
|
|
123
122
|
self.filtered = obs_filter is not None
|
124
|
-
if self.filtered and
|
125
|
-
|
126
|
-
"
|
127
|
-
"as the first element and filtering values as the second element"
|
123
|
+
if self.filtered and not isinstance(obs_filter, dict):
|
124
|
+
logger.warning(
|
125
|
+
"Passing a tuple to `obs_filter` is deprecated, use a dictionary"
|
128
126
|
)
|
127
|
+
obs_filter = {obs_filter[0]: obs_filter[1]}
|
129
128
|
|
130
129
|
if layers_keys is None:
|
131
130
|
self.layers_keys = ["X"]
|
@@ -183,12 +182,16 @@ class MappedCollection:
|
|
183
182
|
store_path = self.path_list[i]
|
184
183
|
self._check_csc_raise_error(X, "X", store_path)
|
185
184
|
if self.filtered:
|
186
|
-
|
187
|
-
|
188
|
-
np.isin(
|
185
|
+
indices_storage_mask = None
|
186
|
+
for obs_filter_key, obs_filter_values in obs_filter.items():
|
187
|
+
obs_filter_mask = np.isin(
|
189
188
|
self._get_labels(store, obs_filter_key), obs_filter_values
|
190
189
|
)
|
191
|
-
|
190
|
+
if indices_storage_mask is None:
|
191
|
+
indices_storage_mask = obs_filter_mask
|
192
|
+
else:
|
193
|
+
indices_storage_mask &= obs_filter_mask
|
194
|
+
indices_storage = np.where(indices_storage_mask)[0]
|
192
195
|
n_obs_storage = len(indices_storage)
|
193
196
|
else:
|
194
197
|
if isinstance(X, ArrayTypes): # type: ignore
|
@@ -348,7 +351,7 @@ class MappedCollection:
|
|
348
351
|
|
349
352
|
@property
|
350
353
|
def original_shapes(self) -> list[tuple[int, int]]:
|
351
|
-
"""Shapes of the underlying AnnData objects."""
|
354
|
+
"""Shapes of the underlying AnnData objects (with `obs_filter` applied)."""
|
352
355
|
if self.n_vars_list is None:
|
353
356
|
n_vars_list = [None] * len(self.n_obs_list)
|
354
357
|
else:
|
lamindb/core/_settings.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import os
|
4
|
-
from typing import TYPE_CHECKING
|
4
|
+
from typing import TYPE_CHECKING
|
5
5
|
|
6
6
|
import lamindb_setup as ln_setup
|
7
7
|
from lamin_utils import logger
|
@@ -10,7 +10,6 @@ from lamindb_setup.core._settings import settings as setup_settings
|
|
10
10
|
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
11
11
|
|
12
12
|
from .subsettings._creation_settings import CreationSettings, creation_settings
|
13
|
-
from .subsettings._transform_settings import TransformSettings, transform_settings
|
14
13
|
|
15
14
|
if TYPE_CHECKING:
|
16
15
|
from collections.abc import Mapping
|
@@ -80,16 +79,6 @@ class Settings:
|
|
80
79
|
storage_settings = ln_setup.core.StorageSettings(root=self._using_storage)
|
81
80
|
return storage_settings
|
82
81
|
|
83
|
-
@property
|
84
|
-
def transform(self) -> TransformSettings:
|
85
|
-
"""Transform settings.
|
86
|
-
|
87
|
-
Is deprecated since version 0.76.1.
|
88
|
-
"""
|
89
|
-
# enable warning soon
|
90
|
-
# logger.warning("Transform settings are deprecated, please instead set `ln.context.uid`")
|
91
|
-
return transform_settings
|
92
|
-
|
93
82
|
@property
|
94
83
|
def sync_git_repo(self) -> str | None:
|
95
84
|
"""Sync transforms with scripts in git repository.
|
lamindb/core/_sync_git.py
CHANGED
@@ -53,22 +53,69 @@ def check_local_git_repo() -> bool:
|
|
53
53
|
|
54
54
|
|
55
55
|
def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
|
56
|
-
|
56
|
+
# Fetch all remote branches so that we can also search them
|
57
|
+
fetch_command = ["git", "fetch", "origin", "+refs/heads/*:refs/remotes/origin/*"]
|
58
|
+
subprocess.run(fetch_command, cwd=repo_dir, check=True)
|
59
|
+
|
60
|
+
# Find the commit containing the blob hash in all branches
|
61
|
+
command = [
|
62
|
+
"git",
|
63
|
+
"log",
|
64
|
+
"--all",
|
65
|
+
f"--find-object={blob_hash}",
|
66
|
+
"--pretty=format:%H",
|
67
|
+
]
|
57
68
|
result = subprocess.run(
|
58
69
|
command,
|
59
70
|
capture_output=True,
|
60
71
|
cwd=repo_dir,
|
61
72
|
)
|
62
|
-
#
|
63
|
-
#
|
73
|
+
# We just care to find one commit
|
74
|
+
# Hence, we split by new line ("\n") and use the first one
|
64
75
|
commit_hash = result.stdout.decode().split("\n")[0]
|
65
|
-
|
76
|
+
|
77
|
+
if not commit_hash or result.returncode == 1:
|
66
78
|
return None
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
79
|
+
|
80
|
+
default_branch = (
|
81
|
+
subprocess.run(
|
82
|
+
["git", "rev-parse", "--abbrev-ref", "origin/HEAD"],
|
83
|
+
capture_output=True,
|
84
|
+
cwd=repo_dir,
|
85
|
+
text=True,
|
86
|
+
)
|
87
|
+
.stdout.strip()
|
88
|
+
.split("/")[-1]
|
89
|
+
)
|
90
|
+
|
91
|
+
# Find all branches containing the commit
|
92
|
+
commit_containing_branches = subprocess.run(
|
93
|
+
["git", "branch", "--all", "--contains", commit_hash],
|
94
|
+
capture_output=True,
|
95
|
+
cwd=repo_dir,
|
96
|
+
text=True,
|
97
|
+
).stdout.split("\n")
|
98
|
+
|
99
|
+
# Clean up branch names and filter out the default branch
|
100
|
+
commit_containing_branches = [
|
101
|
+
branch.strip().replace("remotes/", "")
|
102
|
+
for branch in commit_containing_branches
|
103
|
+
if branch.strip()
|
104
|
+
]
|
105
|
+
non_default_branches = [
|
106
|
+
branch for branch in commit_containing_branches if default_branch not in branch
|
107
|
+
]
|
108
|
+
|
109
|
+
if non_default_branches:
|
110
|
+
logger.warning(
|
111
|
+
f"code blob hash {blob_hash} was found in non-default branch(es): {', '.join(non_default_branches)}"
|
112
|
+
)
|
113
|
+
|
114
|
+
assert ( # noqa: S101
|
115
|
+
len(commit_hash) == 40
|
116
|
+
), f"commit hash |{commit_hash}| is not 40 characters long"
|
117
|
+
|
118
|
+
return commit_hash
|
72
119
|
|
73
120
|
|
74
121
|
def get_filepath_within_git_repo(
|
lamindb/core/datasets/_core.py
CHANGED
@@ -5,11 +5,10 @@ from typing import TYPE_CHECKING
|
|
5
5
|
from urllib.request import urlretrieve
|
6
6
|
|
7
7
|
import anndata as ad
|
8
|
-
import numpy as np
|
9
8
|
import pandas as pd
|
10
|
-
from lnschema_core import ids
|
11
9
|
from upath import UPath
|
12
10
|
|
11
|
+
from lamindb.base.ids import base62
|
13
12
|
from lamindb.core._settings import settings
|
14
13
|
|
15
14
|
if TYPE_CHECKING:
|
@@ -146,7 +145,7 @@ def dir_iris_images() -> UPath: # pragma: no cover
|
|
146
145
|
This is why on the UI, the artifact shows up as output of the downstream
|
147
146
|
demo notebook rather than the upstream curation notebook. The lineage
|
148
147
|
information should still be captured by
|
149
|
-
https://github.com/laminlabs/lnschema-core/blob/a90437e91dfbd6b9002f18c3e978bd0f9c9a632d/
|
148
|
+
https://github.com/laminlabs/lnschema-core/blob/a90437e91dfbd6b9002f18c3e978bd0f9c9a632d/lamindb/models.py#L2050-L2052
|
150
149
|
but we don't use this in the UI yet.
|
151
150
|
"""
|
152
151
|
return UPath("s3://lamindata/iris_studies")
|
@@ -481,11 +480,11 @@ def dir_scrnaseq_cellranger(
|
|
481
480
|
fastqdir.mkdir(parents=True, exist_ok=True)
|
482
481
|
fastqfile1 = fastqdir / f"{sample_name}_R1_001.fastq.gz"
|
483
482
|
with open(fastqfile1, "w") as f:
|
484
|
-
f.write(f"{
|
483
|
+
f.write(f"{base62(n_char=6)}")
|
485
484
|
fastqfile2 = fastqdir / f"{sample_name}_R2_001.fastq.gz"
|
486
485
|
fastqfile2.touch(exist_ok=True)
|
487
486
|
with open(fastqfile2, "w") as f:
|
488
|
-
f.write(f"{
|
487
|
+
f.write(f"{base62(n_char=6)}")
|
489
488
|
|
490
489
|
sampledir = basedir / f"{sample_name}"
|
491
490
|
for folder in ["raw_feature_bc_matrix", "filtered_feature_bc_matrix", "analysis"]:
|
@@ -511,7 +510,7 @@ def dir_scrnaseq_cellranger(
|
|
511
510
|
]:
|
512
511
|
file = sampledir / filename
|
513
512
|
with open(file, "w") as f:
|
514
|
-
f.write(f"{
|
513
|
+
f.write(f"{base62(n_char=6)}")
|
515
514
|
|
516
515
|
return sampledir
|
517
516
|
|
lamindb/core/exceptions.py
CHANGED
@@ -7,7 +7,6 @@
|
|
7
7
|
DoesNotExist
|
8
8
|
ValidationError
|
9
9
|
NotebookNotSaved
|
10
|
-
NoTitleError
|
11
10
|
MissingContextUID
|
12
11
|
UpdateContext
|
13
12
|
IntegrityError
|
@@ -79,12 +78,6 @@ class IntegrityError(Exception):
|
|
79
78
|
pass
|
80
79
|
|
81
80
|
|
82
|
-
class NoTitleError(SystemExit):
|
83
|
-
"""Notebook has no title."""
|
84
|
-
|
85
|
-
pass
|
86
|
-
|
87
|
-
|
88
81
|
class MissingContextUID(SystemExit):
|
89
82
|
"""User didn't define transform settings."""
|
90
83
|
|
lamindb/core/fields.py
CHANGED
lamindb/core/loaders.py
CHANGED
@@ -33,7 +33,6 @@ from lamindb_setup.core.upath import (
|
|
33
33
|
from ._settings import settings
|
34
34
|
|
35
35
|
if TYPE_CHECKING:
|
36
|
-
import mudata as md
|
37
36
|
from lamindb_setup.core.types import UPathStr
|
38
37
|
|
39
38
|
try:
|
@@ -110,8 +109,23 @@ def load_json(path: UPathStr) -> dict:
|
|
110
109
|
return data
|
111
110
|
|
112
111
|
|
112
|
+
def load_yaml(path: UPathStr) -> dict | UPathStr:
|
113
|
+
"""Load `.yaml` to `dict`."""
|
114
|
+
try:
|
115
|
+
import yaml # type: ignore
|
116
|
+
|
117
|
+
with open(path) as f:
|
118
|
+
data = yaml.safe_load(f)
|
119
|
+
return data
|
120
|
+
except ImportError:
|
121
|
+
logger.warning(
|
122
|
+
"Please install PyYAML (`pip install PyYAML`) to load `.yaml` files."
|
123
|
+
)
|
124
|
+
return path
|
125
|
+
|
126
|
+
|
113
127
|
def load_image(path: UPathStr) -> None | UPathStr:
|
114
|
-
"""Display `.
|
128
|
+
"""Display `.jpg`, `.gif` or `.png` in ipython, otherwise return path."""
|
115
129
|
if is_run_from_ipython:
|
116
130
|
from IPython.display import Image, display
|
117
131
|
|
@@ -147,7 +161,9 @@ FILE_LOADERS = {
|
|
147
161
|
".zarr": load_anndata_zarr,
|
148
162
|
".html": load_html,
|
149
163
|
".json": load_json,
|
164
|
+
".yaml": load_yaml,
|
150
165
|
".h5mu": load_h5mu,
|
166
|
+
".gif": load_image,
|
151
167
|
".jpg": load_image,
|
152
168
|
".png": load_image,
|
153
169
|
".svg": load_svg,
|
@@ -7,47 +7,50 @@ from lamindb_setup._connect_instance import (
|
|
7
7
|
load_instance_settings,
|
8
8
|
)
|
9
9
|
from lamindb_setup.core._settings_store import instance_settings_file
|
10
|
-
from lnschema_core.models import Feature, FeatureSet, LinkORM, Record
|
11
10
|
|
11
|
+
from lamindb.models import LinkORM, Record, Schema
|
12
12
|
|
13
|
-
|
13
|
+
|
14
|
+
def get_schema_modules(instance: str | None) -> set[str]:
|
14
15
|
if instance is None or instance == "default":
|
15
|
-
schema_modules = set(ln_setup.settings.instance.
|
16
|
+
schema_modules = set(ln_setup.settings.instance.modules)
|
16
17
|
schema_modules.add("core")
|
17
18
|
return schema_modules
|
18
19
|
owner, name = get_owner_name_from_identifier(instance)
|
19
20
|
settings_file = instance_settings_file(name, owner)
|
20
21
|
if settings_file.exists():
|
21
|
-
|
22
|
+
modules = set(load_instance_settings(settings_file).modules)
|
22
23
|
else:
|
23
24
|
cache_filepath = (
|
24
25
|
ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
|
25
26
|
)
|
26
27
|
if cache_filepath.exists():
|
27
|
-
|
28
|
+
modules = set(cache_filepath.read_text().split("\n")[1].split(","))
|
28
29
|
else:
|
29
30
|
raise ValueError(f"Instance {instance} not found")
|
30
|
-
shared_schema_modules = set(ln_setup.settings.instance.
|
31
|
+
shared_schema_modules = set(ln_setup.settings.instance.modules).intersection(
|
32
|
+
modules
|
33
|
+
)
|
31
34
|
shared_schema_modules.add("core")
|
32
35
|
return shared_schema_modules
|
33
36
|
|
34
37
|
|
35
|
-
def
|
38
|
+
def dict_module_name_to_model_name(
|
36
39
|
registry: type[Record], instance: str | None = None
|
37
40
|
) -> dict[str, Record]:
|
38
|
-
schema_modules =
|
41
|
+
schema_modules = get_schema_modules(instance)
|
39
42
|
d: dict = {
|
40
|
-
i.related_model.
|
43
|
+
i.related_model.__get_name_with_module__(): i.related_model
|
41
44
|
for i in registry._meta.related_objects
|
42
45
|
if i.related_name is not None
|
43
|
-
and i.related_model.
|
46
|
+
and i.related_model.__get_module_name__() in schema_modules
|
44
47
|
}
|
45
48
|
d.update(
|
46
49
|
{
|
47
|
-
i.related_model.
|
50
|
+
i.related_model.__get_name_with_module__(): i.related_model
|
48
51
|
for i in registry._meta.many_to_many
|
49
52
|
if i.name is not None
|
50
|
-
and i.related_model.
|
53
|
+
and i.related_model.__get_module_name__() in schema_modules
|
51
54
|
}
|
52
55
|
)
|
53
56
|
return d
|
@@ -59,11 +62,11 @@ def dict_related_model_to_related_name(
|
|
59
62
|
def include(model: Record):
|
60
63
|
return not links != issubclass(model, LinkORM)
|
61
64
|
|
62
|
-
schema_modules =
|
65
|
+
schema_modules = get_schema_modules(instance)
|
63
66
|
|
64
67
|
related_objects = registry._meta.related_objects + registry._meta.many_to_many
|
65
68
|
d: dict = {
|
66
|
-
record.related_model.
|
69
|
+
record.related_model.__get_name_with_module__(): (
|
67
70
|
record.related_name
|
68
71
|
if not isinstance(record, ManyToManyField)
|
69
72
|
else record.name
|
@@ -72,7 +75,7 @@ def dict_related_model_to_related_name(
|
|
72
75
|
if (
|
73
76
|
record.name is not None
|
74
77
|
and include(record.related_model)
|
75
|
-
and record.related_model.
|
78
|
+
and record.related_model.__get_module_name__() in schema_modules
|
76
79
|
)
|
77
80
|
}
|
78
81
|
return d
|
@@ -81,15 +84,15 @@ def dict_related_model_to_related_name(
|
|
81
84
|
def get_related_name(features_type: type[Record]) -> str:
|
82
85
|
candidates = [
|
83
86
|
field.related_name
|
84
|
-
for field in
|
87
|
+
for field in Schema._meta.related_objects
|
85
88
|
if field.related_model == features_type
|
86
89
|
]
|
87
90
|
if not candidates:
|
88
91
|
raise ValueError(
|
89
92
|
f"Can't create feature sets from {features_type.__name__} because it's not"
|
90
|
-
" related to it!\nYou need to create a link model between
|
91
|
-
" your Record in your custom
|
92
|
-
" line:\
|
93
|
+
" related to it!\nYou need to create a link model between Schema and"
|
94
|
+
" your Record in your custom module.\nTo do so, add a"
|
95
|
+
" line:\n_schemas_m2m = models.ManyToMany(Schema,"
|
93
96
|
" related_name='mythings')\n"
|
94
97
|
)
|
95
98
|
return candidates[0]
|
@@ -17,12 +17,11 @@ from anndata._io.specs.registry import get_spec, read_elem, read_elem_partial
|
|
17
17
|
from anndata.compat import _read_attr
|
18
18
|
from fsspec.implementations.local import LocalFileSystem
|
19
19
|
from lamin_utils import logger
|
20
|
-
from lamindb_setup.core.upath import
|
20
|
+
from lamindb_setup.core.upath import create_mapper, infer_filesystem
|
21
21
|
from packaging import version
|
22
22
|
|
23
23
|
if TYPE_CHECKING:
|
24
24
|
from collections.abc import Mapping
|
25
|
-
from pathlib import Path
|
26
25
|
|
27
26
|
from fsspec.core import OpenFile
|
28
27
|
from lamindb_setup.core.types import UPathStr
|
@@ -4,7 +4,8 @@ from dataclasses import dataclass
|
|
4
4
|
from typing import TYPE_CHECKING, Any, Callable
|
5
5
|
|
6
6
|
from anndata._io.specs.registry import get_spec
|
7
|
-
|
7
|
+
|
8
|
+
from lamindb.models import Artifact
|
8
9
|
|
9
10
|
from ._anndata_accessor import AnnDataAccessor, StorageType, registry
|
10
11
|
from ._pyarrow_dataset import _is_pyarrow_dataset, _open_pyarrow_dataset
|
@@ -2,11 +2,16 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING, Literal
|
4
4
|
|
5
|
+
import pandas as pd
|
6
|
+
import pyarrow as pa
|
5
7
|
from anndata import AnnData, read_h5ad
|
8
|
+
from lamin_utils import logger
|
6
9
|
from lamindb_setup import settings as setup_settings
|
7
10
|
from lamindb_setup.core._settings_storage import get_storage_region
|
8
11
|
from lamindb_setup.core.upath import LocalPathClasses, create_path
|
9
|
-
from
|
12
|
+
from packaging import version
|
13
|
+
|
14
|
+
from lamindb.models import Artifact, Run
|
10
15
|
|
11
16
|
if TYPE_CHECKING:
|
12
17
|
from lamindb_setup.core.types import UPathStr
|
@@ -137,9 +142,17 @@ def save_tiledbsoma_experiment(
|
|
137
142
|
storepath = storepath.as_posix()
|
138
143
|
|
139
144
|
add_run_uid = True
|
145
|
+
run_uid_dtype = "category"
|
140
146
|
if appending:
|
141
147
|
with soma.Experiment.open(storepath, mode="r", context=ctx) as store:
|
142
|
-
|
148
|
+
obs_schema = store["obs"].schema
|
149
|
+
add_run_uid = "lamin_run_uid" in obs_schema.names
|
150
|
+
# this is needed to enable backwards compatibility with tiledbsoma stores
|
151
|
+
# created before PR 2300
|
152
|
+
if add_run_uid:
|
153
|
+
column_type = obs_schema.types[obs_schema.names.index("lamin_run_uid")]
|
154
|
+
if not isinstance(column_type, pa.DictionaryType):
|
155
|
+
run_uid_dtype = None
|
143
156
|
|
144
157
|
if add_run_uid and run is None:
|
145
158
|
raise ValueError("Pass `run`")
|
@@ -147,17 +160,16 @@ def save_tiledbsoma_experiment(
|
|
147
160
|
adata_objects = []
|
148
161
|
for adata in adatas:
|
149
162
|
if isinstance(adata, AnnData):
|
150
|
-
if add_run_uid:
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
)
|
155
|
-
else:
|
156
|
-
adata.obs["lamin_run_uid"] = run.uid
|
163
|
+
if add_run_uid and adata.is_view:
|
164
|
+
raise ValueError(
|
165
|
+
"Can not write an `AnnData` view, please do `adata.copy()` before passing."
|
166
|
+
)
|
157
167
|
else:
|
158
168
|
adata = _load_h5ad_zarr(create_path(adata))
|
159
|
-
|
160
|
-
|
169
|
+
if add_run_uid:
|
170
|
+
adata.obs["lamin_run_uid"] = pd.Series(
|
171
|
+
run.uid, index=adata.obs.index, dtype=run_uid_dtype
|
172
|
+
)
|
161
173
|
adata_objects.append(adata)
|
162
174
|
|
163
175
|
registration_mapping = kwargs.get("registration_mapping", None)
|
@@ -172,13 +184,28 @@ def save_tiledbsoma_experiment(
|
|
172
184
|
context=ctx,
|
173
185
|
)
|
174
186
|
|
187
|
+
resize_experiment = False
|
175
188
|
if registration_mapping is not None:
|
176
|
-
|
189
|
+
if version.parse(soma.__version__) < version.parse("1.15.0rc4"):
|
190
|
+
n_observations = len(registration_mapping.obs_axis.data)
|
191
|
+
else:
|
192
|
+
n_observations = registration_mapping.get_obs_shape()
|
193
|
+
resize_experiment = True
|
177
194
|
else: # happens only if not appending and only one adata passed
|
178
195
|
assert len(adata_objects) == 1 # noqa: S101
|
179
196
|
n_observations = adata_objects[0].n_obs
|
180
197
|
|
198
|
+
logger.important(f"Writing the tiledbsoma store to {storepath}")
|
181
199
|
for adata_obj in adata_objects:
|
200
|
+
if resize_experiment and soma.Experiment.exists(storepath, context=ctx):
|
201
|
+
# can only happen if registration_mapping is not None
|
202
|
+
soma_io.resize_experiment(
|
203
|
+
storepath,
|
204
|
+
nobs=n_observations,
|
205
|
+
nvars=registration_mapping.get_var_shapes(),
|
206
|
+
context=ctx,
|
207
|
+
)
|
208
|
+
resize_experiment = False
|
182
209
|
soma_io.from_anndata(
|
183
210
|
storepath,
|
184
211
|
adata_obj,
|
@@ -199,6 +226,6 @@ def save_tiledbsoma_experiment(
|
|
199
226
|
_is_internal_call=True,
|
200
227
|
)
|
201
228
|
artifact.n_observations = n_observations
|
202
|
-
artifact.
|
229
|
+
artifact.otype = "tiledbsoma"
|
203
230
|
|
204
231
|
return artifact.save()
|