lamindb 0.77.3__py3-none-any.whl → 1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +39 -32
- lamindb/_artifact.py +95 -64
- lamindb/_can_curate.py +13 -6
- lamindb/_collection.py +51 -49
- lamindb/_feature.py +9 -9
- lamindb/_finish.py +92 -79
- lamindb/_from_values.py +13 -10
- lamindb/_is_versioned.py +2 -1
- lamindb/_parents.py +23 -16
- lamindb/_query_manager.py +3 -3
- lamindb/_query_set.py +85 -18
- lamindb/_record.py +114 -41
- lamindb/_run.py +3 -3
- lamindb/_save.py +5 -6
- lamindb/{_feature_set.py → _schema.py} +34 -31
- lamindb/_storage.py +2 -1
- lamindb/_transform.py +51 -23
- lamindb/_ulabel.py +17 -8
- lamindb/_view.py +13 -13
- lamindb/base/__init__.py +24 -0
- lamindb/base/fields.py +281 -0
- lamindb/base/ids.py +103 -0
- lamindb/base/types.py +51 -0
- lamindb/base/users.py +30 -0
- lamindb/base/validation.py +67 -0
- lamindb/core/__init__.py +18 -15
- lamindb/core/_context.py +295 -224
- lamindb/core/_data.py +44 -49
- lamindb/core/_describe.py +41 -31
- lamindb/core/_django.py +29 -27
- lamindb/core/_feature_manager.py +130 -129
- lamindb/core/_label_manager.py +7 -8
- lamindb/core/_mapped_collection.py +17 -14
- lamindb/core/_settings.py +1 -12
- lamindb/core/_sync_git.py +56 -9
- lamindb/core/_track_environment.py +1 -1
- lamindb/core/datasets/_core.py +5 -6
- lamindb/core/exceptions.py +0 -7
- lamindb/core/fields.py +1 -1
- lamindb/core/loaders.py +0 -1
- lamindb/core/{schema.py → relations.py} +22 -19
- lamindb/core/storage/_anndata_accessor.py +1 -2
- lamindb/core/storage/_backed_access.py +2 -1
- lamindb/core/storage/_tiledbsoma.py +38 -13
- lamindb/core/storage/objects.py +1 -1
- lamindb/core/storage/paths.py +13 -8
- lamindb/core/subsettings/__init__.py +0 -2
- lamindb/core/types.py +2 -23
- lamindb/core/versioning.py +11 -7
- lamindb/{_curate.py → curators/__init__.py} +122 -23
- lamindb/curators/_spatial.py +528 -0
- lamindb/integrations/_vitessce.py +1 -3
- lamindb/migrations/0052_squashed.py +1261 -0
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
- lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
- lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
- lamindb/migrations/0060_alter_artifact__actions.py +22 -0
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
- lamindb/migrations/0062_add_is_latest_field.py +32 -0
- lamindb/migrations/0063_populate_latest_field.py +45 -0
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
- lamindb/migrations/0069_squashed.py +1770 -0
- lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
- lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
- lamindb/migrations/0073_merge_ourprojects.py +945 -0
- lamindb/migrations/0074_lamindbv1_part4.py +374 -0
- lamindb/migrations/0075_lamindbv1_part5.py +276 -0
- lamindb/migrations/0076_lamindbv1_part6.py +621 -0
- lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
- lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
- lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
- lamindb/migrations/__init__.py +0 -0
- lamindb/models.py +4064 -0
- {lamindb-0.77.3.dist-info → lamindb-1.0rc1.dist-info}/METADATA +13 -19
- lamindb-1.0rc1.dist-info/RECORD +100 -0
- {lamindb-0.77.3.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
- lamindb/core/subsettings/_transform_settings.py +0 -21
- lamindb-0.77.3.dist-info/RECORD +0 -63
- {lamindb-0.77.3.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0
lamindb/core/_label_manager.py
CHANGED
@@ -6,7 +6,6 @@ from typing import TYPE_CHECKING
|
|
6
6
|
|
7
7
|
from django.db import connections
|
8
8
|
from lamin_utils import logger
|
9
|
-
from lnschema_core.models import CanCurate, Feature
|
10
9
|
from rich.table import Column, Table
|
11
10
|
from rich.text import Text
|
12
11
|
from rich.tree import Tree
|
@@ -19,6 +18,7 @@ from lamindb._record import (
|
|
19
18
|
transfer_to_default_db,
|
20
19
|
)
|
21
20
|
from lamindb._save import save
|
21
|
+
from lamindb.models import CanCurate, Feature
|
22
22
|
|
23
23
|
from ._describe import (
|
24
24
|
NAME_WIDTH,
|
@@ -29,14 +29,13 @@ from ._describe import (
|
|
29
29
|
)
|
30
30
|
from ._django import get_artifact_with_related, get_related_model
|
31
31
|
from ._settings import settings
|
32
|
-
from .
|
32
|
+
from .relations import dict_related_model_to_related_name
|
33
33
|
|
34
34
|
if TYPE_CHECKING:
|
35
|
-
from lnschema_core.models import Artifact, Collection, Record
|
36
|
-
|
37
35
|
from lamindb._query_set import QuerySet
|
36
|
+
from lamindb.models import Artifact, Collection, Record
|
38
37
|
|
39
|
-
EXCLUDE_LABELS = {"
|
38
|
+
EXCLUDE_LABELS = {"_schemas_m2m"}
|
40
39
|
|
41
40
|
|
42
41
|
def _get_labels(
|
@@ -107,7 +106,7 @@ def describe_labels(
|
|
107
106
|
pad_edge=False,
|
108
107
|
)
|
109
108
|
for related_name, labels in labels_data.items():
|
110
|
-
if not labels or related_name == "
|
109
|
+
if not labels or related_name == "_schemas_m2m":
|
111
110
|
continue
|
112
111
|
if isinstance(labels, dict): # postgres, labels are a dict[id, name]
|
113
112
|
print_values = _format_values(labels.values(), n=10, quotes=False)
|
@@ -118,7 +117,7 @@ def describe_labels(
|
|
118
117
|
)
|
119
118
|
if print_values:
|
120
119
|
related_model = get_related_model(self, related_name)
|
121
|
-
type_str = related_model.
|
120
|
+
type_str = related_model.__get_name_with_module__()
|
122
121
|
labels_table.add_row(
|
123
122
|
f".{related_name}", Text(type_str, style="dim"), print_values
|
124
123
|
)
|
@@ -311,7 +310,7 @@ class LabelManager:
|
|
311
310
|
"""
|
312
311
|
d = dict_related_model_to_related_name(self._host)
|
313
312
|
registry = label.__class__
|
314
|
-
related_name = d.get(registry.
|
313
|
+
related_name = d.get(registry.__get_name_with_module__())
|
315
314
|
link_model = getattr(self._host, related_name).through
|
316
315
|
link_records = link_model.filter(
|
317
316
|
artifact_id=self._host.id, **{f"{registry.__name__.lower()}_id": label.id}
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
from collections import Counter
|
4
4
|
from functools import reduce
|
5
|
-
from pathlib import Path
|
6
5
|
from typing import TYPE_CHECKING, Literal
|
7
6
|
|
8
7
|
import numpy as np
|
@@ -86,9 +85,9 @@ class MappedCollection:
|
|
86
85
|
retrieves ``.X``.
|
87
86
|
obsm_keys: Keys from the ``.obsm`` slots.
|
88
87
|
obs_keys: Keys from the ``.obs`` slots.
|
89
|
-
obs_filter: Select only observations with these values for the given obs
|
90
|
-
Should be a
|
91
|
-
and filtering values (a string or a tuple of strings) as
|
88
|
+
obs_filter: Select only observations with these values for the given obs columns.
|
89
|
+
Should be a dictionary with obs column names as keys
|
90
|
+
and filtering values (a string or a tuple of strings) as values.
|
92
91
|
join: `"inner"` or `"outer"` virtual joins. If ``None`` is passed,
|
93
92
|
does not join.
|
94
93
|
encode_labels: Encode labels into integers.
|
@@ -107,7 +106,7 @@ class MappedCollection:
|
|
107
106
|
layers_keys: str | list[str] | None = None,
|
108
107
|
obs_keys: str | list[str] | None = None,
|
109
108
|
obsm_keys: str | list[str] | None = None,
|
110
|
-
obs_filter:
|
109
|
+
obs_filter: dict[str, str | tuple[str, ...]] | None = None,
|
111
110
|
join: Literal["inner", "outer"] | None = "inner",
|
112
111
|
encode_labels: bool | list[str] = True,
|
113
112
|
unknown_label: str | dict[str, str] | None = None,
|
@@ -121,11 +120,11 @@ class MappedCollection:
|
|
121
120
|
)
|
122
121
|
|
123
122
|
self.filtered = obs_filter is not None
|
124
|
-
if self.filtered and
|
125
|
-
|
126
|
-
"
|
127
|
-
"as the first element and filtering values as the second element"
|
123
|
+
if self.filtered and not isinstance(obs_filter, dict):
|
124
|
+
logger.warning(
|
125
|
+
"Passing a tuple to `obs_filter` is deprecated, use a dictionary"
|
128
126
|
)
|
127
|
+
obs_filter = {obs_filter[0]: obs_filter[1]}
|
129
128
|
|
130
129
|
if layers_keys is None:
|
131
130
|
self.layers_keys = ["X"]
|
@@ -183,12 +182,16 @@ class MappedCollection:
|
|
183
182
|
store_path = self.path_list[i]
|
184
183
|
self._check_csc_raise_error(X, "X", store_path)
|
185
184
|
if self.filtered:
|
186
|
-
|
187
|
-
|
188
|
-
np.isin(
|
185
|
+
indices_storage_mask = None
|
186
|
+
for obs_filter_key, obs_filter_values in obs_filter.items():
|
187
|
+
obs_filter_mask = np.isin(
|
189
188
|
self._get_labels(store, obs_filter_key), obs_filter_values
|
190
189
|
)
|
191
|
-
|
190
|
+
if indices_storage_mask is None:
|
191
|
+
indices_storage_mask = obs_filter_mask
|
192
|
+
else:
|
193
|
+
indices_storage_mask &= obs_filter_mask
|
194
|
+
indices_storage = np.where(indices_storage_mask)[0]
|
192
195
|
n_obs_storage = len(indices_storage)
|
193
196
|
else:
|
194
197
|
if isinstance(X, ArrayTypes): # type: ignore
|
@@ -348,7 +351,7 @@ class MappedCollection:
|
|
348
351
|
|
349
352
|
@property
|
350
353
|
def original_shapes(self) -> list[tuple[int, int]]:
|
351
|
-
"""Shapes of the underlying AnnData objects."""
|
354
|
+
"""Shapes of the underlying AnnData objects (with `obs_filter` applied)."""
|
352
355
|
if self.n_vars_list is None:
|
353
356
|
n_vars_list = [None] * len(self.n_obs_list)
|
354
357
|
else:
|
lamindb/core/_settings.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import os
|
4
|
-
from typing import TYPE_CHECKING
|
4
|
+
from typing import TYPE_CHECKING
|
5
5
|
|
6
6
|
import lamindb_setup as ln_setup
|
7
7
|
from lamin_utils import logger
|
@@ -10,7 +10,6 @@ from lamindb_setup.core._settings import settings as setup_settings
|
|
10
10
|
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
11
11
|
|
12
12
|
from .subsettings._creation_settings import CreationSettings, creation_settings
|
13
|
-
from .subsettings._transform_settings import TransformSettings, transform_settings
|
14
13
|
|
15
14
|
if TYPE_CHECKING:
|
16
15
|
from collections.abc import Mapping
|
@@ -80,16 +79,6 @@ class Settings:
|
|
80
79
|
storage_settings = ln_setup.core.StorageSettings(root=self._using_storage)
|
81
80
|
return storage_settings
|
82
81
|
|
83
|
-
@property
|
84
|
-
def transform(self) -> TransformSettings:
|
85
|
-
"""Transform settings.
|
86
|
-
|
87
|
-
Is deprecated since version 0.76.1.
|
88
|
-
"""
|
89
|
-
# enable warning soon
|
90
|
-
# logger.warning("Transform settings are deprecated, please instead set `ln.context.uid`")
|
91
|
-
return transform_settings
|
92
|
-
|
93
82
|
@property
|
94
83
|
def sync_git_repo(self) -> str | None:
|
95
84
|
"""Sync transforms with scripts in git repository.
|
lamindb/core/_sync_git.py
CHANGED
@@ -53,22 +53,69 @@ def check_local_git_repo() -> bool:
|
|
53
53
|
|
54
54
|
|
55
55
|
def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
|
56
|
-
|
56
|
+
# Fetch all remote branches so that we can also search them
|
57
|
+
fetch_command = ["git", "fetch", "origin", "+refs/heads/*:refs/remotes/origin/*"]
|
58
|
+
subprocess.run(fetch_command, cwd=repo_dir, check=True)
|
59
|
+
|
60
|
+
# Find the commit containing the blob hash in all branches
|
61
|
+
command = [
|
62
|
+
"git",
|
63
|
+
"log",
|
64
|
+
"--all",
|
65
|
+
f"--find-object={blob_hash}",
|
66
|
+
"--pretty=format:%H",
|
67
|
+
]
|
57
68
|
result = subprocess.run(
|
58
69
|
command,
|
59
70
|
capture_output=True,
|
60
71
|
cwd=repo_dir,
|
61
72
|
)
|
62
|
-
#
|
63
|
-
#
|
73
|
+
# We just care to find one commit
|
74
|
+
# Hence, we split by new line ("\n") and use the first one
|
64
75
|
commit_hash = result.stdout.decode().split("\n")[0]
|
65
|
-
|
76
|
+
|
77
|
+
if not commit_hash or result.returncode == 1:
|
66
78
|
return None
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
79
|
+
|
80
|
+
default_branch = (
|
81
|
+
subprocess.run(
|
82
|
+
["git", "rev-parse", "--abbrev-ref", "origin/HEAD"],
|
83
|
+
capture_output=True,
|
84
|
+
cwd=repo_dir,
|
85
|
+
text=True,
|
86
|
+
)
|
87
|
+
.stdout.strip()
|
88
|
+
.split("/")[-1]
|
89
|
+
)
|
90
|
+
|
91
|
+
# Find all branches containing the commit
|
92
|
+
commit_containing_branches = subprocess.run(
|
93
|
+
["git", "branch", "--all", "--contains", commit_hash],
|
94
|
+
capture_output=True,
|
95
|
+
cwd=repo_dir,
|
96
|
+
text=True,
|
97
|
+
).stdout.split("\n")
|
98
|
+
|
99
|
+
# Clean up branch names and filter out the default branch
|
100
|
+
commit_containing_branches = [
|
101
|
+
branch.strip().replace("remotes/", "")
|
102
|
+
for branch in commit_containing_branches
|
103
|
+
if branch.strip()
|
104
|
+
]
|
105
|
+
non_default_branches = [
|
106
|
+
branch for branch in commit_containing_branches if default_branch not in branch
|
107
|
+
]
|
108
|
+
|
109
|
+
if non_default_branches:
|
110
|
+
logger.warning(
|
111
|
+
f"code blob hash {blob_hash} was found in non-default branch(es): {', '.join(non_default_branches)}"
|
112
|
+
)
|
113
|
+
|
114
|
+
assert ( # noqa: S101
|
115
|
+
len(commit_hash) == 40
|
116
|
+
), f"commit hash |{commit_hash}| is not 40 characters long"
|
117
|
+
|
118
|
+
return commit_hash
|
72
119
|
|
73
120
|
|
74
121
|
def get_filepath_within_git_repo(
|
lamindb/core/datasets/_core.py
CHANGED
@@ -5,11 +5,10 @@ from typing import TYPE_CHECKING
|
|
5
5
|
from urllib.request import urlretrieve
|
6
6
|
|
7
7
|
import anndata as ad
|
8
|
-
import numpy as np
|
9
8
|
import pandas as pd
|
10
|
-
from lnschema_core import ids
|
11
9
|
from upath import UPath
|
12
10
|
|
11
|
+
from lamindb.base.ids import base62
|
13
12
|
from lamindb.core._settings import settings
|
14
13
|
|
15
14
|
if TYPE_CHECKING:
|
@@ -146,7 +145,7 @@ def dir_iris_images() -> UPath: # pragma: no cover
|
|
146
145
|
This is why on the UI, the artifact shows up as output of the downstream
|
147
146
|
demo notebook rather than the upstream curation notebook. The lineage
|
148
147
|
information should still be captured by
|
149
|
-
https://github.com/laminlabs/lnschema-core/blob/a90437e91dfbd6b9002f18c3e978bd0f9c9a632d/
|
148
|
+
https://github.com/laminlabs/lnschema-core/blob/a90437e91dfbd6b9002f18c3e978bd0f9c9a632d/lamindb/models.py#L2050-L2052
|
150
149
|
but we don't use this in the UI yet.
|
151
150
|
"""
|
152
151
|
return UPath("s3://lamindata/iris_studies")
|
@@ -481,11 +480,11 @@ def dir_scrnaseq_cellranger(
|
|
481
480
|
fastqdir.mkdir(parents=True, exist_ok=True)
|
482
481
|
fastqfile1 = fastqdir / f"{sample_name}_R1_001.fastq.gz"
|
483
482
|
with open(fastqfile1, "w") as f:
|
484
|
-
f.write(f"{
|
483
|
+
f.write(f"{base62(n_char=6)}")
|
485
484
|
fastqfile2 = fastqdir / f"{sample_name}_R2_001.fastq.gz"
|
486
485
|
fastqfile2.touch(exist_ok=True)
|
487
486
|
with open(fastqfile2, "w") as f:
|
488
|
-
f.write(f"{
|
487
|
+
f.write(f"{base62(n_char=6)}")
|
489
488
|
|
490
489
|
sampledir = basedir / f"{sample_name}"
|
491
490
|
for folder in ["raw_feature_bc_matrix", "filtered_feature_bc_matrix", "analysis"]:
|
@@ -511,7 +510,7 @@ def dir_scrnaseq_cellranger(
|
|
511
510
|
]:
|
512
511
|
file = sampledir / filename
|
513
512
|
with open(file, "w") as f:
|
514
|
-
f.write(f"{
|
513
|
+
f.write(f"{base62(n_char=6)}")
|
515
514
|
|
516
515
|
return sampledir
|
517
516
|
|
lamindb/core/exceptions.py
CHANGED
@@ -7,7 +7,6 @@
|
|
7
7
|
DoesNotExist
|
8
8
|
ValidationError
|
9
9
|
NotebookNotSaved
|
10
|
-
NoTitleError
|
11
10
|
MissingContextUID
|
12
11
|
UpdateContext
|
13
12
|
IntegrityError
|
@@ -79,12 +78,6 @@ class IntegrityError(Exception):
|
|
79
78
|
pass
|
80
79
|
|
81
80
|
|
82
|
-
class NoTitleError(SystemExit):
|
83
|
-
"""Notebook has no title."""
|
84
|
-
|
85
|
-
pass
|
86
|
-
|
87
|
-
|
88
81
|
class MissingContextUID(SystemExit):
|
89
82
|
"""User didn't define transform settings."""
|
90
83
|
|
lamindb/core/fields.py
CHANGED
lamindb/core/loaders.py
CHANGED
@@ -7,47 +7,50 @@ from lamindb_setup._connect_instance import (
|
|
7
7
|
load_instance_settings,
|
8
8
|
)
|
9
9
|
from lamindb_setup.core._settings_store import instance_settings_file
|
10
|
-
from lnschema_core.models import Feature, FeatureSet, LinkORM, Record
|
11
10
|
|
11
|
+
from lamindb.models import LinkORM, Record, Schema
|
12
12
|
|
13
|
-
|
13
|
+
|
14
|
+
def get_schema_modules(instance: str | None) -> set[str]:
|
14
15
|
if instance is None or instance == "default":
|
15
|
-
schema_modules = set(ln_setup.settings.instance.
|
16
|
+
schema_modules = set(ln_setup.settings.instance.modules)
|
16
17
|
schema_modules.add("core")
|
17
18
|
return schema_modules
|
18
19
|
owner, name = get_owner_name_from_identifier(instance)
|
19
20
|
settings_file = instance_settings_file(name, owner)
|
20
21
|
if settings_file.exists():
|
21
|
-
|
22
|
+
modules = set(load_instance_settings(settings_file).modules)
|
22
23
|
else:
|
23
24
|
cache_filepath = (
|
24
25
|
ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
|
25
26
|
)
|
26
27
|
if cache_filepath.exists():
|
27
|
-
|
28
|
+
modules = set(cache_filepath.read_text().split("\n")[1].split(","))
|
28
29
|
else:
|
29
30
|
raise ValueError(f"Instance {instance} not found")
|
30
|
-
shared_schema_modules = set(ln_setup.settings.instance.
|
31
|
+
shared_schema_modules = set(ln_setup.settings.instance.modules).intersection(
|
32
|
+
modules
|
33
|
+
)
|
31
34
|
shared_schema_modules.add("core")
|
32
35
|
return shared_schema_modules
|
33
36
|
|
34
37
|
|
35
|
-
def
|
38
|
+
def dict_module_name_to_model_name(
|
36
39
|
registry: type[Record], instance: str | None = None
|
37
40
|
) -> dict[str, Record]:
|
38
|
-
schema_modules =
|
41
|
+
schema_modules = get_schema_modules(instance)
|
39
42
|
d: dict = {
|
40
|
-
i.related_model.
|
43
|
+
i.related_model.__get_name_with_module__(): i.related_model
|
41
44
|
for i in registry._meta.related_objects
|
42
45
|
if i.related_name is not None
|
43
|
-
and i.related_model.
|
46
|
+
and i.related_model.__get_module_name__() in schema_modules
|
44
47
|
}
|
45
48
|
d.update(
|
46
49
|
{
|
47
|
-
i.related_model.
|
50
|
+
i.related_model.__get_name_with_module__(): i.related_model
|
48
51
|
for i in registry._meta.many_to_many
|
49
52
|
if i.name is not None
|
50
|
-
and i.related_model.
|
53
|
+
and i.related_model.__get_module_name__() in schema_modules
|
51
54
|
}
|
52
55
|
)
|
53
56
|
return d
|
@@ -59,11 +62,11 @@ def dict_related_model_to_related_name(
|
|
59
62
|
def include(model: Record):
|
60
63
|
return not links != issubclass(model, LinkORM)
|
61
64
|
|
62
|
-
schema_modules =
|
65
|
+
schema_modules = get_schema_modules(instance)
|
63
66
|
|
64
67
|
related_objects = registry._meta.related_objects + registry._meta.many_to_many
|
65
68
|
d: dict = {
|
66
|
-
record.related_model.
|
69
|
+
record.related_model.__get_name_with_module__(): (
|
67
70
|
record.related_name
|
68
71
|
if not isinstance(record, ManyToManyField)
|
69
72
|
else record.name
|
@@ -72,7 +75,7 @@ def dict_related_model_to_related_name(
|
|
72
75
|
if (
|
73
76
|
record.name is not None
|
74
77
|
and include(record.related_model)
|
75
|
-
and record.related_model.
|
78
|
+
and record.related_model.__get_module_name__() in schema_modules
|
76
79
|
)
|
77
80
|
}
|
78
81
|
return d
|
@@ -81,15 +84,15 @@ def dict_related_model_to_related_name(
|
|
81
84
|
def get_related_name(features_type: type[Record]) -> str:
|
82
85
|
candidates = [
|
83
86
|
field.related_name
|
84
|
-
for field in
|
87
|
+
for field in Schema._meta.related_objects
|
85
88
|
if field.related_model == features_type
|
86
89
|
]
|
87
90
|
if not candidates:
|
88
91
|
raise ValueError(
|
89
92
|
f"Can't create feature sets from {features_type.__name__} because it's not"
|
90
|
-
" related to it!\nYou need to create a link model between
|
91
|
-
" your Record in your custom
|
92
|
-
" line:\
|
93
|
+
" related to it!\nYou need to create a link model between Schema and"
|
94
|
+
" your Record in your custom module.\nTo do so, add a"
|
95
|
+
" line:\n_schemas_m2m = models.ManyToMany(Schema,"
|
93
96
|
" related_name='mythings')\n"
|
94
97
|
)
|
95
98
|
return candidates[0]
|
@@ -17,12 +17,11 @@ from anndata._io.specs.registry import get_spec, read_elem, read_elem_partial
|
|
17
17
|
from anndata.compat import _read_attr
|
18
18
|
from fsspec.implementations.local import LocalFileSystem
|
19
19
|
from lamin_utils import logger
|
20
|
-
from lamindb_setup.core.upath import
|
20
|
+
from lamindb_setup.core.upath import create_mapper, infer_filesystem
|
21
21
|
from packaging import version
|
22
22
|
|
23
23
|
if TYPE_CHECKING:
|
24
24
|
from collections.abc import Mapping
|
25
|
-
from pathlib import Path
|
26
25
|
|
27
26
|
from fsspec.core import OpenFile
|
28
27
|
from lamindb_setup.core.types import UPathStr
|
@@ -4,7 +4,8 @@ from dataclasses import dataclass
|
|
4
4
|
from typing import TYPE_CHECKING, Any, Callable
|
5
5
|
|
6
6
|
from anndata._io.specs.registry import get_spec
|
7
|
-
|
7
|
+
|
8
|
+
from lamindb.models import Artifact
|
8
9
|
|
9
10
|
from ._anndata_accessor import AnnDataAccessor, StorageType, registry
|
10
11
|
from ._pyarrow_dataset import _is_pyarrow_dataset, _open_pyarrow_dataset
|
@@ -2,12 +2,16 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING, Literal
|
4
4
|
|
5
|
+
import pandas as pd
|
6
|
+
import pyarrow as pa
|
5
7
|
from anndata import AnnData, read_h5ad
|
6
8
|
from lamin_utils import logger
|
7
9
|
from lamindb_setup import settings as setup_settings
|
8
10
|
from lamindb_setup.core._settings_storage import get_storage_region
|
9
11
|
from lamindb_setup.core.upath import LocalPathClasses, create_path
|
10
|
-
from
|
12
|
+
from packaging import version
|
13
|
+
|
14
|
+
from lamindb.models import Artifact, Run
|
11
15
|
|
12
16
|
if TYPE_CHECKING:
|
13
17
|
from lamindb_setup.core.types import UPathStr
|
@@ -138,9 +142,17 @@ def save_tiledbsoma_experiment(
|
|
138
142
|
storepath = storepath.as_posix()
|
139
143
|
|
140
144
|
add_run_uid = True
|
145
|
+
run_uid_dtype = "category"
|
141
146
|
if appending:
|
142
147
|
with soma.Experiment.open(storepath, mode="r", context=ctx) as store:
|
143
|
-
|
148
|
+
obs_schema = store["obs"].schema
|
149
|
+
add_run_uid = "lamin_run_uid" in obs_schema.names
|
150
|
+
# this is needed to enable backwards compatibility with tiledbsoma stores
|
151
|
+
# created before PR 2300
|
152
|
+
if add_run_uid:
|
153
|
+
column_type = obs_schema.types[obs_schema.names.index("lamin_run_uid")]
|
154
|
+
if not isinstance(column_type, pa.DictionaryType):
|
155
|
+
run_uid_dtype = None
|
144
156
|
|
145
157
|
if add_run_uid and run is None:
|
146
158
|
raise ValueError("Pass `run`")
|
@@ -148,17 +160,16 @@ def save_tiledbsoma_experiment(
|
|
148
160
|
adata_objects = []
|
149
161
|
for adata in adatas:
|
150
162
|
if isinstance(adata, AnnData):
|
151
|
-
if add_run_uid:
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
)
|
156
|
-
else:
|
157
|
-
adata.obs["lamin_run_uid"] = run.uid
|
163
|
+
if add_run_uid and adata.is_view:
|
164
|
+
raise ValueError(
|
165
|
+
"Can not write an `AnnData` view, please do `adata.copy()` before passing."
|
166
|
+
)
|
158
167
|
else:
|
159
168
|
adata = _load_h5ad_zarr(create_path(adata))
|
160
|
-
|
161
|
-
|
169
|
+
if add_run_uid:
|
170
|
+
adata.obs["lamin_run_uid"] = pd.Series(
|
171
|
+
run.uid, index=adata.obs.index, dtype=run_uid_dtype
|
172
|
+
)
|
162
173
|
adata_objects.append(adata)
|
163
174
|
|
164
175
|
registration_mapping = kwargs.get("registration_mapping", None)
|
@@ -173,14 +184,28 @@ def save_tiledbsoma_experiment(
|
|
173
184
|
context=ctx,
|
174
185
|
)
|
175
186
|
|
187
|
+
resize_experiment = False
|
176
188
|
if registration_mapping is not None:
|
177
|
-
|
189
|
+
if version.parse(soma.__version__) < version.parse("1.15.0rc4"):
|
190
|
+
n_observations = len(registration_mapping.obs_axis.data)
|
191
|
+
else:
|
192
|
+
n_observations = registration_mapping.get_obs_shape()
|
193
|
+
resize_experiment = True
|
178
194
|
else: # happens only if not appending and only one adata passed
|
179
195
|
assert len(adata_objects) == 1 # noqa: S101
|
180
196
|
n_observations = adata_objects[0].n_obs
|
181
197
|
|
182
198
|
logger.important(f"Writing the tiledbsoma store to {storepath}")
|
183
199
|
for adata_obj in adata_objects:
|
200
|
+
if resize_experiment and soma.Experiment.exists(storepath, context=ctx):
|
201
|
+
# can only happen if registration_mapping is not None
|
202
|
+
soma_io.resize_experiment(
|
203
|
+
storepath,
|
204
|
+
nobs=n_observations,
|
205
|
+
nvars=registration_mapping.get_var_shapes(),
|
206
|
+
context=ctx,
|
207
|
+
)
|
208
|
+
resize_experiment = False
|
184
209
|
soma_io.from_anndata(
|
185
210
|
storepath,
|
186
211
|
adata_obj,
|
@@ -201,6 +226,6 @@ def save_tiledbsoma_experiment(
|
|
201
226
|
_is_internal_call=True,
|
202
227
|
)
|
203
228
|
artifact.n_observations = n_observations
|
204
|
-
artifact.
|
229
|
+
artifact.otype = "tiledbsoma"
|
205
230
|
|
206
231
|
return artifact.save()
|
lamindb/core/storage/objects.py
CHANGED
lamindb/core/storage/paths.py
CHANGED
@@ -3,20 +3,16 @@ from __future__ import annotations
|
|
3
3
|
import shutil
|
4
4
|
from typing import TYPE_CHECKING
|
5
5
|
|
6
|
-
import anndata as ad
|
7
6
|
import fsspec
|
8
|
-
import pandas as pd
|
9
7
|
from lamin_utils import logger
|
10
8
|
from lamindb_setup.core import StorageSettings
|
11
9
|
from lamindb_setup.core.upath import (
|
12
10
|
LocalPathClasses,
|
13
11
|
UPath,
|
14
|
-
create_path,
|
15
|
-
infer_filesystem,
|
16
12
|
)
|
17
|
-
from lnschema_core.models import Artifact, Storage
|
18
13
|
|
19
14
|
from lamindb.core._settings import settings
|
15
|
+
from lamindb.models import Artifact, Storage
|
20
16
|
|
21
17
|
if TYPE_CHECKING:
|
22
18
|
from pathlib import Path
|
@@ -30,7 +26,7 @@ AUTO_KEY_PREFIX = ".lamindb/"
|
|
30
26
|
# add type annotations back asap when re-organizing the module
|
31
27
|
def auto_storage_key_from_artifact(artifact: Artifact):
|
32
28
|
if artifact.key is None or artifact._key_is_virtual:
|
33
|
-
is_dir = artifact.
|
29
|
+
is_dir = artifact.n_files is not None
|
34
30
|
return auto_storage_key_from_artifact_uid(artifact.uid, artifact.suffix, is_dir)
|
35
31
|
else:
|
36
32
|
return artifact.key
|
@@ -52,10 +48,19 @@ def check_path_is_child_of_root(path: UPathStr, root: UPathStr) -> bool:
|
|
52
48
|
# and for fsspec.utils.get_protocol
|
53
49
|
path_str = str(path)
|
54
50
|
root_str = str(root)
|
51
|
+
root_protocol = fsspec.utils.get_protocol(root_str)
|
55
52
|
# check that the protocols are the same first
|
56
|
-
if fsspec.utils.get_protocol(path_str) !=
|
53
|
+
if fsspec.utils.get_protocol(path_str) != root_protocol:
|
57
54
|
return False
|
58
|
-
|
55
|
+
if root_protocol in {"http", "https"}:
|
56
|
+
# in this case it is a base url, not a file
|
57
|
+
# so formally does not exist
|
58
|
+
resolve_kwargs = {"follow_redirects": False}
|
59
|
+
else:
|
60
|
+
resolve_kwargs = {}
|
61
|
+
return (
|
62
|
+
UPath(root_str).resolve(**resolve_kwargs) in UPath(path_str).resolve().parents
|
63
|
+
)
|
59
64
|
|
60
65
|
|
61
66
|
# returns filepath and root of the storage
|