lamindb 0.74.0__py3-none-any.whl → 0.74.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +9 -9
- lamindb/_artifact.py +36 -46
- lamindb/_can_validate.py +24 -22
- lamindb/_collection.py +5 -6
- lamindb/{_annotate.py → _curate.py} +62 -40
- lamindb/_feature.py +7 -9
- lamindb/_feature_set.py +17 -18
- lamindb/_filter.py +5 -5
- lamindb/_finish.py +19 -7
- lamindb/_from_values.py +15 -15
- lamindb/_is_versioned.py +2 -2
- lamindb/_parents.py +7 -7
- lamindb/_query_manager.py +8 -8
- lamindb/_query_set.py +32 -30
- lamindb/{_registry.py → _record.py} +91 -50
- lamindb/_save.py +6 -6
- lamindb/_storage.py +1 -1
- lamindb/_view.py +4 -4
- lamindb/core/__init__.py +19 -16
- lamindb/core/_data.py +11 -11
- lamindb/core/_feature_manager.py +49 -32
- lamindb/core/_label_manager.py +5 -5
- lamindb/core/_mapped_collection.py +4 -1
- lamindb/core/_run_context.py +6 -4
- lamindb/core/_settings.py +45 -50
- lamindb/core/_sync_git.py +22 -12
- lamindb/core/_track_environment.py +5 -1
- lamindb/core/datasets/_core.py +3 -3
- lamindb/core/fields.py +1 -1
- lamindb/core/schema.py +6 -6
- lamindb/core/storage/_backed_access.py +56 -12
- lamindb/core/storage/paths.py +4 -4
- lamindb/core/subsettings/__init__.py +12 -0
- lamindb/core/subsettings/_creation_settings.py +38 -0
- lamindb/core/subsettings/_transform_settings.py +21 -0
- lamindb/core/versioning.py +1 -1
- lamindb/integrations/_vitessce.py +4 -3
- {lamindb-0.74.0.dist-info → lamindb-0.74.2.dist-info}/METADATA +7 -9
- lamindb-0.74.2.dist-info/RECORD +57 -0
- lamindb/core/_transform_settings.py +0 -9
- lamindb-0.74.0.dist-info/RECORD +0 -55
- {lamindb-0.74.0.dist-info → lamindb-0.74.2.dist-info}/LICENSE +0 -0
- {lamindb-0.74.0.dist-info → lamindb-0.74.2.dist-info}/WHEEL +0 -0
lamindb/core/_settings.py
CHANGED
@@ -9,11 +9,13 @@ from lamindb_setup._set_managed_storage import set_managed_storage
|
|
9
9
|
from lamindb_setup.core._settings import settings as setup_settings
|
10
10
|
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
11
11
|
|
12
|
-
from .
|
12
|
+
from .subsettings._creation_settings import CreationSettings, creation_settings
|
13
|
+
from .subsettings._transform_settings import TransformSettings, transform_settings
|
13
14
|
|
14
15
|
if TYPE_CHECKING:
|
15
16
|
from pathlib import Path
|
16
17
|
|
18
|
+
from lamindb_setup.core._settings_storage import StorageSettings
|
17
19
|
from upath import UPath
|
18
20
|
|
19
21
|
VERBOSITY_TO_INT = {
|
@@ -40,29 +42,15 @@ class Settings:
|
|
40
42
|
logger.set_verbosity(self._verbosity_int)
|
41
43
|
self._sync_git_repo: str | None = git_repo
|
42
44
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
"""Behavior if file hash exists (default `"warn_return_existing"`).
|
47
|
-
|
48
|
-
One of `["warn_return_existing", "error", "warn_create_new"]`.
|
49
|
-
|
50
|
-
FAQ: :doc:`/faq/idempotency`
|
51
|
-
"""
|
52
|
-
upon_file_create_skip_size_hash: bool = False
|
53
|
-
"""To speed up registering high numbers of files (default `False`).
|
54
|
-
|
55
|
-
This bypasses queries for size and hash to AWS & GCP.
|
56
|
-
|
57
|
-
It speeds up file creation by about a factor 100.
|
58
|
-
"""
|
59
|
-
upon_create_search_names: bool = True
|
60
|
-
"""To speed up creating Registry objects (default `True`).
|
45
|
+
@property
|
46
|
+
def creation(self) -> CreationSettings:
|
47
|
+
"""Record creation settings.
|
61
48
|
|
62
|
-
|
49
|
+
For example, `ln.settings.creation.search_names = False` will disable
|
50
|
+
searching for records with similar names during creation.
|
51
|
+
"""
|
52
|
+
return creation_settings
|
63
53
|
|
64
|
-
FAQ: :doc:`/faq/idempotency`
|
65
|
-
"""
|
66
54
|
track_run_inputs: bool = True
|
67
55
|
"""Track files as input upon `.load()`, `.cache()` and `.backed()`.
|
68
56
|
|
@@ -70,14 +58,6 @@ class Settings:
|
|
70
58
|
|
71
59
|
FAQ: :doc:`/faq/track-run-inputs`
|
72
60
|
"""
|
73
|
-
silence_file_run_transform_warning: bool = False
|
74
|
-
"""Silence warning about missing run & transform during file creation."""
|
75
|
-
artifact_use_virtual_keys: bool = True
|
76
|
-
"""Treat `key` parameter in :class:`~lamindb.Artifact` as virtual.
|
77
|
-
|
78
|
-
If `True`, the `key` is **not** used to construct file paths, but file paths are
|
79
|
-
based on the `uid` of artifact.
|
80
|
-
"""
|
81
61
|
__using_key: str | None = None
|
82
62
|
_using_storage: str | None = None
|
83
63
|
|
@@ -101,8 +81,17 @@ class Settings:
|
|
101
81
|
|
102
82
|
@property
|
103
83
|
def transform(self) -> TransformSettings:
|
104
|
-
"""Transform settings.
|
105
|
-
|
84
|
+
"""Transform settings.
|
85
|
+
|
86
|
+
For example::
|
87
|
+
|
88
|
+
ln.settings.transform.stem_uid = "FPnfDtJz8qbE" # defines version family
|
89
|
+
ln.settings.transform.version = "1" # defines version
|
90
|
+
ln.settings.transform.name = "My good script" # semantic name
|
91
|
+
|
92
|
+
The first two are typically auto-generated by :func:`~lamindb.track`.
|
93
|
+
"""
|
94
|
+
return transform_settings
|
106
95
|
|
107
96
|
@property
|
108
97
|
def sync_git_repo(self) -> str | None:
|
@@ -116,18 +105,26 @@ class Settings:
|
|
116
105
|
def sync_git_repo(self, value) -> None:
|
117
106
|
"""Sync transforms with scripts in git repository.
|
118
107
|
|
119
|
-
|
108
|
+
For example: `ln.sync_git_repo = https://github.com/laminlabs/redun-lamin`
|
120
109
|
"""
|
121
110
|
self._sync_git_repo = sanitize_git_repo_url(value)
|
122
|
-
|
111
|
+
if not self._sync_git_repo.startswith("https://"): # pragma: nocover
|
112
|
+
raise ValueError("git repository URL must start with 'https://'.")
|
123
113
|
|
124
114
|
@property
|
125
|
-
def storage(self) ->
|
126
|
-
"""Default storage location
|
115
|
+
def storage(self) -> StorageSettings:
|
116
|
+
"""Default storage location.
|
127
117
|
|
128
118
|
Examples:
|
129
119
|
|
130
|
-
|
120
|
+
>>> ln.settings.storage
|
121
|
+
StorageSettings(root='s3://my-bucket', uid='j7MaPxtLxPeE')
|
122
|
+
|
123
|
+
>>> ln.settings.storage.root
|
124
|
+
UPath('s3://my-bucket')
|
125
|
+
|
126
|
+
You can switch the default storage location to another managed storage
|
127
|
+
location by passing a string:
|
131
128
|
|
132
129
|
>>> ln.settings.storage = "s3://some-bucket"
|
133
130
|
|
@@ -139,7 +136,7 @@ class Settings:
|
|
139
136
|
>>> )
|
140
137
|
>>> ln.settings.storage = "s3://some-bucket", kwargs
|
141
138
|
"""
|
142
|
-
return self._storage_settings
|
139
|
+
return self._storage_settings
|
143
140
|
|
144
141
|
@storage.setter
|
145
142
|
def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
|
@@ -150,16 +147,14 @@ class Settings:
|
|
150
147
|
set_managed_storage(path, **kwargs)
|
151
148
|
|
152
149
|
@property
|
153
|
-
def storage_local(self) ->
|
150
|
+
def storage_local(self) -> StorageSettings:
|
154
151
|
"""An additional local default storage (a path to its root).
|
155
152
|
|
156
153
|
Is only available if :attr:`~lamindb.setup.core.InstanceSettings.keep_artifacts_local` is enabled.
|
157
154
|
|
158
155
|
Guide: :doc:`faq/keep-artifacts-local`
|
159
|
-
|
160
|
-
Shortcut for: `ln.setup.settings.instance.storage_local.root`
|
161
156
|
"""
|
162
|
-
return ln_setup.settings.instance.storage_local
|
157
|
+
return ln_setup.settings.instance.storage_local
|
163
158
|
|
164
159
|
@storage_local.setter
|
165
160
|
def storage_local(self, local_root: Path):
|
@@ -167,14 +162,14 @@ class Settings:
|
|
167
162
|
|
168
163
|
@property
|
169
164
|
def verbosity(self) -> str:
|
170
|
-
"""Logger verbosity (default 'warning').
|
171
|
-
|
172
|
-
- 'error'
|
173
|
-
- 'warning'
|
174
|
-
- 'success'
|
175
|
-
- 'info'
|
176
|
-
- 'hint'
|
177
|
-
- 'debug'
|
165
|
+
"""Logger verbosity (default `'warning'`).
|
166
|
+
|
167
|
+
- `'error'`: ❌ only show error messages
|
168
|
+
- `'warning'`: ❗ also show warning messages
|
169
|
+
- `'success'`: ✅ also show success and save messages
|
170
|
+
- `'info'`: 💡 also show info messages
|
171
|
+
- `'hint'`: 💡 also show hint messages
|
172
|
+
- `'debug'`: 🐛 also show detailed debug messages
|
178
173
|
"""
|
179
174
|
return VERBOSITY_TO_STR[self._verbosity_int]
|
180
175
|
|
lamindb/core/_sync_git.py
CHANGED
@@ -24,8 +24,7 @@ def get_git_repo_from_remote() -> Path:
|
|
24
24
|
f"running outside of synched git repo, cloning {repo_url} into {repo_dir}"
|
25
25
|
)
|
26
26
|
result = subprocess.run(
|
27
|
-
|
28
|
-
shell=True,
|
27
|
+
["git", "clone", "--depth", "10", f"{repo_url}.git"],
|
29
28
|
capture_output=True,
|
30
29
|
cwd=setup_settings.storage.cache_dir,
|
31
30
|
)
|
@@ -36,8 +35,7 @@ def get_git_repo_from_remote() -> Path:
|
|
36
35
|
|
37
36
|
def check_local_git_repo() -> bool:
|
38
37
|
result = subprocess.run(
|
39
|
-
"git config --get remote.origin.url",
|
40
|
-
shell=True,
|
38
|
+
["git", "config", "--get remote.origin.url"],
|
41
39
|
capture_output=True,
|
42
40
|
)
|
43
41
|
result_str = result.stdout.decode().strip()
|
@@ -55,10 +53,9 @@ def check_local_git_repo() -> bool:
|
|
55
53
|
|
56
54
|
|
57
55
|
def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
|
58
|
-
command =
|
56
|
+
command = ["git", "log", f"--find-object={blob_hash}", "--pretty=format:%H"]
|
59
57
|
result = subprocess.run(
|
60
58
|
command,
|
61
|
-
shell=True,
|
62
59
|
capture_output=True,
|
63
60
|
cwd=repo_dir,
|
64
61
|
)
|
@@ -68,7 +65,7 @@ def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | N
|
|
68
65
|
if commit_hash == "" or result.returncode == 1:
|
69
66
|
return None
|
70
67
|
else:
|
71
|
-
assert (
|
68
|
+
assert ( # noqa: S101
|
72
69
|
len(commit_hash) == 40
|
73
70
|
), f"commit hash |{commit_hash}| is not 40 characters long"
|
74
71
|
return commit_hash
|
@@ -82,21 +79,34 @@ def get_filepath_within_git_repo(
|
|
82
79
|
# from anywhere in the repo, hence, let's get the root
|
83
80
|
repo_root = (
|
84
81
|
subprocess.run(
|
85
|
-
"git rev-parse --show-toplevel",
|
86
|
-
shell=True,
|
82
|
+
["git", "rev-parse", "--show-toplevel"],
|
87
83
|
capture_output=True,
|
88
84
|
cwd=repo_dir,
|
89
85
|
)
|
90
86
|
.stdout.decode()
|
91
87
|
.strip()
|
92
88
|
)
|
93
|
-
|
89
|
+
# Run the git commands separately to circumvent spawning a shell
|
90
|
+
git_command = ["git", "ls-tree", "-r", commit_hash]
|
91
|
+
git_process = subprocess.Popen(
|
92
|
+
git_command,
|
93
|
+
stdout=subprocess.PIPE,
|
94
|
+
cwd=repo_root,
|
95
|
+
)
|
96
|
+
|
97
|
+
grep_command = ["grep", "-E", blob_hash]
|
94
98
|
result = subprocess.run(
|
95
|
-
|
96
|
-
|
99
|
+
grep_command,
|
100
|
+
stdin=git_process.stdout,
|
97
101
|
capture_output=True,
|
98
102
|
cwd=repo_root,
|
99
103
|
)
|
104
|
+
|
105
|
+
# Close the stdout to allow git_process to receive a SIGPIPE if grep_command exits
|
106
|
+
git_process.stdout.close()
|
107
|
+
git_process.wait()
|
108
|
+
|
109
|
+
command = " ".join(git_command) + " | " + " ".join(grep_command)
|
100
110
|
if result.returncode != 0 and result.stderr.decode() != "":
|
101
111
|
raise RuntimeError(f"{command}\n{result.stderr.decode()}")
|
102
112
|
if len(result.stdout.decode()) == 0:
|
@@ -15,7 +15,11 @@ def track_environment(run: Run) -> None:
|
|
15
15
|
# create a requirements.txt
|
16
16
|
# we don't create a conda environment.yml mostly for its slowness
|
17
17
|
try:
|
18
|
-
|
18
|
+
with open(filepath, "w") as f:
|
19
|
+
result = subprocess.run(
|
20
|
+
["pip", "freeze"],
|
21
|
+
stdout=f,
|
22
|
+
)
|
19
23
|
except OSError as e:
|
20
24
|
result = None
|
21
25
|
logger.warning(f"could not run pip freeze with error {e}")
|
lamindb/core/datasets/_core.py
CHANGED
@@ -91,7 +91,7 @@ def file_tsv_rnaseq_nfcore_salmon_merged_gene_counts(
|
|
91
91
|
|
92
92
|
def file_fastq(in_storage_root=False) -> Path:
|
93
93
|
"""Mini mock fastq artifact."""
|
94
|
-
basedir = Path() if not in_storage_root else settings.storage
|
94
|
+
basedir = Path() if not in_storage_root else settings.storage.root
|
95
95
|
filepath = basedir / "input.fastq.gz"
|
96
96
|
with open(filepath, "w") as f:
|
97
97
|
f.write("Mock fastq artifact.")
|
@@ -100,7 +100,7 @@ def file_fastq(in_storage_root=False) -> Path:
|
|
100
100
|
|
101
101
|
def file_bam(in_storage_root=False) -> Path: # pragma: no cover
|
102
102
|
"""Mini mock bam artifact."""
|
103
|
-
basedir = Path() if not in_storage_root else settings.storage
|
103
|
+
basedir = Path() if not in_storage_root else settings.storage.root
|
104
104
|
filepath = basedir / "output.bam"
|
105
105
|
with open(filepath, "w") as f:
|
106
106
|
f.write("Mock bam artifact.")
|
@@ -109,7 +109,7 @@ def file_bam(in_storage_root=False) -> Path: # pragma: no cover
|
|
109
109
|
|
110
110
|
def file_mini_csv(in_storage_root=False) -> Path:
|
111
111
|
"""Mini csv artifact."""
|
112
|
-
basedir = Path() if not in_storage_root else settings.storage
|
112
|
+
basedir = Path() if not in_storage_root else settings.storage.root
|
113
113
|
filepath = basedir / "mini.csv"
|
114
114
|
df = pd.DataFrame([1, 2, 3], columns=["test"])
|
115
115
|
df.to_csv(filepath, index=False)
|
lamindb/core/fields.py
CHANGED
lamindb/core/schema.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from django.db.models import ManyToManyField
|
4
|
-
from lnschema_core.models import Feature, FeatureSet, LinkORM,
|
4
|
+
from lnschema_core.models import Feature, FeatureSet, LinkORM, Record
|
5
5
|
|
6
6
|
|
7
|
-
def dict_schema_name_to_model_name(orm: type[
|
7
|
+
def dict_schema_name_to_model_name(orm: type[Record]) -> dict[str, Record]:
|
8
8
|
d: dict = {
|
9
9
|
i.related_model.__get_name_with_schema__(): i.related_model
|
10
10
|
for i in orm._meta.related_objects
|
@@ -21,9 +21,9 @@ def dict_schema_name_to_model_name(orm: type[Registry]) -> dict[str, Registry]:
|
|
21
21
|
|
22
22
|
|
23
23
|
def dict_related_model_to_related_name(
|
24
|
-
orm: type[
|
24
|
+
orm: type[Record], links: bool = False
|
25
25
|
) -> dict[str, str]:
|
26
|
-
def include(model:
|
26
|
+
def include(model: Record):
|
27
27
|
return not links != issubclass(model, LinkORM)
|
28
28
|
|
29
29
|
related_objects = orm._meta.related_objects + orm._meta.many_to_many
|
@@ -39,7 +39,7 @@ def dict_related_model_to_related_name(
|
|
39
39
|
return d
|
40
40
|
|
41
41
|
|
42
|
-
def get_related_name(features_type: type[
|
42
|
+
def get_related_name(features_type: type[Record]) -> str:
|
43
43
|
candidates = [
|
44
44
|
field.related_name
|
45
45
|
for field in FeatureSet._meta.related_objects
|
@@ -49,7 +49,7 @@ def get_related_name(features_type: type[Registry]) -> str:
|
|
49
49
|
raise ValueError(
|
50
50
|
f"Can't create feature sets from {features_type.__name__} because it's not"
|
51
51
|
" related to it!\nYou need to create a link model between FeatureSet and"
|
52
|
-
" your
|
52
|
+
" your Record in your custom schema.\nTo do so, add a"
|
53
53
|
" line:\nfeature_sets = models.ManyToMany(FeatureSet,"
|
54
54
|
" related_name='mythings')\n"
|
55
55
|
)
|
@@ -28,6 +28,8 @@ if TYPE_CHECKING:
|
|
28
28
|
from pathlib import Path
|
29
29
|
|
30
30
|
from fsspec.core import OpenFile
|
31
|
+
from tiledbsoma import Collection as SOMACollection
|
32
|
+
from tiledbsoma import Experiment as SOMAExperiment
|
31
33
|
|
32
34
|
anndata_version_parse = version.parse(anndata_version)
|
33
35
|
|
@@ -100,7 +102,7 @@ def _records_to_df(obj):
|
|
100
102
|
return obj
|
101
103
|
|
102
104
|
|
103
|
-
class
|
105
|
+
class AccessRecord:
|
104
106
|
def __init__(self):
|
105
107
|
self._registry = {}
|
106
108
|
self._openers = {}
|
@@ -141,7 +143,7 @@ class AccessRegistry:
|
|
141
143
|
|
142
144
|
|
143
145
|
# storage specific functions should be registered and called through the registry
|
144
|
-
registry =
|
146
|
+
registry = AccessRecord()
|
145
147
|
|
146
148
|
|
147
149
|
@registry.register_open("h5py")
|
@@ -207,8 +209,10 @@ def safer_read_partial(elem, indices):
|
|
207
209
|
try:
|
208
210
|
ds = CSRDataset(elem)
|
209
211
|
result = _subset_sparse(ds, indices)
|
210
|
-
except Exception:
|
211
|
-
|
212
|
+
except Exception as e:
|
213
|
+
logger.debug(
|
214
|
+
f"Encountered an exception while attempting to subset a sparse dataset by indices.\n{e}"
|
215
|
+
)
|
212
216
|
if result is None:
|
213
217
|
raise ValueError(
|
214
218
|
"Can not get a subset of the element of type"
|
@@ -305,8 +309,10 @@ if ZARR_INSTALLED:
|
|
305
309
|
try:
|
306
310
|
ds = CSRDataset(elem)
|
307
311
|
return _subset_sparse(ds, indices)
|
308
|
-
except Exception:
|
309
|
-
|
312
|
+
except Exception as e:
|
313
|
+
logger.debug(
|
314
|
+
f"Encountered an exception while attempting to subset a sparse dataset by indices.\n{e}"
|
315
|
+
)
|
310
316
|
raise ValueError(
|
311
317
|
"Can not get a subset of the element of type"
|
312
318
|
f" {type(elem).__name__} with an empty spec."
|
@@ -734,24 +740,62 @@ class BackedAccessor:
|
|
734
740
|
|
735
741
|
def backed_access(
|
736
742
|
artifact_or_filepath: Artifact | Path, using_key: str | None = None
|
737
|
-
) -> AnnDataAccessor | BackedAccessor:
|
743
|
+
) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
|
738
744
|
if isinstance(artifact_or_filepath, Artifact):
|
739
745
|
filepath = filepath_from_artifact(artifact_or_filepath, using_key=using_key)
|
740
746
|
else:
|
741
747
|
filepath = artifact_or_filepath
|
742
748
|
name = filepath.name
|
749
|
+
suffix = filepath.suffix
|
750
|
+
|
751
|
+
if name == "soma" or suffix == ".tiledbsoma":
|
752
|
+
try:
|
753
|
+
import tiledbsoma as soma
|
754
|
+
except ImportError as e:
|
755
|
+
raise ImportError(
|
756
|
+
"Please install tiledbsoma: pip install tiledbsoma"
|
757
|
+
) from e
|
758
|
+
filepath_str = filepath.as_posix()
|
759
|
+
if filepath.protocol == "s3":
|
760
|
+
from lamindb_setup.core._settings_storage import get_storage_region
|
761
|
+
|
762
|
+
region = get_storage_region(filepath_str)
|
763
|
+
tiledb_config = {"vfs.s3.region": region}
|
764
|
+
storage_options = filepath.storage_options
|
765
|
+
if "key" in storage_options:
|
766
|
+
tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
|
767
|
+
if "secret" in storage_options:
|
768
|
+
tiledb_config["vfs.s3.aws_secret_access_key"] = storage_options[
|
769
|
+
"secret"
|
770
|
+
]
|
771
|
+
if "token" in storage_options:
|
772
|
+
tiledb_config["vfs.s3.aws_session_token"] = storage_options["token"]
|
773
|
+
ctx = soma.SOMATileDBContext(tiledb_config=tiledb_config)
|
774
|
+
# this is a strange bug
|
775
|
+
# for some reason iterdir futher gives incorrect results
|
776
|
+
# if cache is not invalidated
|
777
|
+
# instead of obs and ms it gives ms and ms in the list of names
|
778
|
+
filepath.fs.invalidate_cache()
|
779
|
+
else:
|
780
|
+
ctx = None
|
743
781
|
|
744
|
-
|
782
|
+
soma_objects = [obj.name for obj in filepath.iterdir()]
|
783
|
+
if "obs" in soma_objects and "ms" in soma_objects:
|
784
|
+
SOMAType = soma.Experiment
|
785
|
+
else:
|
786
|
+
SOMAType = soma.Collection
|
787
|
+
return SOMAType.open(filepath_str, context=ctx)
|
788
|
+
elif suffix in {".h5", ".hdf5", ".h5ad"}:
|
745
789
|
conn, storage = registry.open("h5py", filepath)
|
746
|
-
elif
|
790
|
+
elif suffix == ".zarr":
|
747
791
|
conn, storage = registry.open("zarr", filepath)
|
748
792
|
else:
|
749
793
|
raise ValueError(
|
750
|
-
"object should have .h5, .hdf5, .h5ad, .zarr suffix, not"
|
751
|
-
f" {
|
794
|
+
"object should have .h5, .hdf5, .h5ad, .zarr, .tiledbsoma suffix, not"
|
795
|
+
f" {suffix}."
|
752
796
|
)
|
753
797
|
|
754
|
-
if
|
798
|
+
if suffix == ".h5ad":
|
755
799
|
return AnnDataAccessor(conn, storage, name)
|
756
800
|
else:
|
757
801
|
if get_spec(storage).encoding_type == "anndata":
|
lamindb/core/storage/paths.py
CHANGED
@@ -47,7 +47,7 @@ def auto_storage_key_from_artifact(artifact: Artifact):
|
|
47
47
|
|
48
48
|
|
49
49
|
def auto_storage_key_from_artifact_uid(uid: str, suffix: str, is_dir: bool) -> str:
|
50
|
-
assert isinstance(suffix, str)
|
50
|
+
assert isinstance(suffix, str) # noqa: S101 Suffix cannot be None.
|
51
51
|
if is_dir:
|
52
52
|
uid_storage = uid[:16] # 16 chars, leave 4 chars for versioning
|
53
53
|
else:
|
@@ -72,7 +72,7 @@ def attempt_accessing_path(
|
|
72
72
|
storage_settings = settings._storage_settings
|
73
73
|
else:
|
74
74
|
storage_settings = StorageSettings(
|
75
|
-
settings.storage, access_token=access_token
|
75
|
+
settings.storage.root, access_token=access_token
|
76
76
|
)
|
77
77
|
else:
|
78
78
|
if artifact._state.db not in ("default", None) and using_key is None:
|
@@ -143,8 +143,8 @@ def delete_storage(
|
|
143
143
|
"""Delete arbitrary artifact."""
|
144
144
|
# TODO is_relative_to is not available in 3.8 and deprecated since 3.12
|
145
145
|
# replace with check_path_is_child_of_root but this needs to first be debugged
|
146
|
-
# if not check_path_is_child_of_root(storagepath, settings.storage):
|
147
|
-
if not storagepath.is_relative_to(settings.storage): # type: ignore
|
146
|
+
# if not check_path_is_child_of_root(storagepath, settings.storage.root):
|
147
|
+
if not storagepath.is_relative_to(settings.storage.root): # type: ignore
|
148
148
|
allow_delete = False
|
149
149
|
if setup_settings.instance.keep_artifacts_local:
|
150
150
|
allow_delete = storagepath.is_relative_to( # type: ignore
|
@@ -0,0 +1,38 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
|
4
|
+
class CreationSettings:
|
5
|
+
artifact_if_hash_exists: Literal[
|
6
|
+
"warn_return_existing", "error", "warn_create_new"
|
7
|
+
] = "warn_return_existing"
|
8
|
+
"""Behavior if file hash exists (default `"warn_return_existing"`).
|
9
|
+
|
10
|
+
One of `["warn_return_existing", "error", "warn_create_new"]`.
|
11
|
+
|
12
|
+
FAQ: :doc:`/faq/idempotency`
|
13
|
+
"""
|
14
|
+
artifact_skip_size_hash: bool = False
|
15
|
+
"""To speed up registering high numbers of files (default `False`).
|
16
|
+
|
17
|
+
This bypasses queries for size and hash to AWS & GCP.
|
18
|
+
|
19
|
+
It speeds up file creation by about a factor 100.
|
20
|
+
"""
|
21
|
+
search_names: bool = True
|
22
|
+
"""To speed up creating records (default `True`).
|
23
|
+
|
24
|
+
If `True`, search for alternative names.
|
25
|
+
|
26
|
+
FAQ: :doc:`/faq/idempotency`
|
27
|
+
"""
|
28
|
+
artifact_silence_missing_run_warning: bool = False
|
29
|
+
"""Silence warning about missing run & transform during artifact creation."""
|
30
|
+
_artifact_use_virtual_keys: bool = True
|
31
|
+
"""Treat `key` parameter in :class:`~lamindb.Artifact` as virtual.
|
32
|
+
|
33
|
+
If `True`, the `key` is **not** used to construct file paths, but file paths are
|
34
|
+
based on the `uid` of artifact.
|
35
|
+
"""
|
36
|
+
|
37
|
+
|
38
|
+
creation_settings = CreationSettings()
|
@@ -0,0 +1,21 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
|
4
|
+
class TransformSettings:
|
5
|
+
stem_uid: str | None = None
|
6
|
+
"""Defines the version family of the transform.
|
7
|
+
|
8
|
+
For example, all notebooks of the same family have a uid that starts with
|
9
|
+
`"FPnfDtJz8qbE"`.
|
10
|
+
|
11
|
+
The full uids of the notebooks in this family are of form
|
12
|
+
`"{stem_uid}{suffix_uid}"` where the `suffix_uid` encodes the semantic
|
13
|
+
`version`.
|
14
|
+
"""
|
15
|
+
version: str | None = None
|
16
|
+
"""The version."""
|
17
|
+
name: str | None = None
|
18
|
+
"""A name like a notebook or script title."""
|
19
|
+
|
20
|
+
|
21
|
+
transform_settings = TransformSettings()
|
lamindb/core/versioning.py
CHANGED
@@ -39,9 +39,10 @@ def save_vitessce_config(vitessce_config: VitessceConfig, description: str) -> A
|
|
39
39
|
if "url" not in file:
|
40
40
|
raise ValueError("Each file must have a 'url' key.")
|
41
41
|
filename = file["url"].split("/")[-1]
|
42
|
-
|
43
|
-
(
|
44
|
-
|
42
|
+
if not filename.endswith((".anndata.zarr", ".zarr", ".ome.zarr")):
|
43
|
+
logger.warning(
|
44
|
+
"filename should end with '.anndata.zarr', '.zarr', or '.ome.zarr'."
|
45
|
+
)
|
45
46
|
filestem = (
|
46
47
|
filename.replace(".anndata.zarr", "")
|
47
48
|
.replace(".spatialdata.zarr", "")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.74.
|
3
|
+
Version: 0.74.2
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,24 +9,22 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.70.
|
13
|
-
Requires-Dist: lamindb_setup==0.
|
12
|
+
Requires-Dist: lnschema_core==0.70.5
|
13
|
+
Requires-Dist: lamindb_setup==0.74.1
|
14
14
|
Requires-Dist: lamin_utils==0.13.2
|
15
|
-
Requires-Dist: lamin_cli==0.
|
15
|
+
Requires-Dist: lamin_cli==0.15.0
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
19
19
|
Requires-Dist: python-dateutil
|
20
|
-
Requires-Dist: anndata>=0.8.0
|
20
|
+
Requires-Dist: anndata>=0.8.0,<=0.10.8
|
21
21
|
Requires-Dist: scipy<1.13.0rc1
|
22
22
|
Requires-Dist: fsspec
|
23
23
|
Requires-Dist: pandas
|
24
24
|
Requires-Dist: graphviz
|
25
25
|
Requires-Dist: psycopg2-binary
|
26
|
-
Requires-Dist: psutil
|
27
26
|
Requires-Dist: lamindb_setup[aws] ; extra == "aws"
|
28
|
-
Requires-Dist: bionty==0.44.
|
29
|
-
Requires-Dist: pandas<2 ; extra == "dev"
|
27
|
+
Requires-Dist: bionty==0.44.2 ; extra == "bionty"
|
30
28
|
Requires-Dist: pre-commit ; extra == "dev"
|
31
29
|
Requires-Dist: nox ; extra == "dev"
|
32
30
|
Requires-Dist: laminci>=0.3 ; extra == "dev"
|
@@ -59,7 +57,7 @@ Provides-Extra: zarr
|
|
59
57
|
|
60
58
|
# LaminDB - A data framework for biology
|
61
59
|
|
62
|
-
- Manage
|
60
|
+
- Manage data & metadata with a unified Python API ("lakehouse").
|
63
61
|
- Track data lineage across notebooks & pipelines.
|
64
62
|
- Integrate registries for experimental metadata & in-house ontologies.
|
65
63
|
- Validate, standardize & annotate.
|