lamindb 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +14 -5
- lamindb/_artifact.py +174 -57
- lamindb/_can_curate.py +27 -8
- lamindb/_collection.py +85 -51
- lamindb/_feature.py +177 -41
- lamindb/_finish.py +222 -81
- lamindb/_from_values.py +83 -98
- lamindb/_parents.py +4 -4
- lamindb/_query_set.py +59 -17
- lamindb/_record.py +171 -53
- lamindb/_run.py +4 -4
- lamindb/_save.py +33 -10
- lamindb/_schema.py +135 -38
- lamindb/_storage.py +1 -1
- lamindb/_tracked.py +106 -0
- lamindb/_transform.py +21 -8
- lamindb/_ulabel.py +5 -14
- lamindb/base/validation.py +2 -6
- lamindb/core/__init__.py +13 -14
- lamindb/core/_context.py +39 -36
- lamindb/core/_data.py +29 -25
- lamindb/core/_describe.py +1 -1
- lamindb/core/_django.py +1 -1
- lamindb/core/_feature_manager.py +54 -44
- lamindb/core/_label_manager.py +4 -4
- lamindb/core/_mapped_collection.py +20 -7
- lamindb/core/datasets/__init__.py +6 -1
- lamindb/core/datasets/_core.py +12 -11
- lamindb/core/datasets/_small.py +66 -20
- lamindb/core/exceptions.py +1 -90
- lamindb/core/loaders.py +7 -13
- lamindb/core/relations.py +6 -4
- lamindb/core/storage/_anndata_accessor.py +41 -0
- lamindb/core/storage/_backed_access.py +2 -2
- lamindb/core/storage/_pyarrow_dataset.py +25 -15
- lamindb/core/storage/_tiledbsoma.py +56 -12
- lamindb/core/storage/paths.py +41 -22
- lamindb/core/subsettings/_creation_settings.py +4 -16
- lamindb/curators/__init__.py +2168 -833
- lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
- lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
- lamindb/errors.py +96 -0
- lamindb/integrations/_vitessce.py +3 -3
- lamindb/migrations/0069_squashed.py +76 -75
- lamindb/migrations/0075_lamindbv1_part5.py +4 -5
- lamindb/migrations/0082_alter_feature_dtype.py +21 -0
- lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
- lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
- lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
- lamindb/migrations/0086_various.py +95 -0
- lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
- lamindb/migrations/0088_schema_components.py +273 -0
- lamindb/migrations/0088_squashed.py +4372 -0
- lamindb/models.py +423 -156
- {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/METADATA +10 -7
- lamindb-1.1.0.dist-info/RECORD +95 -0
- lamindb/curators/_spatial.py +0 -528
- lamindb/migrations/0052_squashed.py +0 -1261
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
- lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
- lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
- lamindb/migrations/0060_alter_artifact__actions.py +0 -22
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
- lamindb/migrations/0062_add_is_latest_field.py +0 -32
- lamindb/migrations/0063_populate_latest_field.py +0 -45
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
- lamindb-1.0.4.dist-info/RECORD +0 -102
- {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/LICENSE +0 -0
- {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/WHEEL +0 -0
@@ -1,6 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING, Literal
|
4
|
+
from urllib.parse import urlparse
|
4
5
|
|
5
6
|
import pandas as pd
|
6
7
|
import pyarrow as pa
|
@@ -17,6 +18,7 @@ if TYPE_CHECKING:
|
|
17
18
|
from lamindb_setup.core.types import UPathStr
|
18
19
|
from tiledbsoma import Collection as SOMACollection
|
19
20
|
from tiledbsoma import Experiment as SOMAExperiment
|
21
|
+
from tiledbsoma import Measurement as SOMAMeasurement
|
20
22
|
from upath import UPath
|
21
23
|
|
22
24
|
|
@@ -36,9 +38,21 @@ def _load_h5ad_zarr(objpath: UPath):
|
|
36
38
|
|
37
39
|
|
38
40
|
def _tiledb_config_s3(storepath: UPath) -> dict:
|
39
|
-
region = get_storage_region(storepath)
|
40
|
-
tiledb_config = {"vfs.s3.region": region}
|
41
41
|
storage_options = storepath.storage_options
|
42
|
+
tiledb_config = {}
|
43
|
+
|
44
|
+
endpoint_url = storage_options.get("endpoint_url", None)
|
45
|
+
if endpoint_url is not None:
|
46
|
+
tiledb_config["vfs.s3.region"] = ""
|
47
|
+
tiledb_config["vfs.s3.use_virtual_addressing"] = "false"
|
48
|
+
parsed = urlparse(endpoint_url)
|
49
|
+
tiledb_config["vfs.s3.scheme"] = parsed.scheme
|
50
|
+
tiledb_config["vfs.s3.endpoint_override"] = (
|
51
|
+
parsed._replace(scheme="").geturl().lstrip("/")
|
52
|
+
)
|
53
|
+
else:
|
54
|
+
tiledb_config["vfs.s3.region"] = get_storage_region(storepath)
|
55
|
+
|
42
56
|
if "key" in storage_options:
|
43
57
|
tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
|
44
58
|
if "secret" in storage_options:
|
@@ -51,7 +65,7 @@ def _tiledb_config_s3(storepath: UPath) -> dict:
|
|
51
65
|
|
52
66
|
def _open_tiledbsoma(
|
53
67
|
storepath: UPath, mode: Literal["r", "w"] = "r"
|
54
|
-
) -> SOMACollection | SOMAExperiment:
|
68
|
+
) -> SOMACollection | SOMAExperiment | SOMAMeasurement:
|
55
69
|
try:
|
56
70
|
import tiledbsoma as soma
|
57
71
|
except ImportError as e:
|
@@ -71,6 +85,8 @@ def _open_tiledbsoma(
|
|
71
85
|
soma_objects = [obj.name for obj in storepath.iterdir()]
|
72
86
|
if "obs" in soma_objects and "ms" in soma_objects:
|
73
87
|
SOMAType = soma.Experiment
|
88
|
+
elif "var" in soma_objects:
|
89
|
+
SOMAType = soma.Measurement
|
74
90
|
else:
|
75
91
|
SOMAType = soma.Collection
|
76
92
|
return SOMAType.open(storepath_str, mode=mode, context=ctx)
|
@@ -134,17 +150,17 @@ def save_tiledbsoma_experiment(
|
|
134
150
|
)
|
135
151
|
storepath = setup_settings.storage.root / storage_key
|
136
152
|
|
137
|
-
if storepath.protocol == "s3":
|
153
|
+
if storepath.protocol == "s3": # type: ignore
|
138
154
|
ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
|
139
155
|
else:
|
140
156
|
ctx = None
|
141
157
|
|
142
|
-
|
158
|
+
storepath_str = storepath.as_posix()
|
143
159
|
|
144
160
|
add_run_uid = True
|
145
161
|
run_uid_dtype = "category"
|
146
162
|
if appending:
|
147
|
-
with soma.Experiment.open(
|
163
|
+
with soma.Experiment.open(storepath_str, mode="r", context=ctx) as store:
|
148
164
|
obs_schema = store["obs"].schema
|
149
165
|
add_run_uid = "lamin_run_uid" in obs_schema.names
|
150
166
|
# this is needed to enable backwards compatibility with tiledbsoma stores
|
@@ -175,7 +191,7 @@ def save_tiledbsoma_experiment(
|
|
175
191
|
registration_mapping = kwargs.get("registration_mapping", None)
|
176
192
|
if registration_mapping is None and (appending or len(adata_objects) > 1):
|
177
193
|
registration_mapping = soma_io.register_anndatas(
|
178
|
-
experiment_uri=
|
194
|
+
experiment_uri=storepath_str if appending else None,
|
179
195
|
adatas=adata_objects,
|
180
196
|
measurement_name=measurement_name,
|
181
197
|
obs_field_name=obs_id_name,
|
@@ -195,19 +211,19 @@ def save_tiledbsoma_experiment(
|
|
195
211
|
assert len(adata_objects) == 1 # noqa: S101
|
196
212
|
n_observations = adata_objects[0].n_obs
|
197
213
|
|
198
|
-
logger.important(f"Writing the tiledbsoma store to {
|
214
|
+
logger.important(f"Writing the tiledbsoma store to {storepath_str}")
|
199
215
|
for adata_obj in adata_objects:
|
200
|
-
if resize_experiment and soma.Experiment.exists(
|
216
|
+
if resize_experiment and soma.Experiment.exists(storepath_str, context=ctx):
|
201
217
|
# can only happen if registration_mapping is not None
|
202
218
|
soma_io.resize_experiment(
|
203
|
-
|
219
|
+
storepath_str,
|
204
220
|
nobs=n_observations,
|
205
221
|
nvars=registration_mapping.get_var_shapes(),
|
206
222
|
context=ctx,
|
207
223
|
)
|
208
224
|
resize_experiment = False
|
209
225
|
soma_io.from_anndata(
|
210
|
-
|
226
|
+
storepath_str,
|
211
227
|
adata_obj,
|
212
228
|
measurement_name,
|
213
229
|
context=ctx,
|
@@ -217,7 +233,7 @@ def save_tiledbsoma_experiment(
|
|
217
233
|
**kwargs,
|
218
234
|
)
|
219
235
|
|
220
|
-
artifact = Artifact(
|
236
|
+
artifact = Artifact( # type: ignore
|
221
237
|
storepath,
|
222
238
|
key=key,
|
223
239
|
description=description,
|
@@ -229,3 +245,31 @@ def save_tiledbsoma_experiment(
|
|
229
245
|
artifact.otype = "tiledbsoma"
|
230
246
|
|
231
247
|
return artifact.save()
|
248
|
+
|
249
|
+
|
250
|
+
# this is less defensive than _anndata_n_observations
|
251
|
+
# this doesn't really catches errors
|
252
|
+
# assumes that the tiledbsoma object is well-formed
|
253
|
+
def _soma_store_n_observations(obj) -> int:
|
254
|
+
if obj.soma_type in {"SOMADataFrame", "SOMASparseNDArray", "SOMADenseNDArray"}:
|
255
|
+
return obj.non_empty_domain()[0][1] + 1
|
256
|
+
elif obj.soma_type == "SOMAExperiment":
|
257
|
+
return _soma_store_n_observations(obj["obs"])
|
258
|
+
elif obj.soma_type == "SOMAMeasurement":
|
259
|
+
keys = obj.keys()
|
260
|
+
for slot in ("X", "obsm", "obsp"):
|
261
|
+
if slot in keys:
|
262
|
+
return _soma_store_n_observations(next(iter(obj[slot].values())))
|
263
|
+
elif obj.soma_type == "SOMACollection":
|
264
|
+
n_obs = 0
|
265
|
+
for value in obj.values():
|
266
|
+
n_obs += _soma_store_n_observations(value)
|
267
|
+
return n_obs
|
268
|
+
raise ValueError(
|
269
|
+
"Could not infer the number of observations from the tiledbsoma object."
|
270
|
+
)
|
271
|
+
|
272
|
+
|
273
|
+
def _soma_n_observations(objectpath: UPath) -> int:
|
274
|
+
with _open_tiledbsoma(objectpath, mode="r") as store:
|
275
|
+
return _soma_store_n_observations(store)
|
lamindb/core/storage/paths.py
CHANGED
@@ -4,7 +4,6 @@ import shutil
|
|
4
4
|
from typing import TYPE_CHECKING
|
5
5
|
|
6
6
|
import fsspec
|
7
|
-
from lamin_utils import logger
|
8
7
|
from lamindb_setup.core import StorageSettings
|
9
8
|
from lamindb_setup.core.upath import (
|
10
9
|
LocalPathClasses,
|
@@ -42,25 +41,27 @@ def auto_storage_key_from_artifact_uid(uid: str, suffix: str, is_dir: bool) -> s
|
|
42
41
|
return storage_key
|
43
42
|
|
44
43
|
|
45
|
-
def
|
46
|
-
|
47
|
-
# from the equality checks below
|
48
|
-
# and for fsspec.utils.get_protocol
|
49
|
-
path_str = str(path)
|
50
|
-
root_str = str(root)
|
51
|
-
root_protocol = fsspec.utils.get_protocol(root_str)
|
52
|
-
# check that the protocols are the same first
|
53
|
-
if fsspec.utils.get_protocol(path_str) != root_protocol:
|
54
|
-
return False
|
55
|
-
if root_protocol in {"http", "https"}:
|
56
|
-
# in this case it is a base url, not a file
|
57
|
-
# so formally does not exist
|
44
|
+
def _safely_resolve(upath: UPath) -> UPath:
|
45
|
+
if upath.protocol in {"http", "https"}:
|
58
46
|
resolve_kwargs = {"follow_redirects": False}
|
59
47
|
else:
|
60
48
|
resolve_kwargs = {}
|
61
|
-
return (
|
62
|
-
|
63
|
-
|
49
|
+
return upath.resolve(**resolve_kwargs)
|
50
|
+
|
51
|
+
|
52
|
+
def check_path_is_child_of_root(path: UPathStr, root: UPathStr) -> bool:
|
53
|
+
if fsspec.utils.get_protocol(str(path)) != fsspec.utils.get_protocol(str(root)):
|
54
|
+
return False
|
55
|
+
path_upath = _safely_resolve(UPath(path))
|
56
|
+
root_upath = _safely_resolve(UPath(root))
|
57
|
+
if path_upath.protocol == "s3":
|
58
|
+
endpoint_path = path_upath.storage_options.get("endpoint_url", "")
|
59
|
+
endpoint_root = root_upath.storage_options.get("endpoint_url", "")
|
60
|
+
if endpoint_path != endpoint_root:
|
61
|
+
return False
|
62
|
+
# str is needed to eliminate UPath storage_options
|
63
|
+
# which affect equality checks
|
64
|
+
return UPath(str(root_upath)) in UPath(str(path_upath)).parents
|
64
65
|
|
65
66
|
|
66
67
|
# returns filepath and root of the storage
|
@@ -139,7 +140,20 @@ def store_file_or_folder(
|
|
139
140
|
local_path = UPath(local_path)
|
140
141
|
if not isinstance(storage_path, LocalPathClasses):
|
141
142
|
# this uploads files and directories
|
142
|
-
|
143
|
+
if local_path.is_dir():
|
144
|
+
create_folder = False
|
145
|
+
try:
|
146
|
+
# if storage_path already exists we need to delete it
|
147
|
+
# if local_path is a directory
|
148
|
+
# to replace storage_path correctly
|
149
|
+
if storage_path.stat().as_info()["type"] == "directory":
|
150
|
+
storage_path.rmdir()
|
151
|
+
else:
|
152
|
+
storage_path.unlink()
|
153
|
+
except (FileNotFoundError, PermissionError):
|
154
|
+
pass
|
155
|
+
else:
|
156
|
+
create_folder = None
|
143
157
|
storage_path.upload_from(
|
144
158
|
local_path, create_folder=create_folder, print_progress=print_progress
|
145
159
|
)
|
@@ -156,10 +170,15 @@ def store_file_or_folder(
|
|
156
170
|
|
157
171
|
|
158
172
|
def delete_storage_using_key(
|
159
|
-
artifact: Artifact,
|
160
|
-
|
173
|
+
artifact: Artifact,
|
174
|
+
storage_key: str,
|
175
|
+
raise_file_not_found_error: bool = True,
|
176
|
+
using_key: str | None = None,
|
177
|
+
) -> None | str:
|
161
178
|
filepath, _ = attempt_accessing_path(artifact, storage_key, using_key=using_key)
|
162
|
-
delete_storage(
|
179
|
+
return delete_storage(
|
180
|
+
filepath, raise_file_not_found_error=raise_file_not_found_error
|
181
|
+
)
|
163
182
|
|
164
183
|
|
165
184
|
def delete_storage(
|
@@ -178,5 +197,5 @@ def delete_storage(
|
|
178
197
|
elif raise_file_not_found_error:
|
179
198
|
raise FileNotFoundError(f"{storagepath} is not an existing path!")
|
180
199
|
else:
|
181
|
-
|
200
|
+
return "did-not-delete"
|
182
201
|
return None
|
@@ -1,13 +1,8 @@
|
|
1
|
-
from typing import Literal
|
2
|
-
|
3
|
-
|
4
1
|
class CreationSettings:
|
5
|
-
|
6
|
-
|
7
|
-
] = "warn_return_existing"
|
8
|
-
"""Behavior if file hash exists (default `"warn_return_existing"`).
|
2
|
+
search_names: bool = True
|
3
|
+
"""Switch off to speed up creating records (default `True`).
|
9
4
|
|
10
|
-
|
5
|
+
If `True`, search for alternative names and avoids duplicates.
|
11
6
|
|
12
7
|
FAQ: :doc:`/faq/idempotency`
|
13
8
|
"""
|
@@ -18,15 +13,8 @@ class CreationSettings:
|
|
18
13
|
|
19
14
|
It speeds up file creation by about a factor 100.
|
20
15
|
"""
|
21
|
-
search_names: bool = True
|
22
|
-
"""To speed up creating records (default `True`).
|
23
|
-
|
24
|
-
If `True`, search for alternative names.
|
25
|
-
|
26
|
-
FAQ: :doc:`/faq/idempotency`
|
27
|
-
"""
|
28
16
|
artifact_silence_missing_run_warning: bool = False
|
29
|
-
"""Silence warning about missing run & transform during artifact creation."""
|
17
|
+
"""Silence warning about missing run & transform during artifact creation (default `False`)."""
|
30
18
|
_artifact_use_virtual_keys: bool = True
|
31
19
|
"""Treat `key` parameter in :class:`~lamindb.Artifact` as virtual.
|
32
20
|
|