lamindb 1.7a1__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/core/_context.py +7 -11
- lamindb/core/_settings.py +31 -5
- lamindb/core/storage/_backed_access.py +36 -28
- lamindb/core/storage/_polars_lazy_df.py +44 -5
- lamindb/curators/core.py +18 -2
- lamindb/migrations/0115_alter_space_uid.py +52 -0
- lamindb/migrations/{0114_squashed.py → 0115_squashed.py} +3 -2
- lamindb/models/_django.py +25 -6
- lamindb/models/artifact.py +5 -36
- lamindb/models/feature.py +14 -20
- lamindb/models/sqlrecord.py +24 -14
- lamindb/models/storage.py +83 -34
- lamindb/setup/__init__.py +1 -0
- lamindb/setup/_switch.py +16 -0
- {lamindb-1.7a1.dist-info → lamindb-1.8.0.dist-info}/METADATA +3 -3
- {lamindb-1.7a1.dist-info → lamindb-1.8.0.dist-info}/RECORD +19 -17
- {lamindb-1.7a1.dist-info → lamindb-1.8.0.dist-info}/LICENSE +0 -0
- {lamindb-1.7a1.dist-info → lamindb-1.8.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/core/_context.py
CHANGED
@@ -17,20 +17,18 @@ from lamin_utils import logger
|
|
17
17
|
from lamindb_setup.core import deprecated
|
18
18
|
from lamindb_setup.core.hashing import hash_file
|
19
19
|
|
20
|
-
from
|
21
|
-
from lamindb.base.ids import base62_12
|
22
|
-
from lamindb.models import Run, Transform, format_field_value
|
23
|
-
|
24
|
-
from ..core._settings import settings
|
20
|
+
from ..base.ids import base62_12
|
25
21
|
from ..errors import (
|
26
22
|
InvalidArgument,
|
27
23
|
TrackNotCalled,
|
28
24
|
UpdateContext,
|
29
25
|
)
|
26
|
+
from ..models import Run, Transform, format_field_value
|
30
27
|
from ..models._is_versioned import bump_version as bump_version_function
|
31
28
|
from ..models._is_versioned import (
|
32
29
|
increment_base62,
|
33
30
|
)
|
31
|
+
from ._settings import is_read_only_connection, settings
|
34
32
|
from ._sync_git import get_transform_reference_from_git_repo
|
35
33
|
from ._track_environment import track_python_environment
|
36
34
|
|
@@ -334,8 +332,8 @@ class Context:
|
|
334
332
|
Args:
|
335
333
|
transform: A transform (stem) `uid` (or record). If `None`, auto-creates a `transform` with its `uid`.
|
336
334
|
project: A project (or its `name` or `uid`) for labeling entities.
|
337
|
-
space: A restricted space (or its `name` or `uid`) in which to store
|
338
|
-
Default: the `"
|
335
|
+
space: A restricted space (or its `name` or `uid`) in which to store entities.
|
336
|
+
Default: the `"all"` space. Note that bionty entities ignore this setting and always get written to the `"all"` space.
|
339
337
|
If you want to manually move entities to a different space, set the `.space` field (:doc:`docs:access`).
|
340
338
|
branch: A branch (or its `name` or `uid`) on which to store records.
|
341
339
|
params: A dictionary of parameters to track for the run.
|
@@ -365,10 +363,8 @@ class Context:
|
|
365
363
|
save_context_core,
|
366
364
|
)
|
367
365
|
|
368
|
-
instance_settings = ln_setup.settings.instance
|
369
366
|
# similar logic here: https://github.com/laminlabs/lamindb/pull/2527
|
370
|
-
|
371
|
-
if instance_settings.dialect == "postgresql" and "read" in instance_settings.db:
|
367
|
+
if is_read_only_connection():
|
372
368
|
logger.warning("skipping track(), connected in read-only mode")
|
373
369
|
return None
|
374
370
|
if project is None:
|
@@ -856,7 +852,7 @@ class Context:
|
|
856
852
|
and not transform_was_saved
|
857
853
|
):
|
858
854
|
raise UpdateContext(
|
859
|
-
f'{transform.created_by.name} ({transform.created_by.handle}) already works on this draft {transform.type}.\n\nPlease create a revision via `ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")` or a new transform with a *different* key and `ln.track("{
|
855
|
+
f'{transform.created_by.name} ({transform.created_by.handle}) already works on this draft {transform.type}.\n\nPlease create a revision via `ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")` or a new transform with a *different* key and `ln.track("{base62_12()}0000")`.'
|
860
856
|
)
|
861
857
|
# check whether transform source code was already saved
|
862
858
|
if transform_was_saved:
|
lamindb/core/_settings.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import os
|
4
|
+
import sys
|
4
5
|
from typing import TYPE_CHECKING
|
5
6
|
|
6
7
|
import lamindb_setup as ln_setup
|
7
8
|
from lamin_utils import colors, logger
|
9
|
+
from lamindb_setup import settings as setup_settings
|
8
10
|
from lamindb_setup._set_managed_storage import set_managed_storage
|
9
|
-
from lamindb_setup.core
|
11
|
+
from lamindb_setup.core import deprecated
|
10
12
|
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
11
13
|
|
12
14
|
from .subsettings._annotation_settings import AnnotationSettings, annotation_settings
|
@@ -19,6 +21,15 @@ if TYPE_CHECKING:
|
|
19
21
|
from lamindb_setup.core._settings_storage import StorageSettings
|
20
22
|
from upath import UPath
|
21
23
|
|
24
|
+
|
25
|
+
def is_read_only_connection() -> bool:
|
26
|
+
instance = setup_settings.instance
|
27
|
+
if instance.dialect == "postgresql":
|
28
|
+
db_url = instance.db
|
29
|
+
return "read" in db_url or "public" in db_url
|
30
|
+
return False
|
31
|
+
|
32
|
+
|
22
33
|
VERBOSITY_TO_INT = {
|
23
34
|
"error": 0, # 40
|
24
35
|
"warning": 1, # 30
|
@@ -44,6 +55,9 @@ class Settings:
|
|
44
55
|
self._sync_git_repo: str | None = None
|
45
56
|
|
46
57
|
def __repr__(self) -> str: # pragma: no cover
|
58
|
+
if "sphinx" in sys.modules:
|
59
|
+
return object.__repr__(self)
|
60
|
+
|
47
61
|
cls_name = colors.green(self.__class__.__name__)
|
48
62
|
verbosity_color = colors.yellow if self.verbosity == "warning" else colors.green
|
49
63
|
verbosity_str = verbosity_color(self.verbosity)
|
@@ -181,6 +195,8 @@ class Settings:
|
|
181
195
|
def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
|
182
196
|
if isinstance(path_kwargs, tuple):
|
183
197
|
path, kwargs = path_kwargs
|
198
|
+
if isinstance(kwargs, str):
|
199
|
+
kwargs = {"host": kwargs}
|
184
200
|
else:
|
185
201
|
path, kwargs = path_kwargs, {}
|
186
202
|
set_managed_storage(path, **kwargs)
|
@@ -196,18 +212,28 @@ class Settings:
|
|
196
212
|
return ln_setup.settings.cache_dir
|
197
213
|
|
198
214
|
@property
|
199
|
-
def
|
215
|
+
def local_storage(self) -> StorageSettings:
|
200
216
|
"""An additional local default storage (a path to its root).
|
201
217
|
|
202
218
|
Is only available if :attr:`~lamindb.setup.core.InstanceSettings.keep_artifacts_local` is enabled.
|
203
219
|
|
204
220
|
Guide: :doc:`faq/keep-artifacts-local`
|
205
221
|
"""
|
206
|
-
return ln_setup.settings.instance.
|
222
|
+
return ln_setup.settings.instance.local_storage
|
223
|
+
|
224
|
+
@local_storage.setter
|
225
|
+
def local_storage(self, local_root: Path):
|
226
|
+
ln_setup.settings.instance.local_storage = local_root
|
227
|
+
|
228
|
+
@property
|
229
|
+
@deprecated("local_storage")
|
230
|
+
def storage_local(self) -> StorageSettings:
|
231
|
+
return self.local_storage
|
207
232
|
|
208
233
|
@storage_local.setter
|
209
|
-
|
210
|
-
|
234
|
+
@deprecated("local_storage")
|
235
|
+
def storage_local(self, local_root_host: tuple[Path | str, str]):
|
236
|
+
self.local_storage = local_root_host # type: ignore
|
211
237
|
|
212
238
|
@property
|
213
239
|
def verbosity(self) -> str:
|
@@ -163,6 +163,11 @@ def _open_dataframe(
|
|
163
163
|
engine: Literal["pyarrow", "polars"] = "pyarrow",
|
164
164
|
**kwargs,
|
165
165
|
) -> PyArrowDataset | Iterator[PolarsLazyFrame]:
|
166
|
+
if engine not in {"pyarrow", "polars"}:
|
167
|
+
raise ValueError(
|
168
|
+
f"Unknown engine: {engine}. It should be 'pyarrow' or 'polars'."
|
169
|
+
)
|
170
|
+
|
166
171
|
df_suffix: str
|
167
172
|
if suffix is None:
|
168
173
|
df_suffixes = _flat_suffixes(paths)
|
@@ -175,34 +180,37 @@ def _open_dataframe(
|
|
175
180
|
else:
|
176
181
|
df_suffix = suffix
|
177
182
|
|
178
|
-
if engine == "pyarrow":
|
179
|
-
if df_suffix not in PYARROW_SUFFIXES:
|
180
|
-
raise ValueError(
|
181
|
-
f"{df_suffix} files are not supported by pyarrow, "
|
182
|
-
f"they should have one of these formats: {', '.join(PYARROW_SUFFIXES)}."
|
183
|
-
)
|
184
|
-
# this checks that the filesystem is the same for all paths
|
185
|
-
# this is a requirement of pyarrow.dataset.dataset
|
186
|
-
if not isinstance(paths, Path): # is a list then
|
187
|
-
fs = getattr(paths[0], "fs", None)
|
188
|
-
for path in paths[1:]:
|
189
|
-
# this assumes that the filesystems are cached by fsspec
|
190
|
-
if getattr(path, "fs", None) is not fs:
|
191
|
-
raise ValueError(
|
192
|
-
"The collection has artifacts with different filesystems, "
|
193
|
-
"this is not supported by pyarrow."
|
194
|
-
)
|
195
|
-
dataframe = _open_pyarrow_dataset(paths, **kwargs)
|
196
|
-
elif engine == "polars":
|
197
|
-
if df_suffix not in POLARS_SUFFIXES:
|
198
|
-
raise ValueError(
|
199
|
-
f"{df_suffix} files are not supported by polars, "
|
200
|
-
f"they should have one of these formats: {', '.join(POLARS_SUFFIXES)}."
|
201
|
-
)
|
202
|
-
dataframe = _open_polars_lazy_df(paths, **kwargs)
|
203
|
-
else:
|
183
|
+
if engine == "pyarrow" and df_suffix not in PYARROW_SUFFIXES:
|
204
184
|
raise ValueError(
|
205
|
-
f"
|
185
|
+
f"{df_suffix} files are not supported by pyarrow, "
|
186
|
+
f"they should have one of these formats: {', '.join(PYARROW_SUFFIXES)}."
|
187
|
+
)
|
188
|
+
elif engine == "polars" and df_suffix not in POLARS_SUFFIXES:
|
189
|
+
raise ValueError(
|
190
|
+
f"{df_suffix} files are not supported by polars, "
|
191
|
+
f"they should have one of these formats: {', '.join(POLARS_SUFFIXES)}."
|
206
192
|
)
|
207
193
|
|
208
|
-
|
194
|
+
polars_without_fsspec = engine == "polars" and not kwargs.get("use_fsspec", False)
|
195
|
+
if (engine == "pyarrow" or polars_without_fsspec) and not isinstance(paths, Path):
|
196
|
+
# this checks that the filesystem is the same for all paths
|
197
|
+
# this is a requirement of pyarrow.dataset.dataset
|
198
|
+
fs = getattr(paths[0], "fs", None)
|
199
|
+
for path in paths[1:]:
|
200
|
+
# this assumes that the filesystems are cached by fsspec
|
201
|
+
if getattr(path, "fs", None) is not fs:
|
202
|
+
engine_msg = (
|
203
|
+
"polars engine without passing `use_fsspec=True`"
|
204
|
+
if engine == "polars"
|
205
|
+
else "pyarrow engine"
|
206
|
+
)
|
207
|
+
raise ValueError(
|
208
|
+
"The collection has artifacts with different filesystems, "
|
209
|
+
f"this is not supported for {engine_msg}."
|
210
|
+
)
|
211
|
+
|
212
|
+
return (
|
213
|
+
_open_pyarrow_dataset(paths, **kwargs)
|
214
|
+
if engine == "pyarrow"
|
215
|
+
else _open_polars_lazy_df(paths, **kwargs)
|
216
|
+
)
|
@@ -4,6 +4,8 @@ from contextlib import contextmanager
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import TYPE_CHECKING
|
6
6
|
|
7
|
+
from lamindb_setup.core._settings_storage import get_storage_region
|
8
|
+
|
7
9
|
if TYPE_CHECKING:
|
8
10
|
from collections.abc import Iterator
|
9
11
|
|
@@ -13,9 +15,35 @@ if TYPE_CHECKING:
|
|
13
15
|
POLARS_SUFFIXES = (".parquet", ".csv", ".ndjson", ".ipc")
|
14
16
|
|
15
17
|
|
18
|
+
def _polars_storage_options(storepath: UPath) -> dict[str, str | bool]:
|
19
|
+
storage_options: dict[str, str | bool] = {}
|
20
|
+
s3fs_options = storepath.storage_options
|
21
|
+
|
22
|
+
endpoint_url = s3fs_options.get("endpoint_url", None)
|
23
|
+
if endpoint_url is not None:
|
24
|
+
storage_options["aws_virtual_hosted_style_request"] = False
|
25
|
+
storage_options["aws_endpoint_url"] = endpoint_url
|
26
|
+
if endpoint_url.startswith("http://"):
|
27
|
+
storage_options["aws_allow_http"] = True
|
28
|
+
else:
|
29
|
+
storage_options["aws_region"] = get_storage_region(storepath)
|
30
|
+
|
31
|
+
if s3fs_options.get("anon", False):
|
32
|
+
storage_options["aws_skip_signature"] = True
|
33
|
+
else:
|
34
|
+
if "key" in s3fs_options:
|
35
|
+
storage_options["aws_access_key_id"] = s3fs_options["key"]
|
36
|
+
if "secret" in s3fs_options:
|
37
|
+
storage_options["aws_secret_access_key"] = s3fs_options["secret"]
|
38
|
+
if "token" in s3fs_options:
|
39
|
+
storage_options["aws_session_token"] = s3fs_options["token"]
|
40
|
+
|
41
|
+
return storage_options
|
42
|
+
|
43
|
+
|
16
44
|
@contextmanager
|
17
45
|
def _open_polars_lazy_df(
|
18
|
-
paths: UPath | list[UPath], **kwargs
|
46
|
+
paths: UPath | list[UPath], use_fsspec: bool = False, **kwargs
|
19
47
|
) -> Iterator[PolarsLazyFrame]:
|
20
48
|
try:
|
21
49
|
import polars as pl
|
@@ -38,14 +66,25 @@ def _open_polars_lazy_df(
|
|
38
66
|
path_list += [p for p in path.rglob("*") if p.suffix != ""]
|
39
67
|
else:
|
40
68
|
path_list.append(path)
|
69
|
+
# assume the filesystem is the same for all
|
70
|
+
# it is checked in _open_dataframe
|
71
|
+
path0 = path_list[0]
|
72
|
+
storage_options = None
|
73
|
+
if not use_fsspec:
|
74
|
+
storage_options = kwargs.pop("storage_options", None)
|
75
|
+
if path0.protocol == "s3" and storage_options is None:
|
76
|
+
storage_options = _polars_storage_options(path0)
|
41
77
|
|
42
78
|
open_files = []
|
43
79
|
|
44
80
|
try:
|
45
81
|
for path in path_list:
|
46
|
-
open_files.append(path.open(mode="rb"))
|
82
|
+
open_files.append(path.open(mode="rb") if use_fsspec else path.as_posix())
|
47
83
|
|
48
|
-
yield scans[path_list[0].suffix](
|
84
|
+
yield scans[path_list[0].suffix](
|
85
|
+
open_files, storage_options=storage_options, **kwargs
|
86
|
+
)
|
49
87
|
finally:
|
50
|
-
|
51
|
-
open_file
|
88
|
+
if use_fsspec:
|
89
|
+
for open_file in open_files:
|
90
|
+
open_file.close()
|
lamindb/curators/core.py
CHANGED
@@ -175,9 +175,18 @@ class Curator:
|
|
175
175
|
- :class:`~lamindb.curators.AnnDataCurator`
|
176
176
|
- :class:`~lamindb.curators.MuDataCurator`
|
177
177
|
- :class:`~lamindb.curators.SpatialDataCurator`
|
178
|
+
- :class:`~lamindb.curators.TiledbsomaExperimentCurator`
|
178
179
|
"""
|
179
180
|
|
180
181
|
def __init__(self, dataset: Any, schema: Schema | None = None):
|
182
|
+
if not isinstance(schema, Schema):
|
183
|
+
raise InvalidArgument("schema argument must be a Schema record.")
|
184
|
+
|
185
|
+
if schema.pk is None:
|
186
|
+
raise ValueError(
|
187
|
+
"Schema must be saved before curation. Please save it using '.save()'."
|
188
|
+
)
|
189
|
+
|
181
190
|
self._artifact: Artifact = None # pass the dataset as an artifact
|
182
191
|
self._dataset: Any = dataset # pass the dataset as a UPathStr or data object
|
183
192
|
if isinstance(self._dataset, Artifact):
|
@@ -463,12 +472,15 @@ class DataFrameCurator(Curator):
|
|
463
472
|
slot: str | None = None,
|
464
473
|
) -> None:
|
465
474
|
super().__init__(dataset=dataset, schema=schema)
|
475
|
+
|
466
476
|
categoricals = []
|
467
477
|
features = []
|
468
478
|
feature_ids: set[int] = set()
|
479
|
+
|
469
480
|
if schema.flexible:
|
470
481
|
features += Feature.filter(name__in=self._dataset.keys()).list()
|
471
482
|
feature_ids = {feature.id for feature in features}
|
483
|
+
|
472
484
|
if schema.n > 0:
|
473
485
|
if schema._index_feature_uid is not None:
|
474
486
|
schema_features = [
|
@@ -488,6 +500,7 @@ class DataFrameCurator(Curator):
|
|
488
500
|
features.extend(schema_features)
|
489
501
|
else:
|
490
502
|
assert schema.itype is not None # noqa: S101
|
503
|
+
|
491
504
|
pandera_columns = {}
|
492
505
|
if features or schema._index_feature_uid is not None:
|
493
506
|
# populate features
|
@@ -640,8 +653,11 @@ class DataFrameCurator(Curator):
|
|
640
653
|
self._cat_manager_validate()
|
641
654
|
except (pandera.errors.SchemaError, pandera.errors.SchemaErrors) as err:
|
642
655
|
self._is_validated = False
|
643
|
-
|
644
|
-
|
656
|
+
has_dtype_error = "WRONG_DATATYPE" in str(err)
|
657
|
+
error_msg = str(err)
|
658
|
+
if has_dtype_error:
|
659
|
+
error_msg += " ▶ Hint: Consider setting 'coerce_datatype=True' to attempt coercing/converting values during validation to the pre-defined dtype."
|
660
|
+
raise ValidationError(error_msg) from err
|
645
661
|
else:
|
646
662
|
self._cat_manager_validate()
|
647
663
|
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# Generated by Django 5.2 on 2025-07-06 23:09
|
2
|
+
|
3
|
+
from django.db import migrations
|
4
|
+
|
5
|
+
import lamindb.base.fields
|
6
|
+
|
7
|
+
|
8
|
+
def extenddefault_values(apps, schema_editor):
|
9
|
+
"""Lowercase default values for Space and Branch models."""
|
10
|
+
Space = apps.get_model("lamindb", "Space")
|
11
|
+
Branch = apps.get_model("lamindb", "Branch")
|
12
|
+
|
13
|
+
space = Space.objects.get(uid="a")
|
14
|
+
space.uid = 12 * "a"
|
15
|
+
space.save()
|
16
|
+
|
17
|
+
trash_branch = Branch.objects.get(uid="t")
|
18
|
+
trash_branch.uid = 12 * "t"
|
19
|
+
trash_branch.save()
|
20
|
+
|
21
|
+
archive_branch = Branch.objects.get(uid="a")
|
22
|
+
archive_branch.uid = 12 * "a"
|
23
|
+
archive_branch.save()
|
24
|
+
|
25
|
+
main_branch = Branch.objects.get(uid="m")
|
26
|
+
main_branch.uid = 12 * "m"
|
27
|
+
main_branch.save()
|
28
|
+
|
29
|
+
|
30
|
+
class Migration(migrations.Migration):
|
31
|
+
dependencies = [
|
32
|
+
("lamindb", "0114_alter_run__status_code"),
|
33
|
+
]
|
34
|
+
|
35
|
+
operations = [
|
36
|
+
migrations.AlterField(
|
37
|
+
model_name="space",
|
38
|
+
name="uid",
|
39
|
+
field=lamindb.base.fields.CharField(
|
40
|
+
blank=True,
|
41
|
+
db_default="aaaaaaaaaaaa",
|
42
|
+
db_index=True,
|
43
|
+
default="aaaaaaaaaaaaa",
|
44
|
+
editable=False,
|
45
|
+
max_length=12,
|
46
|
+
unique=True,
|
47
|
+
),
|
48
|
+
),
|
49
|
+
migrations.RunPython(
|
50
|
+
extenddefault_values,
|
51
|
+
),
|
52
|
+
]
|
@@ -136,6 +136,7 @@ class Migration(migrations.Migration):
|
|
136
136
|
("lamindb", "0112_alter_recordartifact_feature_and_more"),
|
137
137
|
("lamindb", "0113_lower_case_branch_and_space_names"),
|
138
138
|
("lamindb", "0114_alter_run__status_code"),
|
139
|
+
("lamindb", "0115_alter_space_uid"),
|
139
140
|
]
|
140
141
|
|
141
142
|
dependencies = [] # type: ignore
|
@@ -214,9 +215,9 @@ class Migration(migrations.Migration):
|
|
214
215
|
"uid",
|
215
216
|
lamindb.base.fields.CharField(
|
216
217
|
blank=True,
|
217
|
-
db_default="
|
218
|
+
db_default="aaaaaaaaaaaa",
|
218
219
|
db_index=True,
|
219
|
-
default="
|
220
|
+
default="aaaaaaaaaaaaa",
|
220
221
|
editable=False,
|
221
222
|
max_length=12,
|
222
223
|
unique=True,
|
lamindb/models/_django.py
CHANGED
@@ -21,12 +21,10 @@ if TYPE_CHECKING:
|
|
21
21
|
def patch_many_to_many_descriptor() -> None:
|
22
22
|
"""Patches Django's `ManyToManyDescriptor.__get__` method to suggest better errors when saving relationships of an unsaved model.
|
23
23
|
|
24
|
-
Before this patch: Cryptic errors are raised when relationships of an unsaved
|
25
|
-
record are attempted to be modified.
|
24
|
+
Before this patch: Cryptic errors are raised when relationships of an unsaved record are attempted to be modified.
|
26
25
|
|
27
|
-
After this patch: Attempts to access M2M relationships on unsaved objects
|
28
|
-
|
29
|
-
before relationship creation.
|
26
|
+
After this patch: Attempts to access M2M relationships on unsaved objects will raise ValueError,
|
27
|
+
suggesting explicit .save() of the record to be modified before relationship creation.
|
30
28
|
"""
|
31
29
|
from django.db.models.fields.related_descriptors import ManyToManyDescriptor
|
32
30
|
|
@@ -37,7 +35,28 @@ def patch_many_to_many_descriptor() -> None:
|
|
37
35
|
raise ValueError(
|
38
36
|
f"You are trying to access the many-to-many relationships of an unsaved {instance.__class__.__name__} object. Please save it first using '.save()'."
|
39
37
|
)
|
40
|
-
|
38
|
+
|
39
|
+
manager = original_get(self, instance, cls)
|
40
|
+
if manager is None or not hasattr(manager, "add"):
|
41
|
+
return manager
|
42
|
+
|
43
|
+
original_manager_add = manager.add
|
44
|
+
|
45
|
+
def patched_manager_add(*objs, **kwargs):
|
46
|
+
try:
|
47
|
+
return original_manager_add(*objs, **kwargs)
|
48
|
+
except ValueError as e:
|
49
|
+
if "Cannot add" in str(e) and "database" in str(e):
|
50
|
+
source_db = manager.instance._state.db
|
51
|
+
|
52
|
+
raise ValueError(
|
53
|
+
f"Cannot label a record from instance '{source_db}'. "
|
54
|
+
f"Please save the record first to your instance using '.save()'."
|
55
|
+
) from None
|
56
|
+
raise
|
57
|
+
|
58
|
+
manager.add = patched_manager_add
|
59
|
+
return manager
|
41
60
|
|
42
61
|
ManyToManyDescriptor.__get__ = patched_get
|
43
62
|
|
lamindb/models/artifact.py
CHANGED
@@ -37,6 +37,7 @@ from lamindb.errors import FieldValidationError, UnknownStorageLocation
|
|
37
37
|
from lamindb.models.query_set import QuerySet
|
38
38
|
|
39
39
|
from ..base.users import current_user_id
|
40
|
+
from ..core._settings import is_read_only_connection, settings
|
40
41
|
from ..core.loaders import load_to_memory
|
41
42
|
from ..core.storage import (
|
42
43
|
LocalPathClasses,
|
@@ -279,8 +280,6 @@ def process_data(
|
|
279
280
|
|
280
281
|
# in case we have an in-memory representation, we need to write it to disk
|
281
282
|
if memory_rep is not None:
|
282
|
-
from lamindb import settings
|
283
|
-
|
284
283
|
path = settings.cache_dir / f"{provisional_uid}{suffix}"
|
285
284
|
if isinstance(format, dict):
|
286
285
|
format.pop("suffix", None)
|
@@ -301,8 +300,6 @@ def get_stat_or_artifact(
|
|
301
300
|
) -> Union[tuple[int, str | None, str | None, int | None, Artifact | None], Artifact]:
|
302
301
|
"""Retrieves file statistics or an existing artifact based on the path, hash, and key."""
|
303
302
|
n_files = None
|
304
|
-
from lamindb import settings
|
305
|
-
|
306
303
|
if settings.creation.artifact_skip_size_hash:
|
307
304
|
return None, None, None, n_files, None
|
308
305
|
stat = path.stat() # one network request
|
@@ -407,8 +404,6 @@ def get_artifact_kwargs_from_data(
|
|
407
404
|
skip_check_exists: bool = False,
|
408
405
|
overwrite_versions: bool | None = None,
|
409
406
|
):
|
410
|
-
from lamindb import settings
|
411
|
-
|
412
407
|
run = get_run(run)
|
413
408
|
memory_rep, path, suffix, storage, use_existing_storage_key = process_data(
|
414
409
|
provisional_uid,
|
@@ -635,8 +630,6 @@ def _populate_subsequent_runs_(record: Union[Artifact, Collection], run: Run):
|
|
635
630
|
|
636
631
|
# also see current_run() in core._data
|
637
632
|
def get_run(run: Run | None) -> Run | None:
|
638
|
-
from lamindb import settings
|
639
|
-
|
640
633
|
from .._tracked import get_current_tracked_run
|
641
634
|
from ..core._context import context
|
642
635
|
|
@@ -645,11 +638,7 @@ def get_run(run: Run | None) -> Run | None:
|
|
645
638
|
if run is None:
|
646
639
|
run = context.run
|
647
640
|
if run is None and not settings.creation.artifact_silence_missing_run_warning:
|
648
|
-
|
649
|
-
# normally for our connection strings the read-only role name has "read" in it
|
650
|
-
# not absolutely safe but the worst case is that the warning is not shown
|
651
|
-
instance = setup_settings.instance
|
652
|
-
if instance.dialect != "postgresql" or "read" not in instance.db:
|
641
|
+
if not is_read_only_connection():
|
653
642
|
logger.warning(WARNING_RUN_TRANSFORM)
|
654
643
|
# suppress run by passing False
|
655
644
|
elif not run:
|
@@ -1388,7 +1377,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1388
1377
|
default_storage = kwargs.pop("default_storage")
|
1389
1378
|
else:
|
1390
1379
|
if setup_settings.instance.keep_artifacts_local:
|
1391
|
-
default_storage = setup_settings.instance.
|
1380
|
+
default_storage = setup_settings.instance.local_storage.record
|
1392
1381
|
else:
|
1393
1382
|
default_storage = setup_settings.instance.storage.record
|
1394
1383
|
using_key = kwargs.pop("using_key", None)
|
@@ -1568,15 +1557,11 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
1568
1557
|
artifact.path
|
1569
1558
|
#> PosixPath('/home/runner/work/lamindb/lamindb/docs/guide/mydata/myfile.csv')
|
1570
1559
|
"""
|
1571
|
-
from lamindb import settings
|
1572
|
-
|
1573
1560
|
filepath, _ = filepath_from_artifact(self, using_key=settings._using_key)
|
1574
1561
|
return filepath
|
1575
1562
|
|
1576
1563
|
@property
|
1577
1564
|
def _cache_path(self) -> UPath:
|
1578
|
-
from lamindb import settings
|
1579
|
-
|
1580
1565
|
filepath, cache_key = filepath_cache_key_from_artifact(
|
1581
1566
|
self, using_key=settings._using_key
|
1582
1567
|
)
|
@@ -2061,8 +2046,6 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2061
2046
|
artifacts = ln.Artifact.from_dir(dir_path)
|
2062
2047
|
ln.save(artifacts)
|
2063
2048
|
"""
|
2064
|
-
from lamindb import settings
|
2065
|
-
|
2066
2049
|
folderpath: UPath = create_path(path) # returns Path for local
|
2067
2050
|
default_storage = settings.storage.record
|
2068
2051
|
using_key = settings._using_key
|
@@ -2172,8 +2155,6 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2172
2155
|
|
2173
2156
|
However, it will update the suffix if it changes.
|
2174
2157
|
"""
|
2175
|
-
from lamindb import settings
|
2176
|
-
|
2177
2158
|
default_storage = settings.storage.record
|
2178
2159
|
kwargs, privates = get_artifact_kwargs_from_data(
|
2179
2160
|
provisional_uid=self.uid,
|
@@ -2335,8 +2316,6 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2335
2316
|
"Only a tiledbsoma store can be openened with `mode!='r'`."
|
2336
2317
|
)
|
2337
2318
|
|
2338
|
-
from lamindb import settings
|
2339
|
-
|
2340
2319
|
using_key = settings._using_key
|
2341
2320
|
filepath, cache_key = filepath_cache_key_from_artifact(
|
2342
2321
|
self, using_key=using_key
|
@@ -2441,8 +2420,6 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2441
2420
|
>>> artifact.load()
|
2442
2421
|
PosixPath('/home/runner/work/lamindb/lamindb/docs/guide/mydata/.lamindb/jb7BY5UJoQVGMUOKiLcn.jpg')
|
2443
2422
|
"""
|
2444
|
-
from lamindb import settings
|
2445
|
-
|
2446
2423
|
if self._overwrite_versions and not self.is_latest:
|
2447
2424
|
raise ValueError(INCONSISTENT_STATE_MSG)
|
2448
2425
|
|
@@ -2508,8 +2485,6 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2508
2485
|
artifact.cache()
|
2509
2486
|
#> PosixPath('/home/runner/work/Caches/lamindb/lamindb-ci/lndb-storage/pbmc68k.h5ad')
|
2510
2487
|
"""
|
2511
|
-
from lamindb import settings
|
2512
|
-
|
2513
2488
|
if self._overwrite_versions and not self.is_latest:
|
2514
2489
|
raise ValueError(INCONSISTENT_STATE_MSG)
|
2515
2490
|
|
@@ -2706,7 +2681,7 @@ class Artifact(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
2706
2681
|
if upload and setup_settings.instance.keep_artifacts_local:
|
2707
2682
|
# switch local storage location to cloud
|
2708
2683
|
local_path = self.path
|
2709
|
-
self.storage_id = setup_settings.instance.storage.
|
2684
|
+
self.storage_id = setup_settings.instance.storage._id
|
2710
2685
|
self._local_filepath = local_path
|
2711
2686
|
# switch to virtual storage key upon upload
|
2712
2687
|
# the local filepath is already cached at that point
|
@@ -2856,8 +2831,6 @@ def _track_run_input(
|
|
2856
2831
|
if is_run_input is False:
|
2857
2832
|
return
|
2858
2833
|
|
2859
|
-
from lamindb import settings
|
2860
|
-
|
2861
2834
|
from .._tracked import get_current_tracked_run
|
2862
2835
|
from ..core._context import context
|
2863
2836
|
from .collection import Collection
|
@@ -2915,11 +2888,7 @@ def _track_run_input(
|
|
2915
2888
|
# we don't have a run record
|
2916
2889
|
if run is None:
|
2917
2890
|
if settings.track_run_inputs:
|
2918
|
-
|
2919
|
-
# normally for our connection strings the read-only role name has "read" in it
|
2920
|
-
# not absolutely safe but the worst case is that the warning is not shown
|
2921
|
-
instance = setup_settings.instance
|
2922
|
-
if instance.dialect != "postgresql" or "read" not in instance.db:
|
2891
|
+
if not is_read_only_connection():
|
2923
2892
|
logger.warning(WARNING_NO_INPUT)
|
2924
2893
|
# assume we have a run record
|
2925
2894
|
else:
|
lamindb/models/feature.py
CHANGED
@@ -535,38 +535,32 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
535
535
|
|
536
536
|
A simple `"str"` feature.::
|
537
537
|
|
538
|
-
|
539
|
-
name="sample_note",
|
540
|
-
dtype="str",
|
541
|
-
).save()
|
538
|
+
ln.Feature(name="sample_note", dtype=str).save()
|
542
539
|
|
543
540
|
A dtype `"cat[ULabel]"` can be more easily passed as below.::
|
544
541
|
|
545
|
-
|
546
|
-
name="project",
|
547
|
-
dtype=ln.ULabel,
|
548
|
-
).save()
|
542
|
+
ln.Feature(name="project", dtype=ln.ULabel).save()
|
549
543
|
|
550
544
|
A dtype `"cat[ULabel|bionty.CellType]"` can be more easily passed as below.::
|
551
545
|
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
546
|
+
ln.Feature(
|
547
|
+
name="cell_type",
|
548
|
+
dtype=[ln.ULabel, bt.CellType],
|
549
|
+
).save()
|
556
550
|
|
557
551
|
A multivalue feature with a list of cell types.::
|
558
552
|
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
553
|
+
ln.Feature(
|
554
|
+
name="cell_types",
|
555
|
+
dtype=list[bt.CellType], # or list[str] for a list of strings
|
556
|
+
).save()
|
563
557
|
|
564
558
|
A path feature.::
|
565
559
|
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
560
|
+
ln.Feature(
|
561
|
+
name="image_path",
|
562
|
+
dtype="path", # will be validated as `str`
|
563
|
+
).save()
|
570
564
|
|
571
565
|
Hint:
|
572
566
|
|
lamindb/models/sqlrecord.py
CHANGED
@@ -661,43 +661,53 @@ class BaseSQLRecord(models.Model, metaclass=Registry):
|
|
661
661
|
def __init__(self, *args, **kwargs):
|
662
662
|
skip_validation = kwargs.pop("_skip_validation", False)
|
663
663
|
if not args:
|
664
|
-
if
|
665
|
-
|
666
|
-
"
|
667
|
-
|
668
|
-
"
|
669
|
-
|
664
|
+
if (
|
665
|
+
issubclass(self.__class__, SQLRecord)
|
666
|
+
and self.__class__.__name__ != "Storage"
|
667
|
+
# do not save bionty entities in restricted spaces by default
|
668
|
+
and self.__class__.__module__ != "bionty.models"
|
669
|
+
):
|
670
670
|
from lamindb import context as run_context
|
671
671
|
|
672
672
|
if run_context.space is not None:
|
673
|
+
current_space = run_context.space
|
674
|
+
elif setup_settings.space is not None:
|
675
|
+
current_space = setup_settings.space
|
676
|
+
|
677
|
+
if current_space is not None:
|
673
678
|
if "space_id" in kwargs:
|
674
679
|
# space_id takes precedence over space
|
675
680
|
# https://claude.ai/share/f045e5dc-0143-4bc5-b8a4-38309229f75e
|
676
681
|
if kwargs["space_id"] == 1: # ignore default space
|
677
682
|
kwargs.pop("space_id")
|
678
|
-
kwargs["space"] =
|
683
|
+
kwargs["space"] = current_space
|
679
684
|
elif "space" in kwargs:
|
680
685
|
if kwargs["space"] is None:
|
681
|
-
kwargs["space"] =
|
686
|
+
kwargs["space"] = current_space
|
682
687
|
else:
|
683
|
-
kwargs["space"] =
|
688
|
+
kwargs["space"] = current_space
|
684
689
|
if issubclass(
|
685
690
|
self.__class__, SQLRecord
|
686
691
|
) and self.__class__.__name__ not in {"Storage", "Source"}:
|
687
692
|
from lamindb import context as run_context
|
688
693
|
|
689
694
|
if run_context.branch is not None:
|
695
|
+
current_branch = run_context.branch
|
696
|
+
elif setup_settings.branch is not None:
|
697
|
+
current_branch = setup_settings.branch
|
698
|
+
|
699
|
+
if current_branch is not None:
|
690
700
|
# branch_id takes precedence over branch
|
691
701
|
# https://claude.ai/share/f045e5dc-0143-4bc5-b8a4-38309229f75e
|
692
702
|
if "branch_id" in kwargs:
|
693
703
|
if kwargs["branch_id"] == 1: # ignore default branch
|
694
704
|
kwargs.pop("branch_id")
|
695
|
-
kwargs["branch"] =
|
705
|
+
kwargs["branch"] = current_branch
|
696
706
|
elif "branch" in kwargs:
|
697
707
|
if kwargs["branch"] is None:
|
698
|
-
kwargs["branch"] =
|
708
|
+
kwargs["branch"] = current_branch
|
699
709
|
else:
|
700
|
-
kwargs["branch"] =
|
710
|
+
kwargs["branch"] = current_branch
|
701
711
|
if skip_validation:
|
702
712
|
super().__init__(**kwargs)
|
703
713
|
else:
|
@@ -952,8 +962,8 @@ class Space(BaseSQLRecord):
|
|
952
962
|
editable=False,
|
953
963
|
unique=True,
|
954
964
|
max_length=12,
|
955
|
-
default="
|
956
|
-
db_default="
|
965
|
+
default="aaaaaaaaaaaaa",
|
966
|
+
db_default="aaaaaaaaaaaa",
|
957
967
|
db_index=True,
|
958
968
|
)
|
959
969
|
"""Universal id."""
|
lamindb/models/storage.py
CHANGED
@@ -12,7 +12,11 @@ from lamindb_setup.core._hub_core import (
|
|
12
12
|
delete_storage_record,
|
13
13
|
get_storage_records_for_instance,
|
14
14
|
)
|
15
|
-
from lamindb_setup.core._settings_storage import
|
15
|
+
from lamindb_setup.core._settings_storage import (
|
16
|
+
StorageSettings,
|
17
|
+
get_storage_type,
|
18
|
+
init_storage,
|
19
|
+
)
|
16
20
|
from lamindb_setup.core.upath import check_storage_is_empty, create_path
|
17
21
|
|
18
22
|
from lamindb.base.fields import (
|
@@ -33,78 +37,88 @@ if TYPE_CHECKING:
|
|
33
37
|
|
34
38
|
|
35
39
|
class Storage(SQLRecord, TracksRun, TracksUpdates):
|
36
|
-
"""Storage locations of artifacts such as
|
40
|
+
"""Storage locations of artifacts such as local directories or S3 buckets.
|
37
41
|
|
38
|
-
A storage location is either a
|
42
|
+
A storage location is either a directory (local or a folder in the cloud) or
|
39
43
|
an entire S3/GCP bucket.
|
40
|
-
|
41
|
-
A LaminDB instance can manage and reference multiple storage locations. But any
|
44
|
+
A LaminDB instance can manage and read from multiple storage locations. But any
|
42
45
|
storage location is managed by *at most one* LaminDB instance.
|
43
46
|
|
44
|
-
.. dropdown:: Managed vs.
|
47
|
+
.. dropdown:: Managed vs. read-only storage locations
|
45
48
|
|
46
49
|
A LaminDB instance can only write artifacts to its managed storage
|
47
|
-
locations
|
50
|
+
locations.
|
48
51
|
|
49
|
-
The :attr:`~lamindb.Storage.instance_uid` field defines the managing LaminDB instance of a
|
50
|
-
|
51
|
-
instance, in which case the `instance_uid` is `None`. If it matches the
|
52
|
-
:attr:`~lamindb.core.Settings.instance_uid` of the current instance, the storage location
|
53
|
-
is managed by the current instance.
|
52
|
+
The :attr:`~lamindb.Storage.instance_uid` field defines the managing LaminDB instance of a storage location.
|
53
|
+
You can access the `instance_uid` of your current instance through `ln.setup.settings.instance_uid`.
|
54
54
|
|
55
|
-
Here is an example
|
55
|
+
Here is an example (`source <https://lamin.ai/laminlabs/lamindata/transform/dPco79GYgzag0000>`__).
|
56
56
|
|
57
57
|
.. image:: https://lamin-site-assets.s3.amazonaws.com/.lamindb/eHDmIOAxLEoqZ2oK0000.png
|
58
58
|
:width: 400px
|
59
59
|
|
60
|
+
Some public storage locations are not be managed by any LaminDB instance: their `instance_uid` is `None`.
|
61
|
+
|
60
62
|
.. dropdown:: Managing access to storage locations across instances
|
61
63
|
|
62
|
-
You can
|
64
|
+
You can manage access through AWS policies that you attach to your S3 bucket
|
63
65
|
or leverage LaminHub's fine-grained access management.
|
64
66
|
|
65
|
-
Head over to `https://lamin.ai/{account}/infrastructure
|
66
|
-
By clicking the green button that says "Connect S3 bucket", you enable
|
67
|
-
for a
|
67
|
+
Head over to `https://lamin.ai/{account}/infrastructure`.
|
68
|
+
By clicking the green button that says "Connect S3 bucket", you enable Lamin to issue federated S3 tokens
|
69
|
+
for a bucket so that your collaborators can access data based on their permissions in LaminHub.
|
68
70
|
:doc:`docs:access` has more details.
|
69
71
|
|
70
72
|
.. image:: https://lamin-site-assets.s3.amazonaws.com/.lamindb/ze8hkgVxVptSSZEU0000.png
|
71
73
|
:width: 800px
|
72
74
|
|
75
|
+
If you don't want to store data in the cloud, you can use local storage locations: :doc:`faq/keep-artifacts-local`.
|
76
|
+
|
73
77
|
Args:
|
74
|
-
root: `str` The root path of the storage location, e.g., `"./
|
75
|
-
|
76
|
-
|
77
|
-
region: `str | None = None` Cloud storage region, if applicable. Auto-populated for AWS S3.
|
78
|
+
root: `str` The root path of the storage location, e.g., `"./mydir"`, `"s3://my-bucket"`, `"s3://my-bucket/myfolder"`, `"gs://my-bucket/myfolder"`, `"/nfs/shared/datasets/genomics"`, `"/weka/shared/models/"`, ...
|
79
|
+
description: `str | None = None` An optional description.
|
80
|
+
host: `str | None = None` For local storage locations, pass a globally unique host identifier, e.g. `"my-institute-cluster-1"`, `"my-server-abcd"`, ...
|
78
81
|
|
79
82
|
See Also:
|
80
83
|
:attr:`lamindb.core.Settings.storage`
|
81
84
|
Current default storage location of your compute session for writing artifacts.
|
82
85
|
:attr:`~lamindb.setup.core.StorageSettings`
|
83
86
|
Storage settings.
|
87
|
+
:doc:`faq/keep-artifacts-local`
|
88
|
+
Avoid storing artifacts in the cloud, but keep them on local infrastructure.
|
84
89
|
|
85
90
|
Examples:
|
86
91
|
|
87
92
|
When you create a LaminDB instance, you configure its default storage location via `--storage`::
|
88
93
|
|
89
|
-
lamin init --storage ./
|
94
|
+
lamin init --storage ./mydatadir # or "s3://my-bucket/myfolder", "gs://my-bucket/myfolder", ...
|
90
95
|
|
91
|
-
View the current default storage location
|
96
|
+
View the current default storage location for writing artifacts::
|
92
97
|
|
93
98
|
import lamindb as ln
|
94
99
|
|
95
100
|
ln.settings.storage
|
96
101
|
|
97
|
-
|
102
|
+
Create a new cloud storage location::
|
98
103
|
|
99
|
-
ln.
|
104
|
+
ln.Storage(root="s3://our-bucket/our-folder").save()
|
100
105
|
|
101
|
-
|
106
|
+
Create a new local storage location::
|
102
107
|
|
103
|
-
ln.Storage.
|
108
|
+
ln.Storage(root="/dir/our-shared-dir", host="our-server-123").save()
|
104
109
|
|
105
|
-
|
110
|
+
Switch to another storage location::
|
106
111
|
|
107
|
-
ln.
|
112
|
+
ln.settings.storage = "/dir/our-shared-dir" # or "s3://our-bucket/our-folder", "gs://our-bucket/our-folder", ...
|
113
|
+
|
114
|
+
If you're operating in `keep-artifacts-local` mode (:doc:`faq/keep-artifacts-local`), you can switch among additional local storage locations::
|
115
|
+
|
116
|
+
ln.Storage(root="/dir/our-other-shared-dir", host="our-server-123").save() # create
|
117
|
+
ln.settings.local_storage = "/dir/our-other-shared-dir" # switch
|
118
|
+
|
119
|
+
View all storage locations used in your LaminDB instance::
|
120
|
+
|
121
|
+
ln.Storage.df()
|
108
122
|
|
109
123
|
Notes:
|
110
124
|
|
@@ -146,9 +160,9 @@ class Storage(SQLRecord, TracksRun, TracksUpdates):
|
|
146
160
|
description: str | None = CharField(db_index=True, null=True)
|
147
161
|
"""A description of what the storage location is used for (optional)."""
|
148
162
|
type: StorageType = CharField(max_length=30, db_index=True)
|
149
|
-
"""Can be "local" vs. "s3" vs. "gs"."""
|
163
|
+
"""Can be "local" vs. "s3" vs. "gs". Is auto-detected from the format of the `root` path."""
|
150
164
|
region: str | None = CharField(max_length=64, db_index=True, null=True)
|
151
|
-
"""
|
165
|
+
"""Storage region for cloud storage locations. Host identifier for local storage locations."""
|
152
166
|
instance_uid: str | None = CharField(max_length=12, db_index=True, null=True)
|
153
167
|
"""Instance that manages this storage location."""
|
154
168
|
artifacts: Artifact
|
@@ -158,9 +172,9 @@ class Storage(SQLRecord, TracksRun, TracksUpdates):
|
|
158
172
|
def __init__(
|
159
173
|
self,
|
160
174
|
root: str,
|
161
|
-
|
175
|
+
*,
|
162
176
|
description: str | None = None,
|
163
|
-
|
177
|
+
host: str | None = None,
|
164
178
|
): ...
|
165
179
|
|
166
180
|
@overload
|
@@ -177,7 +191,28 @@ class Storage(SQLRecord, TracksRun, TracksUpdates):
|
|
177
191
|
if len(args) == len(self._meta.concrete_fields):
|
178
192
|
super().__init__(*args)
|
179
193
|
return None
|
180
|
-
|
194
|
+
if args:
|
195
|
+
assert len(args) == 1, ( # noqa: S101
|
196
|
+
"Storage can only be initialized with a single positional argument, the root path."
|
197
|
+
)
|
198
|
+
kwargs["root"] = args[0]
|
199
|
+
if "host" in kwargs:
|
200
|
+
if "type" in kwargs:
|
201
|
+
assert kwargs["type"] == "local", ( # noqa: S101
|
202
|
+
"type needs to be 'local' if host is set"
|
203
|
+
)
|
204
|
+
else:
|
205
|
+
kwargs["type"] = "local"
|
206
|
+
assert get_storage_type(kwargs["root"]) == "local", ( # noqa: S101
|
207
|
+
"root must be a local path if host is set"
|
208
|
+
)
|
209
|
+
assert "region" not in kwargs, "region must not be set if host is set" # noqa: S101
|
210
|
+
kwargs["region"] = kwargs.pop("host")
|
211
|
+
storage_record = Storage.filter(
|
212
|
+
root=kwargs["root"], region=kwargs["region"]
|
213
|
+
).one_or_none()
|
214
|
+
else:
|
215
|
+
storage_record = Storage.filter(root=kwargs["root"]).one_or_none()
|
181
216
|
if storage_record is not None:
|
182
217
|
from .sqlrecord import init_self_from_db
|
183
218
|
|
@@ -196,7 +231,9 @@ class Storage(SQLRecord, TracksRun, TracksUpdates):
|
|
196
231
|
kwargs["root"],
|
197
232
|
instance_id=setup_settings.instance._id,
|
198
233
|
instance_slug=setup_settings.instance.slug,
|
234
|
+
register_hub=setup_settings.instance.is_on_hub,
|
199
235
|
prevent_register_hub=not setup_settings.instance.is_on_hub,
|
236
|
+
region=kwargs.get("region", None), # host was renamed to region already
|
200
237
|
)
|
201
238
|
# ssettings performed validation and normalization of the root path
|
202
239
|
kwargs["root"] = ssettings.root_as_str # noqa: S101
|
@@ -238,6 +275,18 @@ class Storage(SQLRecord, TracksRun, TracksUpdates):
|
|
238
275
|
)
|
239
276
|
super().__init__(**kwargs)
|
240
277
|
|
278
|
+
@property
|
279
|
+
def host(self) -> str | None:
|
280
|
+
"""Host identifier for local storage locations.
|
281
|
+
|
282
|
+
Is `None` for locations with `type != "local"`.
|
283
|
+
|
284
|
+
A globally unique user-defined host identifier (cluster, server, laptop, etc.).
|
285
|
+
"""
|
286
|
+
if self.type != "local":
|
287
|
+
return None
|
288
|
+
return self.region
|
289
|
+
|
241
290
|
@property
|
242
291
|
def path(self) -> Path | UPath:
|
243
292
|
"""Path.
|
lamindb/setup/__init__.py
CHANGED
@@ -8,6 +8,7 @@ from lamindb_setup import (
|
|
8
8
|
)
|
9
9
|
|
10
10
|
from . import core, errors, types
|
11
|
+
from ._switch import switch # noqa: F401
|
11
12
|
|
12
13
|
del connect # we have this at the root level, hence, we don't want it here
|
13
14
|
__doc__ = _lamindb_setup.__doc__.replace("lamindb_setup", "lamindb.setup")
|
lamindb/setup/_switch.py
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING
|
4
|
+
|
5
|
+
from lamindb_setup import settings
|
6
|
+
|
7
|
+
if TYPE_CHECKING:
|
8
|
+
from lamindb.models import Branch, Space
|
9
|
+
|
10
|
+
|
11
|
+
def switch(*, branch: str | Branch | None = None, space: str | Space | None = None):
|
12
|
+
"""Switch to a branch or space, create if not exists."""
|
13
|
+
if branch is not None:
|
14
|
+
settings.branch = branch
|
15
|
+
if space is not None:
|
16
|
+
settings.space = space
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lamindb
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.8.0
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.10,<3.14
|
@@ -10,8 +10,8 @@ Classifier: Programming Language :: Python :: 3.11
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.12
|
11
11
|
Classifier: Programming Language :: Python :: 3.13
|
12
12
|
Requires-Dist: lamin_utils==0.15.0
|
13
|
-
Requires-Dist: lamin_cli==1.5.
|
14
|
-
Requires-Dist: lamindb_setup[aws]==1.
|
13
|
+
Requires-Dist: lamin_cli==1.5.5
|
14
|
+
Requires-Dist: lamindb_setup[aws]==1.8.1
|
15
15
|
Requires-Dist: pyyaml
|
16
16
|
Requires-Dist: pyarrow
|
17
17
|
Requires-Dist: pandera>=0.24.0
|
@@ -1,4 +1,4 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=187A6Y-iGQVzOl_cIiB0FvfeVeS0c9jisHbLAt3U6ak,2904
|
2
2
|
lamindb/_finish.py,sha256=MZKXiGk_NFRyc693OXitqq7Qd9bGojcBe26JkingJGI,20859
|
3
3
|
lamindb/_tracked.py,sha256=-wK7BJv30nf4v2_nH5qDCyxHvug7ih6duQNGxDrj3UE,4447
|
4
4
|
lamindb/_view.py,sha256=cod1RnZoLyzMVJcjWjytg78Sf4qsR8IAdqpwzsi8FTw,4950
|
@@ -12,9 +12,9 @@ lamindb/base/uids.py,sha256=cLBi5mIlsf1ltkTb17r1FLzlOjlGmjvsCygoVJHQ-A8,2116
|
|
12
12
|
lamindb/base/users.py,sha256=8MSmAvCKoUF15YsDE6BGLBXsFWpfoEEg8iDTKZ7kD48,848
|
13
13
|
lamindb/core/__init__.py,sha256=aaBq0UVjNolMynbT1V5hB6UrJm1tK0M6WHu_r6em9_4,604
|
14
14
|
lamindb/core/_compat.py,sha256=NLnKk1qk4xdgMV-QwFDnBnbio02ujjlF86icvhpdv4c,2029
|
15
|
-
lamindb/core/_context.py,sha256=
|
15
|
+
lamindb/core/_context.py,sha256=46jUmSq5JiwRloriMNb3VT3mXCR4vo_KotbVSerBUfA,39606
|
16
16
|
lamindb/core/_mapped_collection.py,sha256=osquwC6ee0wJ_I6O-8AZwnQUa_r9zqa0MN82Q-nBI3Y,25746
|
17
|
-
lamindb/core/_settings.py,sha256=
|
17
|
+
lamindb/core/_settings.py,sha256=Dj44Xh5x3OEpTPvG0twREcIv6X6Er5NzDZO86i8Cq9g,8678
|
18
18
|
lamindb/core/_sync_git.py,sha256=Z7keuyS5X7CAj285sEbZIFExZF9mtjGH8DzKwz3xhHw,5881
|
19
19
|
lamindb/core/_track_environment.py,sha256=fa0-qKEe0BpL79_nsDUDtbg1iA3VpJTh0RCOGdc2XOA,974
|
20
20
|
lamindb/core/exceptions.py,sha256=FMEoSvT3FvtLkxQAt2oDXPeaPem8V5x5UBbTsPFYU5w,53
|
@@ -27,8 +27,8 @@ lamindb/core/datasets/_small.py,sha256=HBzyTporAl-6Cr4DbDDEtzbU2ILKNxxiRM-GeZofq
|
|
27
27
|
lamindb/core/datasets/mini_immuno.py,sha256=eAtRQ3_4cln5IFzTH0jNufbWcyQKrXmzizbSmvfS-FM,5707
|
28
28
|
lamindb/core/storage/__init__.py,sha256=JOIMu_7unbyhndtH1j0Q-9AvY8knSuc1IJO9sQnyBAQ,498
|
29
29
|
lamindb/core/storage/_anndata_accessor.py,sha256=jrKbRylkqgZ3opKcJCwilDhRGEnPcQsKt-X7EA9Isr8,26100
|
30
|
-
lamindb/core/storage/_backed_access.py,sha256=
|
31
|
-
lamindb/core/storage/_polars_lazy_df.py,sha256=
|
30
|
+
lamindb/core/storage/_backed_access.py,sha256=6YczvbmcmwZsOHFyD0ArDrvysUUAEiYcwZ_8TyWi_r8,7674
|
31
|
+
lamindb/core/storage/_polars_lazy_df.py,sha256=jqe06s2idUloi4FnOB1DJ6UMNWM_wltFzxuMrE-C2oU,2941
|
32
32
|
lamindb/core/storage/_pyarrow_dataset.py,sha256=lRYYt7edUtwauhxd7RwFud6YPDbz2PFvYYgqLhfapfk,1398
|
33
33
|
lamindb/core/storage/_tiledbsoma.py,sha256=kwf5zz8byZF5Lm-4Tt2ZE-hjUzwO8l07I9G7l2r68u0,11434
|
34
34
|
lamindb/core/storage/_valid_suffixes.py,sha256=vUSeQ4s01rdhD_vSd6wKmFBsgMJAKkBMnL_T9Y1znMg,501
|
@@ -40,7 +40,7 @@ lamindb/core/subsettings/_annotation_settings.py,sha256=o-yTYw-NmjFmtehbKU8qnf7t
|
|
40
40
|
lamindb/core/subsettings/_creation_settings.py,sha256=NGHWKqCFSzVNBxAr2VnmdYguiFdW29XUK7T9wRsVshg,906
|
41
41
|
lamindb/curators/__init__.py,sha256=rv5Xrhv0jS1NMpuRVUHEMAsu6pXhBdDP8PBlO4FXrsE,662
|
42
42
|
lamindb/curators/_legacy.py,sha256=vWA3CFryIXRG2RDHY7-paMFoG7bpu_gHti8V0sJLuYc,76280
|
43
|
-
lamindb/curators/core.py,sha256=
|
43
|
+
lamindb/curators/core.py,sha256=5Z5TZ8g5s6d7kCdCcyyJPA4Q7063-KhyqKGeYD5C51w,67428
|
44
44
|
lamindb/curators/_cellxgene_schemas/__init__.py,sha256=iw6PrzhBQpAR7aQ4_MXopSAVX2hdderHH3LRWeQy7Hk,7511
|
45
45
|
lamindb/curators/_cellxgene_schemas/schema_versions.csv,sha256=X9rmO88TW1Fht1f5mJs0JdW-VPvyKSajpf8lHNeECj4,1680
|
46
46
|
lamindb/examples/__init__.py,sha256=DGImiuWYDvwxh78p5FCwQWClEwsE3ODLU49i_NqbW0c,533
|
@@ -97,21 +97,22 @@ lamindb/migrations/0111_remove_record__sort_order.py,sha256=m5CC_VXupeUywupKQ74R
|
|
97
97
|
lamindb/migrations/0112_alter_recordartifact_feature_and_more.py,sha256=19AothLLch_iY5W5YhH3G-paNFSlqTeGwVfYX78o8Hc,3458
|
98
98
|
lamindb/migrations/0113_lower_case_branch_and_space_names.py,sha256=Xt2krstx3t30iTi2z0qTCBNteDA5Wy9L-thRXJSeUA8,1734
|
99
99
|
lamindb/migrations/0114_alter_run__status_code.py,sha256=KkGecSBJElA3LBnhSK5_rFpcFridOuv6BhM8DCYqTKw,612
|
100
|
-
lamindb/migrations/
|
100
|
+
lamindb/migrations/0115_alter_space_uid.py,sha256=18fCP8d31Ox1KxSSmfzU-W3lSpS3xtiaBNbPeHQiuTM,1332
|
101
|
+
lamindb/migrations/0115_squashed.py,sha256=gDjKt5S-Uk5NK72JPnsB1zD_kyAVIXR5DFEBHNAUcr4,162935
|
101
102
|
lamindb/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
102
103
|
lamindb/models/__init__.py,sha256=mtjZH0x31aV463YaUef8ZvdQHlGa6SZr_thsrlbdkCg,2419
|
103
104
|
lamindb/models/_describe.py,sha256=kxw7E_a5FqAI_GEH0QDGsBywQtmZ9DqQrJxnFmVJDt8,7717
|
104
|
-
lamindb/models/_django.py,sha256=
|
105
|
+
lamindb/models/_django.py,sha256=49DtZuDtPa_R7FPJ47V6wh77gBDcGdILgotQsFZeRXE,9775
|
105
106
|
lamindb/models/_feature_manager.py,sha256=YLhcNGQHWpKDzXR2SFoj0jAPkz0Iynw51_pcxJUSNCE,54854
|
106
107
|
lamindb/models/_from_values.py,sha256=cCGPMDlAbBrdhFW-XrIQVZ10q1LNg4MycYPLOkF0fTc,13366
|
107
108
|
lamindb/models/_is_versioned.py,sha256=Th2_cBf9UWh27E6ANxg6LGmjBOumXFy7AjH0GG4FoXA,7601
|
108
109
|
lamindb/models/_label_manager.py,sha256=O3KaaTEVId5ky3h0aCGg1kDoFFfrovFYlV82YsZZyIs,12127
|
109
110
|
lamindb/models/_relations.py,sha256=zHYLujy9vkuB9jVq5844TpzLSP__iYNCQcsl-FzK1Jw,3700
|
110
|
-
lamindb/models/artifact.py,sha256=
|
111
|
+
lamindb/models/artifact.py,sha256=UnNFkumf0FI1RlH_0KCR1lq5lTDi3xEA3U9LGGBg2hw,115932
|
111
112
|
lamindb/models/artifact_set.py,sha256=VOZEGDo3m_9Yg_ftx3I2fwdydjHN61X_qV18N6xG4kM,4117
|
112
113
|
lamindb/models/can_curate.py,sha256=ShEva1GGpJcCg7k95t99RzWfz28OFSorPFXLrGoXavE,29266
|
113
114
|
lamindb/models/collection.py,sha256=zNiYzj0K_UgIobWzBY93rekVpZm76p9BJOw7Pz0i8ZE,28356
|
114
|
-
lamindb/models/feature.py,sha256=
|
115
|
+
lamindb/models/feature.py,sha256=csy8NZy10Z-ekPoYvEwT0Il4QS4jceq0To_QVRL41A8,37165
|
115
116
|
lamindb/models/has_parents.py,sha256=NRNshrWCX7G3nnM3lnnHQ3Ho216T3EJfgakY6KlTvt8,20301
|
116
117
|
lamindb/models/project.py,sha256=Za__zEzsShXmfCkKjg1wmlJ_UuGJur-mg6ALNslYJfw,17315
|
117
118
|
lamindb/models/query_manager.py,sha256=EzbyNA5zWUbLYH5yJ7dIC90j1teVoQHrXpRLjCfBEao,11036
|
@@ -120,15 +121,16 @@ lamindb/models/record.py,sha256=syOBBefZhlqZpoVJD32uqzEzbwXiOboAOA3AlGaOkhE,1205
|
|
120
121
|
lamindb/models/run.py,sha256=3xCAJnxK4iNeFlFz1bAxYDnRGU4HnRpDfxq4MwB6cPw,15565
|
121
122
|
lamindb/models/save.py,sha256=jXha2jfY-pWsKuP2dwaEROhUGxhM8fTWQGWAzA_xsM0,16777
|
122
123
|
lamindb/models/schema.py,sha256=oI3_eUYTYrMofOVJTCCKVkGr4L6VWpIxx5L4fauTtn8,48244
|
123
|
-
lamindb/models/sqlrecord.py,sha256=
|
124
|
-
lamindb/models/storage.py,sha256=
|
124
|
+
lamindb/models/sqlrecord.py,sha256=FqtK9epCiFPhqr2DI0W6OmU621wU7FVh9rXouGK3_3w,68136
|
125
|
+
lamindb/models/storage.py,sha256=0jvuQyJcIMdrZ9qq-vmKkI66libb2DqWjCXNFuvinIM,13518
|
125
126
|
lamindb/models/transform.py,sha256=BceBz250AznWf85LefgS2nJNye_xJ0w_jce-mGJDN6Y,12474
|
126
127
|
lamindb/models/ulabel.py,sha256=ocAMSKeQcq2Kr6Dq0mxGupOmW1K0pAs19vjDeTEb6vM,9335
|
127
|
-
lamindb/setup/__init__.py,sha256=
|
128
|
+
lamindb/setup/__init__.py,sha256=QZ-JF8IzO_ckDOU223lsJrdO5ay7cDFgvCbkLeAuxYA,467
|
129
|
+
lamindb/setup/_switch.py,sha256=njZJN__JOhVrBFGClQG1wobdhJJp6l_XzPGKtKSCrfU,434
|
128
130
|
lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
|
129
131
|
lamindb/setup/errors/__init__.py,sha256=bAHTxOUJW1rm4zpF0Pvqkftn8W6iMGnQ-uyNBu13Nfg,171
|
130
132
|
lamindb/setup/types/__init__.py,sha256=ATaosOi6q-cDWB52T69_sRmLMqj8cHfc-vljzZsrJNw,169
|
131
|
-
lamindb-1.
|
132
|
-
lamindb-1.
|
133
|
-
lamindb-1.
|
134
|
-
lamindb-1.
|
133
|
+
lamindb-1.8.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
134
|
+
lamindb-1.8.0.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
|
135
|
+
lamindb-1.8.0.dist-info/METADATA,sha256=rHBLxF0dN8XsUC8W8_qNRg1GoQVyIg5NnbtaiD_gb6A,2669
|
136
|
+
lamindb-1.8.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|