lamindb 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +30 -25
- lamindb/_tracked.py +1 -1
- lamindb/_view.py +2 -3
- lamindb/base/__init__.py +1 -1
- lamindb/base/ids.py +1 -10
- lamindb/core/__init__.py +7 -65
- lamindb/core/_compat.py +60 -0
- lamindb/core/_context.py +43 -20
- lamindb/core/_settings.py +6 -6
- lamindb/core/_sync_git.py +1 -1
- lamindb/core/loaders.py +30 -19
- lamindb/core/storage/_backed_access.py +4 -2
- lamindb/core/storage/_tiledbsoma.py +8 -6
- lamindb/core/storage/_zarr.py +104 -25
- lamindb/core/storage/objects.py +63 -28
- lamindb/core/storage/paths.py +4 -1
- lamindb/core/types.py +10 -0
- lamindb/curators/__init__.py +100 -85
- lamindb/errors.py +1 -1
- lamindb/integrations/_vitessce.py +4 -4
- lamindb/migrations/0089_subsequent_runs.py +159 -0
- lamindb/migrations/0090_runproject_project_runs.py +73 -0
- lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
- lamindb/models/__init__.py +79 -0
- lamindb/{core → models}/_describe.py +3 -3
- lamindb/{core → models}/_django.py +8 -5
- lamindb/{core → models}/_feature_manager.py +103 -87
- lamindb/{_from_values.py → models/_from_values.py} +5 -2
- lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
- lamindb/{core → models}/_label_manager.py +10 -17
- lamindb/{core/relations.py → models/_relations.py} +8 -1
- lamindb/models/artifact.py +2602 -0
- lamindb/{_can_curate.py → models/can_curate.py} +349 -180
- lamindb/models/collection.py +683 -0
- lamindb/models/core.py +135 -0
- lamindb/models/feature.py +643 -0
- lamindb/models/flextable.py +163 -0
- lamindb/{_parents.py → models/has_parents.py} +55 -49
- lamindb/models/project.py +384 -0
- lamindb/{_query_manager.py → models/query_manager.py} +10 -8
- lamindb/{_query_set.py → models/query_set.py} +40 -26
- lamindb/models/record.py +1762 -0
- lamindb/models/run.py +563 -0
- lamindb/{_save.py → models/save.py} +9 -7
- lamindb/models/schema.py +732 -0
- lamindb/models/transform.py +360 -0
- lamindb/models/ulabel.py +249 -0
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/METADATA +6 -6
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/RECORD +51 -51
- lamindb/_artifact.py +0 -1379
- lamindb/_collection.py +0 -440
- lamindb/_feature.py +0 -316
- lamindb/_is_versioned.py +0 -40
- lamindb/_record.py +0 -1064
- lamindb/_run.py +0 -60
- lamindb/_schema.py +0 -347
- lamindb/_storage.py +0 -15
- lamindb/_transform.py +0 -170
- lamindb/_ulabel.py +0 -56
- lamindb/_utils.py +0 -9
- lamindb/base/validation.py +0 -63
- lamindb/core/_data.py +0 -491
- lamindb/core/fields.py +0 -12
- lamindb/models.py +0 -4475
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/LICENSE +0 -0
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -16,8 +16,8 @@ Registries.
|
|
16
16
|
|
17
17
|
Artifact
|
18
18
|
Transform
|
19
|
-
ULabel
|
20
19
|
Run
|
20
|
+
ULabel
|
21
21
|
User
|
22
22
|
Storage
|
23
23
|
Feature
|
@@ -25,9 +25,18 @@ Registries.
|
|
25
25
|
Param
|
26
26
|
Collection
|
27
27
|
Project
|
28
|
+
Space
|
28
29
|
Reference
|
29
30
|
Person
|
30
31
|
|
32
|
+
Curators & integrations.
|
33
|
+
|
34
|
+
.. autosummary::
|
35
|
+
:toctree: .
|
36
|
+
|
37
|
+
curators
|
38
|
+
integrations
|
39
|
+
|
31
40
|
Key functionality.
|
32
41
|
|
33
42
|
.. autosummary::
|
@@ -36,20 +45,19 @@ Key functionality.
|
|
36
45
|
connect
|
37
46
|
view
|
38
47
|
save
|
48
|
+
UPath
|
49
|
+
settings
|
39
50
|
|
40
|
-
|
51
|
+
Low-level functionality.
|
41
52
|
|
42
53
|
.. autosummary::
|
43
54
|
:toctree: .
|
44
55
|
|
45
|
-
integrations
|
46
56
|
context
|
47
|
-
curators
|
48
|
-
settings
|
49
57
|
errors
|
50
58
|
setup
|
51
|
-
UPath
|
52
59
|
base
|
60
|
+
models
|
53
61
|
core
|
54
62
|
|
55
63
|
Backward compatibility.
|
@@ -62,8 +70,16 @@ Backward compatibility.
|
|
62
70
|
|
63
71
|
"""
|
64
72
|
|
73
|
+
# ruff: noqa: I001
|
65
74
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
66
|
-
__version__ = "1.
|
75
|
+
__version__ = "1.2.0"
|
76
|
+
|
77
|
+
import warnings
|
78
|
+
|
79
|
+
# through SpatialData
|
80
|
+
warnings.filterwarnings(
|
81
|
+
"ignore", message="The legacy Dask DataFrame implementation is deprecated"
|
82
|
+
)
|
67
83
|
|
68
84
|
from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError
|
69
85
|
from lamindb_setup._check_setup import _check_instance_setup
|
@@ -79,23 +95,7 @@ def __getattr__(name):
|
|
79
95
|
|
80
96
|
if _check_instance_setup(from_module="lamindb"):
|
81
97
|
del __getattr__ # so that imports work out
|
82
|
-
from . import
|
83
|
-
from . import (
|
84
|
-
_artifact,
|
85
|
-
_can_curate,
|
86
|
-
_collection,
|
87
|
-
_feature,
|
88
|
-
_is_versioned,
|
89
|
-
_parents,
|
90
|
-
_record,
|
91
|
-
_run,
|
92
|
-
_schema,
|
93
|
-
_storage,
|
94
|
-
_transform,
|
95
|
-
_ulabel,
|
96
|
-
integrations,
|
97
|
-
)
|
98
|
-
from ._save import save
|
98
|
+
from . import base
|
99
99
|
from ._tracked import tracked
|
100
100
|
from ._view import view
|
101
101
|
from .core._context import context
|
@@ -111,12 +111,17 @@ if _check_instance_setup(from_module="lamindb"):
|
|
111
111
|
Project,
|
112
112
|
Reference,
|
113
113
|
Run,
|
114
|
-
Schema,
|
114
|
+
Schema,
|
115
115
|
Storage,
|
116
116
|
Transform,
|
117
117
|
ULabel,
|
118
118
|
User,
|
119
|
+
Space,
|
119
120
|
)
|
121
|
+
from .models.save import save
|
122
|
+
from . import core
|
123
|
+
from . import integrations
|
124
|
+
from . import curators
|
120
125
|
|
121
126
|
track = context.track # simple access
|
122
127
|
finish = context.finish # simple access
|
lamindb/_tracked.py
CHANGED
@@ -5,8 +5,8 @@ from datetime import datetime, timezone
|
|
5
5
|
from typing import Callable, ParamSpec, TypeVar
|
6
6
|
|
7
7
|
from .core._context import context
|
8
|
-
from .core._feature_manager import infer_feature_type_convert_json
|
9
8
|
from .models import Run, Transform
|
9
|
+
from .models._feature_manager import infer_feature_type_convert_json
|
10
10
|
|
11
11
|
P = ParamSpec("P")
|
12
12
|
R = TypeVar("R")
|
lamindb/_view.py
CHANGED
@@ -9,10 +9,9 @@ from lamin_utils import colors, logger
|
|
9
9
|
from lamindb_setup import settings
|
10
10
|
from lamindb_setup._init_instance import get_schema_module_name
|
11
11
|
|
12
|
-
from lamindb.
|
13
|
-
from lamindb.models import Feature, Record
|
12
|
+
from lamindb.models import Feature, FeatureValue, ParamValue, Record
|
14
13
|
|
15
|
-
from .
|
14
|
+
from .models.feature import convert_pandas_dtype_to_lamin_dtype
|
16
15
|
|
17
16
|
if TYPE_CHECKING:
|
18
17
|
import pandas as pd
|
lamindb/base/__init__.py
CHANGED
lamindb/base/ids.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""IDs.
|
1
|
+
"""Universal IDs.
|
2
2
|
|
3
3
|
Base generators:
|
4
4
|
|
@@ -55,15 +55,6 @@ def base62(n_char: int) -> str:
|
|
55
55
|
return id
|
56
56
|
|
57
57
|
|
58
|
-
# the following cannot be serialized by Django
|
59
|
-
# class Base62:
|
60
|
-
# def __init__(self, n_char: int):
|
61
|
-
# self.n_char = n_char
|
62
|
-
|
63
|
-
# def __call__(self):
|
64
|
-
# return base62(self.n_char)
|
65
|
-
|
66
|
-
|
67
58
|
def base26(n_char: int):
|
68
59
|
"""ASCII lowercase."""
|
69
60
|
alphabet = string.ascii_lowercase
|
lamindb/core/__init__.py
CHANGED
@@ -1,49 +1,20 @@
|
|
1
1
|
"""Core library.
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
.. autosummary::
|
6
|
-
:toctree: .
|
7
|
-
|
8
|
-
BasicRecord
|
9
|
-
Record
|
10
|
-
Registry
|
11
|
-
QuerySet
|
12
|
-
QueryManager
|
13
|
-
RecordList
|
14
|
-
FeatureManager
|
15
|
-
ParamManager
|
16
|
-
LabelManager
|
17
|
-
IsVersioned
|
18
|
-
CanCurate
|
19
|
-
HasParents
|
20
|
-
TracksRun
|
21
|
-
TracksUpdates
|
22
|
-
ParamValue
|
23
|
-
FeatureValue
|
24
|
-
InspectResult
|
25
|
-
ValidateFields
|
26
|
-
fields
|
27
|
-
|
28
|
-
Curators:
|
3
|
+
Settings & context:
|
29
4
|
|
30
5
|
.. autosummary::
|
31
6
|
:toctree: .
|
32
7
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
MuDataCatManager
|
37
|
-
TiledbsomaCatManager
|
38
|
-
CurateLookup
|
8
|
+
Settings
|
9
|
+
subsettings
|
10
|
+
Context
|
39
11
|
|
40
|
-
|
12
|
+
Artifact loaders:
|
41
13
|
|
42
14
|
.. autosummary::
|
43
15
|
:toctree: .
|
44
16
|
|
45
|
-
|
46
|
-
Context
|
17
|
+
loaders
|
47
18
|
|
48
19
|
Data loaders:
|
49
20
|
|
@@ -57,10 +28,8 @@ Modules:
|
|
57
28
|
.. autosummary::
|
58
29
|
:toctree: .
|
59
30
|
|
60
|
-
loaders
|
61
31
|
datasets
|
62
32
|
storage
|
63
|
-
subsettings
|
64
33
|
logger
|
65
34
|
|
66
35
|
"""
|
@@ -68,35 +37,8 @@ Modules:
|
|
68
37
|
from lamin_utils import logger
|
69
38
|
from lamin_utils._inspect import InspectResult
|
70
39
|
|
71
|
-
from lamindb._query_manager import QueryManager
|
72
|
-
from lamindb._query_set import QuerySet, RecordList
|
73
|
-
from lamindb.core._feature_manager import FeatureManager, ParamManager
|
74
|
-
from lamindb.core._label_manager import LabelManager
|
75
|
-
from lamindb.curators import (
|
76
|
-
AnnDataCatManager,
|
77
|
-
CatManager,
|
78
|
-
CurateLookup,
|
79
|
-
Curator,
|
80
|
-
DataFrameCatManager,
|
81
|
-
MuDataCatManager,
|
82
|
-
TiledbsomaCatManager,
|
83
|
-
)
|
84
|
-
from lamindb.models import (
|
85
|
-
BasicRecord,
|
86
|
-
CanCurate,
|
87
|
-
FeatureValue,
|
88
|
-
HasParents,
|
89
|
-
IsVersioned,
|
90
|
-
ParamValue,
|
91
|
-
Record,
|
92
|
-
Registry,
|
93
|
-
TracksRun,
|
94
|
-
TracksUpdates,
|
95
|
-
ValidateFields,
|
96
|
-
)
|
97
|
-
|
98
40
|
from .. import errors as exceptions
|
99
|
-
from . import
|
41
|
+
from . import datasets, loaders, subsettings, types
|
100
42
|
from ._context import Context
|
101
43
|
from ._mapped_collection import MappedCollection
|
102
44
|
from ._settings import Settings
|
lamindb/core/_compat.py
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
import importlib.util
|
2
|
+
from typing import Any, Callable, TypeVar
|
3
|
+
|
4
|
+
T = TypeVar("T")
|
5
|
+
|
6
|
+
|
7
|
+
def is_package_installed(package_name: str) -> bool:
|
8
|
+
spec = importlib.util.find_spec(package_name)
|
9
|
+
return spec is not None
|
10
|
+
|
11
|
+
|
12
|
+
def with_package(package_name: str, operation: Callable[[Any], T]) -> T:
|
13
|
+
"""Execute an operation that requires a specific package.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
package_name: Package name (e.g., "mudata")
|
17
|
+
operation: Function that takes the imported module and returns a result
|
18
|
+
|
19
|
+
Examples:
|
20
|
+
# For direct package functions
|
21
|
+
result = with_package("mudata", lambda mod: mod.read_zarr(path))
|
22
|
+
"""
|
23
|
+
try:
|
24
|
+
module = importlib.import_module(package_name)
|
25
|
+
return operation(module)
|
26
|
+
except ImportError:
|
27
|
+
raise ImportError(
|
28
|
+
f"Package '{package_name}' is required but not installed. "
|
29
|
+
f"Please install with: pip install {package_name}"
|
30
|
+
) from None
|
31
|
+
|
32
|
+
|
33
|
+
def with_package_obj(
|
34
|
+
obj: Any, class_name: str, package_name: str, operation: Callable[[Any], T]
|
35
|
+
) -> tuple[bool, T | None]:
|
36
|
+
"""Handle operations on objects that require specific packages.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
obj: The object to operate on
|
40
|
+
class_name: Expected class name (e.g., "MuData")
|
41
|
+
package_name: Package that provides the class (e.g., "mudata")
|
42
|
+
operation: Function to call with the object if package is available.
|
43
|
+
|
44
|
+
Examples:
|
45
|
+
# For instance methods
|
46
|
+
handled, res = apply_class_func(dmem, "MuData", "mudata",
|
47
|
+
lambda obj: obj.write(filepath))
|
48
|
+
"""
|
49
|
+
if obj.__class__.__name__ == class_name:
|
50
|
+
try:
|
51
|
+
importlib.import_module(package_name)
|
52
|
+
result = operation(obj)
|
53
|
+
return True, result
|
54
|
+
except ImportError:
|
55
|
+
raise ImportError(
|
56
|
+
f"Object appears to be {class_name} but '{package_name}' package is not installed. "
|
57
|
+
f"Please install with: pip install {package_name}"
|
58
|
+
) from None
|
59
|
+
|
60
|
+
return False, None
|
lamindb/core/_context.py
CHANGED
@@ -11,7 +11,7 @@ from pathlib import Path
|
|
11
11
|
from typing import TYPE_CHECKING
|
12
12
|
|
13
13
|
import lamindb_setup as ln_setup
|
14
|
-
from django.db.models import Func, IntegerField
|
14
|
+
from django.db.models import Func, IntegerField, Q
|
15
15
|
from lamin_utils import logger
|
16
16
|
from lamindb_setup.core import deprecated
|
17
17
|
from lamindb_setup.core.hashing import hash_file
|
@@ -20,21 +20,26 @@ from lamindb.base import ids
|
|
20
20
|
from lamindb.base.ids import base62_12
|
21
21
|
from lamindb.models import Run, Transform, format_field_value
|
22
22
|
|
23
|
+
from ..core._settings import settings
|
23
24
|
from ..errors import (
|
24
25
|
InconsistentKey,
|
26
|
+
InvalidArgument,
|
25
27
|
TrackNotCalled,
|
26
28
|
UpdateContext,
|
27
29
|
)
|
28
|
-
from .
|
30
|
+
from ..models._is_versioned import bump_version as bump_version_function
|
31
|
+
from ..models._is_versioned import (
|
32
|
+
increment_base62,
|
33
|
+
message_update_key_in_version_family,
|
34
|
+
)
|
29
35
|
from ._sync_git import get_transform_reference_from_git_repo
|
30
36
|
from ._track_environment import track_environment
|
31
|
-
from .versioning import bump_version as bump_version_function
|
32
|
-
from .versioning import increment_base62, message_update_key_in_version_family
|
33
37
|
|
34
38
|
if TYPE_CHECKING:
|
35
39
|
from lamindb_setup.core.types import UPathStr
|
36
40
|
|
37
41
|
from lamindb.base.types import TransformType
|
42
|
+
from lamindb.models import Project
|
38
43
|
|
39
44
|
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
40
45
|
|
@@ -198,6 +203,7 @@ class Context:
|
|
198
203
|
self._run: Run | None = None
|
199
204
|
self._path: Path | None = None
|
200
205
|
"""A local path to the script that's running."""
|
206
|
+
self._project: Project | None = None
|
201
207
|
self._logging_message_track: str = ""
|
202
208
|
self._logging_message_imports: str = ""
|
203
209
|
self._stream_tracker: LogStreamTracker = LogStreamTracker()
|
@@ -244,6 +250,11 @@ class Context:
|
|
244
250
|
def version(self, value: str | None):
|
245
251
|
self._version = value
|
246
252
|
|
253
|
+
@property
|
254
|
+
def project(self) -> Project | None:
|
255
|
+
"""Project to label entities created during the run."""
|
256
|
+
return self._project
|
257
|
+
|
247
258
|
@property
|
248
259
|
def run(self) -> Run | None:
|
249
260
|
"""Managed run of context."""
|
@@ -253,10 +264,10 @@ class Context:
|
|
253
264
|
self,
|
254
265
|
transform: str | Transform | None = None,
|
255
266
|
*,
|
267
|
+
project: str | None = None,
|
256
268
|
params: dict | None = None,
|
257
269
|
new_run: bool | None = None,
|
258
270
|
path: str | None = None,
|
259
|
-
log_to_file: bool | None = None,
|
260
271
|
) -> None:
|
261
272
|
"""Track a global run of your Python session.
|
262
273
|
|
@@ -270,14 +281,12 @@ class Context:
|
|
270
281
|
|
271
282
|
Args:
|
272
283
|
transform: A transform `uid` or record. If `None`, creates a `uid`.
|
284
|
+
project: A project `name` or `uid` for labeling entities created during the run.
|
273
285
|
params: A dictionary of parameters to track for the run.
|
274
286
|
new_run: If `False`, loads the latest run of transform
|
275
287
|
(default notebook), if `True`, creates new run (default non-notebook).
|
276
288
|
path: Filepath of notebook or script. Only needed if it can't be
|
277
289
|
automatically detected.
|
278
|
-
log_to_file: If `True`, logs stdout and stderr to a file and
|
279
|
-
saves the file within the current run (default non-notebook),
|
280
|
-
if `False`, does not log the output (default notebook).
|
281
290
|
|
282
291
|
Examples:
|
283
292
|
|
@@ -290,6 +299,17 @@ class Context:
|
|
290
299
|
>>> ln.track("Onv04I53OgtT0000") # example uid, the last four characters encode the version of the transform
|
291
300
|
|
292
301
|
"""
|
302
|
+
from lamindb.models import Project
|
303
|
+
|
304
|
+
if project is not None:
|
305
|
+
project_record = Project.filter(
|
306
|
+
Q(name=project) | Q(uid=project)
|
307
|
+
).one_or_none()
|
308
|
+
if project_record is None:
|
309
|
+
raise InvalidArgument(
|
310
|
+
f"Project '{project}' not found, either create it with `ln.Project(name='...').save()` or fix typos."
|
311
|
+
)
|
312
|
+
self._project = project_record
|
293
313
|
self._logging_message_track = ""
|
294
314
|
self._logging_message_imports = ""
|
295
315
|
if transform is not None and isinstance(transform, str):
|
@@ -375,6 +395,12 @@ class Context:
|
|
375
395
|
)
|
376
396
|
self._run = run
|
377
397
|
track_environment(run)
|
398
|
+
if self.project is not None:
|
399
|
+
# to update a potential project link
|
400
|
+
# is only necessary if transform is loaded rather than newly created
|
401
|
+
# can be optimized by checking whether the transform is loaded, but it typically is
|
402
|
+
self.transform.save()
|
403
|
+
log_to_file = None
|
378
404
|
if log_to_file is None:
|
379
405
|
log_to_file = self.transform.type != "notebook"
|
380
406
|
if log_to_file:
|
@@ -435,26 +461,23 @@ class Context:
|
|
435
461
|
path_str = get_notebook_key_colab()
|
436
462
|
path = Path(path_str)
|
437
463
|
else:
|
438
|
-
import
|
464
|
+
from nbproject.dev import read_notebook
|
465
|
+
from nbproject.dev._meta_live import get_title
|
466
|
+
from nbproject.dev._pypackage import infer_pypackages
|
439
467
|
|
440
468
|
try:
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
description = nbproject_title
|
447
|
-
# log imported python packages
|
448
|
-
try:
|
449
|
-
from nbproject.dev._pypackage import infer_pypackages
|
469
|
+
nb = read_notebook(path_str)
|
470
|
+
|
471
|
+
nbproject_title = get_title(nb)
|
472
|
+
if nbproject_title is not None:
|
473
|
+
description = nbproject_title
|
450
474
|
|
451
|
-
nb = nbproject.dev.read_notebook(path_str)
|
452
475
|
self._logging_message_imports += (
|
453
476
|
"notebook imports:"
|
454
477
|
f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
|
455
478
|
)
|
456
479
|
except Exception:
|
457
|
-
logger.debug("
|
480
|
+
logger.debug("reading the notebook file failed")
|
458
481
|
pass
|
459
482
|
return path, description
|
460
483
|
|
lamindb/core/_settings.py
CHANGED
@@ -155,12 +155,12 @@ class Settings:
|
|
155
155
|
def verbosity(self) -> str:
|
156
156
|
"""Logger verbosity (default `'warning'`).
|
157
157
|
|
158
|
-
- `'error'`:
|
159
|
-
- `'warning'`:
|
160
|
-
- `'success'`:
|
161
|
-
- `'info'`:
|
162
|
-
- `'hint'`:
|
163
|
-
- `'debug'`:
|
158
|
+
- `'error'`: only show error messages
|
159
|
+
- `'warning'`: also show warning messages
|
160
|
+
- `'success'`: also show success and save messages
|
161
|
+
- `'info'`: also show info messages
|
162
|
+
- `'hint'`: also show hint messages
|
163
|
+
- `'debug'`: also show detailed debug messages
|
164
164
|
"""
|
165
165
|
return VERBOSITY_TO_STR[self._verbosity_int]
|
166
166
|
|
lamindb/core/_sync_git.py
CHANGED
@@ -7,7 +7,7 @@ from lamin_utils import logger
|
|
7
7
|
from lamindb_setup import settings as setup_settings
|
8
8
|
from lamindb_setup.core.hashing import hash_code
|
9
9
|
|
10
|
-
from ._settings import sanitize_git_repo_url, settings
|
10
|
+
from ..core._settings import sanitize_git_repo_url, settings
|
11
11
|
|
12
12
|
|
13
13
|
class BlobHashNotFound(SystemExit):
|
lamindb/core/loaders.py
CHANGED
@@ -20,26 +20,30 @@ from __future__ import annotations
|
|
20
20
|
import builtins
|
21
21
|
import re
|
22
22
|
from pathlib import Path
|
23
|
-
from typing import TYPE_CHECKING
|
23
|
+
from typing import TYPE_CHECKING, Any
|
24
24
|
|
25
|
-
import anndata as ad
|
26
25
|
import pandas as pd
|
26
|
+
from anndata import read_h5ad
|
27
27
|
from lamin_utils import logger
|
28
28
|
from lamindb_setup.core.upath import (
|
29
29
|
create_path,
|
30
30
|
infer_filesystem,
|
31
31
|
)
|
32
32
|
|
33
|
-
from ._settings import settings
|
33
|
+
from ..core._settings import settings
|
34
34
|
|
35
35
|
if TYPE_CHECKING:
|
36
|
+
from anndata import AnnData
|
36
37
|
from lamindb_setup.core.types import UPathStr
|
38
|
+
from mudata import MuData
|
39
|
+
|
40
|
+
from lamindb.core.types import ScverseDataStructures
|
37
41
|
|
38
42
|
try:
|
39
|
-
from .storage._zarr import
|
43
|
+
from ..core.storage._zarr import load_zarr
|
40
44
|
except ImportError:
|
41
45
|
|
42
|
-
def
|
46
|
+
def load_zarr(storepath): # type: ignore
|
43
47
|
raise ImportError("Please install zarr: pip install zarr<=2.18.4")
|
44
48
|
|
45
49
|
|
@@ -47,7 +51,7 @@ is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
|
47
51
|
|
48
52
|
|
49
53
|
# tested in lamin-usecases
|
50
|
-
def load_fcs(*args, **kwargs) ->
|
54
|
+
def load_fcs(*args, **kwargs) -> AnnData:
|
51
55
|
"""Load an `.fcs` file to `AnnData`."""
|
52
56
|
try:
|
53
57
|
import readfcs
|
@@ -62,16 +66,16 @@ def load_tsv(path: UPathStr, **kwargs) -> pd.DataFrame:
|
|
62
66
|
return pd.read_csv(path_sanitized, sep="\t", **kwargs)
|
63
67
|
|
64
68
|
|
65
|
-
def load_h5ad(filepath, **kwargs) ->
|
69
|
+
def load_h5ad(filepath, **kwargs) -> AnnData:
|
66
70
|
"""Load an `.h5ad` file to `AnnData`."""
|
67
71
|
fs, filepath = infer_filesystem(filepath)
|
68
72
|
compression = kwargs.pop("compression", "infer")
|
69
73
|
with fs.open(filepath, mode="rb", compression=compression) as file:
|
70
|
-
adata =
|
74
|
+
adata = read_h5ad(file, backed=False, **kwargs)
|
71
75
|
return adata
|
72
76
|
|
73
77
|
|
74
|
-
def load_h5mu(filepath: UPathStr, **kwargs):
|
78
|
+
def load_h5mu(filepath: UPathStr, **kwargs) -> MuData:
|
75
79
|
"""Load an `.h5mu` file to `MuData`."""
|
76
80
|
import mudata as md
|
77
81
|
|
@@ -100,7 +104,7 @@ def load_html(path: UPathStr) -> None | UPathStr:
|
|
100
104
|
return path
|
101
105
|
|
102
106
|
|
103
|
-
def load_json(path: UPathStr) -> dict:
|
107
|
+
def load_json(path: UPathStr) -> dict[str, Any] | list[Any]:
|
104
108
|
"""Load `.json` to `dict`."""
|
105
109
|
import json
|
106
110
|
|
@@ -109,7 +113,7 @@ def load_json(path: UPathStr) -> dict:
|
|
109
113
|
return data
|
110
114
|
|
111
115
|
|
112
|
-
def load_yaml(path: UPathStr) -> dict:
|
116
|
+
def load_yaml(path: UPathStr) -> dict[str, Any] | list[Any]:
|
113
117
|
"""Load `.yaml` to `dict`."""
|
114
118
|
import yaml # type: ignore
|
115
119
|
|
@@ -156,7 +160,7 @@ FILE_LOADERS = {
|
|
156
160
|
".parquet": pd.read_parquet,
|
157
161
|
".parquet.gz": pd.read_parquet, # this doesn't work for externally gzipped files, REMOVE LATER
|
158
162
|
".fcs": load_fcs,
|
159
|
-
".zarr":
|
163
|
+
".zarr": load_zarr,
|
160
164
|
".html": load_html,
|
161
165
|
".json": load_json,
|
162
166
|
".yaml": load_yaml,
|
@@ -172,15 +176,18 @@ SUPPORTED_SUFFIXES = [sfx for sfx in FILE_LOADERS.keys() if sfx != ".rds"]
|
|
172
176
|
"""Suffixes with defined artifact loaders."""
|
173
177
|
|
174
178
|
|
175
|
-
def load_to_memory(
|
179
|
+
def load_to_memory(
|
180
|
+
filepath: UPathStr, **kwargs
|
181
|
+
) -> (
|
182
|
+
pd.DataFrame | ScverseDataStructures | dict[str, Any] | list[Any] | UPathStr | None
|
183
|
+
):
|
176
184
|
"""Load a file into memory.
|
177
185
|
|
178
186
|
Returns the filepath if no in-memory form is found.
|
187
|
+
May return None in interactive sessions for images.
|
179
188
|
"""
|
180
189
|
filepath = create_path(filepath)
|
181
190
|
|
182
|
-
filepath = settings._storage_settings.cloud_to_local(filepath, print_progress=True)
|
183
|
-
|
184
191
|
# infer the correct suffix when .gz is present
|
185
192
|
suffixes = filepath.suffixes
|
186
193
|
suffix = (
|
@@ -189,8 +196,12 @@ def load_to_memory(filepath: UPathStr, **kwargs):
|
|
189
196
|
else filepath.suffix
|
190
197
|
)
|
191
198
|
|
192
|
-
loader = FILE_LOADERS.get(suffix)
|
199
|
+
loader = FILE_LOADERS.get(suffix, None)
|
193
200
|
if loader is None:
|
194
|
-
|
195
|
-
|
196
|
-
|
201
|
+
raise NotImplementedError(
|
202
|
+
f"There is no loader for {suffix} files. Use .cache() to get the path."
|
203
|
+
)
|
204
|
+
|
205
|
+
filepath = settings._storage_settings.cloud_to_local(filepath, print_progress=True)
|
206
|
+
|
207
|
+
return loader(filepath, **kwargs)
|
@@ -5,8 +5,6 @@ from typing import TYPE_CHECKING, Any, Callable
|
|
5
5
|
|
6
6
|
from anndata._io.specs.registry import get_spec
|
7
7
|
|
8
|
-
from lamindb.models import Artifact
|
9
|
-
|
10
8
|
from ._anndata_accessor import AnnDataAccessor, StorageType, registry
|
11
9
|
from ._pyarrow_dataset import _is_pyarrow_dataset, _open_pyarrow_dataset
|
12
10
|
from ._tiledbsoma import _open_tiledbsoma
|
@@ -19,6 +17,8 @@ if TYPE_CHECKING:
|
|
19
17
|
from tiledbsoma import Experiment as SOMAExperiment
|
20
18
|
from upath import UPath
|
21
19
|
|
20
|
+
from lamindb.models.artifact import Artifact
|
21
|
+
|
22
22
|
|
23
23
|
# this dynamically creates a subclass of a context manager class
|
24
24
|
# and reassigns it to an instance of the superclass
|
@@ -74,6 +74,8 @@ def backed_access(
|
|
74
74
|
) -> (
|
75
75
|
AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
|
76
76
|
):
|
77
|
+
from lamindb.models import Artifact
|
78
|
+
|
77
79
|
if isinstance(artifact_or_filepath, Artifact):
|
78
80
|
objectpath, _ = filepath_from_artifact(
|
79
81
|
artifact_or_filepath, using_key=using_key
|