lamindb 1.1.0__py3-none-any.whl → 1.2a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +31 -26
- lamindb/_finish.py +9 -1
- lamindb/_tracked.py +26 -3
- lamindb/_view.py +2 -3
- lamindb/base/__init__.py +1 -1
- lamindb/base/ids.py +1 -10
- lamindb/base/users.py +1 -4
- lamindb/core/__init__.py +7 -65
- lamindb/core/_context.py +41 -10
- lamindb/core/_mapped_collection.py +4 -2
- lamindb/core/_settings.py +6 -6
- lamindb/core/_sync_git.py +1 -1
- lamindb/core/_track_environment.py +2 -1
- lamindb/core/datasets/_small.py +3 -3
- lamindb/core/loaders.py +22 -9
- lamindb/core/storage/_anndata_accessor.py +8 -3
- lamindb/core/storage/_backed_access.py +14 -7
- lamindb/core/storage/_pyarrow_dataset.py +24 -9
- lamindb/core/storage/_tiledbsoma.py +6 -4
- lamindb/core/storage/_zarr.py +32 -11
- lamindb/core/storage/objects.py +59 -26
- lamindb/core/storage/paths.py +16 -13
- lamindb/curators/__init__.py +173 -145
- lamindb/errors.py +1 -1
- lamindb/integrations/_vitessce.py +4 -4
- lamindb/migrations/0089_subsequent_runs.py +159 -0
- lamindb/migrations/0090_runproject_project_runs.py +73 -0
- lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
- lamindb/models/__init__.py +79 -0
- lamindb/{core → models}/_describe.py +3 -3
- lamindb/{core → models}/_django.py +8 -5
- lamindb/{core → models}/_feature_manager.py +103 -87
- lamindb/{_from_values.py → models/_from_values.py} +5 -2
- lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
- lamindb/{core → models}/_label_manager.py +10 -17
- lamindb/{core/relations.py → models/_relations.py} +8 -1
- lamindb/models/artifact.py +2601 -0
- lamindb/{_can_curate.py → models/can_curate.py} +349 -180
- lamindb/models/collection.py +683 -0
- lamindb/models/core.py +135 -0
- lamindb/models/feature.py +643 -0
- lamindb/models/flextable.py +163 -0
- lamindb/{_parents.py → models/has_parents.py} +55 -49
- lamindb/models/project.py +384 -0
- lamindb/{_query_manager.py → models/query_manager.py} +10 -8
- lamindb/{_query_set.py → models/query_set.py} +52 -30
- lamindb/models/record.py +1757 -0
- lamindb/models/run.py +563 -0
- lamindb/{_save.py → models/save.py} +18 -8
- lamindb/models/schema.py +732 -0
- lamindb/models/transform.py +360 -0
- lamindb/models/ulabel.py +249 -0
- {lamindb-1.1.0.dist-info → lamindb-1.2a2.dist-info}/METADATA +5 -5
- lamindb-1.2a2.dist-info/RECORD +94 -0
- lamindb/_artifact.py +0 -1361
- lamindb/_collection.py +0 -440
- lamindb/_feature.py +0 -316
- lamindb/_is_versioned.py +0 -40
- lamindb/_record.py +0 -1065
- lamindb/_run.py +0 -60
- lamindb/_schema.py +0 -347
- lamindb/_storage.py +0 -15
- lamindb/_transform.py +0 -170
- lamindb/_ulabel.py +0 -56
- lamindb/_utils.py +0 -9
- lamindb/base/validation.py +0 -63
- lamindb/core/_data.py +0 -491
- lamindb/core/fields.py +0 -12
- lamindb/models.py +0 -4435
- lamindb-1.1.0.dist-info/RECORD +0 -95
- {lamindb-1.1.0.dist-info → lamindb-1.2a2.dist-info}/LICENSE +0 -0
- {lamindb-1.1.0.dist-info → lamindb-1.2a2.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
"""A data framework for biology.
|
2
2
|
|
3
|
-
Tracking notebooks &
|
3
|
+
Tracking notebooks, scripts & functions.
|
4
4
|
|
5
5
|
.. autosummary::
|
6
6
|
:toctree: .
|
7
7
|
|
8
8
|
track
|
9
9
|
finish
|
10
|
+
tracked
|
10
11
|
|
11
12
|
Registries.
|
12
13
|
|
@@ -15,8 +16,8 @@ Registries.
|
|
15
16
|
|
16
17
|
Artifact
|
17
18
|
Transform
|
18
|
-
ULabel
|
19
19
|
Run
|
20
|
+
ULabel
|
20
21
|
User
|
21
22
|
Storage
|
22
23
|
Feature
|
@@ -24,9 +25,17 @@ Registries.
|
|
24
25
|
Param
|
25
26
|
Collection
|
26
27
|
Project
|
28
|
+
Space
|
27
29
|
Reference
|
28
30
|
Person
|
29
31
|
|
32
|
+
Curators & integrations.
|
33
|
+
|
34
|
+
.. autosummary::
|
35
|
+
|
36
|
+
curators
|
37
|
+
integrations
|
38
|
+
|
30
39
|
Key functionality.
|
31
40
|
|
32
41
|
.. autosummary::
|
@@ -35,20 +44,19 @@ Key functionality.
|
|
35
44
|
connect
|
36
45
|
view
|
37
46
|
save
|
47
|
+
UPath
|
48
|
+
settings
|
38
49
|
|
39
|
-
|
50
|
+
Low-level functionality.
|
40
51
|
|
41
52
|
.. autosummary::
|
42
53
|
:toctree: .
|
43
54
|
|
44
|
-
integrations
|
45
55
|
context
|
46
|
-
curators
|
47
|
-
settings
|
48
56
|
errors
|
49
57
|
setup
|
50
|
-
UPath
|
51
58
|
base
|
59
|
+
models
|
52
60
|
core
|
53
61
|
|
54
62
|
Backward compatibility.
|
@@ -57,11 +65,20 @@ Backward compatibility.
|
|
57
65
|
:toctree: .
|
58
66
|
|
59
67
|
FeatureSet
|
68
|
+
Curator
|
60
69
|
|
61
70
|
"""
|
62
71
|
|
72
|
+
# ruff: noqa: I001
|
63
73
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
64
|
-
__version__ = "1.
|
74
|
+
__version__ = "1.2a2"
|
75
|
+
|
76
|
+
import warnings
|
77
|
+
|
78
|
+
# through SpatialData
|
79
|
+
warnings.filterwarnings(
|
80
|
+
"ignore", message="The legacy Dask DataFrame implementation is deprecated"
|
81
|
+
)
|
65
82
|
|
66
83
|
from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError
|
67
84
|
from lamindb_setup._check_setup import _check_instance_setup
|
@@ -77,23 +94,7 @@ def __getattr__(name):
|
|
77
94
|
|
78
95
|
if _check_instance_setup(from_module="lamindb"):
|
79
96
|
del __getattr__ # so that imports work out
|
80
|
-
from . import
|
81
|
-
from . import (
|
82
|
-
_artifact,
|
83
|
-
_can_curate,
|
84
|
-
_collection,
|
85
|
-
_feature,
|
86
|
-
_is_versioned,
|
87
|
-
_parents,
|
88
|
-
_record,
|
89
|
-
_run,
|
90
|
-
_schema,
|
91
|
-
_storage,
|
92
|
-
_transform,
|
93
|
-
_ulabel,
|
94
|
-
integrations,
|
95
|
-
)
|
96
|
-
from ._save import save
|
97
|
+
from . import base
|
97
98
|
from ._tracked import tracked
|
98
99
|
from ._view import view
|
99
100
|
from .core._context import context
|
@@ -109,12 +110,16 @@ if _check_instance_setup(from_module="lamindb"):
|
|
109
110
|
Project,
|
110
111
|
Reference,
|
111
112
|
Run,
|
112
|
-
Schema,
|
113
|
+
Schema,
|
113
114
|
Storage,
|
114
115
|
Transform,
|
115
116
|
ULabel,
|
116
117
|
User,
|
118
|
+
Space,
|
117
119
|
)
|
120
|
+
from .models.save import save
|
121
|
+
from . import core
|
122
|
+
from . import integrations
|
118
123
|
|
119
124
|
track = context.track # simple access
|
120
125
|
finish = context.finish # simple access
|
lamindb/_finish.py
CHANGED
@@ -436,7 +436,15 @@ def save_context_core(
|
|
436
436
|
# save both run & transform records if we arrive here
|
437
437
|
if run is not None:
|
438
438
|
run.save()
|
439
|
-
transform.
|
439
|
+
transform_id_prior_to_save = transform.id
|
440
|
+
transform.save() # this in-place updates the state of transform upon hash collision
|
441
|
+
if transform.id != transform_id_prior_to_save:
|
442
|
+
# the hash existed and we're actually back to the previous version
|
443
|
+
# hence, this was in fact a run of the previous transform rather than of
|
444
|
+
# the new transform
|
445
|
+
# this can happen in interactive notebooks if the user makes no change to the notebook
|
446
|
+
run.transform = transform
|
447
|
+
run.save()
|
440
448
|
|
441
449
|
# finalize
|
442
450
|
if not from_cli and run is not None:
|
lamindb/_tracked.py
CHANGED
@@ -5,8 +5,8 @@ from datetime import datetime, timezone
|
|
5
5
|
from typing import Callable, ParamSpec, TypeVar
|
6
6
|
|
7
7
|
from .core._context import context
|
8
|
-
from .core._feature_manager import infer_feature_type_convert_json
|
9
8
|
from .models import Run, Transform
|
9
|
+
from .models._feature_manager import infer_feature_type_convert_json
|
10
10
|
|
11
11
|
P = ParamSpec("P")
|
12
12
|
R = TypeVar("R")
|
@@ -26,10 +26,33 @@ def get_current_tracked_run() -> Run | None:
|
|
26
26
|
|
27
27
|
|
28
28
|
def tracked(uid: str | None = None) -> Callable[[Callable[P, R]], Callable[P, R]]:
|
29
|
-
"""
|
29
|
+
"""Mark a function as tracked with this decorator.
|
30
|
+
|
31
|
+
You will be able to see inputs, outputs, and parameters of the function in the data lineage graph.
|
32
|
+
|
33
|
+
Guide: :doc:`/track`
|
34
|
+
|
35
|
+
.. versionadded:: 1.1.0
|
36
|
+
This is still in beta and will be refined in future releases.
|
30
37
|
|
31
38
|
Args:
|
32
|
-
uid:
|
39
|
+
uid: Persist the uid to identify this transform across renames.
|
40
|
+
|
41
|
+
Example::
|
42
|
+
|
43
|
+
import lamindb as ln
|
44
|
+
|
45
|
+
@ln.tracked()
|
46
|
+
def subset_dataframe(
|
47
|
+
input_artifact_key: str, # all arguments tracked as parameters of the function run
|
48
|
+
output_artifact_key: str,
|
49
|
+
subset_rows: int = 2,
|
50
|
+
subset_cols: int = 2,
|
51
|
+
) -> None:
|
52
|
+
artifact = ln.Artifact.get(key=input_artifact_key)
|
53
|
+
df = artifact.load() # auto-tracked as input
|
54
|
+
new_df = df.iloc[:subset_rows, :subset_cols]
|
55
|
+
ln.Artifact.from_df(new_df, key=output_artifact_key).save() # auto-tracked as output
|
33
56
|
"""
|
34
57
|
|
35
58
|
def decorator_tracked(func: Callable[P, R]) -> Callable[P, R]:
|
lamindb/_view.py
CHANGED
@@ -9,10 +9,9 @@ from lamin_utils import colors, logger
|
|
9
9
|
from lamindb_setup import settings
|
10
10
|
from lamindb_setup._init_instance import get_schema_module_name
|
11
11
|
|
12
|
-
from lamindb.
|
13
|
-
from lamindb.models import Feature, Record
|
12
|
+
from lamindb.models import Feature, FeatureValue, ParamValue, Record
|
14
13
|
|
15
|
-
from .
|
14
|
+
from .models.feature import convert_pandas_dtype_to_lamin_dtype
|
16
15
|
|
17
16
|
if TYPE_CHECKING:
|
18
17
|
import pandas as pd
|
lamindb/base/__init__.py
CHANGED
lamindb/base/ids.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""IDs.
|
1
|
+
"""Universal IDs.
|
2
2
|
|
3
3
|
Base generators:
|
4
4
|
|
@@ -55,15 +55,6 @@ def base62(n_char: int) -> str:
|
|
55
55
|
return id
|
56
56
|
|
57
57
|
|
58
|
-
# the following cannot be serialized by Django
|
59
|
-
# class Base62:
|
60
|
-
# def __init__(self, n_char: int):
|
61
|
-
# self.n_char = n_char
|
62
|
-
|
63
|
-
# def __call__(self):
|
64
|
-
# return base62(self.n_char)
|
65
|
-
|
66
|
-
|
67
58
|
def base26(n_char: int):
|
68
59
|
"""ASCII lowercase."""
|
69
60
|
alphabet = string.ascii_lowercase
|
lamindb/base/users.py
CHANGED
@@ -12,12 +12,9 @@ def current_user_id() -> int:
|
|
12
12
|
if ln_setup.core.django.IS_MIGRATING:
|
13
13
|
return 1
|
14
14
|
else:
|
15
|
-
exc_attr = (
|
16
|
-
"DoesNotExist" if hasattr(User, "DoesNotExist") else "_DoesNotExist"
|
17
|
-
)
|
18
15
|
try:
|
19
16
|
user_id = User.objects.get(uid=settings.user.uid).id
|
20
|
-
except
|
17
|
+
except User.DoesNotExist:
|
21
18
|
register_user(settings.user)
|
22
19
|
user_id = User.objects.get(uid=settings.user.uid).id
|
23
20
|
return user_id
|
lamindb/core/__init__.py
CHANGED
@@ -1,49 +1,20 @@
|
|
1
1
|
"""Core library.
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
.. autosummary::
|
6
|
-
:toctree: .
|
7
|
-
|
8
|
-
BasicRecord
|
9
|
-
Record
|
10
|
-
Registry
|
11
|
-
QuerySet
|
12
|
-
QueryManager
|
13
|
-
RecordList
|
14
|
-
FeatureManager
|
15
|
-
ParamManager
|
16
|
-
LabelManager
|
17
|
-
IsVersioned
|
18
|
-
CanCurate
|
19
|
-
HasParents
|
20
|
-
TracksRun
|
21
|
-
TracksUpdates
|
22
|
-
ParamValue
|
23
|
-
FeatureValue
|
24
|
-
InspectResult
|
25
|
-
ValidateFields
|
26
|
-
fields
|
27
|
-
|
28
|
-
Curators:
|
3
|
+
Settings & context:
|
29
4
|
|
30
5
|
.. autosummary::
|
31
6
|
:toctree: .
|
32
7
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
MuDataCatManager
|
37
|
-
TiledbsomaCatManager
|
38
|
-
CurateLookup
|
8
|
+
Settings
|
9
|
+
subsettings
|
10
|
+
Context
|
39
11
|
|
40
|
-
|
12
|
+
Artifact loaders:
|
41
13
|
|
42
14
|
.. autosummary::
|
43
15
|
:toctree: .
|
44
16
|
|
45
|
-
|
46
|
-
Context
|
17
|
+
loaders
|
47
18
|
|
48
19
|
Data loaders:
|
49
20
|
|
@@ -57,10 +28,8 @@ Modules:
|
|
57
28
|
.. autosummary::
|
58
29
|
:toctree: .
|
59
30
|
|
60
|
-
loaders
|
61
31
|
datasets
|
62
32
|
storage
|
63
|
-
subsettings
|
64
33
|
logger
|
65
34
|
|
66
35
|
"""
|
@@ -68,35 +37,8 @@ Modules:
|
|
68
37
|
from lamin_utils import logger
|
69
38
|
from lamin_utils._inspect import InspectResult
|
70
39
|
|
71
|
-
from lamindb._query_manager import QueryManager
|
72
|
-
from lamindb._query_set import QuerySet, RecordList
|
73
|
-
from lamindb.core._feature_manager import FeatureManager, ParamManager
|
74
|
-
from lamindb.core._label_manager import LabelManager
|
75
|
-
from lamindb.curators import (
|
76
|
-
AnnDataCatManager,
|
77
|
-
CatManager,
|
78
|
-
CurateLookup,
|
79
|
-
Curator,
|
80
|
-
DataFrameCatManager,
|
81
|
-
MuDataCatManager,
|
82
|
-
TiledbsomaCatManager,
|
83
|
-
)
|
84
|
-
from lamindb.models import (
|
85
|
-
BasicRecord,
|
86
|
-
CanCurate,
|
87
|
-
FeatureValue,
|
88
|
-
HasParents,
|
89
|
-
IsVersioned,
|
90
|
-
ParamValue,
|
91
|
-
Record,
|
92
|
-
Registry,
|
93
|
-
TracksRun,
|
94
|
-
TracksUpdates,
|
95
|
-
ValidateFields,
|
96
|
-
)
|
97
|
-
|
98
40
|
from .. import errors as exceptions
|
99
|
-
from . import
|
41
|
+
from . import datasets, loaders, subsettings, types
|
100
42
|
from ._context import Context
|
101
43
|
from ._mapped_collection import MappedCollection
|
102
44
|
from ._settings import Settings
|
lamindb/core/_context.py
CHANGED
@@ -11,29 +11,35 @@ from pathlib import Path
|
|
11
11
|
from typing import TYPE_CHECKING
|
12
12
|
|
13
13
|
import lamindb_setup as ln_setup
|
14
|
-
from django.db.models import Func, IntegerField
|
14
|
+
from django.db.models import Func, IntegerField, Q
|
15
15
|
from lamin_utils import logger
|
16
|
+
from lamindb_setup.core import deprecated
|
16
17
|
from lamindb_setup.core.hashing import hash_file
|
17
18
|
|
18
19
|
from lamindb.base import ids
|
19
20
|
from lamindb.base.ids import base62_12
|
20
21
|
from lamindb.models import Run, Transform, format_field_value
|
21
22
|
|
23
|
+
from ..core._settings import settings
|
22
24
|
from ..errors import (
|
23
25
|
InconsistentKey,
|
26
|
+
InvalidArgument,
|
24
27
|
TrackNotCalled,
|
25
28
|
UpdateContext,
|
26
29
|
)
|
27
|
-
from .
|
30
|
+
from ..models._is_versioned import bump_version as bump_version_function
|
31
|
+
from ..models._is_versioned import (
|
32
|
+
increment_base62,
|
33
|
+
message_update_key_in_version_family,
|
34
|
+
)
|
28
35
|
from ._sync_git import get_transform_reference_from_git_repo
|
29
36
|
from ._track_environment import track_environment
|
30
|
-
from .versioning import bump_version as bump_version_function
|
31
|
-
from .versioning import increment_base62, message_update_key_in_version_family
|
32
37
|
|
33
38
|
if TYPE_CHECKING:
|
34
39
|
from lamindb_setup.core.types import UPathStr
|
35
40
|
|
36
41
|
from lamindb.base.types import TransformType
|
42
|
+
from lamindb.models import Project
|
37
43
|
|
38
44
|
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
39
45
|
|
@@ -197,6 +203,7 @@ class Context:
|
|
197
203
|
self._run: Run | None = None
|
198
204
|
self._path: Path | None = None
|
199
205
|
"""A local path to the script that's running."""
|
206
|
+
self._project: Project | None = None
|
200
207
|
self._logging_message_track: str = ""
|
201
208
|
self._logging_message_imports: str = ""
|
202
209
|
self._stream_tracker: LogStreamTracker = LogStreamTracker()
|
@@ -217,8 +224,8 @@ class Context:
|
|
217
224
|
self._description = value
|
218
225
|
|
219
226
|
@property
|
227
|
+
@deprecated(new_name="description")
|
220
228
|
def name(self) -> str | None:
|
221
|
-
"""Deprecated. Populates `description` argument for `context.transform`."""
|
222
229
|
return self._description
|
223
230
|
|
224
231
|
@name.setter
|
@@ -243,6 +250,11 @@ class Context:
|
|
243
250
|
def version(self, value: str | None):
|
244
251
|
self._version = value
|
245
252
|
|
253
|
+
@property
|
254
|
+
def project(self) -> Project | None:
|
255
|
+
"""Project to label entities created during the run."""
|
256
|
+
return self._project
|
257
|
+
|
246
258
|
@property
|
247
259
|
def run(self) -> Run | None:
|
248
260
|
"""Managed run of context."""
|
@@ -252,12 +264,12 @@ class Context:
|
|
252
264
|
self,
|
253
265
|
transform: str | Transform | None = None,
|
254
266
|
*,
|
267
|
+
project: str | None = None,
|
255
268
|
params: dict | None = None,
|
256
269
|
new_run: bool | None = None,
|
257
270
|
path: str | None = None,
|
258
|
-
log_to_file: bool | None = None,
|
259
271
|
) -> None:
|
260
|
-
"""
|
272
|
+
"""Track a global run of your Python session.
|
261
273
|
|
262
274
|
- sets :attr:`~lamindb.core.Context.transform` &
|
263
275
|
:attr:`~lamindb.core.Context.run` by creating or loading `Transform` &
|
@@ -269,14 +281,12 @@ class Context:
|
|
269
281
|
|
270
282
|
Args:
|
271
283
|
transform: A transform `uid` or record. If `None`, creates a `uid`.
|
284
|
+
project: A project `name` or `uid` for labeling entities created during the run.
|
272
285
|
params: A dictionary of parameters to track for the run.
|
273
286
|
new_run: If `False`, loads the latest run of transform
|
274
287
|
(default notebook), if `True`, creates new run (default non-notebook).
|
275
288
|
path: Filepath of notebook or script. Only needed if it can't be
|
276
289
|
automatically detected.
|
277
|
-
log_to_file: If `True`, logs stdout and stderr to a file and
|
278
|
-
saves the file within the current run (default non-notebook),
|
279
|
-
if `False`, does not log the output (default notebook).
|
280
290
|
|
281
291
|
Examples:
|
282
292
|
|
@@ -284,7 +294,22 @@ class Context:
|
|
284
294
|
|
285
295
|
>>> ln.track()
|
286
296
|
|
297
|
+
If you want to ensure a single version history across renames of the notebook or script, pass the auto-generated `uid` that you'll find in the logs:
|
298
|
+
|
299
|
+
>>> ln.track("Onv04I53OgtT0000") # example uid, the last four characters encode the version of the transform
|
300
|
+
|
287
301
|
"""
|
302
|
+
from lamindb.models import Project
|
303
|
+
|
304
|
+
if project is not None:
|
305
|
+
project_record = Project.filter(
|
306
|
+
Q(name=project) | Q(uid=project)
|
307
|
+
).one_or_none()
|
308
|
+
if project_record is None:
|
309
|
+
raise InvalidArgument(
|
310
|
+
f"Project '{project}' not found, either create it with `ln.Project(name='...').save()` or fix typos."
|
311
|
+
)
|
312
|
+
self._project = project_record
|
288
313
|
self._logging_message_track = ""
|
289
314
|
self._logging_message_imports = ""
|
290
315
|
if transform is not None and isinstance(transform, str):
|
@@ -370,6 +395,12 @@ class Context:
|
|
370
395
|
)
|
371
396
|
self._run = run
|
372
397
|
track_environment(run)
|
398
|
+
if self.project is not None:
|
399
|
+
# to update a potential project link
|
400
|
+
# is only necessary if transform is loaded rather than newly created
|
401
|
+
# can be optimized by checking whether the transform is loaded, but it typically is
|
402
|
+
self.transform.save()
|
403
|
+
log_to_file = None
|
373
404
|
if log_to_file is None:
|
374
405
|
log_to_file = self.transform.type != "notebook"
|
375
406
|
if log_to_file:
|
@@ -27,7 +27,8 @@ if TYPE_CHECKING:
|
|
27
27
|
class _Connect:
|
28
28
|
def __init__(self, storage):
|
29
29
|
if isinstance(storage, UPath):
|
30
|
-
|
30
|
+
# force no external compression even for files with .gz extension. REMOVE LATER
|
31
|
+
self.conn, self.store = registry.open("h5py", storage, compression=None)
|
31
32
|
self.to_close = True
|
32
33
|
else:
|
33
34
|
self.conn, self.store = None, storage
|
@@ -246,7 +247,8 @@ class MappedCollection:
|
|
246
247
|
if parallel:
|
247
248
|
conn, storage = None, path
|
248
249
|
else:
|
249
|
-
|
250
|
+
# force no external compression even for files with .gz extension. REMOVE LATER
|
251
|
+
conn, storage = registry.open("h5py", path, compression=None)
|
250
252
|
else:
|
251
253
|
conn, storage = registry.open("zarr", path)
|
252
254
|
self.conns.append(conn)
|
lamindb/core/_settings.py
CHANGED
@@ -155,12 +155,12 @@ class Settings:
|
|
155
155
|
def verbosity(self) -> str:
|
156
156
|
"""Logger verbosity (default `'warning'`).
|
157
157
|
|
158
|
-
- `'error'`:
|
159
|
-
- `'warning'`:
|
160
|
-
- `'success'`:
|
161
|
-
- `'info'`:
|
162
|
-
- `'hint'`:
|
163
|
-
- `'debug'`:
|
158
|
+
- `'error'`: only show error messages
|
159
|
+
- `'warning'`: also show warning messages
|
160
|
+
- `'success'`: also show success and save messages
|
161
|
+
- `'info'`: also show info messages
|
162
|
+
- `'hint'`: also show hint messages
|
163
|
+
- `'debug'`: also show detailed debug messages
|
164
164
|
"""
|
165
165
|
return VERBOSITY_TO_STR[self._verbosity_int]
|
166
166
|
|
lamindb/core/_sync_git.py
CHANGED
@@ -7,7 +7,7 @@ from lamin_utils import logger
|
|
7
7
|
from lamindb_setup import settings as setup_settings
|
8
8
|
from lamindb_setup.core.hashing import hash_code
|
9
9
|
|
10
|
-
from ._settings import sanitize_git_repo_url, settings
|
10
|
+
from ..core._settings import sanitize_git_repo_url, settings
|
11
11
|
|
12
12
|
|
13
13
|
class BlobHashNotFound(SystemExit):
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import subprocess
|
4
|
+
import sys
|
4
5
|
from typing import TYPE_CHECKING
|
5
6
|
|
6
7
|
import lamindb_setup as ln_setup
|
@@ -17,7 +18,7 @@ def track_environment(run: Run) -> None:
|
|
17
18
|
try:
|
18
19
|
with open(filepath, "w") as f:
|
19
20
|
result = subprocess.run(
|
20
|
-
["pip", "freeze"],
|
21
|
+
[sys.executable, "-m", "pip", "freeze"],
|
21
22
|
stdout=f,
|
22
23
|
)
|
23
24
|
except OSError as e:
|
lamindb/core/datasets/_small.py
CHANGED
@@ -23,7 +23,7 @@ def small_dataset1(
|
|
23
23
|
var_ids[0]: [1, 2, 3],
|
24
24
|
var_ids[1]: [3, 4, 5],
|
25
25
|
var_ids[2]: [5, 6, 7],
|
26
|
-
"
|
26
|
+
"perturbation": pd.Categorical(["DMSO", ifng, "DMSO"]),
|
27
27
|
"sample_note": ["was ok", "looks naah", "pretty! 🤩"],
|
28
28
|
"cell_type_by_expert": pd.Categorical(["B cell", "T cell", "T cell"]),
|
29
29
|
"cell_type_by_model": pd.Categorical(["B cell", "T cell", "T cell"]),
|
@@ -60,7 +60,7 @@ def small_dataset2(
|
|
60
60
|
var_ids[0]: [2, 3, 3],
|
61
61
|
var_ids[1]: [3, 4, 5],
|
62
62
|
var_ids[2]: [4, 2, 3],
|
63
|
-
"
|
63
|
+
"perturbation": pd.Categorical(["DMSO", "IFNG", "IFNG"]),
|
64
64
|
"cell_type_by_model": pd.Categorical(["B cell", "T cell", "T cell"]),
|
65
65
|
}
|
66
66
|
metadata = {
|
@@ -74,7 +74,7 @@ def small_dataset2(
|
|
74
74
|
)
|
75
75
|
ad.AnnData(
|
76
76
|
dataset_df[var_ids],
|
77
|
-
obs=dataset_df[["
|
77
|
+
obs=dataset_df[["perturbation", "cell_type_by_model"]],
|
78
78
|
)
|
79
79
|
if otype == "DataFrame":
|
80
80
|
for key, value in metadata.items():
|
lamindb/core/loaders.py
CHANGED
@@ -30,13 +30,13 @@ from lamindb_setup.core.upath import (
|
|
30
30
|
infer_filesystem,
|
31
31
|
)
|
32
32
|
|
33
|
-
from ._settings import settings
|
33
|
+
from ..core._settings import settings
|
34
34
|
|
35
35
|
if TYPE_CHECKING:
|
36
36
|
from lamindb_setup.core.types import UPathStr
|
37
37
|
|
38
38
|
try:
|
39
|
-
from .storage._zarr import load_anndata_zarr
|
39
|
+
from ..core.storage._zarr import load_anndata_zarr
|
40
40
|
except ImportError:
|
41
41
|
|
42
42
|
def load_anndata_zarr(storepath): # type: ignore
|
@@ -65,8 +65,8 @@ def load_tsv(path: UPathStr, **kwargs) -> pd.DataFrame:
|
|
65
65
|
def load_h5ad(filepath, **kwargs) -> ad.AnnData:
|
66
66
|
"""Load an `.h5ad` file to `AnnData`."""
|
67
67
|
fs, filepath = infer_filesystem(filepath)
|
68
|
-
|
69
|
-
with fs.open(filepath, mode="rb") as file:
|
68
|
+
compression = kwargs.pop("compression", "infer")
|
69
|
+
with fs.open(filepath, mode="rb", compression=compression) as file:
|
70
70
|
adata = ad.read_h5ad(file, backed=False, **kwargs)
|
71
71
|
return adata
|
72
72
|
|
@@ -148,9 +148,13 @@ def load_rds(path: UPathStr) -> UPathStr:
|
|
148
148
|
|
149
149
|
FILE_LOADERS = {
|
150
150
|
".csv": pd.read_csv,
|
151
|
+
".csv.gz": pd.read_csv,
|
151
152
|
".tsv": load_tsv,
|
153
|
+
".tsv.gz": load_tsv,
|
152
154
|
".h5ad": load_h5ad,
|
155
|
+
".h5ad.gz": load_h5ad,
|
153
156
|
".parquet": pd.read_parquet,
|
157
|
+
".parquet.gz": pd.read_parquet, # this doesn't work for externally gzipped files, REMOVE LATER
|
154
158
|
".fcs": load_fcs,
|
155
159
|
".zarr": load_anndata_zarr,
|
156
160
|
".html": load_html,
|
@@ -175,10 +179,19 @@ def load_to_memory(filepath: UPathStr, **kwargs):
|
|
175
179
|
"""
|
176
180
|
filepath = create_path(filepath)
|
177
181
|
|
178
|
-
|
182
|
+
# infer the correct suffix when .gz is present
|
183
|
+
suffixes = filepath.suffixes
|
184
|
+
suffix = (
|
185
|
+
"".join(suffixes[-2:])
|
186
|
+
if len(suffixes) > 1 and ".gz" in suffixes
|
187
|
+
else filepath.suffix
|
188
|
+
)
|
179
189
|
|
180
|
-
loader = FILE_LOADERS.get(
|
190
|
+
loader = FILE_LOADERS.get(suffix, None)
|
181
191
|
if loader is None:
|
182
|
-
|
183
|
-
|
184
|
-
|
192
|
+
raise NotImplementedError(
|
193
|
+
f"There is no loader for {suffix} files. Use .cache() to get the path."
|
194
|
+
)
|
195
|
+
|
196
|
+
filepath = settings._storage_settings.cloud_to_local(filepath, print_progress=True)
|
197
|
+
return loader(filepath, **kwargs)
|
@@ -16,6 +16,7 @@ from anndata._io.h5ad import read_dataframe_legacy as read_dataframe_legacy_h5
|
|
16
16
|
from anndata._io.specs.registry import get_spec, read_elem, read_elem_partial
|
17
17
|
from anndata.compat import _read_attr
|
18
18
|
from fsspec.implementations.local import LocalFileSystem
|
19
|
+
from fsspec.utils import infer_compression
|
19
20
|
from lamin_utils import logger
|
20
21
|
from lamindb_setup.core.upath import create_mapper, infer_filesystem
|
21
22
|
from packaging import version
|
@@ -152,9 +153,13 @@ registry = AccessRegistry()
|
|
152
153
|
|
153
154
|
|
154
155
|
@registry.register_open("h5py")
|
155
|
-
def open(filepath: UPathStr, mode: str = "r"):
|
156
|
+
def open(filepath: UPathStr, mode: str = "r", compression: str | None = "infer"):
|
156
157
|
fs, file_path_str = infer_filesystem(filepath)
|
157
|
-
|
158
|
+
# we don't open compressed files directly because we need fsspec to uncompress on .open
|
159
|
+
compression = (
|
160
|
+
infer_compression(file_path_str) if compression == "infer" else compression
|
161
|
+
)
|
162
|
+
if isinstance(fs, LocalFileSystem) and compression is None:
|
158
163
|
assert mode in {"r", "r+", "a", "w", "w-"}, f"Unknown mode {mode}!" # noqa: S101
|
159
164
|
return None, h5py.File(file_path_str, mode=mode)
|
160
165
|
if mode == "r":
|
@@ -165,7 +170,7 @@ def open(filepath: UPathStr, mode: str = "r"):
|
|
165
170
|
conn_mode = "ab"
|
166
171
|
else:
|
167
172
|
raise ValueError(f"Unknown mode {mode}! Should be 'r', 'w' or 'a'.")
|
168
|
-
conn = fs.open(file_path_str, mode=conn_mode)
|
173
|
+
conn = fs.open(file_path_str, mode=conn_mode, compression=compression)
|
169
174
|
try:
|
170
175
|
storage = h5py.File(conn, mode=mode)
|
171
176
|
except Exception as e:
|