lamindb 0.45a1__py3-none-any.whl → 0.46a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +30 -9
- lamindb/_context.py +11 -12
- lamindb/_dataset.py +142 -0
- lamindb/_delete.py +6 -6
- lamindb/_feature_set.py +138 -0
- lamindb/_file.py +322 -81
- lamindb/_from_values.py +57 -160
- lamindb/_orm.py +398 -0
- lamindb/_save.py +26 -10
- lamindb/_select.py +3 -3
- lamindb/_view.py +2 -2
- lamindb/dev/__init__.py +2 -2
- lamindb/dev/_settings.py +2 -1
- lamindb/dev/datasets/__init__.py +6 -0
- lamindb/dev/datasets/_core.py +30 -0
- lamindb/dev/hashing.py +4 -0
- lamindb/dev/storage/__init__.py +4 -3
- lamindb/dev/storage/_backed_access.py +3 -3
- lamindb/dev/storage/{_file.py → file.py} +48 -3
- lamindb/dev/storage/{_object.py → object.py} +1 -0
- lamindb/dev/utils.py +9 -0
- lamindb/types.py +9 -1
- {lamindb-0.45a1.dist-info → lamindb-0.46a1.dist-info}/METADATA +20 -17
- lamindb-0.46a1.dist-info/RECORD +36 -0
- lamindb/_baseorm_methods.py +0 -535
- lamindb/_featureset_methods.py +0 -73
- lamindb/_file_access.py +0 -48
- lamindb/_file_methods.py +0 -319
- lamindb-0.45a1.dist-info/RECORD +0 -36
- /lamindb/{_transform_methods.py → _transform.py} +0 -0
- {lamindb-0.45a1.dist-info → lamindb-0.46a1.dist-info}/LICENSE +0 -0
- {lamindb-0.45a1.dist-info → lamindb-0.46a1.dist-info}/WHEEL +0 -0
- {lamindb-0.45a1.dist-info → lamindb-0.46a1.dist-info}/entry_points.txt +0 -0
lamindb/__init__.py
CHANGED
@@ -13,12 +13,14 @@ The core schema entities are central to lamindb's API:
|
|
13
13
|
.. autosummary::
|
14
14
|
:toctree: .
|
15
15
|
|
16
|
+
Dataset
|
16
17
|
File
|
17
18
|
Transform
|
18
19
|
Run
|
20
|
+
Feature
|
21
|
+
FeatureSet
|
19
22
|
User
|
20
23
|
Storage
|
21
|
-
FeatureSet
|
22
24
|
Tag
|
23
25
|
Project
|
24
26
|
|
@@ -28,7 +30,6 @@ Functional tools:
|
|
28
30
|
:toctree: .
|
29
31
|
|
30
32
|
track
|
31
|
-
parse
|
32
33
|
view
|
33
34
|
select
|
34
35
|
save
|
@@ -54,23 +55,43 @@ Developer API:
|
|
54
55
|
|
55
56
|
"""
|
56
57
|
|
57
|
-
__version__ = "0.
|
58
|
+
__version__ = "0.46a1" # denote a release candidate for 0.1.0 with 0.1rc1
|
59
|
+
|
60
|
+
import os as _os
|
58
61
|
|
59
62
|
import lamindb_setup as _lamindb_setup
|
60
63
|
|
61
64
|
# prints warning of python versions
|
62
65
|
from lamin_logger import py_version_warning as _py_version_warning
|
63
66
|
from lamindb_setup import _check_instance_setup
|
67
|
+
from lamindb_setup._check_instance_setup import _INSTANCE_NOT_SETUP_WARNING
|
64
68
|
|
65
|
-
_py_version_warning("3.8", "3.
|
69
|
+
_py_version_warning("3.8", "3.11")
|
66
70
|
|
71
|
+
_TESTING = _lamindb_setup._TESTING
|
67
72
|
_INSTANCE_SETUP = _check_instance_setup(from_lamindb=True)
|
68
73
|
# allow the user to call setup
|
69
74
|
from . import setup # noqa
|
70
75
|
|
76
|
+
|
77
|
+
class InstanceNotSetupError(Exception):
|
78
|
+
pass
|
79
|
+
|
80
|
+
|
81
|
+
def __getattr__(name):
|
82
|
+
raise InstanceNotSetupError(
|
83
|
+
f"{_INSTANCE_NOT_SETUP_WARNING}If you used the CLI to init or load an instance,"
|
84
|
+
" please RESTART the python session (in a notebook, restart kernel)"
|
85
|
+
)
|
86
|
+
|
87
|
+
|
71
88
|
# only import all other functionality if setup was successful
|
72
89
|
if _INSTANCE_SETUP:
|
90
|
+
del InstanceNotSetupError
|
91
|
+
del __getattr__ # delete so that imports work out
|
73
92
|
from lnschema_core import ( # noqa
|
93
|
+
Dataset,
|
94
|
+
Feature,
|
74
95
|
FeatureSet,
|
75
96
|
File,
|
76
97
|
Project,
|
@@ -95,12 +116,12 @@ if _INSTANCE_SETUP:
|
|
95
116
|
f" {__version__})"
|
96
117
|
)
|
97
118
|
|
98
|
-
from . import
|
99
|
-
from . import
|
100
|
-
from . import
|
101
|
-
from . import
|
119
|
+
from . import _dataset # noqa
|
120
|
+
from . import _feature_set # noqa
|
121
|
+
from . import _file # noqa
|
122
|
+
from . import _orm # noqa
|
123
|
+
from . import _transform # noqa
|
102
124
|
from ._delete import delete # noqa
|
103
|
-
from ._from_values import parse # noqa
|
104
125
|
from ._save import save # noqa
|
105
126
|
from ._select import select # noqa
|
106
127
|
from ._view import view # noqa
|
lamindb/_context.py
CHANGED
@@ -20,10 +20,9 @@ msg_path_failed = (
|
|
20
20
|
" `notebook_path` to ln.track()."
|
21
21
|
)
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
)
|
23
|
+
|
24
|
+
class NonInteractiveEditorError(Exception):
|
25
|
+
pass
|
27
26
|
|
28
27
|
|
29
28
|
def _write_notebook_meta(metadata):
|
@@ -178,10 +177,11 @@ class context:
|
|
178
177
|
" notebook!\nConsider installing nbproject for automatic"
|
179
178
|
" name, title & id tracking."
|
180
179
|
)
|
181
|
-
elif
|
180
|
+
elif isinstance(e, NonInteractiveEditorError):
|
182
181
|
raise e
|
183
182
|
else:
|
184
183
|
logger.warning(f"Automatic tracking of notebook failed: {e}")
|
184
|
+
raise e
|
185
185
|
is_tracked_notebook = False
|
186
186
|
|
187
187
|
if not is_tracked_notebook:
|
@@ -292,7 +292,7 @@ class context:
|
|
292
292
|
except Exception as e:
|
293
293
|
nbproject_failed_msg = (
|
294
294
|
"Auto-retrieval of notebook name & title failed.\n\nFixes: Either"
|
295
|
-
" init on the CLI `lamin track
|
295
|
+
f" init on the CLI `lamin track {notebook_path}` or pass"
|
296
296
|
" transform manually `ln.track(ln.Transform(name='My"
|
297
297
|
" notebook'))`\n\nPlease consider pasting error at:"
|
298
298
|
f" https://github.com/laminlabs/nbproject/issues/new\n\n{e}"
|
@@ -317,12 +317,11 @@ class context:
|
|
317
317
|
if _env in ("lab", "notebook"):
|
318
318
|
cls._notebook_meta = metadata # type: ignore
|
319
319
|
else:
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
raise RuntimeError(msg_init_noninteractive)
|
320
|
+
msg_init_noninteractive = (
|
321
|
+
"Please attach metadata to the notebook by running the CLI: "
|
322
|
+
f"lamin track {notebook_path}"
|
323
|
+
)
|
324
|
+
raise NonInteractiveEditorError(msg_init_noninteractive)
|
326
325
|
|
327
326
|
if _env in ("lab", "notebook"):
|
328
327
|
# save the notebook in case that title was updated
|
lamindb/_dataset.py
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
from typing import Iterable, List, Optional, Union
|
2
|
+
|
3
|
+
import anndata as ad
|
4
|
+
import pandas as pd
|
5
|
+
from lnschema_core import ids
|
6
|
+
from lnschema_core.models import Dataset
|
7
|
+
|
8
|
+
from . import Feature, FeatureSet, File, Run
|
9
|
+
from .dev.hashing import hash_set
|
10
|
+
|
11
|
+
|
12
|
+
def __init__(
|
13
|
+
dataset: Dataset,
|
14
|
+
*args,
|
15
|
+
**kwargs,
|
16
|
+
):
|
17
|
+
if len(args) == len(dataset._meta.concrete_fields):
|
18
|
+
super(Dataset, dataset).__init__(*args, **kwargs)
|
19
|
+
return None
|
20
|
+
# now we proceed with the user-facing constructor
|
21
|
+
if len(args) > 1:
|
22
|
+
raise ValueError("Only one non-keyword arg allowed: data")
|
23
|
+
data: Optional[Union[pd.DataFrame, ad.AnnData]] = None
|
24
|
+
if "data" in kwargs or len(args) == 1:
|
25
|
+
data = kwargs.pop("data") if len(args) == 0 else args[0]
|
26
|
+
name: Optional[str] = kwargs.pop("name") if "name" in kwargs else None
|
27
|
+
run: Optional[Run] = kwargs.pop("run") if "run" in kwargs else None
|
28
|
+
files: List[File] = kwargs.pop("files") if "files" in kwargs else []
|
29
|
+
file: Optional[File] = kwargs.pop("file") if "file" in kwargs else None
|
30
|
+
hash: Optional[str] = kwargs.pop("hash") if "hash" in kwargs else None
|
31
|
+
feature_sets: List[FeatureSet] = (
|
32
|
+
kwargs.pop("feature_sets") if "feature_sets" in kwargs else []
|
33
|
+
)
|
34
|
+
assert len(kwargs) == 0
|
35
|
+
if data is not None:
|
36
|
+
if isinstance(data, pd.DataFrame):
|
37
|
+
feature_set = FeatureSet.from_values(data.columns, Feature.name)
|
38
|
+
dataset._feature_sets = [feature_set]
|
39
|
+
elif isinstance(data, ad.AnnData):
|
40
|
+
if len(feature_sets) != 2:
|
41
|
+
raise ValueError(
|
42
|
+
"Please provide a feature set describing each `.var.index` &"
|
43
|
+
" `.obs.columns`"
|
44
|
+
)
|
45
|
+
dataset._feature_sets = feature_sets
|
46
|
+
file = File(data=data, run=run, feature_sets=dataset._feature_sets)
|
47
|
+
hash = file.hash
|
48
|
+
id = file.id
|
49
|
+
else:
|
50
|
+
id = ids.base62_20()
|
51
|
+
dataset._feature_sets = feature_sets
|
52
|
+
super(Dataset, dataset).__init__(id=id, name=name, file=file, hash=hash)
|
53
|
+
dataset._files = files
|
54
|
+
|
55
|
+
|
56
|
+
@classmethod # type: ignore
|
57
|
+
def from_files(dataset: Dataset, *, name: str, files: Iterable[File]) -> Dataset:
|
58
|
+
# assert all files are already saved
|
59
|
+
# saved = not any([file._state._adding for file in files])
|
60
|
+
# if not saved:
|
61
|
+
# raise ValueError("Not all files are yet saved, please save them")
|
62
|
+
# query all feature sets of files
|
63
|
+
file_ids = [file.id for file in files]
|
64
|
+
# query all feature sets at the same time rather than making a single query per file
|
65
|
+
feature_set_file_links = File.feature_sets.through.objects.filter(
|
66
|
+
file_id__in=file_ids
|
67
|
+
)
|
68
|
+
feature_set_ids = [link.featureset_id for link in feature_set_file_links]
|
69
|
+
feature_sets = FeatureSet.select(id__in=feature_set_ids)
|
70
|
+
# validate consistency of feature_sets
|
71
|
+
# we only allow one feature set per type
|
72
|
+
feature_set_types = [feature_set.type for feature_set in feature_sets]
|
73
|
+
feature_set_ids_types = [
|
74
|
+
(feature_set.id, feature_set.type) for feature_set in feature_sets
|
75
|
+
]
|
76
|
+
if len(set(feature_set_ids_types)) != len(set(feature_set_types)):
|
77
|
+
# we can do below in the future!
|
78
|
+
# logger.warning(
|
79
|
+
# "feature sets are inconsistent across files"
|
80
|
+
# "computing union! files will be outer-joined"
|
81
|
+
# )
|
82
|
+
raise ValueError(
|
83
|
+
"Currently only supporting datasets from files with same feature sets"
|
84
|
+
)
|
85
|
+
# validate consistency of hashes
|
86
|
+
# we do not allow duplicate hashes
|
87
|
+
file_hashes = [file.hash for file in files]
|
88
|
+
file_hashes_set = set(file_hashes)
|
89
|
+
assert len(file_hashes) == len(file_hashes_set)
|
90
|
+
hash = hash_set(file_hashes_set)
|
91
|
+
# create the dataset
|
92
|
+
dataset = Dataset(name=name, hash=hash, feature_sets=feature_sets, files=files)
|
93
|
+
return dataset
|
94
|
+
|
95
|
+
|
96
|
+
def backed(dataset: Dataset):
|
97
|
+
if dataset.file is None:
|
98
|
+
raise RuntimeError("Can only call backed() for datasets with a single file")
|
99
|
+
return dataset.file.backed()
|
100
|
+
|
101
|
+
|
102
|
+
def load(dataset: Dataset):
|
103
|
+
"""Load the combined dataset."""
|
104
|
+
if dataset.file is not None:
|
105
|
+
return dataset.file.load()
|
106
|
+
else:
|
107
|
+
suffixes = [file.suffix for file in dataset.files.all()]
|
108
|
+
if len(set(suffixes)) != 1:
|
109
|
+
raise RuntimeError(
|
110
|
+
"Can only load datasets where all files have the same suffix"
|
111
|
+
)
|
112
|
+
objects = [file.load() for file in dataset.files.all()]
|
113
|
+
if isinstance(objects[0], pd.DataFrame):
|
114
|
+
return pd.concat(objects)
|
115
|
+
elif isinstance(objects[0], ad.AnnData):
|
116
|
+
return ad.concat(objects)
|
117
|
+
|
118
|
+
|
119
|
+
def delete(dataset: Dataset, storage: bool = False):
|
120
|
+
super(Dataset, dataset).delete()
|
121
|
+
if dataset.file is not None:
|
122
|
+
dataset.file.delete(storage=storage)
|
123
|
+
|
124
|
+
|
125
|
+
def save(dataset: Dataset):
|
126
|
+
if dataset.file is not None:
|
127
|
+
dataset.file.save()
|
128
|
+
for feature_set in dataset._feature_sets:
|
129
|
+
feature_set.save()
|
130
|
+
super(Dataset, dataset).save()
|
131
|
+
if len(dataset._files) > 0:
|
132
|
+
dataset.files.set(dataset._files)
|
133
|
+
if len(dataset._feature_sets) > 0:
|
134
|
+
dataset.feature_sets.set(dataset._feature_sets)
|
135
|
+
|
136
|
+
|
137
|
+
Dataset.__init__ = __init__
|
138
|
+
Dataset.from_files = from_files
|
139
|
+
Dataset.backed = backed
|
140
|
+
Dataset.load = load
|
141
|
+
Dataset.delete = delete
|
142
|
+
Dataset.save = save
|
lamindb/_delete.py
CHANGED
@@ -1,31 +1,31 @@
|
|
1
1
|
from typing import List, Optional, Union, overload # noqa
|
2
2
|
|
3
|
-
from lnschema_core import
|
3
|
+
from lnschema_core import ORM
|
4
4
|
|
5
5
|
from ._logger import colors, logger
|
6
6
|
|
7
7
|
|
8
8
|
@overload
|
9
9
|
def delete(
|
10
|
-
record:
|
10
|
+
record: ORM,
|
11
11
|
) -> None:
|
12
12
|
...
|
13
13
|
|
14
14
|
|
15
15
|
@overload
|
16
16
|
def delete(
|
17
|
-
records: List[
|
17
|
+
records: List[ORM],
|
18
18
|
) -> None: # type: ignore
|
19
19
|
...
|
20
20
|
|
21
21
|
|
22
22
|
def delete( # type: ignore
|
23
|
-
records: Union[
|
23
|
+
records: Union[ORM, List[ORM]],
|
24
24
|
) -> None:
|
25
25
|
"""Delete metadata records & files.
|
26
26
|
|
27
27
|
Args:
|
28
|
-
records: `Union[
|
28
|
+
records: `Union[ORM, List[ORM]]` One or multiple records.
|
29
29
|
|
30
30
|
Returns:
|
31
31
|
`None`
|
@@ -49,7 +49,7 @@ def delete( # type: ignore
|
|
49
49
|
"""
|
50
50
|
if isinstance(records, list):
|
51
51
|
records = records
|
52
|
-
elif isinstance(records,
|
52
|
+
elif isinstance(records, ORM):
|
53
53
|
records = [records]
|
54
54
|
for record in records:
|
55
55
|
record.delete()
|
lamindb/_feature_set.py
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
from typing import List, Optional
|
2
|
+
|
3
|
+
from django.db.models.query_utils import DeferredAttribute as Field
|
4
|
+
from lamin_logger import logger
|
5
|
+
from lamindb_setup.dev._docs import doc_args
|
6
|
+
from lnschema_core import ORM, Feature, FeatureSet
|
7
|
+
from lnschema_core.types import ListLike
|
8
|
+
|
9
|
+
from lamindb.dev.hashing import hash_set
|
10
|
+
from lamindb.dev.utils import attach_func_to_class_method
|
11
|
+
|
12
|
+
from . import _TESTING
|
13
|
+
from ._from_values import get_or_create_records, index_iterable
|
14
|
+
from ._orm import init_self_from_db
|
15
|
+
|
16
|
+
|
17
|
+
def get_related_name(features_type: ORM):
|
18
|
+
candidates = [
|
19
|
+
field.related_name
|
20
|
+
for field in FeatureSet._meta.related_objects
|
21
|
+
if field.related_model == features_type
|
22
|
+
]
|
23
|
+
if not candidates:
|
24
|
+
raise ValueError(
|
25
|
+
f"Can't create feature sets from {features_type.__name__} because it's not"
|
26
|
+
" related to it!\nYou need to create a link model between FeatureSet and"
|
27
|
+
" your ORM in your custom schema.\nTo do so, add a line:\nfeature_sets ="
|
28
|
+
" models.ManyToMany(FeatureSet, related_name='mythings')\n"
|
29
|
+
)
|
30
|
+
return candidates[0]
|
31
|
+
|
32
|
+
|
33
|
+
def validate_features(features: List[ORM]) -> ORM:
|
34
|
+
"""Validate and return feature type."""
|
35
|
+
if len(features) == 0:
|
36
|
+
raise ValueError("provide list of features with at least one element")
|
37
|
+
if not hasattr(features, "__getitem__"):
|
38
|
+
raise TypeError("features has to be list-like")
|
39
|
+
if not isinstance(features[0], ORM):
|
40
|
+
raise TypeError(
|
41
|
+
"features has to store feature records! use .from_values() otherwise"
|
42
|
+
)
|
43
|
+
feature_types = set([feature.__class__ for feature in features])
|
44
|
+
if len(feature_types) > 1:
|
45
|
+
raise ValueError("feature_set can only contain a single type")
|
46
|
+
return next(iter(feature_types)) # return value in set of cardinality 1
|
47
|
+
|
48
|
+
|
49
|
+
def __init__(self, *args, **kwargs):
|
50
|
+
if len(args) == len(self._meta.concrete_fields):
|
51
|
+
super(FeatureSet, self).__init__(*args, **kwargs)
|
52
|
+
return None
|
53
|
+
# now we proceed with the user-facing constructor
|
54
|
+
if len(args) > 1:
|
55
|
+
raise ValueError("Only one non-keyword arg allowed: features")
|
56
|
+
features: List[ORM] = kwargs.pop("features") if len(args) == 0 else args[0]
|
57
|
+
field: Optional[str] = kwargs.pop("field") if "field" in kwargs else None
|
58
|
+
id: Optional[str] = kwargs.pop("id") if "id" in kwargs else None
|
59
|
+
features_type = validate_features(features)
|
60
|
+
related_name = get_related_name(features_type)
|
61
|
+
if id is None:
|
62
|
+
features_hash = hash_set({feature.id for feature in features})
|
63
|
+
feature_set = FeatureSet.select(id=features_hash).one_or_none()
|
64
|
+
if feature_set is not None:
|
65
|
+
logger.info("Returning an existing feature_set")
|
66
|
+
init_self_from_db(self, feature_set)
|
67
|
+
return None
|
68
|
+
else:
|
69
|
+
id = features_hash
|
70
|
+
self._features = (related_name, features)
|
71
|
+
if field is None:
|
72
|
+
field = "id"
|
73
|
+
super(FeatureSet, self).__init__(
|
74
|
+
id=id, type=features_type.__name_with_type__(), field=field
|
75
|
+
)
|
76
|
+
|
77
|
+
|
78
|
+
@doc_args(FeatureSet.save.__doc__)
|
79
|
+
def save(self, *args, **kwargs) -> None:
|
80
|
+
"""{}"""
|
81
|
+
super(FeatureSet, self).save(*args, **kwargs)
|
82
|
+
if hasattr(self, "_features"):
|
83
|
+
related_name, records = self._features
|
84
|
+
[record.save() for record in records]
|
85
|
+
getattr(self, related_name).set(records)
|
86
|
+
|
87
|
+
|
88
|
+
@classmethod # type:ignore
|
89
|
+
@doc_args(FeatureSet.from_values.__doc__)
|
90
|
+
def from_values(
|
91
|
+
cls, values: ListLike, field: Field = Feature.name, **kwargs
|
92
|
+
) -> "FeatureSet":
|
93
|
+
"""{}"""
|
94
|
+
if not isinstance(field, Field):
|
95
|
+
raise TypeError("Argument `field` must be an ORM field, e.g., `Feature.name`")
|
96
|
+
if len(values) == 0:
|
97
|
+
raise ValueError("Provide a list of at least one value")
|
98
|
+
if not isinstance(values[0], (str, int)):
|
99
|
+
raise TypeError("values should be list-like of str or int")
|
100
|
+
orm = field.field.model
|
101
|
+
iterable_idx = index_iterable(values)
|
102
|
+
features_hash = hash_set(set(iterable_idx))
|
103
|
+
feature_set = FeatureSet.select(id=features_hash).one_or_none()
|
104
|
+
if feature_set is not None:
|
105
|
+
logger.info("Returning an existing feature_set")
|
106
|
+
else:
|
107
|
+
from_bionty = orm.__module__.startswith("lnschema_bionty")
|
108
|
+
records = get_or_create_records(
|
109
|
+
iterable=iterable_idx,
|
110
|
+
field=field,
|
111
|
+
from_bionty=from_bionty,
|
112
|
+
**kwargs,
|
113
|
+
)
|
114
|
+
feature_set = FeatureSet(
|
115
|
+
id=features_hash,
|
116
|
+
field=field.field.name,
|
117
|
+
features=records,
|
118
|
+
)
|
119
|
+
return feature_set
|
120
|
+
|
121
|
+
|
122
|
+
METHOD_NAMES = [
|
123
|
+
"__init__",
|
124
|
+
"from_values",
|
125
|
+
"save",
|
126
|
+
]
|
127
|
+
|
128
|
+
if _TESTING:
|
129
|
+
from inspect import signature
|
130
|
+
|
131
|
+
SIGS = {
|
132
|
+
name: signature(getattr(FeatureSet, name))
|
133
|
+
for name in METHOD_NAMES
|
134
|
+
if name != "__init__"
|
135
|
+
}
|
136
|
+
|
137
|
+
for name in METHOD_NAMES:
|
138
|
+
attach_func_to_class_method(name, FeatureSet, globals())
|