lamindb 0.76.7__py3-none-any.whl → 0.76.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +113 -113
- lamindb/_artifact.py +1205 -1178
- lamindb/_can_validate.py +579 -579
- lamindb/_collection.py +387 -387
- lamindb/_curate.py +1601 -1601
- lamindb/_feature.py +155 -155
- lamindb/_feature_set.py +242 -242
- lamindb/_filter.py +23 -23
- lamindb/_finish.py +256 -256
- lamindb/_from_values.py +382 -382
- lamindb/_is_versioned.py +40 -40
- lamindb/_parents.py +476 -476
- lamindb/_query_manager.py +125 -125
- lamindb/_query_set.py +362 -362
- lamindb/_record.py +649 -649
- lamindb/_run.py +57 -57
- lamindb/_save.py +308 -295
- lamindb/_storage.py +14 -14
- lamindb/_transform.py +127 -127
- lamindb/_ulabel.py +56 -56
- lamindb/_utils.py +9 -9
- lamindb/_view.py +72 -72
- lamindb/core/__init__.py +94 -94
- lamindb/core/_context.py +574 -574
- lamindb/core/_data.py +438 -438
- lamindb/core/_feature_manager.py +867 -867
- lamindb/core/_label_manager.py +253 -253
- lamindb/core/_mapped_collection.py +597 -597
- lamindb/core/_settings.py +187 -187
- lamindb/core/_sync_git.py +138 -138
- lamindb/core/_track_environment.py +27 -27
- lamindb/core/datasets/__init__.py +59 -59
- lamindb/core/datasets/_core.py +571 -571
- lamindb/core/datasets/_fake.py +36 -36
- lamindb/core/exceptions.py +90 -77
- lamindb/core/fields.py +12 -12
- lamindb/core/loaders.py +164 -164
- lamindb/core/schema.py +56 -56
- lamindb/core/storage/__init__.py +25 -25
- lamindb/core/storage/_anndata_accessor.py +740 -740
- lamindb/core/storage/_anndata_sizes.py +41 -41
- lamindb/core/storage/_backed_access.py +98 -98
- lamindb/core/storage/_tiledbsoma.py +204 -204
- lamindb/core/storage/_valid_suffixes.py +21 -21
- lamindb/core/storage/_zarr.py +110 -110
- lamindb/core/storage/objects.py +62 -62
- lamindb/core/storage/paths.py +172 -141
- lamindb/core/subsettings/__init__.py +12 -12
- lamindb/core/subsettings/_creation_settings.py +38 -38
- lamindb/core/subsettings/_transform_settings.py +21 -21
- lamindb/core/types.py +19 -19
- lamindb/core/versioning.py +158 -158
- lamindb/integrations/__init__.py +12 -12
- lamindb/integrations/_vitessce.py +107 -107
- lamindb/setup/__init__.py +14 -14
- lamindb/setup/core/__init__.py +4 -4
- {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/LICENSE +201 -201
- {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/METADATA +3 -3
- lamindb-0.76.8.dist-info/RECORD +60 -0
- {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/WHEEL +1 -1
- lamindb-0.76.7.dist-info/RECORD +0 -60
lamindb/_feature.py
CHANGED
@@ -1,155 +1,155 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from typing import TYPE_CHECKING
|
4
|
-
|
5
|
-
import lamindb_setup as ln_setup
|
6
|
-
import pandas as pd
|
7
|
-
from lamindb_setup.core._docs import doc_args
|
8
|
-
from lnschema_core.models import Artifact, Feature
|
9
|
-
from pandas.api.types import CategoricalDtype, is_string_dtype
|
10
|
-
|
11
|
-
from lamindb._utils import attach_func_to_class_method
|
12
|
-
from lamindb.core._settings import settings
|
13
|
-
|
14
|
-
from ._query_set import RecordsList
|
15
|
-
from .core.schema import dict_schema_name_to_model_name
|
16
|
-
|
17
|
-
if TYPE_CHECKING:
|
18
|
-
from lnschema_core.types import FieldAttr
|
19
|
-
|
20
|
-
FEATURE_TYPES = {
|
21
|
-
"number": "number",
|
22
|
-
"int": "int",
|
23
|
-
"float": "float",
|
24
|
-
"bool": "bool",
|
25
|
-
"str": "cat",
|
26
|
-
"object": "cat",
|
27
|
-
}
|
28
|
-
|
29
|
-
|
30
|
-
def convert_numpy_dtype_to_lamin_feature_type(dtype, str_as_cat: bool = True) -> str:
|
31
|
-
orig_type = dtype.name
|
32
|
-
# strip precision qualifiers
|
33
|
-
type = "".join(i for i in orig_type if not i.isdigit())
|
34
|
-
if type == "object" or type == "str":
|
35
|
-
type = "cat" if str_as_cat else "str"
|
36
|
-
return type
|
37
|
-
|
38
|
-
|
39
|
-
def __init__(self, *args, **kwargs):
|
40
|
-
if len(args) == len(self._meta.concrete_fields):
|
41
|
-
super(Feature, self).__init__(*args, **kwargs)
|
42
|
-
return None
|
43
|
-
# now we proceed with the user-facing constructor
|
44
|
-
if len(args) != 0:
|
45
|
-
raise ValueError("Only keyword args allowed")
|
46
|
-
dtype: type | str = kwargs.pop("dtype") if "dtype" in kwargs else None
|
47
|
-
# cast type
|
48
|
-
if dtype is None:
|
49
|
-
raise ValueError("Please pass dtype!")
|
50
|
-
elif dtype is not None:
|
51
|
-
if not isinstance(dtype, str):
|
52
|
-
if not isinstance(dtype, list) and dtype.__name__ in FEATURE_TYPES:
|
53
|
-
dtype_str = FEATURE_TYPES[dtype.__name__]
|
54
|
-
else:
|
55
|
-
if not isinstance(dtype, list):
|
56
|
-
raise ValueError("dtype has to be a list of Record types")
|
57
|
-
registries_str = ""
|
58
|
-
for cls in dtype:
|
59
|
-
if not hasattr(cls, "__get_name_with_schema__"):
|
60
|
-
raise ValueError("each element of the list has to be a Record")
|
61
|
-
registries_str += cls.__get_name_with_schema__() + "|"
|
62
|
-
dtype_str = f'cat[{registries_str.rstrip("|")}]'
|
63
|
-
else:
|
64
|
-
dtype_str = dtype
|
65
|
-
# add validation that a registry actually exists
|
66
|
-
if dtype_str not in FEATURE_TYPES.values() and not dtype_str.startswith(
|
67
|
-
"cat"
|
68
|
-
):
|
69
|
-
raise ValueError(
|
70
|
-
f"dtype is {dtype_str} but has to be one of 'number', 'int', 'float', 'cat', 'bool', 'cat[...]'!"
|
71
|
-
)
|
72
|
-
if dtype_str != "cat" and dtype_str.startswith("cat"):
|
73
|
-
registries_str = dtype_str.replace("cat[", "").rstrip("]")
|
74
|
-
if registries_str != "":
|
75
|
-
registry_str_list = registries_str.split("|")
|
76
|
-
for registry_str in registry_str_list:
|
77
|
-
if registry_str not in dict_schema_name_to_model_name(Artifact):
|
78
|
-
raise ValueError(
|
79
|
-
f"'{registry_str}' is an invalid dtype, pass, e.g. `[ln.ULabel, bt.CellType]` or similar"
|
80
|
-
)
|
81
|
-
kwargs["dtype"] = dtype_str
|
82
|
-
super(Feature, self).__init__(*args, **kwargs)
|
83
|
-
|
84
|
-
|
85
|
-
def categoricals_from_df(df: pd.DataFrame) -> dict:
|
86
|
-
"""Returns categorical columns."""
|
87
|
-
string_cols = [col for col in df.columns if is_string_dtype(df[col])]
|
88
|
-
categoricals = {
|
89
|
-
col: df[col]
|
90
|
-
for col in df.columns
|
91
|
-
if isinstance(df[col].dtype, CategoricalDtype)
|
92
|
-
}
|
93
|
-
for key in string_cols:
|
94
|
-
c = pd.Categorical(df[key])
|
95
|
-
if len(c.categories) < len(c):
|
96
|
-
categoricals[key] = c
|
97
|
-
return categoricals
|
98
|
-
|
99
|
-
|
100
|
-
@classmethod # type:ignore
|
101
|
-
@doc_args(Feature.from_df.__doc__)
|
102
|
-
def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsList:
|
103
|
-
"""{}""" # noqa: D415
|
104
|
-
field = Feature.name if field is None else field
|
105
|
-
categoricals = categoricals_from_df(df)
|
106
|
-
|
107
|
-
dtypes = {}
|
108
|
-
# categoricals_with_unmapped_categories = {} # type: ignore
|
109
|
-
for name, col in df.items():
|
110
|
-
if name in categoricals:
|
111
|
-
dtypes[name] = "cat"
|
112
|
-
else:
|
113
|
-
dtypes[name] = convert_numpy_dtype_to_lamin_feature_type(col.dtype)
|
114
|
-
|
115
|
-
# silence the warning "loaded record with exact same name "
|
116
|
-
verbosity = settings.verbosity
|
117
|
-
try:
|
118
|
-
settings.verbosity = "error"
|
119
|
-
|
120
|
-
registry = field.field.model
|
121
|
-
if registry != Feature:
|
122
|
-
raise ValueError("field must be a Feature FieldAttr!")
|
123
|
-
# create records for all features including non-validated
|
124
|
-
features = [Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()]
|
125
|
-
finally:
|
126
|
-
settings.verbosity = verbosity
|
127
|
-
|
128
|
-
assert len(features) == len(df.columns) # noqa: S101
|
129
|
-
return RecordsList(features)
|
130
|
-
|
131
|
-
|
132
|
-
@doc_args(Feature.save.__doc__)
|
133
|
-
def save(self, *args, **kwargs) -> Feature:
|
134
|
-
"""{}""" # noqa: D415
|
135
|
-
super(Feature, self).save(*args, **kwargs)
|
136
|
-
return self
|
137
|
-
|
138
|
-
|
139
|
-
METHOD_NAMES = [
|
140
|
-
"__init__",
|
141
|
-
"from_df",
|
142
|
-
"save",
|
143
|
-
]
|
144
|
-
|
145
|
-
if ln_setup._TESTING:
|
146
|
-
from inspect import signature
|
147
|
-
|
148
|
-
SIGS = {
|
149
|
-
name: signature(getattr(Feature, name))
|
150
|
-
for name in METHOD_NAMES
|
151
|
-
if name != "__init__"
|
152
|
-
}
|
153
|
-
|
154
|
-
for name in METHOD_NAMES:
|
155
|
-
attach_func_to_class_method(name, Feature, globals())
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING
|
4
|
+
|
5
|
+
import lamindb_setup as ln_setup
|
6
|
+
import pandas as pd
|
7
|
+
from lamindb_setup.core._docs import doc_args
|
8
|
+
from lnschema_core.models import Artifact, Feature
|
9
|
+
from pandas.api.types import CategoricalDtype, is_string_dtype
|
10
|
+
|
11
|
+
from lamindb._utils import attach_func_to_class_method
|
12
|
+
from lamindb.core._settings import settings
|
13
|
+
|
14
|
+
from ._query_set import RecordsList
|
15
|
+
from .core.schema import dict_schema_name_to_model_name
|
16
|
+
|
17
|
+
if TYPE_CHECKING:
|
18
|
+
from lnschema_core.types import FieldAttr
|
19
|
+
|
20
|
+
FEATURE_TYPES = {
|
21
|
+
"number": "number",
|
22
|
+
"int": "int",
|
23
|
+
"float": "float",
|
24
|
+
"bool": "bool",
|
25
|
+
"str": "cat",
|
26
|
+
"object": "cat",
|
27
|
+
}
|
28
|
+
|
29
|
+
|
30
|
+
def convert_numpy_dtype_to_lamin_feature_type(dtype, str_as_cat: bool = True) -> str:
|
31
|
+
orig_type = dtype.name
|
32
|
+
# strip precision qualifiers
|
33
|
+
type = "".join(i for i in orig_type if not i.isdigit())
|
34
|
+
if type == "object" or type == "str":
|
35
|
+
type = "cat" if str_as_cat else "str"
|
36
|
+
return type
|
37
|
+
|
38
|
+
|
39
|
+
def __init__(self, *args, **kwargs):
|
40
|
+
if len(args) == len(self._meta.concrete_fields):
|
41
|
+
super(Feature, self).__init__(*args, **kwargs)
|
42
|
+
return None
|
43
|
+
# now we proceed with the user-facing constructor
|
44
|
+
if len(args) != 0:
|
45
|
+
raise ValueError("Only keyword args allowed")
|
46
|
+
dtype: type | str = kwargs.pop("dtype") if "dtype" in kwargs else None
|
47
|
+
# cast type
|
48
|
+
if dtype is None:
|
49
|
+
raise ValueError("Please pass dtype!")
|
50
|
+
elif dtype is not None:
|
51
|
+
if not isinstance(dtype, str):
|
52
|
+
if not isinstance(dtype, list) and dtype.__name__ in FEATURE_TYPES:
|
53
|
+
dtype_str = FEATURE_TYPES[dtype.__name__]
|
54
|
+
else:
|
55
|
+
if not isinstance(dtype, list):
|
56
|
+
raise ValueError("dtype has to be a list of Record types")
|
57
|
+
registries_str = ""
|
58
|
+
for cls in dtype:
|
59
|
+
if not hasattr(cls, "__get_name_with_schema__"):
|
60
|
+
raise ValueError("each element of the list has to be a Record")
|
61
|
+
registries_str += cls.__get_name_with_schema__() + "|"
|
62
|
+
dtype_str = f'cat[{registries_str.rstrip("|")}]'
|
63
|
+
else:
|
64
|
+
dtype_str = dtype
|
65
|
+
# add validation that a registry actually exists
|
66
|
+
if dtype_str not in FEATURE_TYPES.values() and not dtype_str.startswith(
|
67
|
+
"cat"
|
68
|
+
):
|
69
|
+
raise ValueError(
|
70
|
+
f"dtype is {dtype_str} but has to be one of 'number', 'int', 'float', 'cat', 'bool', 'cat[...]'!"
|
71
|
+
)
|
72
|
+
if dtype_str != "cat" and dtype_str.startswith("cat"):
|
73
|
+
registries_str = dtype_str.replace("cat[", "").rstrip("]")
|
74
|
+
if registries_str != "":
|
75
|
+
registry_str_list = registries_str.split("|")
|
76
|
+
for registry_str in registry_str_list:
|
77
|
+
if registry_str not in dict_schema_name_to_model_name(Artifact):
|
78
|
+
raise ValueError(
|
79
|
+
f"'{registry_str}' is an invalid dtype, pass, e.g. `[ln.ULabel, bt.CellType]` or similar"
|
80
|
+
)
|
81
|
+
kwargs["dtype"] = dtype_str
|
82
|
+
super(Feature, self).__init__(*args, **kwargs)
|
83
|
+
|
84
|
+
|
85
|
+
def categoricals_from_df(df: pd.DataFrame) -> dict:
|
86
|
+
"""Returns categorical columns."""
|
87
|
+
string_cols = [col for col in df.columns if is_string_dtype(df[col])]
|
88
|
+
categoricals = {
|
89
|
+
col: df[col]
|
90
|
+
for col in df.columns
|
91
|
+
if isinstance(df[col].dtype, CategoricalDtype)
|
92
|
+
}
|
93
|
+
for key in string_cols:
|
94
|
+
c = pd.Categorical(df[key])
|
95
|
+
if len(c.categories) < len(c):
|
96
|
+
categoricals[key] = c
|
97
|
+
return categoricals
|
98
|
+
|
99
|
+
|
100
|
+
@classmethod # type:ignore
|
101
|
+
@doc_args(Feature.from_df.__doc__)
|
102
|
+
def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsList:
|
103
|
+
"""{}""" # noqa: D415
|
104
|
+
field = Feature.name if field is None else field
|
105
|
+
categoricals = categoricals_from_df(df)
|
106
|
+
|
107
|
+
dtypes = {}
|
108
|
+
# categoricals_with_unmapped_categories = {} # type: ignore
|
109
|
+
for name, col in df.items():
|
110
|
+
if name in categoricals:
|
111
|
+
dtypes[name] = "cat"
|
112
|
+
else:
|
113
|
+
dtypes[name] = convert_numpy_dtype_to_lamin_feature_type(col.dtype)
|
114
|
+
|
115
|
+
# silence the warning "loaded record with exact same name "
|
116
|
+
verbosity = settings.verbosity
|
117
|
+
try:
|
118
|
+
settings.verbosity = "error"
|
119
|
+
|
120
|
+
registry = field.field.model
|
121
|
+
if registry != Feature:
|
122
|
+
raise ValueError("field must be a Feature FieldAttr!")
|
123
|
+
# create records for all features including non-validated
|
124
|
+
features = [Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()]
|
125
|
+
finally:
|
126
|
+
settings.verbosity = verbosity
|
127
|
+
|
128
|
+
assert len(features) == len(df.columns) # noqa: S101
|
129
|
+
return RecordsList(features)
|
130
|
+
|
131
|
+
|
132
|
+
@doc_args(Feature.save.__doc__)
|
133
|
+
def save(self, *args, **kwargs) -> Feature:
|
134
|
+
"""{}""" # noqa: D415
|
135
|
+
super(Feature, self).save(*args, **kwargs)
|
136
|
+
return self
|
137
|
+
|
138
|
+
|
139
|
+
METHOD_NAMES = [
|
140
|
+
"__init__",
|
141
|
+
"from_df",
|
142
|
+
"save",
|
143
|
+
]
|
144
|
+
|
145
|
+
if ln_setup._TESTING:
|
146
|
+
from inspect import signature
|
147
|
+
|
148
|
+
SIGS = {
|
149
|
+
name: signature(getattr(Feature, name))
|
150
|
+
for name in METHOD_NAMES
|
151
|
+
if name != "__init__"
|
152
|
+
}
|
153
|
+
|
154
|
+
for name in METHOD_NAMES:
|
155
|
+
attach_func_to_class_method(name, Feature, globals())
|