lamindb 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +30 -25
- lamindb/_tracked.py +1 -1
- lamindb/_view.py +2 -3
- lamindb/base/__init__.py +1 -1
- lamindb/base/ids.py +1 -10
- lamindb/core/__init__.py +7 -65
- lamindb/core/_compat.py +60 -0
- lamindb/core/_context.py +43 -20
- lamindb/core/_settings.py +6 -6
- lamindb/core/_sync_git.py +1 -1
- lamindb/core/loaders.py +30 -19
- lamindb/core/storage/_backed_access.py +4 -2
- lamindb/core/storage/_tiledbsoma.py +8 -6
- lamindb/core/storage/_zarr.py +104 -25
- lamindb/core/storage/objects.py +63 -28
- lamindb/core/storage/paths.py +4 -1
- lamindb/core/types.py +10 -0
- lamindb/curators/__init__.py +100 -85
- lamindb/errors.py +1 -1
- lamindb/integrations/_vitessce.py +4 -4
- lamindb/migrations/0089_subsequent_runs.py +159 -0
- lamindb/migrations/0090_runproject_project_runs.py +73 -0
- lamindb/migrations/{0088_squashed.py → 0090_squashed.py} +245 -177
- lamindb/models/__init__.py +79 -0
- lamindb/{core → models}/_describe.py +3 -3
- lamindb/{core → models}/_django.py +8 -5
- lamindb/{core → models}/_feature_manager.py +103 -87
- lamindb/{_from_values.py → models/_from_values.py} +5 -2
- lamindb/{core/versioning.py → models/_is_versioned.py} +94 -6
- lamindb/{core → models}/_label_manager.py +10 -17
- lamindb/{core/relations.py → models/_relations.py} +8 -1
- lamindb/models/artifact.py +2602 -0
- lamindb/{_can_curate.py → models/can_curate.py} +349 -180
- lamindb/models/collection.py +683 -0
- lamindb/models/core.py +135 -0
- lamindb/models/feature.py +643 -0
- lamindb/models/flextable.py +163 -0
- lamindb/{_parents.py → models/has_parents.py} +55 -49
- lamindb/models/project.py +384 -0
- lamindb/{_query_manager.py → models/query_manager.py} +10 -8
- lamindb/{_query_set.py → models/query_set.py} +40 -26
- lamindb/models/record.py +1762 -0
- lamindb/models/run.py +563 -0
- lamindb/{_save.py → models/save.py} +9 -7
- lamindb/models/schema.py +732 -0
- lamindb/models/transform.py +360 -0
- lamindb/models/ulabel.py +249 -0
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/METADATA +6 -6
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/RECORD +51 -51
- lamindb/_artifact.py +0 -1379
- lamindb/_collection.py +0 -440
- lamindb/_feature.py +0 -316
- lamindb/_is_versioned.py +0 -40
- lamindb/_record.py +0 -1064
- lamindb/_run.py +0 -60
- lamindb/_schema.py +0 -347
- lamindb/_storage.py +0 -15
- lamindb/_transform.py +0 -170
- lamindb/_ulabel.py +0 -56
- lamindb/_utils.py +0 -9
- lamindb/base/validation.py +0 -63
- lamindb/core/_data.py +0 -491
- lamindb/core/fields.py +0 -12
- lamindb/models.py +0 -4475
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/LICENSE +0 -0
- {lamindb-1.1.1.dist-info → lamindb-1.2.0.dist-info}/WHEEL +0 -0
lamindb/_feature.py
DELETED
@@ -1,316 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import importlib
|
4
|
-
from typing import TYPE_CHECKING, Any, get_args
|
5
|
-
|
6
|
-
import lamindb_setup as ln_setup
|
7
|
-
import pandas as pd
|
8
|
-
from django.db.models.query_utils import DeferredAttribute
|
9
|
-
from lamin_utils import logger
|
10
|
-
from lamindb_setup._init_instance import get_schema_module_name
|
11
|
-
from lamindb_setup.core._docs import doc_args
|
12
|
-
from pandas.api.types import CategoricalDtype, is_string_dtype
|
13
|
-
|
14
|
-
from lamindb._record import _get_record_kwargs
|
15
|
-
from lamindb.base.types import FeatureDtype
|
16
|
-
from lamindb.errors import FieldValidationError, ValidationError
|
17
|
-
from lamindb.models import Artifact, Feature, Record, Registry
|
18
|
-
|
19
|
-
from ._query_set import RecordList
|
20
|
-
from ._utils import attach_func_to_class_method
|
21
|
-
from .core.relations import dict_module_name_to_model_name
|
22
|
-
|
23
|
-
if TYPE_CHECKING:
|
24
|
-
from collections.abc import Iterable
|
25
|
-
|
26
|
-
from pandas.core.dtypes.base import ExtensionDtype
|
27
|
-
|
28
|
-
from lamindb.base.types import FieldAttr
|
29
|
-
|
30
|
-
|
31
|
-
FEATURE_DTYPES = set(get_args(FeatureDtype))
|
32
|
-
|
33
|
-
|
34
|
-
def parse_dtype_single_cat(
|
35
|
-
dtype_str: str,
|
36
|
-
related_registries: dict[str, Record] | None = None,
|
37
|
-
is_itype: bool = False,
|
38
|
-
) -> dict:
|
39
|
-
assert isinstance(dtype_str, str) # noqa: S101
|
40
|
-
if related_registries is None:
|
41
|
-
related_registries = dict_module_name_to_model_name(Artifact)
|
42
|
-
split_result = dtype_str.split("[")
|
43
|
-
# has sub type
|
44
|
-
sub_type_str = ""
|
45
|
-
if len(split_result) == 2:
|
46
|
-
registry_str = split_result[0]
|
47
|
-
assert "]" in split_result[1] # noqa: S101
|
48
|
-
sub_type_field_split = split_result[1].split("].")
|
49
|
-
if len(sub_type_field_split) == 1:
|
50
|
-
sub_type_str = sub_type_field_split[0].strip("]")
|
51
|
-
field_str = ""
|
52
|
-
else:
|
53
|
-
sub_type_str = sub_type_field_split[0]
|
54
|
-
field_str = sub_type_field_split[1]
|
55
|
-
elif len(split_result) == 1:
|
56
|
-
registry_field_split = split_result[0].split(".")
|
57
|
-
if (
|
58
|
-
len(registry_field_split) == 2 and registry_field_split[1][0].isupper()
|
59
|
-
) or len(registry_field_split) == 3:
|
60
|
-
# bionty.CellType or bionty.CellType.name
|
61
|
-
registry_str = f"{registry_field_split[0]}.{registry_field_split[1]}"
|
62
|
-
field_str = (
|
63
|
-
"" if len(registry_field_split) == 2 else registry_field_split[2]
|
64
|
-
)
|
65
|
-
else:
|
66
|
-
# ULabel or ULabel.name
|
67
|
-
registry_str = registry_field_split[0]
|
68
|
-
field_str = (
|
69
|
-
"" if len(registry_field_split) == 1 else registry_field_split[1]
|
70
|
-
)
|
71
|
-
if not is_itype:
|
72
|
-
if registry_str not in related_registries:
|
73
|
-
raise ValidationError(
|
74
|
-
f"'{registry_str}' is an invalid dtype, has to be registry, e.g. ULabel or bionty.CellType"
|
75
|
-
)
|
76
|
-
registry = related_registries[registry_str]
|
77
|
-
else:
|
78
|
-
if "." in registry_str:
|
79
|
-
registry_str_split = registry_str.split(".")
|
80
|
-
assert len(registry_str_split) == 2, registry_str # noqa: S101
|
81
|
-
module_name, class_name = registry_str_split
|
82
|
-
module_name = get_schema_module_name(module_name)
|
83
|
-
else:
|
84
|
-
module_name, class_name = "lamindb", registry_str
|
85
|
-
module = importlib.import_module(module_name)
|
86
|
-
registry = getattr(module, class_name)
|
87
|
-
if sub_type_str != "":
|
88
|
-
pass
|
89
|
-
# validate that the subtype is a record in the registry with is_type = True
|
90
|
-
if field_str != "":
|
91
|
-
pass
|
92
|
-
# validate that field_str is an actual field of the module
|
93
|
-
else:
|
94
|
-
field_str = registry._name_field if hasattr(registry, "_name_field") else "name"
|
95
|
-
return {
|
96
|
-
"registry": registry, # should be typed as CanCurate
|
97
|
-
"registry_str": registry_str,
|
98
|
-
"subtype_str": sub_type_str,
|
99
|
-
"field_str": field_str,
|
100
|
-
"field": getattr(registry, field_str),
|
101
|
-
}
|
102
|
-
|
103
|
-
|
104
|
-
def parse_dtype(dtype_str: str, is_param: bool = False) -> list[dict[str, str]]:
|
105
|
-
allowed_dtypes = FEATURE_DTYPES
|
106
|
-
if is_param:
|
107
|
-
allowed_dtypes.add("dict")
|
108
|
-
is_composed_cat = dtype_str.startswith("cat[") and dtype_str.endswith("]")
|
109
|
-
result = []
|
110
|
-
if is_composed_cat:
|
111
|
-
related_registries = dict_module_name_to_model_name(Artifact)
|
112
|
-
registries_str = dtype_str.replace("cat[", "")[:-1] # strip last ]
|
113
|
-
if registries_str != "":
|
114
|
-
registry_str_list = registries_str.split("|")
|
115
|
-
for cat_single_dtype_str in registry_str_list:
|
116
|
-
single_result = parse_dtype_single_cat(
|
117
|
-
cat_single_dtype_str, related_registries
|
118
|
-
)
|
119
|
-
result.append(single_result)
|
120
|
-
elif dtype_str not in allowed_dtypes:
|
121
|
-
raise ValueError(
|
122
|
-
f"dtype is '{dtype_str}' but has to be one of {FEATURE_DTYPES}!"
|
123
|
-
)
|
124
|
-
return result
|
125
|
-
|
126
|
-
|
127
|
-
def get_dtype_str_from_dtype(dtype: Any, is_itype: bool = False) -> str:
|
128
|
-
if (
|
129
|
-
not isinstance(dtype, list)
|
130
|
-
and hasattr(dtype, "__name__")
|
131
|
-
and dtype.__name__ in FEATURE_DTYPES
|
132
|
-
):
|
133
|
-
dtype_str = dtype.__name__
|
134
|
-
else:
|
135
|
-
error_message = (
|
136
|
-
"dtype has to be a record, a record field, or a list of records, not {}"
|
137
|
-
)
|
138
|
-
if isinstance(dtype, Registry):
|
139
|
-
dtype = [dtype]
|
140
|
-
elif isinstance(dtype, DeferredAttribute):
|
141
|
-
dtype = [dtype]
|
142
|
-
elif not isinstance(dtype, list):
|
143
|
-
raise ValueError(error_message.format(dtype))
|
144
|
-
dtype_str = ""
|
145
|
-
for single_dtype in dtype:
|
146
|
-
if not isinstance(single_dtype, Registry) and not isinstance(
|
147
|
-
single_dtype, DeferredAttribute
|
148
|
-
):
|
149
|
-
raise ValueError(error_message.format(single_dtype))
|
150
|
-
if isinstance(single_dtype, Registry):
|
151
|
-
dtype_str += single_dtype.__get_name_with_module__() + "|"
|
152
|
-
else:
|
153
|
-
dtype_str += (
|
154
|
-
single_dtype.field.model.__get_name_with_module__()
|
155
|
-
+ f".{single_dtype.field.name}"
|
156
|
-
+ "|"
|
157
|
-
)
|
158
|
-
dtype_str = dtype_str.rstrip("|")
|
159
|
-
if not is_itype:
|
160
|
-
dtype_str = f"cat[{dtype_str}]"
|
161
|
-
return dtype_str
|
162
|
-
|
163
|
-
|
164
|
-
def convert_pandas_dtype_to_lamin_dtype(pandas_dtype: ExtensionDtype) -> str:
|
165
|
-
if is_string_dtype(pandas_dtype):
|
166
|
-
if not isinstance(pandas_dtype, CategoricalDtype):
|
167
|
-
dtype = "str"
|
168
|
-
else:
|
169
|
-
dtype = "cat"
|
170
|
-
# there are string-like categoricals and "pure" categoricals (pd.Categorical)
|
171
|
-
elif isinstance(pandas_dtype, CategoricalDtype):
|
172
|
-
dtype = "cat"
|
173
|
-
else:
|
174
|
-
# strip precision qualifiers
|
175
|
-
dtype = "".join(dt for dt in pandas_dtype.name if not dt.isdigit())
|
176
|
-
if dtype.startswith("datetime"):
|
177
|
-
dtype = dtype.split("[")[0]
|
178
|
-
assert dtype in FEATURE_DTYPES # noqa: S101
|
179
|
-
return dtype
|
180
|
-
|
181
|
-
|
182
|
-
def process_init_feature_param(args, kwargs, is_param: bool = False):
|
183
|
-
# now we proceed with the user-facing constructor
|
184
|
-
if len(args) != 0:
|
185
|
-
raise ValueError("Only keyword args allowed")
|
186
|
-
name: str = kwargs.pop("name", None)
|
187
|
-
dtype: type | str | None = kwargs.pop("dtype", None)
|
188
|
-
is_type: bool = kwargs.pop("is_type", None)
|
189
|
-
type_: Feature | str | None = kwargs.pop("type", None)
|
190
|
-
description: str | None = kwargs.pop("description", None)
|
191
|
-
if kwargs:
|
192
|
-
valid_keywords = ", ".join([val[0] for val in _get_record_kwargs(Feature)])
|
193
|
-
raise FieldValidationError(f"Only {valid_keywords} are valid keyword arguments")
|
194
|
-
kwargs["name"] = name
|
195
|
-
kwargs["type"] = type_
|
196
|
-
kwargs["is_type"] = is_type
|
197
|
-
if not is_param:
|
198
|
-
kwargs["description"] = description
|
199
|
-
# cast dtype
|
200
|
-
if dtype is None and not is_type:
|
201
|
-
raise ValidationError(
|
202
|
-
f"Please pass dtype, one of {FEATURE_DTYPES} or a composed categorical dtype"
|
203
|
-
)
|
204
|
-
dtype_str = None
|
205
|
-
if dtype is not None:
|
206
|
-
if not isinstance(dtype, str):
|
207
|
-
dtype_str = get_dtype_str_from_dtype(dtype)
|
208
|
-
else:
|
209
|
-
dtype_str = dtype
|
210
|
-
parse_dtype(dtype_str, is_param=is_param)
|
211
|
-
kwargs["dtype"] = dtype_str
|
212
|
-
return kwargs
|
213
|
-
|
214
|
-
|
215
|
-
def __init__(self, *args, **kwargs):
|
216
|
-
if len(args) == len(self._meta.concrete_fields):
|
217
|
-
super(Feature, self).__init__(*args, **kwargs)
|
218
|
-
return None
|
219
|
-
dtype = kwargs.get("dtype", None)
|
220
|
-
default_value = kwargs.pop("default_value", None)
|
221
|
-
nullable = kwargs.pop("nullable", True) # default value of nullable
|
222
|
-
cat_filters = kwargs.pop("cat_filters", None)
|
223
|
-
kwargs = process_init_feature_param(args, kwargs)
|
224
|
-
super(Feature, self).__init__(*args, **kwargs)
|
225
|
-
self.default_value = default_value
|
226
|
-
self.nullable = nullable
|
227
|
-
dtype_str = kwargs.pop("dtype", None)
|
228
|
-
if cat_filters:
|
229
|
-
assert "|" not in dtype_str # noqa: S101
|
230
|
-
assert "]]" not in dtype_str # noqa: S101
|
231
|
-
fill_in = ", ".join(f"{key}='{value}'" for (key, value) in cat_filters.items())
|
232
|
-
dtype_str = dtype_str.replace("]", f"[{fill_in}]]")
|
233
|
-
self.dtype = dtype_str
|
234
|
-
if not self._state.adding:
|
235
|
-
if not (
|
236
|
-
self.dtype.startswith("cat") if dtype == "cat" else self.dtype == dtype_str
|
237
|
-
):
|
238
|
-
raise ValidationError(
|
239
|
-
f"Feature {self.name} already exists with dtype {self.dtype}, you passed {dtype_str}"
|
240
|
-
)
|
241
|
-
|
242
|
-
|
243
|
-
def suggest_categorical_for_str_iterable(
|
244
|
-
iterable: Iterable[str], key: str = None
|
245
|
-
) -> str:
|
246
|
-
c = pd.Categorical(iterable)
|
247
|
-
message = ""
|
248
|
-
if len(c.categories) < len(c):
|
249
|
-
if key != "":
|
250
|
-
key_note = f" for feature {key}"
|
251
|
-
else:
|
252
|
-
key_note = ""
|
253
|
-
message = f"You have few permissible values{key_note}, consider dtype 'cat' instead of 'str'"
|
254
|
-
return message
|
255
|
-
|
256
|
-
|
257
|
-
def categoricals_from_df(df: pd.DataFrame) -> dict:
|
258
|
-
"""Returns categorical columns."""
|
259
|
-
string_cols = [col for col in df.columns if is_string_dtype(df[col])]
|
260
|
-
categoricals = {
|
261
|
-
col: df[col]
|
262
|
-
for col in df.columns
|
263
|
-
if isinstance(df[col].dtype, CategoricalDtype)
|
264
|
-
}
|
265
|
-
for key in string_cols:
|
266
|
-
message = suggest_categorical_for_str_iterable(df[key], key)
|
267
|
-
if message:
|
268
|
-
logger.warning(message)
|
269
|
-
return categoricals
|
270
|
-
|
271
|
-
|
272
|
-
@classmethod # type:ignore
|
273
|
-
@doc_args(Feature.from_df.__doc__)
|
274
|
-
def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordList:
|
275
|
-
"""{}""" # noqa: D415
|
276
|
-
field = Feature.name if field is None else field
|
277
|
-
registry = field.field.model # type: ignore
|
278
|
-
if registry != Feature:
|
279
|
-
raise ValueError("field must be a Feature FieldAttr!")
|
280
|
-
categoricals = categoricals_from_df(df)
|
281
|
-
dtypes = {}
|
282
|
-
for name, col in df.items():
|
283
|
-
if name in categoricals:
|
284
|
-
dtypes[name] = "cat"
|
285
|
-
else:
|
286
|
-
dtypes[name] = convert_pandas_dtype_to_lamin_dtype(col.dtype)
|
287
|
-
with logger.mute(): # silence the warning "loaded record with exact same name "
|
288
|
-
features = [Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()] # type: ignore
|
289
|
-
assert len(features) == len(df.columns) # noqa: S101
|
290
|
-
return RecordList(features)
|
291
|
-
|
292
|
-
|
293
|
-
@doc_args(Feature.save.__doc__)
|
294
|
-
def save(self, *args, **kwargs) -> Feature:
|
295
|
-
"""{}""" # noqa: D415
|
296
|
-
super(Feature, self).save(*args, **kwargs)
|
297
|
-
return self
|
298
|
-
|
299
|
-
|
300
|
-
METHOD_NAMES = [
|
301
|
-
"__init__",
|
302
|
-
"from_df",
|
303
|
-
"save",
|
304
|
-
]
|
305
|
-
|
306
|
-
if ln_setup._TESTING:
|
307
|
-
from inspect import signature
|
308
|
-
|
309
|
-
SIGS = {
|
310
|
-
name: signature(getattr(Feature, name))
|
311
|
-
for name in METHOD_NAMES
|
312
|
-
if name != "__init__"
|
313
|
-
}
|
314
|
-
|
315
|
-
for name in METHOD_NAMES:
|
316
|
-
attach_func_to_class_method(name, Feature, globals())
|
lamindb/_is_versioned.py
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import lamindb_setup as ln_setup
|
4
|
-
from lamin_utils import logger
|
5
|
-
from lamindb_setup.core.upath import UPath
|
6
|
-
|
7
|
-
from lamindb.models import IsVersioned
|
8
|
-
|
9
|
-
from ._utils import attach_func_to_class_method
|
10
|
-
from .core.versioning import create_uid, get_new_path_from_uid
|
11
|
-
|
12
|
-
|
13
|
-
# docstring handled through attach_func_to_class_method
|
14
|
-
def _add_to_version_family(self, revises: IsVersioned, version: str | None = None):
|
15
|
-
old_uid = self.uid
|
16
|
-
new_uid, revises = create_uid(revises=revises, version=version)
|
17
|
-
if self.__class__.__name__ == "Artifact" and self._key_is_virtual:
|
18
|
-
old_path = self.path
|
19
|
-
new_path = get_new_path_from_uid(
|
20
|
-
old_path=old_path, old_uid=old_uid, new_uid=new_uid
|
21
|
-
)
|
22
|
-
new_path = UPath(old_path).rename(new_path)
|
23
|
-
logger.success(f"updated path from {old_path} to {new_path}!")
|
24
|
-
self.uid = new_uid
|
25
|
-
self.version = version
|
26
|
-
self.save()
|
27
|
-
logger.success(f"updated uid from {old_uid} to {new_uid}!")
|
28
|
-
|
29
|
-
|
30
|
-
METHOD_NAMES = [
|
31
|
-
"_add_to_version_family",
|
32
|
-
]
|
33
|
-
|
34
|
-
if ln_setup._TESTING: # type: ignore
|
35
|
-
from inspect import signature
|
36
|
-
|
37
|
-
SIGS = {name: signature(getattr(IsVersioned, name)) for name in METHOD_NAMES}
|
38
|
-
|
39
|
-
for name in METHOD_NAMES:
|
40
|
-
attach_func_to_class_method(name, IsVersioned, globals())
|