lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +114 -113
  2. lamindb/_artifact.py +1206 -1205
  3. lamindb/_can_validate.py +621 -579
  4. lamindb/_collection.py +390 -387
  5. lamindb/_curate.py +1603 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +244 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +250 -256
  10. lamindb/_from_values.py +403 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +364 -362
  15. lamindb/_record.py +668 -649
  16. lamindb/_run.py +60 -57
  17. lamindb/_save.py +310 -308
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +130 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +590 -574
  25. lamindb/core/_data.py +510 -438
  26. lamindb/core/_django.py +209 -0
  27. lamindb/core/_feature_manager.py +994 -867
  28. lamindb/core/_label_manager.py +289 -253
  29. lamindb/core/_mapped_collection.py +631 -597
  30. lamindb/core/_settings.py +188 -187
  31. lamindb/core/_sync_git.py +138 -138
  32. lamindb/core/_track_environment.py +27 -27
  33. lamindb/core/datasets/__init__.py +59 -59
  34. lamindb/core/datasets/_core.py +581 -571
  35. lamindb/core/datasets/_fake.py +36 -36
  36. lamindb/core/exceptions.py +90 -90
  37. lamindb/core/fields.py +12 -12
  38. lamindb/core/loaders.py +164 -164
  39. lamindb/core/schema.py +56 -56
  40. lamindb/core/storage/__init__.py +25 -25
  41. lamindb/core/storage/_anndata_accessor.py +741 -740
  42. lamindb/core/storage/_anndata_sizes.py +41 -41
  43. lamindb/core/storage/_backed_access.py +98 -98
  44. lamindb/core/storage/_tiledbsoma.py +204 -204
  45. lamindb/core/storage/_valid_suffixes.py +21 -21
  46. lamindb/core/storage/_zarr.py +110 -110
  47. lamindb/core/storage/objects.py +62 -62
  48. lamindb/core/storage/paths.py +172 -172
  49. lamindb/core/subsettings/__init__.py +12 -12
  50. lamindb/core/subsettings/_creation_settings.py +38 -38
  51. lamindb/core/subsettings/_transform_settings.py +21 -21
  52. lamindb/core/types.py +19 -19
  53. lamindb/core/versioning.py +146 -158
  54. lamindb/integrations/__init__.py +12 -12
  55. lamindb/integrations/_vitessce.py +107 -107
  56. lamindb/setup/__init__.py +14 -14
  57. lamindb/setup/core/__init__.py +4 -4
  58. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
  59. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
  60. lamindb-0.76.10.dist-info/RECORD +61 -0
  61. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
  62. lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/_feature.py CHANGED
@@ -1,155 +1,155 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING
4
-
5
- import lamindb_setup as ln_setup
6
- import pandas as pd
7
- from lamindb_setup.core._docs import doc_args
8
- from lnschema_core.models import Artifact, Feature
9
- from pandas.api.types import CategoricalDtype, is_string_dtype
10
-
11
- from lamindb._utils import attach_func_to_class_method
12
- from lamindb.core._settings import settings
13
-
14
- from ._query_set import RecordsList
15
- from .core.schema import dict_schema_name_to_model_name
16
-
17
- if TYPE_CHECKING:
18
- from lnschema_core.types import FieldAttr
19
-
20
- FEATURE_TYPES = {
21
- "number": "number",
22
- "int": "int",
23
- "float": "float",
24
- "bool": "bool",
25
- "str": "cat",
26
- "object": "cat",
27
- }
28
-
29
-
30
- def convert_numpy_dtype_to_lamin_feature_type(dtype, str_as_cat: bool = True) -> str:
31
- orig_type = dtype.name
32
- # strip precision qualifiers
33
- type = "".join(i for i in orig_type if not i.isdigit())
34
- if type == "object" or type == "str":
35
- type = "cat" if str_as_cat else "str"
36
- return type
37
-
38
-
39
- def __init__(self, *args, **kwargs):
40
- if len(args) == len(self._meta.concrete_fields):
41
- super(Feature, self).__init__(*args, **kwargs)
42
- return None
43
- # now we proceed with the user-facing constructor
44
- if len(args) != 0:
45
- raise ValueError("Only keyword args allowed")
46
- dtype: type | str = kwargs.pop("dtype") if "dtype" in kwargs else None
47
- # cast type
48
- if dtype is None:
49
- raise ValueError("Please pass dtype!")
50
- elif dtype is not None:
51
- if not isinstance(dtype, str):
52
- if not isinstance(dtype, list) and dtype.__name__ in FEATURE_TYPES:
53
- dtype_str = FEATURE_TYPES[dtype.__name__]
54
- else:
55
- if not isinstance(dtype, list):
56
- raise ValueError("dtype has to be a list of Record types")
57
- registries_str = ""
58
- for cls in dtype:
59
- if not hasattr(cls, "__get_name_with_schema__"):
60
- raise ValueError("each element of the list has to be a Record")
61
- registries_str += cls.__get_name_with_schema__() + "|"
62
- dtype_str = f'cat[{registries_str.rstrip("|")}]'
63
- else:
64
- dtype_str = dtype
65
- # add validation that a registry actually exists
66
- if dtype_str not in FEATURE_TYPES.values() and not dtype_str.startswith(
67
- "cat"
68
- ):
69
- raise ValueError(
70
- f"dtype is {dtype_str} but has to be one of 'number', 'int', 'float', 'cat', 'bool', 'cat[...]'!"
71
- )
72
- if dtype_str != "cat" and dtype_str.startswith("cat"):
73
- registries_str = dtype_str.replace("cat[", "").rstrip("]")
74
- if registries_str != "":
75
- registry_str_list = registries_str.split("|")
76
- for registry_str in registry_str_list:
77
- if registry_str not in dict_schema_name_to_model_name(Artifact):
78
- raise ValueError(
79
- f"'{registry_str}' is an invalid dtype, pass, e.g. `[ln.ULabel, bt.CellType]` or similar"
80
- )
81
- kwargs["dtype"] = dtype_str
82
- super(Feature, self).__init__(*args, **kwargs)
83
-
84
-
85
- def categoricals_from_df(df: pd.DataFrame) -> dict:
86
- """Returns categorical columns."""
87
- string_cols = [col for col in df.columns if is_string_dtype(df[col])]
88
- categoricals = {
89
- col: df[col]
90
- for col in df.columns
91
- if isinstance(df[col].dtype, CategoricalDtype)
92
- }
93
- for key in string_cols:
94
- c = pd.Categorical(df[key])
95
- if len(c.categories) < len(c):
96
- categoricals[key] = c
97
- return categoricals
98
-
99
-
100
- @classmethod # type:ignore
101
- @doc_args(Feature.from_df.__doc__)
102
- def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsList:
103
- """{}""" # noqa: D415
104
- field = Feature.name if field is None else field
105
- categoricals = categoricals_from_df(df)
106
-
107
- dtypes = {}
108
- # categoricals_with_unmapped_categories = {} # type: ignore
109
- for name, col in df.items():
110
- if name in categoricals:
111
- dtypes[name] = "cat"
112
- else:
113
- dtypes[name] = convert_numpy_dtype_to_lamin_feature_type(col.dtype)
114
-
115
- # silence the warning "loaded record with exact same name "
116
- verbosity = settings.verbosity
117
- try:
118
- settings.verbosity = "error"
119
-
120
- registry = field.field.model
121
- if registry != Feature:
122
- raise ValueError("field must be a Feature FieldAttr!")
123
- # create records for all features including non-validated
124
- features = [Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()]
125
- finally:
126
- settings.verbosity = verbosity
127
-
128
- assert len(features) == len(df.columns) # noqa: S101
129
- return RecordsList(features)
130
-
131
-
132
- @doc_args(Feature.save.__doc__)
133
- def save(self, *args, **kwargs) -> Feature:
134
- """{}""" # noqa: D415
135
- super(Feature, self).save(*args, **kwargs)
136
- return self
137
-
138
-
139
- METHOD_NAMES = [
140
- "__init__",
141
- "from_df",
142
- "save",
143
- ]
144
-
145
- if ln_setup._TESTING:
146
- from inspect import signature
147
-
148
- SIGS = {
149
- name: signature(getattr(Feature, name))
150
- for name in METHOD_NAMES
151
- if name != "__init__"
152
- }
153
-
154
- for name in METHOD_NAMES:
155
- attach_func_to_class_method(name, Feature, globals())
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ import lamindb_setup as ln_setup
6
+ import pandas as pd
7
+ from lamindb_setup.core._docs import doc_args
8
+ from lnschema_core.models import Artifact, Feature
9
+ from pandas.api.types import CategoricalDtype, is_string_dtype
10
+
11
+ from lamindb._utils import attach_func_to_class_method
12
+ from lamindb.core._settings import settings
13
+
14
+ from ._query_set import RecordsList
15
+ from .core.schema import dict_schema_name_to_model_name
16
+
17
+ if TYPE_CHECKING:
18
+ from lnschema_core.types import FieldAttr
19
+
20
+ FEATURE_TYPES = {
21
+ "number": "number",
22
+ "int": "int",
23
+ "float": "float",
24
+ "bool": "bool",
25
+ "str": "cat",
26
+ "object": "cat",
27
+ }
28
+
29
+
30
+ def convert_numpy_dtype_to_lamin_feature_type(dtype, str_as_cat: bool = True) -> str:
31
+ orig_type = dtype.name
32
+ # strip precision qualifiers
33
+ type = "".join(i for i in orig_type if not i.isdigit())
34
+ if type == "object" or type == "str":
35
+ type = "cat" if str_as_cat else "str"
36
+ return type
37
+
38
+
39
+ def __init__(self, *args, **kwargs):
40
+ if len(args) == len(self._meta.concrete_fields):
41
+ super(Feature, self).__init__(*args, **kwargs)
42
+ return None
43
+ # now we proceed with the user-facing constructor
44
+ if len(args) != 0:
45
+ raise ValueError("Only keyword args allowed")
46
+ dtype: type | str = kwargs.pop("dtype") if "dtype" in kwargs else None
47
+ # cast type
48
+ if dtype is None:
49
+ raise ValueError("Please pass dtype!")
50
+ elif dtype is not None:
51
+ if not isinstance(dtype, str):
52
+ if not isinstance(dtype, list) and dtype.__name__ in FEATURE_TYPES:
53
+ dtype_str = FEATURE_TYPES[dtype.__name__]
54
+ else:
55
+ if not isinstance(dtype, list):
56
+ raise ValueError("dtype has to be a list of Record types")
57
+ registries_str = ""
58
+ for cls in dtype:
59
+ if not hasattr(cls, "__get_name_with_schema__"):
60
+ raise ValueError("each element of the list has to be a Record")
61
+ registries_str += cls.__get_name_with_schema__() + "|"
62
+ dtype_str = f'cat[{registries_str.rstrip("|")}]'
63
+ else:
64
+ dtype_str = dtype
65
+ # add validation that a registry actually exists
66
+ if dtype_str not in FEATURE_TYPES.values() and not dtype_str.startswith(
67
+ "cat"
68
+ ):
69
+ raise ValueError(
70
+ f"dtype is {dtype_str} but has to be one of 'number', 'int', 'float', 'cat', 'bool', 'cat[...]'!"
71
+ )
72
+ if dtype_str != "cat" and dtype_str.startswith("cat"):
73
+ registries_str = dtype_str.replace("cat[", "").rstrip("]")
74
+ if registries_str != "":
75
+ registry_str_list = registries_str.split("|")
76
+ for registry_str in registry_str_list:
77
+ if registry_str not in dict_schema_name_to_model_name(Artifact):
78
+ raise ValueError(
79
+ f"'{registry_str}' is an invalid dtype, pass, e.g. `[ln.ULabel, bt.CellType]` or similar"
80
+ )
81
+ kwargs["dtype"] = dtype_str
82
+ super(Feature, self).__init__(*args, **kwargs)
83
+
84
+
85
+ def categoricals_from_df(df: pd.DataFrame) -> dict:
86
+ """Returns categorical columns."""
87
+ string_cols = [col for col in df.columns if is_string_dtype(df[col])]
88
+ categoricals = {
89
+ col: df[col]
90
+ for col in df.columns
91
+ if isinstance(df[col].dtype, CategoricalDtype)
92
+ }
93
+ for key in string_cols:
94
+ c = pd.Categorical(df[key])
95
+ if len(c.categories) < len(c):
96
+ categoricals[key] = c
97
+ return categoricals
98
+
99
+
100
+ @classmethod # type:ignore
101
+ @doc_args(Feature.from_df.__doc__)
102
+ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsList:
103
+ """{}""" # noqa: D415
104
+ field = Feature.name if field is None else field
105
+ categoricals = categoricals_from_df(df)
106
+
107
+ dtypes = {}
108
+ # categoricals_with_unmapped_categories = {} # type: ignore
109
+ for name, col in df.items():
110
+ if name in categoricals:
111
+ dtypes[name] = "cat"
112
+ else:
113
+ dtypes[name] = convert_numpy_dtype_to_lamin_feature_type(col.dtype)
114
+
115
+ # silence the warning "loaded record with exact same name "
116
+ verbosity = settings.verbosity
117
+ try:
118
+ settings.verbosity = "error"
119
+
120
+ registry = field.field.model
121
+ if registry != Feature:
122
+ raise ValueError("field must be a Feature FieldAttr!")
123
+ # create records for all features including non-validated
124
+ features = [Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()]
125
+ finally:
126
+ settings.verbosity = verbosity
127
+
128
+ assert len(features) == len(df.columns) # noqa: S101
129
+ return RecordsList(features)
130
+
131
+
132
+ @doc_args(Feature.save.__doc__)
133
+ def save(self, *args, **kwargs) -> Feature:
134
+ """{}""" # noqa: D415
135
+ super(Feature, self).save(*args, **kwargs)
136
+ return self
137
+
138
+
139
+ METHOD_NAMES = [
140
+ "__init__",
141
+ "from_df",
142
+ "save",
143
+ ]
144
+
145
+ if ln_setup._TESTING:
146
+ from inspect import signature
147
+
148
+ SIGS = {
149
+ name: signature(getattr(Feature, name))
150
+ for name in METHOD_NAMES
151
+ if name != "__init__"
152
+ }
153
+
154
+ for name in METHOD_NAMES:
155
+ attach_func_to_class_method(name, Feature, globals())