lamindb 1.10.2__py3-none-any.whl → 1.11a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +89 -49
- lamindb/_finish.py +14 -12
- lamindb/_tracked.py +2 -4
- lamindb/_view.py +1 -1
- lamindb/base/__init__.py +2 -1
- lamindb/base/dtypes.py +76 -0
- lamindb/core/_settings.py +2 -2
- lamindb/core/storage/_anndata_accessor.py +29 -9
- lamindb/curators/_legacy.py +16 -3
- lamindb/curators/core.py +432 -186
- lamindb/examples/cellxgene/__init__.py +8 -3
- lamindb/examples/cellxgene/_cellxgene.py +127 -13
- lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
- lamindb/examples/croissant/__init__.py +12 -2
- lamindb/examples/datasets/__init__.py +2 -2
- lamindb/examples/datasets/_core.py +1 -1
- lamindb/examples/datasets/_small.py +66 -22
- lamindb/examples/datasets/mini_immuno.py +1 -0
- lamindb/migrations/0119_squashed.py +5 -2
- lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
- lamindb/migrations/0121_recorduser.py +53 -0
- lamindb/models/__init__.py +3 -1
- lamindb/models/_describe.py +2 -2
- lamindb/models/_feature_manager.py +53 -53
- lamindb/models/_from_values.py +2 -2
- lamindb/models/_is_versioned.py +4 -4
- lamindb/models/_label_manager.py +4 -4
- lamindb/models/artifact.py +305 -116
- lamindb/models/artifact_set.py +36 -1
- lamindb/models/can_curate.py +1 -2
- lamindb/models/collection.py +3 -34
- lamindb/models/feature.py +111 -7
- lamindb/models/has_parents.py +11 -11
- lamindb/models/project.py +18 -0
- lamindb/models/query_manager.py +16 -7
- lamindb/models/query_set.py +59 -34
- lamindb/models/record.py +25 -4
- lamindb/models/run.py +8 -6
- lamindb/models/schema.py +54 -26
- lamindb/models/sqlrecord.py +123 -25
- lamindb/models/storage.py +59 -14
- lamindb/models/transform.py +17 -17
- lamindb/models/ulabel.py +6 -1
- {lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/METADATA +4 -5
- {lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/RECORD +47 -44
- {lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/WHEEL +1 -1
- {lamindb-1.10.2.dist-info/licenses → lamindb-1.11a1.dist-info}/LICENSE +0 -0
@@ -496,21 +496,11 @@ def describe_features(
|
|
496
496
|
return tree
|
497
497
|
|
498
498
|
|
499
|
-
def is_valid_datetime_str(date_string: str) -> bool | str:
|
500
|
-
try:
|
501
|
-
dt = datetime.fromisoformat(date_string)
|
502
|
-
return dt.isoformat()
|
503
|
-
except ValueError:
|
504
|
-
return False
|
505
|
-
|
506
|
-
|
507
|
-
def is_iterable_of_sqlrecord(value: Any):
|
508
|
-
return isinstance(value, Iterable) and isinstance(next(iter(value)), SQLRecord)
|
509
|
-
|
510
|
-
|
511
499
|
def infer_feature_type_convert_json(
|
512
|
-
key: str, value: Any, mute: bool = False
|
500
|
+
key: str, value: Any, mute: bool = False
|
513
501
|
) -> tuple[str, Any, str]:
|
502
|
+
from lamindb.base.dtypes import is_valid_datetime_str
|
503
|
+
|
514
504
|
message = ""
|
515
505
|
if isinstance(value, bool):
|
516
506
|
return "bool", value, message
|
@@ -719,15 +709,15 @@ def parse_staged_feature_sets_from_anndata(
|
|
719
709
|
data_parse = backed_access(filepath, using_key=using_key)
|
720
710
|
else:
|
721
711
|
data_parse = ad.read_h5ad(filepath, backed="r")
|
722
|
-
|
712
|
+
dtype = "float"
|
723
713
|
else:
|
724
|
-
|
714
|
+
dtype = "float" if adata.X is None else serialize_pandas_dtype(adata.X.dtype)
|
725
715
|
feature_sets = {}
|
726
716
|
if var_field is not None:
|
727
717
|
schema_var = Schema.from_values(
|
728
718
|
data_parse.var.index,
|
729
719
|
var_field,
|
730
|
-
|
720
|
+
dtype=dtype,
|
731
721
|
mute=mute,
|
732
722
|
organism=organism,
|
733
723
|
raise_validation_error=False,
|
@@ -735,7 +725,7 @@ def parse_staged_feature_sets_from_anndata(
|
|
735
725
|
if schema_var is not None:
|
736
726
|
feature_sets["var"] = schema_var
|
737
727
|
if obs_field is not None and len(data_parse.obs.columns) > 0:
|
738
|
-
schema_obs = Schema.
|
728
|
+
schema_obs = Schema.from_dataframe(
|
739
729
|
df=data_parse.obs,
|
740
730
|
field=obs_field,
|
741
731
|
mute=mute,
|
@@ -851,16 +841,17 @@ class FeatureManager:
|
|
851
841
|
self,
|
852
842
|
values: dict[str, str | int | float | bool],
|
853
843
|
feature_field: FieldAttr = Feature.name,
|
854
|
-
|
844
|
+
schema: Schema = None,
|
855
845
|
) -> None:
|
856
846
|
"""Curate artifact with features & values.
|
857
847
|
|
858
848
|
Args:
|
859
849
|
values: A dictionary of keys (features) & values (labels, numbers, booleans).
|
860
|
-
feature_field: The field of a reference registry to map keys of the
|
861
|
-
|
862
|
-
str_as_ulabel: Whether to interpret string values as ulabels.
|
850
|
+
feature_field: The field of a reference registry to map keys of the dictionary.
|
851
|
+
schema: Schema to validate against.
|
863
852
|
"""
|
853
|
+
from lamindb.base.dtypes import is_iterable_of_sqlrecord
|
854
|
+
|
864
855
|
from .._tracked import get_current_tracked_run
|
865
856
|
|
866
857
|
# rename to distinguish from the values inside the dict
|
@@ -870,39 +861,48 @@ class FeatureManager:
|
|
870
861
|
keys = list(keys) # type: ignore
|
871
862
|
# deal with other cases later
|
872
863
|
assert all(isinstance(key, str) for key in keys) # noqa: S101
|
864
|
+
|
873
865
|
registry = feature_field.field.model
|
874
866
|
value_model = FeatureValue
|
875
867
|
model_name = "Feature"
|
876
|
-
records = registry.from_values(keys, field=feature_field, mute=True)
|
877
|
-
if len(records) != len(keys):
|
878
|
-
not_validated_keys = [
|
879
|
-
key for key in keys if key not in records.list("name")
|
880
|
-
]
|
881
|
-
not_validated_keys_dtype_message = [
|
882
|
-
(key, infer_feature_type_convert_json(key, dictionary[key]))
|
883
|
-
for key in not_validated_keys
|
884
|
-
]
|
885
|
-
run = get_current_tracked_run()
|
886
|
-
if run is not None:
|
887
|
-
name = f"{run.transform.type}[{run.transform.key}]"
|
888
|
-
type_hint = f""" {model_name.lower()}_type = ln.{model_name}(name='{name}', is_type=True).save()"""
|
889
|
-
elements = [type_hint]
|
890
|
-
type_kwarg = f", type={model_name.lower()}_type"
|
891
|
-
else:
|
892
|
-
elements = []
|
893
|
-
type_kwarg = ""
|
894
|
-
elements += [
|
895
|
-
f" ln.{model_name}(name='{key}', dtype='{dtype}'{type_kwarg}).save(){message}"
|
896
|
-
for key, (dtype, _, message) in not_validated_keys_dtype_message
|
897
|
-
]
|
898
|
-
hint = "\n".join(elements)
|
899
|
-
msg = (
|
900
|
-
f"These keys could not be validated: {not_validated_keys}\n"
|
901
|
-
f"Here is how to create a {model_name.lower()}:\n\n{hint}"
|
902
|
-
)
|
903
|
-
raise ValidationError(msg)
|
904
868
|
|
905
|
-
|
869
|
+
if schema is not None:
|
870
|
+
from lamindb.curators import DataFrameCurator
|
871
|
+
|
872
|
+
temp_df = pd.DataFrame([values])
|
873
|
+
curator = DataFrameCurator(temp_df, schema)
|
874
|
+
curator.validate()
|
875
|
+
records = schema.members.filter(name__in=keys)
|
876
|
+
else:
|
877
|
+
records = registry.from_values(keys, field=feature_field, mute=True)
|
878
|
+
if len(records) != len(keys):
|
879
|
+
not_validated_keys = [
|
880
|
+
key for key in keys if key not in records.to_list("name")
|
881
|
+
]
|
882
|
+
not_validated_keys_dtype_message = [
|
883
|
+
(key, infer_feature_type_convert_json(key, dictionary[key]))
|
884
|
+
for key in not_validated_keys
|
885
|
+
]
|
886
|
+
run = get_current_tracked_run()
|
887
|
+
if run is not None:
|
888
|
+
name = f"{run.transform.type}[{run.transform.key}]"
|
889
|
+
type_hint = f""" {model_name.lower()}_type = ln.{model_name}(name='{name}', is_type=True).save()"""
|
890
|
+
elements = [type_hint]
|
891
|
+
type_kwarg = f", type={model_name.lower()}_type"
|
892
|
+
else:
|
893
|
+
elements = []
|
894
|
+
type_kwarg = ""
|
895
|
+
elements += [
|
896
|
+
f" ln.{model_name}(name='{key}', dtype='{dtype}'{type_kwarg}).save(){message}"
|
897
|
+
for key, (dtype, _, message) in not_validated_keys_dtype_message
|
898
|
+
]
|
899
|
+
hint = "\n".join(elements)
|
900
|
+
msg = (
|
901
|
+
f"These keys could not be validated: {not_validated_keys}\n"
|
902
|
+
f"Here is how to create a {model_name.lower()}:\n\n{hint}"
|
903
|
+
)
|
904
|
+
raise ValidationError(msg)
|
905
|
+
|
906
906
|
features_labels = defaultdict(list)
|
907
907
|
_feature_values = []
|
908
908
|
not_validated_values: dict[str, list[str]] = defaultdict(list)
|
@@ -912,7 +912,6 @@ class FeatureManager:
|
|
912
912
|
feature.name,
|
913
913
|
value,
|
914
914
|
mute=True,
|
915
|
-
str_as_ulabel=str_as_ulabel,
|
916
915
|
)
|
917
916
|
if feature.dtype == "num":
|
918
917
|
if inferred_type not in {"int", "float"}:
|
@@ -994,6 +993,7 @@ class FeatureManager:
|
|
994
993
|
f"Here is how to create records for them:\n\n{hint}"
|
995
994
|
)
|
996
995
|
raise ValidationError(msg)
|
996
|
+
|
997
997
|
if features_labels:
|
998
998
|
self._add_label_feature_links(features_labels)
|
999
999
|
if _feature_values:
|
@@ -1039,7 +1039,7 @@ class FeatureManager:
|
|
1039
1039
|
feature: str | Feature,
|
1040
1040
|
*,
|
1041
1041
|
value: Any | None = None,
|
1042
|
-
):
|
1042
|
+
) -> None:
|
1043
1043
|
"""Remove value annotations for a given feature.
|
1044
1044
|
|
1045
1045
|
Args:
|
@@ -1262,7 +1262,7 @@ class FeatureManager:
|
|
1262
1262
|
"""Add feature set corresponding to column names of DataFrame."""
|
1263
1263
|
assert self._host.otype == "DataFrame" # noqa: S101
|
1264
1264
|
df = self._host.load(is_run_input=False)
|
1265
|
-
schema = Schema.
|
1265
|
+
schema = Schema.from_dataframe(
|
1266
1266
|
df=df,
|
1267
1267
|
field=field,
|
1268
1268
|
mute=mute,
|
lamindb/models/_from_values.py
CHANGED
@@ -121,7 +121,7 @@ def get_existing_records(
|
|
121
121
|
# ]
|
122
122
|
# )
|
123
123
|
# order by causes a factor 10 in runtime
|
124
|
-
# records = query_set.order_by(preserved).
|
124
|
+
# records = query_set.order_by(preserved).to_list()
|
125
125
|
|
126
126
|
# log validated terms
|
127
127
|
is_validated = model.validate(
|
@@ -165,7 +165,7 @@ def get_existing_records(
|
|
165
165
|
query = {f"{field.field.name}__in": iterable_idx.values} # type: ignore
|
166
166
|
if organism is not None:
|
167
167
|
query["organism"] = organism
|
168
|
-
records = model.filter(**query).
|
168
|
+
records = model.filter(**query).to_list()
|
169
169
|
|
170
170
|
if len(validated) == len(iterable_idx):
|
171
171
|
return records, pd.Index([]), msg
|
lamindb/models/_is_versioned.py
CHANGED
@@ -108,12 +108,12 @@ def bump_version(
|
|
108
108
|
) -> str:
|
109
109
|
"""Bumps the version number by major or minor depending on the bump_type flag.
|
110
110
|
|
111
|
-
|
112
|
-
|
113
|
-
|
111
|
+
Args:
|
112
|
+
version: The current version in "MAJOR" or "MAJOR.MINOR" format.
|
113
|
+
bump_type: The type of version bump, either 'major' or 'minor'.
|
114
114
|
|
115
115
|
Returns:
|
116
|
-
|
116
|
+
The new version string.
|
117
117
|
"""
|
118
118
|
try:
|
119
119
|
# Split the version into major and minor parts if possible
|
lamindb/models/_label_manager.py
CHANGED
@@ -268,7 +268,7 @@ class LabelManager:
|
|
268
268
|
for link in links:
|
269
269
|
if link.feature is not None:
|
270
270
|
features.add(link.feature)
|
271
|
-
key = link.feature.
|
271
|
+
key = link.feature.uid
|
272
272
|
else:
|
273
273
|
key = None
|
274
274
|
keys.append(key)
|
@@ -299,9 +299,9 @@ class LabelManager:
|
|
299
299
|
)
|
300
300
|
save(new_features) # type: ignore
|
301
301
|
if hasattr(self._host, related_name):
|
302
|
-
for
|
303
|
-
if
|
304
|
-
feature_id = Feature.get(
|
302
|
+
for feature_uid, feature_labels in labels_by_features.items():
|
303
|
+
if feature_uid is not None:
|
304
|
+
feature_id = Feature.get(feature_uid).id
|
305
305
|
else:
|
306
306
|
feature_id = None
|
307
307
|
getattr(self._host, related_name).add(
|