lamindb 0.76.14__py3-none-any.whl → 0.76.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_artifact.py +35 -31
- lamindb/_can_validate.py +6 -6
- lamindb/_collection.py +6 -5
- lamindb/_curate.py +60 -39
- lamindb/_feature.py +2 -3
- lamindb/_feature_set.py +1 -2
- lamindb/_is_versioned.py +1 -2
- lamindb/_parents.py +28 -5
- lamindb/_query_manager.py +1 -2
- lamindb/_query_set.py +1 -1
- lamindb/_record.py +3 -3
- lamindb/_save.py +2 -2
- lamindb/_transform.py +1 -2
- lamindb/_ulabel.py +1 -1
- lamindb/core/_mapped_collection.py +1 -1
- lamindb/core/storage/_backed_access.py +16 -8
- lamindb/core/storage/_pyarrow_dataset.py +31 -0
- {lamindb-0.76.14.dist-info → lamindb-0.76.15.dist-info}/METADATA +5 -6
- {lamindb-0.76.14.dist-info → lamindb-0.76.15.dist-info}/RECORD +22 -21
- {lamindb-0.76.14.dist-info → lamindb-0.76.15.dist-info}/LICENSE +0 -0
- {lamindb-0.76.14.dist-info → lamindb-0.76.15.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_artifact.py
CHANGED
@@ -28,39 +28,41 @@ from lnschema_core.types import (
|
|
28
28
|
VisibilityChoice,
|
29
29
|
)
|
30
30
|
|
31
|
-
from
|
32
|
-
from
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
from ._utils import attach_func_to_class_method
|
32
|
+
from .core._data import (
|
33
|
+
_track_run_input,
|
34
|
+
add_transform_to_kwargs,
|
35
|
+
describe,
|
36
|
+
get_run,
|
37
|
+
save_feature_set_links,
|
38
|
+
save_feature_sets,
|
39
|
+
view_lineage,
|
40
|
+
)
|
41
|
+
from .core._settings import settings
|
42
|
+
from .core.exceptions import IntegrityError, InvalidArgument
|
43
|
+
from .core.loaders import load_to_memory
|
44
|
+
from .core.storage import (
|
37
45
|
LocalPathClasses,
|
38
46
|
UPath,
|
39
47
|
delete_storage,
|
40
48
|
infer_suffix,
|
41
49
|
write_to_disk,
|
42
50
|
)
|
43
|
-
from
|
51
|
+
from .core.storage._pyarrow_dataset import PYARROW_SUFFIXES
|
52
|
+
from .core.storage.objects import _mudata_is_installed
|
53
|
+
from .core.storage.paths import (
|
54
|
+
AUTO_KEY_PREFIX,
|
44
55
|
auto_storage_key_from_artifact,
|
45
56
|
auto_storage_key_from_artifact_uid,
|
46
57
|
check_path_is_child_of_root,
|
47
58
|
filepath_cache_key_from_artifact,
|
48
59
|
filepath_from_artifact,
|
49
60
|
)
|
50
|
-
from
|
61
|
+
from .core.versioning import (
|
51
62
|
create_uid,
|
52
63
|
message_update_key_in_version_family,
|
53
64
|
)
|
54
65
|
|
55
|
-
from .core._data import (
|
56
|
-
add_transform_to_kwargs,
|
57
|
-
get_run,
|
58
|
-
save_feature_set_links,
|
59
|
-
save_feature_sets,
|
60
|
-
)
|
61
|
-
from .core.storage.objects import _mudata_is_installed
|
62
|
-
from .core.storage.paths import AUTO_KEY_PREFIX
|
63
|
-
|
64
66
|
try:
|
65
67
|
from .core.storage._zarr import zarr_is_adata
|
66
68
|
except ImportError:
|
@@ -72,6 +74,7 @@ except ImportError:
|
|
72
74
|
if TYPE_CHECKING:
|
73
75
|
from lamindb_setup.core.types import UPathStr
|
74
76
|
from mudata import MuData
|
77
|
+
from pyarrow.dataset import Dataset as PyArrowDataset
|
75
78
|
from tiledbsoma import Collection as SOMACollection
|
76
79
|
from tiledbsoma import Experiment as SOMAExperiment
|
77
80
|
|
@@ -772,19 +775,14 @@ def from_dir(
|
|
772
775
|
else:
|
773
776
|
folder_key_path = Path(key)
|
774
777
|
|
775
|
-
|
776
|
-
folder_key = folder_key_path.as_posix().rstrip("/")
|
777
|
-
|
778
|
-
# TODO: (non-local) UPath doesn't list the first level artifacts and dirs with "*"
|
779
|
-
pattern = "" if not isinstance(folderpath, LocalPathClasses) else "*"
|
780
|
-
|
778
|
+
folder_key = folder_key_path.as_posix()
|
781
779
|
# silence fine-grained logging
|
782
780
|
verbosity = settings.verbosity
|
783
781
|
verbosity_int = settings._verbosity_int
|
784
782
|
if verbosity_int >= 1:
|
785
783
|
settings.verbosity = "warning"
|
786
784
|
artifacts_dict = {}
|
787
|
-
for filepath in folderpath.rglob(
|
785
|
+
for filepath in folderpath.rglob("*"):
|
788
786
|
if filepath.is_file():
|
789
787
|
relative_path = get_relative_path_to_directory(filepath, folderpath)
|
790
788
|
artifact_key = folder_key + "/" + relative_path.as_posix()
|
@@ -802,7 +800,8 @@ def from_dir(
|
|
802
800
|
if artifact.hash is not None
|
803
801
|
]
|
804
802
|
uids = artifacts_dict.keys()
|
805
|
-
|
803
|
+
n_unique_hashes = len(set(hashes))
|
804
|
+
if n_unique_hashes == len(hashes):
|
806
805
|
artifacts = list(artifacts_dict.values())
|
807
806
|
else:
|
808
807
|
# consider exact duplicates (same id, same hash)
|
@@ -811,7 +810,7 @@ def from_dir(
|
|
811
810
|
# logger.warning("dropping duplicate records in list of artifact records")
|
812
811
|
# artifacts = list(set(uids))
|
813
812
|
# consider false duplicates (different id, same hash)
|
814
|
-
if not len(set(uids)) ==
|
813
|
+
if not len(set(uids)) == n_unique_hashes:
|
815
814
|
seen_hashes = set()
|
816
815
|
non_unique_artifacts = {
|
817
816
|
hash: artifact
|
@@ -905,14 +904,19 @@ def replace(
|
|
905
904
|
# docstring handled through attach_func_to_class_method
|
906
905
|
def open(
|
907
906
|
self, mode: str = "r", is_run_input: bool | None = None
|
908
|
-
) ->
|
907
|
+
) -> (
|
908
|
+
AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
|
909
|
+
):
|
909
910
|
# ignore empty suffix for now
|
910
|
-
suffixes = (".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma"
|
911
|
+
suffixes = ("", ".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma") + PYARROW_SUFFIXES
|
911
912
|
if self.suffix not in suffixes:
|
912
913
|
raise ValueError(
|
913
|
-
"Artifact should have a zarr, h5
|
914
|
-
"
|
915
|
-
|
914
|
+
"Artifact should have a zarr, h5, tiledbsoma object"
|
915
|
+
" or a compatible `pyarrow.dataset.dataset` directory"
|
916
|
+
" as the underlying data, please use one of the following suffixes"
|
917
|
+
f" for the object name: {', '.join(suffixes[1:])}."
|
918
|
+
f" Or no suffix for a folder with {', '.join(PYARROW_SUFFIXES)} files"
|
919
|
+
" (no mixing allowed)."
|
916
920
|
)
|
917
921
|
if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
|
918
922
|
raise ValueError("Only a tiledbsoma store can be openened with `mode!='r'`.")
|
lamindb/_can_validate.py
CHANGED
@@ -10,10 +10,10 @@ from lamin_utils import colors, logger
|
|
10
10
|
from lamindb_setup.core._docs import doc_args
|
11
11
|
from lnschema_core import CanValidate, Record
|
12
12
|
|
13
|
-
from lamindb._utils import attach_func_to_class_method
|
14
|
-
|
15
13
|
from ._from_values import _has_organism_field, _print_values, get_or_create_records
|
16
14
|
from ._record import _queryset, get_name_field
|
15
|
+
from ._utils import attach_func_to_class_method
|
16
|
+
from .core.exceptions import ValidationError
|
17
17
|
|
18
18
|
if TYPE_CHECKING:
|
19
19
|
from django.db.models import QuerySet
|
@@ -496,9 +496,9 @@ def _add_or_remove_synonyms(
|
|
496
496
|
" with the following records:\n"
|
497
497
|
)
|
498
498
|
display(records_df)
|
499
|
-
raise
|
500
|
-
"
|
501
|
-
"
|
499
|
+
raise ValidationError(
|
500
|
+
f"you are trying to assign a synonym to record: {record}\n"
|
501
|
+
" → consider removing the synonym from existing records or using a different synonym."
|
502
502
|
)
|
503
503
|
|
504
504
|
# passed synonyms
|
@@ -516,7 +516,7 @@ def _add_or_remove_synonyms(
|
|
516
516
|
return
|
517
517
|
# because we use | as the separator
|
518
518
|
if any("|" in i for i in syn_new_set):
|
519
|
-
raise
|
519
|
+
raise ValidationError("a synonym can't contain '|'!")
|
520
520
|
|
521
521
|
# existing synonyms
|
522
522
|
syns_exist = record.synonyms
|
lamindb/_collection.py
CHANGED
@@ -20,20 +20,21 @@ from lnschema_core.models import (
|
|
20
20
|
)
|
21
21
|
from lnschema_core.types import VisibilityChoice
|
22
22
|
|
23
|
-
from lamindb._utils import attach_func_to_class_method
|
24
|
-
from lamindb.core._data import _track_run_input, describe, view_lineage
|
25
|
-
from lamindb.core._mapped_collection import MappedCollection
|
26
|
-
from lamindb.core.versioning import process_revises
|
27
|
-
|
28
23
|
from . import Artifact, Run
|
29
24
|
from ._record import init_self_from_db, update_attributes
|
25
|
+
from ._utils import attach_func_to_class_method
|
30
26
|
from .core._data import (
|
27
|
+
_track_run_input,
|
31
28
|
add_transform_to_kwargs,
|
29
|
+
describe,
|
32
30
|
get_run,
|
33
31
|
save_feature_set_links,
|
34
32
|
save_feature_sets,
|
33
|
+
view_lineage,
|
35
34
|
)
|
35
|
+
from .core._mapped_collection import MappedCollection
|
36
36
|
from .core._settings import settings
|
37
|
+
from .core.versioning import process_revises
|
37
38
|
|
38
39
|
if TYPE_CHECKING:
|
39
40
|
from collections.abc import Iterable
|
lamindb/_curate.py
CHANGED
@@ -184,7 +184,7 @@ class DataFrameCurator(BaseCurator):
|
|
184
184
|
def non_validated(self) -> list:
|
185
185
|
"""Return the non-validated features and labels."""
|
186
186
|
if self._non_validated is None:
|
187
|
-
raise
|
187
|
+
raise ValidationError("Please run validate() first!")
|
188
188
|
return self._non_validated
|
189
189
|
|
190
190
|
@property
|
@@ -222,7 +222,7 @@ class DataFrameCurator(BaseCurator):
|
|
222
222
|
valid_keys = set(self._df.columns) | {"columns"} | extra
|
223
223
|
nonval_keys = [key for key in d.keys() if key not in valid_keys]
|
224
224
|
if len(nonval_keys) > 0:
|
225
|
-
raise
|
225
|
+
raise ValidationError(
|
226
226
|
f"the following keys passed to {name} are not allowed: {nonval_keys}"
|
227
227
|
)
|
228
228
|
|
@@ -288,9 +288,11 @@ class DataFrameCurator(BaseCurator):
|
|
288
288
|
self._save_columns(validated_only=validated_only, **kwargs)
|
289
289
|
else:
|
290
290
|
if categorical not in self.fields:
|
291
|
-
raise
|
291
|
+
raise ValidationError(
|
292
|
+
f"Feature {categorical} is not part of the fields!"
|
293
|
+
)
|
292
294
|
update_registry(
|
293
|
-
values=self._df[categorical]
|
295
|
+
values=flatten_unique(self._df[categorical]),
|
294
296
|
field=self.fields[categorical],
|
295
297
|
key=categorical,
|
296
298
|
using_key=self._using_key,
|
@@ -508,13 +510,13 @@ class AnnDataCurator(DataFrameCurator):
|
|
508
510
|
exclude=self._exclude.get("var_index"),
|
509
511
|
)
|
510
512
|
|
511
|
-
def _update_registry_all(self):
|
513
|
+
def _update_registry_all(self, validated_only: bool = True, **kwargs):
|
512
514
|
"""Save labels for all features."""
|
513
515
|
logger.info("saving validated records of 'var_index'")
|
514
|
-
self._save_from_var_index(validated_only=
|
516
|
+
self._save_from_var_index(validated_only=validated_only, **self._kwargs)
|
515
517
|
for name in self._obs_fields.keys():
|
516
518
|
logger.info(f"saving validated terms of '{name}'")
|
517
|
-
self._update_registry(name, validated_only=
|
519
|
+
self._update_registry(name, validated_only=validated_only, **self._kwargs)
|
518
520
|
|
519
521
|
def add_new_from_var_index(self, organism: str | None = None, **kwargs):
|
520
522
|
"""Update variable records.
|
@@ -704,7 +706,7 @@ class MuDataCurator:
|
|
704
706
|
"""Verify the modality exists."""
|
705
707
|
for modality in modalities:
|
706
708
|
if modality not in self._mdata.mod.keys():
|
707
|
-
raise
|
709
|
+
raise ValidationError(f"modality '{modality}' does not exist!")
|
708
710
|
|
709
711
|
def _save_from_var_index_modality(
|
710
712
|
self, modality: str, validated_only: bool = True, **kwargs
|
@@ -729,7 +731,7 @@ class MuDataCurator:
|
|
729
731
|
obs_fields: dict[str, dict[str, FieldAttr]] = {}
|
730
732
|
for k, v in categoricals.items():
|
731
733
|
if k not in self._mdata.obs.columns:
|
732
|
-
raise
|
734
|
+
raise ValidationError(f"column '{k}' does not exist in mdata.obs!")
|
733
735
|
if any(k.startswith(prefix) for prefix in prefixes):
|
734
736
|
modality, col = k.split(":")[0], k.split(":")[1]
|
735
737
|
if modality not in obs_fields.keys():
|
@@ -1120,7 +1122,7 @@ def check_registry_organism(registry: Record, organism: str | None = None) -> di
|
|
1120
1122
|
import bionty as bt
|
1121
1123
|
|
1122
1124
|
if organism is None and bt.settings.organism is None:
|
1123
|
-
raise
|
1125
|
+
raise ValidationError(
|
1124
1126
|
f"{registry.__name__} registry requires an organism!\n"
|
1125
1127
|
" → please pass an organism name via organism="
|
1126
1128
|
)
|
@@ -1148,8 +1150,8 @@ def validate_categories(
|
|
1148
1150
|
using_key: A reference LaminDB instance.
|
1149
1151
|
organism: The organism name.
|
1150
1152
|
source: The source record.
|
1151
|
-
exclude: Exclude specific values.
|
1152
|
-
standardize:
|
1153
|
+
exclude: Exclude specific values from validation.
|
1154
|
+
standardize: Whether to standardize the values.
|
1153
1155
|
validated_hint_print: The hint to print for validated values.
|
1154
1156
|
"""
|
1155
1157
|
from lamindb._from_values import _print_values
|
@@ -1210,12 +1212,15 @@ def validate_categories(
|
|
1210
1212
|
|
1211
1213
|
validated_hint_print = validated_hint_print or f".add_validated_from('{key}')"
|
1212
1214
|
n_validated = len(values_validated)
|
1215
|
+
|
1213
1216
|
if n_validated > 0:
|
1214
1217
|
_log_mapping_info()
|
1218
|
+
terms_str = f"{', '.join([f'{chr(39)}{v}{chr(39)}' for v in values_validated[:10]])}{', ...' if len(values_validated) > 10 else ''}"
|
1219
|
+
val_numerous = "" if n_validated == 1 else "s"
|
1215
1220
|
logger.warning(
|
1216
|
-
f"found {colors.yellow(n_validated)} validated
|
1217
|
-
f"{colors.yellow(
|
1218
|
-
f"{colors.yellow(validated_hint_print)}"
|
1221
|
+
f"found {colors.yellow(n_validated)} validated term{val_numerous}: "
|
1222
|
+
f"{colors.yellow(terms_str)}\n"
|
1223
|
+
f"→ save term{val_numerous} via {colors.yellow(validated_hint_print)}"
|
1219
1224
|
)
|
1220
1225
|
|
1221
1226
|
non_validated_hint_print = validated_hint_print.replace("_validated_", "_new_")
|
@@ -1230,13 +1235,15 @@ def validate_categories(
|
|
1230
1235
|
# validated values still need to be saved to the current instance
|
1231
1236
|
return False, []
|
1232
1237
|
else:
|
1233
|
-
|
1238
|
+
non_val_numerous = ("", "is") if n_non_validated == 1 else ("s", "are")
|
1234
1239
|
print_values = _print_values(non_validated)
|
1235
1240
|
warning_message = (
|
1236
|
-
f"{colors.red(f'{n_non_validated}
|
1237
|
-
f"{colors.red(print_values)
|
1241
|
+
f"{colors.red(f'{n_non_validated} term{non_val_numerous[0]}')} {non_val_numerous[1]} not validated: "
|
1242
|
+
f"{colors.red(', '.join(print_values.split(', ')[:10]) + ', ...' if len(print_values.split(', ')) > 10 else print_values)}\n"
|
1243
|
+
f"→ fix typo{non_val_numerous[0]}, remove non-existent value{non_val_numerous[0]}, or save term{non_val_numerous[0]} via "
|
1238
1244
|
f"{colors.red(non_validated_hint_print)}"
|
1239
1245
|
)
|
1246
|
+
|
1240
1247
|
if logger.indent == "":
|
1241
1248
|
_log_mapping_info()
|
1242
1249
|
logger.warning(warning_message)
|
@@ -1427,6 +1434,19 @@ def save_artifact(
|
|
1427
1434
|
return artifact
|
1428
1435
|
|
1429
1436
|
|
1437
|
+
def flatten_unique(series):
|
1438
|
+
"""Flatten a pandas series if it contains lists."""
|
1439
|
+
result = set()
|
1440
|
+
|
1441
|
+
for item in series:
|
1442
|
+
if isinstance(item, list):
|
1443
|
+
result.update(item)
|
1444
|
+
else:
|
1445
|
+
result.add(item)
|
1446
|
+
|
1447
|
+
return list(result)
|
1448
|
+
|
1449
|
+
|
1430
1450
|
def update_registry(
|
1431
1451
|
values: list[str],
|
1432
1452
|
field: FieldAttr,
|
@@ -1596,24 +1616,25 @@ def log_saved_labels(
|
|
1596
1616
|
continue
|
1597
1617
|
|
1598
1618
|
if k == "without reference" and validated_only:
|
1599
|
-
|
1600
|
-
|
1601
|
-
)
|
1602
|
-
|
1603
|
-
|
1604
|
-
)
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1608
|
-
msg +=
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
)
|
1613
|
-
|
1614
|
-
|
1615
|
-
|
1616
|
-
|
1619
|
+
continue
|
1620
|
+
# msg = colors.yellow(
|
1621
|
+
# f"{len(labels)} non-validated values are not saved in {model_field}: {labels}!"
|
1622
|
+
# )
|
1623
|
+
# lookup_print = (
|
1624
|
+
# f"lookup().{key}" if key.isidentifier() else f".lookup()['{key}']"
|
1625
|
+
# )
|
1626
|
+
|
1627
|
+
# hint = f".add_new_from('{key}')"
|
1628
|
+
# msg += f"\n → to lookup values, use {lookup_print}"
|
1629
|
+
# msg += (
|
1630
|
+
# f"\n → to save, run {colors.yellow(hint)}"
|
1631
|
+
# if save_function == "add_new_from"
|
1632
|
+
# else f"\n → to save, run {colors.yellow(save_function)}"
|
1633
|
+
# )
|
1634
|
+
# if warning:
|
1635
|
+
# logger.warning(msg)
|
1636
|
+
# else:
|
1637
|
+
# logger.info(msg)
|
1617
1638
|
else:
|
1618
1639
|
k = "" if k == "without reference" else f"{colors.green(k)} "
|
1619
1640
|
# the term "transferred" stresses that this is always in the context of transferring
|
@@ -1631,8 +1652,8 @@ def save_ulabels_with_parent(values: list[str], field: FieldAttr, key: str) -> N
|
|
1631
1652
|
all_records = registry.from_values(list(values), field=field)
|
1632
1653
|
is_feature = registry.filter(name=f"is_{key}").one_or_none()
|
1633
1654
|
if is_feature is None:
|
1634
|
-
is_feature = registry(name=f"is_{key}")
|
1635
|
-
|
1655
|
+
is_feature = registry(name=f"is_{key}").save()
|
1656
|
+
logger.important(f"Created a parent ULabel: {is_feature}")
|
1636
1657
|
is_feature.children.add(*all_records)
|
1637
1658
|
|
1638
1659
|
|
@@ -1689,7 +1710,7 @@ def _save_organism(name: str): # pragma: no cover
|
|
1689
1710
|
if organism is None:
|
1690
1711
|
organism = bt.Organism.from_source(name=name)
|
1691
1712
|
if organism is None:
|
1692
|
-
raise
|
1713
|
+
raise ValidationError(
|
1693
1714
|
f"Organism '{name}' not found\n"
|
1694
1715
|
f" → please save it: bt.Organism(name='{name}').save()"
|
1695
1716
|
)
|
lamindb/_feature.py
CHANGED
@@ -8,10 +8,9 @@ from lamindb_setup.core._docs import doc_args
|
|
8
8
|
from lnschema_core.models import Artifact, Feature
|
9
9
|
from pandas.api.types import CategoricalDtype, is_string_dtype
|
10
10
|
|
11
|
-
from lamindb._utils import attach_func_to_class_method
|
12
|
-
from lamindb.core._settings import settings
|
13
|
-
|
14
11
|
from ._query_set import RecordsList
|
12
|
+
from ._utils import attach_func_to_class_method
|
13
|
+
from .core._settings import settings
|
15
14
|
from .core.schema import dict_schema_name_to_model_name
|
16
15
|
|
17
16
|
if TYPE_CHECKING:
|
lamindb/_feature_set.py
CHANGED
@@ -10,10 +10,9 @@ from lamindb_setup.core.hashing import hash_set
|
|
10
10
|
from lnschema_core import Feature, FeatureSet, Record, ids
|
11
11
|
from lnschema_core.types import FieldAttr, ListLike
|
12
12
|
|
13
|
-
from lamindb._utils import attach_func_to_class_method
|
14
|
-
|
15
13
|
from ._feature import convert_numpy_dtype_to_lamin_feature_type
|
16
14
|
from ._record import init_self_from_db
|
15
|
+
from ._utils import attach_func_to_class_method
|
17
16
|
from .core.exceptions import ValidationError
|
18
17
|
from .core.schema import (
|
19
18
|
dict_related_model_to_related_name,
|
lamindb/_is_versioned.py
CHANGED
@@ -5,8 +5,7 @@ from lamin_utils import logger
|
|
5
5
|
from lamindb_setup.core.upath import UPath
|
6
6
|
from lnschema_core.models import IsVersioned
|
7
7
|
|
8
|
-
from
|
9
|
-
|
8
|
+
from ._utils import attach_func_to_class_method
|
10
9
|
from .core.versioning import create_uid, get_new_path_from_uid
|
11
10
|
|
12
11
|
|
lamindb/_parents.py
CHANGED
@@ -8,13 +8,14 @@ from lamin_utils import logger
|
|
8
8
|
from lnschema_core import Artifact, Collection, Record, Run, Transform
|
9
9
|
from lnschema_core.models import HasParents, format_field_value
|
10
10
|
|
11
|
-
from lamindb._utils import attach_func_to_class_method
|
12
|
-
|
13
11
|
from ._record import get_name_field
|
12
|
+
from ._utils import attach_func_to_class_method
|
14
13
|
|
15
14
|
if TYPE_CHECKING:
|
16
15
|
from lnschema_core.types import StrField
|
17
16
|
|
17
|
+
from lamindb.core import QuerySet
|
18
|
+
|
18
19
|
LAMIN_GREEN_LIGHTER = "#10b981"
|
19
20
|
LAMIN_GREEN_DARKER = "#065f46"
|
20
21
|
GREEN_FILL = "honeydew"
|
@@ -22,6 +23,30 @@ TRANSFORM_EMOJIS = {"notebook": "📔", "app": "🖥️", "pipeline": "🧩"}
|
|
22
23
|
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
23
24
|
|
24
25
|
|
26
|
+
# this is optimized to have fewer recursive calls
|
27
|
+
# also len of QuerySet can be costly at times
|
28
|
+
def _query_relatives(
|
29
|
+
records: QuerySet | list[Record],
|
30
|
+
kind: Literal["parents", "children"],
|
31
|
+
cls: type[HasParents],
|
32
|
+
) -> QuerySet:
|
33
|
+
relatives = cls.objects.none()
|
34
|
+
if len(records) == 0:
|
35
|
+
return relatives
|
36
|
+
for record in records:
|
37
|
+
relatives = relatives.union(getattr(record, kind).all())
|
38
|
+
relatives = relatives.union(_query_relatives(relatives, kind, cls))
|
39
|
+
return relatives
|
40
|
+
|
41
|
+
|
42
|
+
def query_parents(self) -> QuerySet:
|
43
|
+
return _query_relatives([self], "parents", self.__class__)
|
44
|
+
|
45
|
+
|
46
|
+
def query_children(self) -> QuerySet:
|
47
|
+
return _query_relatives([self], "children", self.__class__)
|
48
|
+
|
49
|
+
|
25
50
|
def _transform_emoji(transform: Transform):
|
26
51
|
if transform is not None:
|
27
52
|
return TRANSFORM_EMOJIS.get(transform.type, "💫")
|
@@ -474,9 +499,7 @@ def _df_edges_from_runs(df_values: list):
|
|
474
499
|
return df
|
475
500
|
|
476
501
|
|
477
|
-
METHOD_NAMES = [
|
478
|
-
"view_parents",
|
479
|
-
]
|
502
|
+
METHOD_NAMES = ["view_parents", "query_parents", "query_children"]
|
480
503
|
|
481
504
|
if ln_setup._TESTING: # type: ignore
|
482
505
|
from inspect import signature
|
lamindb/_query_manager.py
CHANGED
@@ -7,9 +7,8 @@ from lamin_utils import logger
|
|
7
7
|
from lamindb_setup.core._docs import doc_args
|
8
8
|
from lnschema_core.models import Record
|
9
9
|
|
10
|
-
from lamindb.core._settings import settings
|
11
|
-
|
12
10
|
from .core._feature_manager import get_feature_set_by_slot_
|
11
|
+
from .core._settings import settings
|
13
12
|
|
14
13
|
if TYPE_CHECKING:
|
15
14
|
from lnschema_core.types import StrField
|
lamindb/_query_set.py
CHANGED
lamindb/_record.py
CHANGED
@@ -19,9 +19,9 @@ from lamindb_setup.core._hub_core import connect_instance_hub
|
|
19
19
|
from lamindb_setup.core._settings_store import instance_settings_file
|
20
20
|
from lnschema_core.models import Artifact, Feature, IsVersioned, Record, Run, Transform
|
21
21
|
|
22
|
-
from
|
23
|
-
from
|
24
|
-
from
|
22
|
+
from ._utils import attach_func_to_class_method
|
23
|
+
from .core._settings import settings
|
24
|
+
from .core.exceptions import RecordNameChangeIntegrityError
|
25
25
|
|
26
26
|
if TYPE_CHECKING:
|
27
27
|
import pandas as pd
|
lamindb/_save.py
CHANGED
@@ -15,8 +15,8 @@ from lamin_utils import logger
|
|
15
15
|
from lamindb_setup.core.upath import LocalPathClasses
|
16
16
|
from lnschema_core.models import Artifact, Record
|
17
17
|
|
18
|
-
from
|
19
|
-
from
|
18
|
+
from .core._settings import settings
|
19
|
+
from .core.storage.paths import (
|
20
20
|
_cache_key_from_artifact_storage,
|
21
21
|
attempt_accessing_path,
|
22
22
|
auto_storage_key_from_artifact,
|
lamindb/_transform.py
CHANGED
@@ -6,10 +6,9 @@ from lamin_utils import logger
|
|
6
6
|
from lamindb_setup.core._docs import doc_args
|
7
7
|
from lnschema_core.models import Run, Transform
|
8
8
|
|
9
|
-
from lamindb.core.exceptions import InconsistentKey
|
10
|
-
|
11
9
|
from ._parents import _view_parents
|
12
10
|
from ._run import delete_run_artifacts
|
11
|
+
from .core.exceptions import InconsistentKey
|
13
12
|
from .core.versioning import message_update_key_in_version_family, process_revises
|
14
13
|
|
15
14
|
if TYPE_CHECKING:
|
lamindb/_ulabel.py
CHANGED
@@ -6,7 +6,7 @@ import lamindb_setup as ln_setup
|
|
6
6
|
from lamindb_setup.core._docs import doc_args
|
7
7
|
from lnschema_core import ULabel
|
8
8
|
|
9
|
-
from
|
9
|
+
from ._utils import attach_func_to_class_method
|
10
10
|
|
11
11
|
if TYPE_CHECKING:
|
12
12
|
from lnschema_core.types import ListLike
|
@@ -7,11 +7,13 @@ from anndata._io.specs.registry import get_spec
|
|
7
7
|
from lnschema_core import Artifact
|
8
8
|
|
9
9
|
from ._anndata_accessor import AnnDataAccessor, StorageType, registry
|
10
|
+
from ._pyarrow_dataset import _is_pyarrow_dataset, _open_pyarrow_dataset
|
10
11
|
from ._tiledbsoma import _open_tiledbsoma
|
11
12
|
from .paths import filepath_from_artifact
|
12
13
|
|
13
14
|
if TYPE_CHECKING:
|
14
15
|
from fsspec.core import OpenFile
|
16
|
+
from pyarrow.dataset import Dataset as PyArrowDataset
|
15
17
|
from tiledbsoma import Collection as SOMACollection
|
16
18
|
from tiledbsoma import Experiment as SOMAExperiment
|
17
19
|
from upath import UPath
|
@@ -67,22 +69,28 @@ def backed_access(
|
|
67
69
|
artifact_or_filepath: Artifact | UPath,
|
68
70
|
mode: str = "r",
|
69
71
|
using_key: str | None = None,
|
70
|
-
) ->
|
72
|
+
) -> (
|
73
|
+
AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
|
74
|
+
):
|
71
75
|
if isinstance(artifact_or_filepath, Artifact):
|
72
|
-
|
76
|
+
objectpath, _ = filepath_from_artifact(
|
77
|
+
artifact_or_filepath, using_key=using_key
|
78
|
+
)
|
73
79
|
else:
|
74
|
-
|
75
|
-
name =
|
76
|
-
suffix =
|
80
|
+
objectpath = artifact_or_filepath
|
81
|
+
name = objectpath.name
|
82
|
+
suffix = objectpath.suffix
|
77
83
|
|
78
84
|
if name == "soma" or suffix == ".tiledbsoma":
|
79
85
|
if mode not in {"r", "w"}:
|
80
86
|
raise ValueError("`mode` should be either 'r' or 'w' for tiledbsoma.")
|
81
|
-
return _open_tiledbsoma(
|
87
|
+
return _open_tiledbsoma(objectpath, mode=mode) # type: ignore
|
82
88
|
elif suffix in {".h5", ".hdf5", ".h5ad"}:
|
83
|
-
conn, storage = registry.open("h5py",
|
89
|
+
conn, storage = registry.open("h5py", objectpath, mode=mode)
|
84
90
|
elif suffix == ".zarr":
|
85
|
-
conn, storage = registry.open("zarr",
|
91
|
+
conn, storage = registry.open("zarr", objectpath, mode=mode)
|
92
|
+
elif _is_pyarrow_dataset(objectpath):
|
93
|
+
return _open_pyarrow_dataset(objectpath)
|
86
94
|
else:
|
87
95
|
raise ValueError(
|
88
96
|
"object should have .h5, .hdf5, .h5ad, .zarr, .tiledbsoma suffix, not"
|
@@ -0,0 +1,31 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING
|
4
|
+
|
5
|
+
import pyarrow.dataset
|
6
|
+
from lamindb_setup.core.upath import LocalPathClasses
|
7
|
+
|
8
|
+
if TYPE_CHECKING:
|
9
|
+
from upath import UPath
|
10
|
+
|
11
|
+
|
12
|
+
PYARROW_SUFFIXES = (".parquet", ".csv", ".json", ".orc", ".arrow", ".feather")
|
13
|
+
|
14
|
+
|
15
|
+
def _is_pyarrow_dataset(path: UPath) -> bool:
|
16
|
+
# it is assumed here that path exists
|
17
|
+
if path.is_file():
|
18
|
+
return path.suffix in PYARROW_SUFFIXES
|
19
|
+
else:
|
20
|
+
objects = path.rglob("*")
|
21
|
+
suffixes = {object.suffix for object in objects if object.suffix != ""}
|
22
|
+
return len(suffixes) == 1 and suffixes.pop() in PYARROW_SUFFIXES
|
23
|
+
|
24
|
+
|
25
|
+
def _open_pyarrow_dataset(path: UPath) -> pyarrow.dataset.Dataset:
|
26
|
+
if isinstance(path, LocalPathClasses):
|
27
|
+
path_str, filesystem = path.as_posix(), None
|
28
|
+
else:
|
29
|
+
path_str, filesystem = path.path, path.fs
|
30
|
+
|
31
|
+
return pyarrow.dataset.dataset(path_str, filesystem=filesystem)
|
@@ -1,14 +1,15 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.76.
|
3
|
+
Version: 0.76.15
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
|
-
Requires-Python: >=3.9
|
6
|
+
Requires-Python: >=3.9,<3.13
|
7
7
|
Description-Content-Type: text/markdown
|
8
8
|
Classifier: Programming Language :: Python :: 3.9
|
9
9
|
Classifier: Programming Language :: Python :: 3.10
|
10
10
|
Classifier: Programming Language :: Python :: 3.11
|
11
|
-
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
12
|
+
Requires-Dist: lnschema_core==0.76.1
|
12
13
|
Requires-Dist: lamin_utils==0.13.7
|
13
14
|
Requires-Dist: lamin_cli==0.20.1
|
14
15
|
Requires-Dist: lamindb_setup
|
@@ -17,9 +18,7 @@ Requires-Dist: pyarrow
|
|
17
18
|
Requires-Dist: typing_extensions!=4.6.0
|
18
19
|
Requires-Dist: python-dateutil
|
19
20
|
Requires-Dist: anndata>=0.8.0,<=0.10.9
|
20
|
-
Requires-Dist: scipy<1.13.0rc1
|
21
21
|
Requires-Dist: fsspec
|
22
|
-
Requires-Dist: pandas
|
23
22
|
Requires-Dist: graphviz
|
24
23
|
Requires-Dist: psycopg2-binary
|
25
24
|
Requires-Dist: lamindb_setup[aws] ; extra == "aws"
|
@@ -35,7 +34,7 @@ Requires-Dist: mudata ; extra == "dev"
|
|
35
34
|
Requires-Dist: nbproject_test>=0.5.1 ; extra == "dev"
|
36
35
|
Requires-Dist: faker-biology ; extra == "dev"
|
37
36
|
Requires-Dist: django-schema-graph ; extra == "erdiagram"
|
38
|
-
Requires-Dist: readfcs>=1.1.
|
37
|
+
Requires-Dist: readfcs>=1.1.9 ; extra == "fcs"
|
39
38
|
Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
|
40
39
|
Requires-Dist: nbproject==0.10.5 ; extra == "jupyter"
|
41
40
|
Requires-Dist: jupytext ; extra == "jupyter"
|
@@ -1,23 +1,23 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
2
|
-
lamindb/_artifact.py,sha256=
|
3
|
-
lamindb/_can_validate.py,sha256=
|
4
|
-
lamindb/_collection.py,sha256=
|
5
|
-
lamindb/_curate.py,sha256=
|
6
|
-
lamindb/_feature.py,sha256=
|
7
|
-
lamindb/_feature_set.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=ZoRboX4PQc0sbVajeZ1fd4GoLN3YDj5U74zVUpcSd9I,2278
|
2
|
+
lamindb/_artifact.py,sha256=WNdKAJFu3sFgQ_Qe1JflDHiTP4EGhAIkzaHFwTthAjY,44903
|
3
|
+
lamindb/_can_validate.py,sha256=TKfkHgkPl1bwuJrQCp6pHgQbi7m2pc_zjFhZfUAUU20,19573
|
4
|
+
lamindb/_collection.py,sha256=MLOEoOgTu7rTlRD7zkm1k0YIk_gVhQDO17JbmZCptOs,14573
|
5
|
+
lamindb/_curate.py,sha256=pLsdtnnTn0qQbsUB4hrU3yuoVnVNh-BQe10EpMNZ4Ns,64083
|
6
|
+
lamindb/_feature.py,sha256=9cgrcHoyOa1jpON-9KiUfFSHcxiGECiefUAqAx4cVvU,5325
|
7
|
+
lamindb/_feature_set.py,sha256=WdXw_YGlMXCs8l0WVHOrqvvrH2hsQLqCiho8LFDYwhI,8161
|
8
8
|
lamindb/_filter.py,sha256=Pf9NHV4gm7NOC0Frtvx4W7nvwt2EowOP74DwppyXAZs,635
|
9
9
|
lamindb/_finish.py,sha256=VMAmxCUFmTKIMSCx7LEh4QAnWDeue6MeUAAzkMVEYMU,9546
|
10
10
|
lamindb/_from_values.py,sha256=uRtZLaMWKoANMMXm1hrADHfckRCTiK8_d02Yp07nLkw,14119
|
11
|
-
lamindb/_is_versioned.py,sha256=
|
12
|
-
lamindb/_parents.py,sha256=
|
13
|
-
lamindb/_query_manager.py,sha256=
|
14
|
-
lamindb/_query_set.py,sha256=
|
15
|
-
lamindb/_record.py,sha256=
|
11
|
+
lamindb/_is_versioned.py,sha256=GWZk-usV6aB33Cl9AlrnEGE5nxUkZic7QJzOW_DrwQA,1298
|
12
|
+
lamindb/_parents.py,sha256=INhbqh6IaUjuYVUOp-6rnOGN-8kGZirNqqW9XQ1qz_M,17119
|
13
|
+
lamindb/_query_manager.py,sha256=noc05Ad-aADxckOVBVDAiErFB7gL8XTgckELvI4rGmM,3702
|
14
|
+
lamindb/_query_set.py,sha256=6vHOvB_uXzKVVIw8AAVw7EYOIAGuw3TYcUzkpNlFLdE,12973
|
15
|
+
lamindb/_record.py,sha256=isR9GMQFwUUwVSmPNABvEzcJS38TbjhD7Cc6kygPsTA,26819
|
16
16
|
lamindb/_run.py,sha256=K_5drpLn3D7y3XtZ3vtAw35rG5RCSvB4bXQZx4ESSI0,1964
|
17
|
-
lamindb/_save.py,sha256=
|
17
|
+
lamindb/_save.py,sha256=OD052Qr_hiMyAonHTktKETe_Bhnp1RY810y0rwZqpBQ,11352
|
18
18
|
lamindb/_storage.py,sha256=GBVChv-DHVMNEBJL5l_JT6B4RDhZ6NnwgzmUICphYKk,413
|
19
|
-
lamindb/_transform.py,sha256=
|
20
|
-
lamindb/_ulabel.py,sha256=
|
19
|
+
lamindb/_transform.py,sha256=HpqRCk0ZTmqxSV4nRbyvDq8fAQEE9wTj31d-CusiL6A,4720
|
20
|
+
lamindb/_ulabel.py,sha256=DQQzAPkrOg8W9I77BJ5umajR8MQcFSvXYUy53YNN2HA,1604
|
21
21
|
lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
22
22
|
lamindb/_view.py,sha256=4Ln2ItTb3857PAI-70O8eJYqoTJ_NNFc7E_wds6OGns,2412
|
23
23
|
lamindb/core/__init__.py,sha256=y87MCP1BEC2qHNVDIOwqVteIP_2hPCdIoa9JXr0EG8U,1524
|
@@ -26,7 +26,7 @@ lamindb/core/_data.py,sha256=BVZkxK8aloSecH25LivbwnjcG1fz7Gs2UDceO5pWd3I,20049
|
|
26
26
|
lamindb/core/_django.py,sha256=yeMPp1n9WrFmEjVRdavfpVqAolPLd24RseTQlvsK67w,7157
|
27
27
|
lamindb/core/_feature_manager.py,sha256=q4WmzJvFLL_fAs-vNRgV2klanAoU6Wu8_g0O2dyIjVg,40027
|
28
28
|
lamindb/core/_label_manager.py,sha256=yh-r4KbtOArMUKPJL75yOxJc8HUKqsik8pExBVKyDlA,10949
|
29
|
-
lamindb/core/_mapped_collection.py,sha256=
|
29
|
+
lamindb/core/_mapped_collection.py,sha256=EDS0xzOdCc_iGE_Iqv5COTVHNm4jWue7Jtcd8DdXkJU,24591
|
30
30
|
lamindb/core/_settings.py,sha256=6jNadlQdimxCsKR2ZyUD0YJYzOdubTnKktki-MqEWqQ,6137
|
31
31
|
lamindb/core/_sync_git.py,sha256=lIgl6YfpH4rCFT1WILAp7zlemZfxog1d0zp3cX0KIZw,4531
|
32
32
|
lamindb/core/_track_environment.py,sha256=Ywzg_sJ7guI1dcsN7h5orce9VdYl8VGVE3OLITlHBXQ,820
|
@@ -42,7 +42,8 @@ lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh
|
|
42
42
|
lamindb/core/storage/__init__.py,sha256=JOIMu_7unbyhndtH1j0Q-9AvY8knSuc1IJO9sQnyBAQ,498
|
43
43
|
lamindb/core/storage/_anndata_accessor.py,sha256=2p1HjoatmZjQ1u94tjgmXgiv8MKowrQH5xInDmiLCw4,24191
|
44
44
|
lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
|
45
|
-
lamindb/core/storage/_backed_access.py,sha256=
|
45
|
+
lamindb/core/storage/_backed_access.py,sha256=t9iS9mlZQBy1FfIS-Twt-96npYiShbPwEo2y9_3b6jY,3517
|
46
|
+
lamindb/core/storage/_pyarrow_dataset.py,sha256=wuLsEvdblqMdUdDfMtis8AWrE3igzvFWTSTbxuD1Oc8,926
|
46
47
|
lamindb/core/storage/_tiledbsoma.py,sha256=0NPLS5m1icEhzWPfXAv4U2SNiLGqGQd7FM6xCm5wYEc,7269
|
47
48
|
lamindb/core/storage/_valid_suffixes.py,sha256=vUSeQ4s01rdhD_vSd6wKmFBsgMJAKkBMnL_T9Y1znMg,501
|
48
49
|
lamindb/core/storage/_zarr.py,sha256=TODQD3p1eykoPwP-c-YRP_UDmsbMeBGMGvkBxxOMeYc,3663
|
@@ -55,7 +56,7 @@ lamindb/integrations/__init__.py,sha256=RWGMYYIzr8zvmNPyVB4m-p4gMDhxdRbjES2Ed23O
|
|
55
56
|
lamindb/integrations/_vitessce.py,sha256=uPl45_w4Uu9_BhpBDDVonC1nDOuAnB7DAnzi5w5bZAE,4032
|
56
57
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
57
58
|
lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
|
58
|
-
lamindb-0.76.
|
59
|
-
lamindb-0.76.
|
60
|
-
lamindb-0.76.
|
61
|
-
lamindb-0.76.
|
59
|
+
lamindb-0.76.15.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
60
|
+
lamindb-0.76.15.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
61
|
+
lamindb-0.76.15.dist-info/METADATA,sha256=31K0SoWfAxBpWR3x_7PLFfwpu-mnsQ8IHFqsvjdg_cw,2365
|
62
|
+
lamindb-0.76.15.dist-info/RECORD,,
|
File without changes
|
File without changes
|