lamindb 0.76.14__py3-none-any.whl → 0.76.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -43,7 +43,7 @@ Modules and settings.
43
43
  """
44
44
 
45
45
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
46
- __version__ = "0.76.14"
46
+ __version__ = "0.76.15"
47
47
 
48
48
  import os as _os
49
49
 
lamindb/_artifact.py CHANGED
@@ -28,39 +28,41 @@ from lnschema_core.types import (
28
28
  VisibilityChoice,
29
29
  )
30
30
 
31
- from lamindb._utils import attach_func_to_class_method
32
- from lamindb.core._data import _track_run_input, describe, view_lineage
33
- from lamindb.core._settings import settings
34
- from lamindb.core.exceptions import IntegrityError, InvalidArgument
35
- from lamindb.core.loaders import load_to_memory
36
- from lamindb.core.storage import (
31
+ from ._utils import attach_func_to_class_method
32
+ from .core._data import (
33
+ _track_run_input,
34
+ add_transform_to_kwargs,
35
+ describe,
36
+ get_run,
37
+ save_feature_set_links,
38
+ save_feature_sets,
39
+ view_lineage,
40
+ )
41
+ from .core._settings import settings
42
+ from .core.exceptions import IntegrityError, InvalidArgument
43
+ from .core.loaders import load_to_memory
44
+ from .core.storage import (
37
45
  LocalPathClasses,
38
46
  UPath,
39
47
  delete_storage,
40
48
  infer_suffix,
41
49
  write_to_disk,
42
50
  )
43
- from lamindb.core.storage.paths import (
51
+ from .core.storage._pyarrow_dataset import PYARROW_SUFFIXES
52
+ from .core.storage.objects import _mudata_is_installed
53
+ from .core.storage.paths import (
54
+ AUTO_KEY_PREFIX,
44
55
  auto_storage_key_from_artifact,
45
56
  auto_storage_key_from_artifact_uid,
46
57
  check_path_is_child_of_root,
47
58
  filepath_cache_key_from_artifact,
48
59
  filepath_from_artifact,
49
60
  )
50
- from lamindb.core.versioning import (
61
+ from .core.versioning import (
51
62
  create_uid,
52
63
  message_update_key_in_version_family,
53
64
  )
54
65
 
55
- from .core._data import (
56
- add_transform_to_kwargs,
57
- get_run,
58
- save_feature_set_links,
59
- save_feature_sets,
60
- )
61
- from .core.storage.objects import _mudata_is_installed
62
- from .core.storage.paths import AUTO_KEY_PREFIX
63
-
64
66
  try:
65
67
  from .core.storage._zarr import zarr_is_adata
66
68
  except ImportError:
@@ -72,6 +74,7 @@ except ImportError:
72
74
  if TYPE_CHECKING:
73
75
  from lamindb_setup.core.types import UPathStr
74
76
  from mudata import MuData
77
+ from pyarrow.dataset import Dataset as PyArrowDataset
75
78
  from tiledbsoma import Collection as SOMACollection
76
79
  from tiledbsoma import Experiment as SOMAExperiment
77
80
 
@@ -772,19 +775,14 @@ def from_dir(
772
775
  else:
773
776
  folder_key_path = Path(key)
774
777
 
775
- # always sanitize by stripping a trailing slash
776
- folder_key = folder_key_path.as_posix().rstrip("/")
777
-
778
- # TODO: (non-local) UPath doesn't list the first level artifacts and dirs with "*"
779
- pattern = "" if not isinstance(folderpath, LocalPathClasses) else "*"
780
-
778
+ folder_key = folder_key_path.as_posix()
781
779
  # silence fine-grained logging
782
780
  verbosity = settings.verbosity
783
781
  verbosity_int = settings._verbosity_int
784
782
  if verbosity_int >= 1:
785
783
  settings.verbosity = "warning"
786
784
  artifacts_dict = {}
787
- for filepath in folderpath.rglob(pattern):
785
+ for filepath in folderpath.rglob("*"):
788
786
  if filepath.is_file():
789
787
  relative_path = get_relative_path_to_directory(filepath, folderpath)
790
788
  artifact_key = folder_key + "/" + relative_path.as_posix()
@@ -802,7 +800,8 @@ def from_dir(
802
800
  if artifact.hash is not None
803
801
  ]
804
802
  uids = artifacts_dict.keys()
805
- if len(set(hashes)) == len(hashes):
803
+ n_unique_hashes = len(set(hashes))
804
+ if n_unique_hashes == len(hashes):
806
805
  artifacts = list(artifacts_dict.values())
807
806
  else:
808
807
  # consider exact duplicates (same id, same hash)
@@ -811,7 +810,7 @@ def from_dir(
811
810
  # logger.warning("dropping duplicate records in list of artifact records")
812
811
  # artifacts = list(set(uids))
813
812
  # consider false duplicates (different id, same hash)
814
- if not len(set(uids)) == len(set(hashes)):
813
+ if not len(set(uids)) == n_unique_hashes:
815
814
  seen_hashes = set()
816
815
  non_unique_artifacts = {
817
816
  hash: artifact
@@ -905,14 +904,19 @@ def replace(
905
904
  # docstring handled through attach_func_to_class_method
906
905
  def open(
907
906
  self, mode: str = "r", is_run_input: bool | None = None
908
- ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
907
+ ) -> (
908
+ AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
909
+ ):
909
910
  # ignore empty suffix for now
910
- suffixes = (".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma", "")
911
+ suffixes = ("", ".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma") + PYARROW_SUFFIXES
911
912
  if self.suffix not in suffixes:
912
913
  raise ValueError(
913
- "Artifact should have a zarr, h5 or tiledbsoma object as the underlying data, please"
914
- " use one of the following suffixes for the object name:"
915
- f" {', '.join(suffixes[:-1])}."
914
+ "Artifact should have a zarr, h5, tiledbsoma object"
915
+ " or a compatible `pyarrow.dataset.dataset` directory"
916
+ " as the underlying data, please use one of the following suffixes"
917
+ f" for the object name: {', '.join(suffixes[1:])}."
918
+ f" Or no suffix for a folder with {', '.join(PYARROW_SUFFIXES)} files"
919
+ " (no mixing allowed)."
916
920
  )
917
921
  if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
918
922
  raise ValueError("Only a tiledbsoma store can be openened with `mode!='r'`.")
lamindb/_can_validate.py CHANGED
@@ -10,10 +10,10 @@ from lamin_utils import colors, logger
10
10
  from lamindb_setup.core._docs import doc_args
11
11
  from lnschema_core import CanValidate, Record
12
12
 
13
- from lamindb._utils import attach_func_to_class_method
14
-
15
13
  from ._from_values import _has_organism_field, _print_values, get_or_create_records
16
14
  from ._record import _queryset, get_name_field
15
+ from ._utils import attach_func_to_class_method
16
+ from .core.exceptions import ValidationError
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from django.db.models import QuerySet
@@ -496,9 +496,9 @@ def _add_or_remove_synonyms(
496
496
  " with the following records:\n"
497
497
  )
498
498
  display(records_df)
499
- raise ValueError(
500
- "cannot assigned a synonym that is already associated with a record to a different record.\n"
501
- "Consider removing the synonym from existing records or using a different synonym."
499
+ raise ValidationError(
500
+ f"you are trying to assign a synonym to record: {record}\n"
501
+ " consider removing the synonym from existing records or using a different synonym."
502
502
  )
503
503
 
504
504
  # passed synonyms
@@ -516,7 +516,7 @@ def _add_or_remove_synonyms(
516
516
  return
517
517
  # because we use | as the separator
518
518
  if any("|" in i for i in syn_new_set):
519
- raise ValueError("a synonym can't contain '|'!")
519
+ raise ValidationError("a synonym can't contain '|'!")
520
520
 
521
521
  # existing synonyms
522
522
  syns_exist = record.synonyms
lamindb/_collection.py CHANGED
@@ -20,20 +20,21 @@ from lnschema_core.models import (
20
20
  )
21
21
  from lnschema_core.types import VisibilityChoice
22
22
 
23
- from lamindb._utils import attach_func_to_class_method
24
- from lamindb.core._data import _track_run_input, describe, view_lineage
25
- from lamindb.core._mapped_collection import MappedCollection
26
- from lamindb.core.versioning import process_revises
27
-
28
23
  from . import Artifact, Run
29
24
  from ._record import init_self_from_db, update_attributes
25
+ from ._utils import attach_func_to_class_method
30
26
  from .core._data import (
27
+ _track_run_input,
31
28
  add_transform_to_kwargs,
29
+ describe,
32
30
  get_run,
33
31
  save_feature_set_links,
34
32
  save_feature_sets,
33
+ view_lineage,
35
34
  )
35
+ from .core._mapped_collection import MappedCollection
36
36
  from .core._settings import settings
37
+ from .core.versioning import process_revises
37
38
 
38
39
  if TYPE_CHECKING:
39
40
  from collections.abc import Iterable
lamindb/_curate.py CHANGED
@@ -184,7 +184,7 @@ class DataFrameCurator(BaseCurator):
184
184
  def non_validated(self) -> list:
185
185
  """Return the non-validated features and labels."""
186
186
  if self._non_validated is None:
187
- raise ValueError("Please run validate() first!")
187
+ raise ValidationError("Please run validate() first!")
188
188
  return self._non_validated
189
189
 
190
190
  @property
@@ -222,7 +222,7 @@ class DataFrameCurator(BaseCurator):
222
222
  valid_keys = set(self._df.columns) | {"columns"} | extra
223
223
  nonval_keys = [key for key in d.keys() if key not in valid_keys]
224
224
  if len(nonval_keys) > 0:
225
- raise ValueError(
225
+ raise ValidationError(
226
226
  f"the following keys passed to {name} are not allowed: {nonval_keys}"
227
227
  )
228
228
 
@@ -288,9 +288,11 @@ class DataFrameCurator(BaseCurator):
288
288
  self._save_columns(validated_only=validated_only, **kwargs)
289
289
  else:
290
290
  if categorical not in self.fields:
291
- raise ValueError(f"Feature {categorical} is not part of the fields!")
291
+ raise ValidationError(
292
+ f"Feature {categorical} is not part of the fields!"
293
+ )
292
294
  update_registry(
293
- values=self._df[categorical].unique().tolist(),
295
+ values=flatten_unique(self._df[categorical]),
294
296
  field=self.fields[categorical],
295
297
  key=categorical,
296
298
  using_key=self._using_key,
@@ -508,13 +510,13 @@ class AnnDataCurator(DataFrameCurator):
508
510
  exclude=self._exclude.get("var_index"),
509
511
  )
510
512
 
511
- def _update_registry_all(self):
513
+ def _update_registry_all(self, validated_only: bool = True, **kwargs):
512
514
  """Save labels for all features."""
513
515
  logger.info("saving validated records of 'var_index'")
514
- self._save_from_var_index(validated_only=True, **self._kwargs)
516
+ self._save_from_var_index(validated_only=validated_only, **self._kwargs)
515
517
  for name in self._obs_fields.keys():
516
518
  logger.info(f"saving validated terms of '{name}'")
517
- self._update_registry(name, validated_only=True, **self._kwargs)
519
+ self._update_registry(name, validated_only=validated_only, **self._kwargs)
518
520
 
519
521
  def add_new_from_var_index(self, organism: str | None = None, **kwargs):
520
522
  """Update variable records.
@@ -704,7 +706,7 @@ class MuDataCurator:
704
706
  """Verify the modality exists."""
705
707
  for modality in modalities:
706
708
  if modality not in self._mdata.mod.keys():
707
- raise ValueError(f"modality '{modality}' does not exist!")
709
+ raise ValidationError(f"modality '{modality}' does not exist!")
708
710
 
709
711
  def _save_from_var_index_modality(
710
712
  self, modality: str, validated_only: bool = True, **kwargs
@@ -729,7 +731,7 @@ class MuDataCurator:
729
731
  obs_fields: dict[str, dict[str, FieldAttr]] = {}
730
732
  for k, v in categoricals.items():
731
733
  if k not in self._mdata.obs.columns:
732
- raise ValueError(f"column '{k}' does not exist in mdata.obs!")
734
+ raise ValidationError(f"column '{k}' does not exist in mdata.obs!")
733
735
  if any(k.startswith(prefix) for prefix in prefixes):
734
736
  modality, col = k.split(":")[0], k.split(":")[1]
735
737
  if modality not in obs_fields.keys():
@@ -1120,7 +1122,7 @@ def check_registry_organism(registry: Record, organism: str | None = None) -> di
1120
1122
  import bionty as bt
1121
1123
 
1122
1124
  if organism is None and bt.settings.organism is None:
1123
- raise ValueError(
1125
+ raise ValidationError(
1124
1126
  f"{registry.__name__} registry requires an organism!\n"
1125
1127
  " → please pass an organism name via organism="
1126
1128
  )
@@ -1148,8 +1150,8 @@ def validate_categories(
1148
1150
  using_key: A reference LaminDB instance.
1149
1151
  organism: The organism name.
1150
1152
  source: The source record.
1151
- exclude: Exclude specific values.
1152
- standardize: Standardize the values.
1153
+ exclude: Exclude specific values from validation.
1154
+ standardize: Whether to standardize the values.
1153
1155
  validated_hint_print: The hint to print for validated values.
1154
1156
  """
1155
1157
  from lamindb._from_values import _print_values
@@ -1210,12 +1212,15 @@ def validate_categories(
1210
1212
 
1211
1213
  validated_hint_print = validated_hint_print or f".add_validated_from('{key}')"
1212
1214
  n_validated = len(values_validated)
1215
+
1213
1216
  if n_validated > 0:
1214
1217
  _log_mapping_info()
1218
+ terms_str = f"{', '.join([f'{chr(39)}{v}{chr(39)}' for v in values_validated[:10]])}{', ...' if len(values_validated) > 10 else ''}"
1219
+ val_numerous = "" if n_validated == 1 else "s"
1215
1220
  logger.warning(
1216
- f"found {colors.yellow(n_validated)} validated terms: "
1217
- f"{colors.yellow(values_validated)}\n → save terms via "
1218
- f"{colors.yellow(validated_hint_print)}"
1221
+ f"found {colors.yellow(n_validated)} validated term{val_numerous}: "
1222
+ f"{colors.yellow(terms_str)}\n"
1223
+ f"→ save term{val_numerous} via {colors.yellow(validated_hint_print)}"
1219
1224
  )
1220
1225
 
1221
1226
  non_validated_hint_print = validated_hint_print.replace("_validated_", "_new_")
@@ -1230,13 +1235,15 @@ def validate_categories(
1230
1235
  # validated values still need to be saved to the current instance
1231
1236
  return False, []
1232
1237
  else:
1233
- are = "are" if n_non_validated > 1 else "is"
1238
+ non_val_numerous = ("", "is") if n_non_validated == 1 else ("s", "are")
1234
1239
  print_values = _print_values(non_validated)
1235
1240
  warning_message = (
1236
- f"{colors.red(f'{n_non_validated} terms')} {are} not validated: "
1237
- f"{colors.red(print_values)}\n → fix typos, remove non-existent values, or save terms via "
1241
+ f"{colors.red(f'{n_non_validated} term{non_val_numerous[0]}')} {non_val_numerous[1]} not validated: "
1242
+ f"{colors.red(', '.join(print_values.split(', ')[:10]) + ', ...' if len(print_values.split(', ')) > 10 else print_values)}\n"
1243
+ f"→ fix typo{non_val_numerous[0]}, remove non-existent value{non_val_numerous[0]}, or save term{non_val_numerous[0]} via "
1238
1244
  f"{colors.red(non_validated_hint_print)}"
1239
1245
  )
1246
+
1240
1247
  if logger.indent == "":
1241
1248
  _log_mapping_info()
1242
1249
  logger.warning(warning_message)
@@ -1427,6 +1434,19 @@ def save_artifact(
1427
1434
  return artifact
1428
1435
 
1429
1436
 
1437
+ def flatten_unique(series):
1438
+ """Flatten a pandas series if it contains lists."""
1439
+ result = set()
1440
+
1441
+ for item in series:
1442
+ if isinstance(item, list):
1443
+ result.update(item)
1444
+ else:
1445
+ result.add(item)
1446
+
1447
+ return list(result)
1448
+
1449
+
1430
1450
  def update_registry(
1431
1451
  values: list[str],
1432
1452
  field: FieldAttr,
@@ -1596,24 +1616,25 @@ def log_saved_labels(
1596
1616
  continue
1597
1617
 
1598
1618
  if k == "without reference" and validated_only:
1599
- msg = colors.yellow(
1600
- f"{len(labels)} non-validated values are not saved in {model_field}: {labels}!"
1601
- )
1602
- lookup_print = (
1603
- f"lookup().{key}" if key.isidentifier() else f".lookup()['{key}']"
1604
- )
1605
-
1606
- hint = f".add_new_from('{key}')"
1607
- msg += f"\n → to lookup values, use {lookup_print}"
1608
- msg += (
1609
- f"\n → to save, run {colors.yellow(hint)}"
1610
- if save_function == "add_new_from"
1611
- else f"\n → to save, run {colors.yellow(save_function)}"
1612
- )
1613
- if warning:
1614
- logger.warning(msg)
1615
- else:
1616
- logger.info(msg)
1619
+ continue
1620
+ # msg = colors.yellow(
1621
+ # f"{len(labels)} non-validated values are not saved in {model_field}: {labels}!"
1622
+ # )
1623
+ # lookup_print = (
1624
+ # f"lookup().{key}" if key.isidentifier() else f".lookup()['{key}']"
1625
+ # )
1626
+
1627
+ # hint = f".add_new_from('{key}')"
1628
+ # msg += f"\n → to lookup values, use {lookup_print}"
1629
+ # msg += (
1630
+ # f"\n → to save, run {colors.yellow(hint)}"
1631
+ # if save_function == "add_new_from"
1632
+ # else f"\n → to save, run {colors.yellow(save_function)}"
1633
+ # )
1634
+ # if warning:
1635
+ # logger.warning(msg)
1636
+ # else:
1637
+ # logger.info(msg)
1617
1638
  else:
1618
1639
  k = "" if k == "without reference" else f"{colors.green(k)} "
1619
1640
  # the term "transferred" stresses that this is always in the context of transferring
@@ -1631,8 +1652,8 @@ def save_ulabels_with_parent(values: list[str], field: FieldAttr, key: str) -> N
1631
1652
  all_records = registry.from_values(list(values), field=field)
1632
1653
  is_feature = registry.filter(name=f"is_{key}").one_or_none()
1633
1654
  if is_feature is None:
1634
- is_feature = registry(name=f"is_{key}")
1635
- is_feature.save()
1655
+ is_feature = registry(name=f"is_{key}").save()
1656
+ logger.important(f"Created a parent ULabel: {is_feature}")
1636
1657
  is_feature.children.add(*all_records)
1637
1658
 
1638
1659
 
@@ -1689,7 +1710,7 @@ def _save_organism(name: str): # pragma: no cover
1689
1710
  if organism is None:
1690
1711
  organism = bt.Organism.from_source(name=name)
1691
1712
  if organism is None:
1692
- raise ValueError(
1713
+ raise ValidationError(
1693
1714
  f"Organism '{name}' not found\n"
1694
1715
  f" → please save it: bt.Organism(name='{name}').save()"
1695
1716
  )
lamindb/_feature.py CHANGED
@@ -8,10 +8,9 @@ from lamindb_setup.core._docs import doc_args
8
8
  from lnschema_core.models import Artifact, Feature
9
9
  from pandas.api.types import CategoricalDtype, is_string_dtype
10
10
 
11
- from lamindb._utils import attach_func_to_class_method
12
- from lamindb.core._settings import settings
13
-
14
11
  from ._query_set import RecordsList
12
+ from ._utils import attach_func_to_class_method
13
+ from .core._settings import settings
15
14
  from .core.schema import dict_schema_name_to_model_name
16
15
 
17
16
  if TYPE_CHECKING:
lamindb/_feature_set.py CHANGED
@@ -10,10 +10,9 @@ from lamindb_setup.core.hashing import hash_set
10
10
  from lnschema_core import Feature, FeatureSet, Record, ids
11
11
  from lnschema_core.types import FieldAttr, ListLike
12
12
 
13
- from lamindb._utils import attach_func_to_class_method
14
-
15
13
  from ._feature import convert_numpy_dtype_to_lamin_feature_type
16
14
  from ._record import init_self_from_db
15
+ from ._utils import attach_func_to_class_method
17
16
  from .core.exceptions import ValidationError
18
17
  from .core.schema import (
19
18
  dict_related_model_to_related_name,
lamindb/_is_versioned.py CHANGED
@@ -5,8 +5,7 @@ from lamin_utils import logger
5
5
  from lamindb_setup.core.upath import UPath
6
6
  from lnschema_core.models import IsVersioned
7
7
 
8
- from lamindb._utils import attach_func_to_class_method
9
-
8
+ from ._utils import attach_func_to_class_method
10
9
  from .core.versioning import create_uid, get_new_path_from_uid
11
10
 
12
11
 
lamindb/_parents.py CHANGED
@@ -8,13 +8,14 @@ from lamin_utils import logger
8
8
  from lnschema_core import Artifact, Collection, Record, Run, Transform
9
9
  from lnschema_core.models import HasParents, format_field_value
10
10
 
11
- from lamindb._utils import attach_func_to_class_method
12
-
13
11
  from ._record import get_name_field
12
+ from ._utils import attach_func_to_class_method
14
13
 
15
14
  if TYPE_CHECKING:
16
15
  from lnschema_core.types import StrField
17
16
 
17
+ from lamindb.core import QuerySet
18
+
18
19
  LAMIN_GREEN_LIGHTER = "#10b981"
19
20
  LAMIN_GREEN_DARKER = "#065f46"
20
21
  GREEN_FILL = "honeydew"
@@ -22,6 +23,30 @@ TRANSFORM_EMOJIS = {"notebook": "📔", "app": "🖥️", "pipeline": "🧩"}
22
23
  is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
23
24
 
24
25
 
26
+ # this is optimized to have fewer recursive calls
27
+ # also len of QuerySet can be costly at times
28
+ def _query_relatives(
29
+ records: QuerySet | list[Record],
30
+ kind: Literal["parents", "children"],
31
+ cls: type[HasParents],
32
+ ) -> QuerySet:
33
+ relatives = cls.objects.none()
34
+ if len(records) == 0:
35
+ return relatives
36
+ for record in records:
37
+ relatives = relatives.union(getattr(record, kind).all())
38
+ relatives = relatives.union(_query_relatives(relatives, kind, cls))
39
+ return relatives
40
+
41
+
42
+ def query_parents(self) -> QuerySet:
43
+ return _query_relatives([self], "parents", self.__class__)
44
+
45
+
46
+ def query_children(self) -> QuerySet:
47
+ return _query_relatives([self], "children", self.__class__)
48
+
49
+
25
50
  def _transform_emoji(transform: Transform):
26
51
  if transform is not None:
27
52
  return TRANSFORM_EMOJIS.get(transform.type, "💫")
@@ -474,9 +499,7 @@ def _df_edges_from_runs(df_values: list):
474
499
  return df
475
500
 
476
501
 
477
- METHOD_NAMES = [
478
- "view_parents",
479
- ]
502
+ METHOD_NAMES = ["view_parents", "query_parents", "query_children"]
480
503
 
481
504
  if ln_setup._TESTING: # type: ignore
482
505
  from inspect import signature
lamindb/_query_manager.py CHANGED
@@ -7,9 +7,8 @@ from lamin_utils import logger
7
7
  from lamindb_setup.core._docs import doc_args
8
8
  from lnschema_core.models import Record
9
9
 
10
- from lamindb.core._settings import settings
11
-
12
10
  from .core._feature_manager import get_feature_set_by_slot_
11
+ from .core._settings import settings
13
12
 
14
13
  if TYPE_CHECKING:
15
14
  from lnschema_core.types import StrField
lamindb/_query_set.py CHANGED
@@ -20,7 +20,7 @@ from lnschema_core.models import (
20
20
  VisibilityChoice,
21
21
  )
22
22
 
23
- from lamindb.core.exceptions import DoesNotExist
23
+ from .core.exceptions import DoesNotExist
24
24
 
25
25
  if TYPE_CHECKING:
26
26
  from collections.abc import Iterable
lamindb/_record.py CHANGED
@@ -19,9 +19,9 @@ from lamindb_setup.core._hub_core import connect_instance_hub
19
19
  from lamindb_setup.core._settings_store import instance_settings_file
20
20
  from lnschema_core.models import Artifact, Feature, IsVersioned, Record, Run, Transform
21
21
 
22
- from lamindb._utils import attach_func_to_class_method
23
- from lamindb.core._settings import settings
24
- from lamindb.core.exceptions import RecordNameChangeIntegrityError
22
+ from ._utils import attach_func_to_class_method
23
+ from .core._settings import settings
24
+ from .core.exceptions import RecordNameChangeIntegrityError
25
25
 
26
26
  if TYPE_CHECKING:
27
27
  import pandas as pd
lamindb/_save.py CHANGED
@@ -15,8 +15,8 @@ from lamin_utils import logger
15
15
  from lamindb_setup.core.upath import LocalPathClasses
16
16
  from lnschema_core.models import Artifact, Record
17
17
 
18
- from lamindb.core._settings import settings
19
- from lamindb.core.storage.paths import (
18
+ from .core._settings import settings
19
+ from .core.storage.paths import (
20
20
  _cache_key_from_artifact_storage,
21
21
  attempt_accessing_path,
22
22
  auto_storage_key_from_artifact,
lamindb/_transform.py CHANGED
@@ -6,10 +6,9 @@ from lamin_utils import logger
6
6
  from lamindb_setup.core._docs import doc_args
7
7
  from lnschema_core.models import Run, Transform
8
8
 
9
- from lamindb.core.exceptions import InconsistentKey
10
-
11
9
  from ._parents import _view_parents
12
10
  from ._run import delete_run_artifacts
11
+ from .core.exceptions import InconsistentKey
13
12
  from .core.versioning import message_update_key_in_version_family, process_revises
14
13
 
15
14
  if TYPE_CHECKING:
lamindb/_ulabel.py CHANGED
@@ -6,7 +6,7 @@ import lamindb_setup as ln_setup
6
6
  from lamindb_setup.core._docs import doc_args
7
7
  from lnschema_core import ULabel
8
8
 
9
- from lamindb._utils import attach_func_to_class_method
9
+ from ._utils import attach_func_to_class_method
10
10
 
11
11
  if TYPE_CHECKING:
12
12
  from lnschema_core.types import ListLike
@@ -69,7 +69,7 @@ class MappedCollection:
69
69
 
70
70
  .. note::
71
71
 
72
- For a guide, see :doc:`docs:scrna5`.
72
+ For a guide, see :doc:`docs:scrna-mappedcollection`.
73
73
 
74
74
  For more convenient use within :class:`~lamindb.core.MappedCollection`,
75
75
  see :meth:`~lamindb.Collection.mapped`.
@@ -7,11 +7,13 @@ from anndata._io.specs.registry import get_spec
7
7
  from lnschema_core import Artifact
8
8
 
9
9
  from ._anndata_accessor import AnnDataAccessor, StorageType, registry
10
+ from ._pyarrow_dataset import _is_pyarrow_dataset, _open_pyarrow_dataset
10
11
  from ._tiledbsoma import _open_tiledbsoma
11
12
  from .paths import filepath_from_artifact
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from fsspec.core import OpenFile
16
+ from pyarrow.dataset import Dataset as PyArrowDataset
15
17
  from tiledbsoma import Collection as SOMACollection
16
18
  from tiledbsoma import Experiment as SOMAExperiment
17
19
  from upath import UPath
@@ -67,22 +69,28 @@ def backed_access(
67
69
  artifact_or_filepath: Artifact | UPath,
68
70
  mode: str = "r",
69
71
  using_key: str | None = None,
70
- ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
72
+ ) -> (
73
+ AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment | PyArrowDataset
74
+ ):
71
75
  if isinstance(artifact_or_filepath, Artifact):
72
- filepath, _ = filepath_from_artifact(artifact_or_filepath, using_key=using_key)
76
+ objectpath, _ = filepath_from_artifact(
77
+ artifact_or_filepath, using_key=using_key
78
+ )
73
79
  else:
74
- filepath = artifact_or_filepath
75
- name = filepath.name
76
- suffix = filepath.suffix
80
+ objectpath = artifact_or_filepath
81
+ name = objectpath.name
82
+ suffix = objectpath.suffix
77
83
 
78
84
  if name == "soma" or suffix == ".tiledbsoma":
79
85
  if mode not in {"r", "w"}:
80
86
  raise ValueError("`mode` should be either 'r' or 'w' for tiledbsoma.")
81
- return _open_tiledbsoma(filepath, mode=mode) # type: ignore
87
+ return _open_tiledbsoma(objectpath, mode=mode) # type: ignore
82
88
  elif suffix in {".h5", ".hdf5", ".h5ad"}:
83
- conn, storage = registry.open("h5py", filepath, mode=mode)
89
+ conn, storage = registry.open("h5py", objectpath, mode=mode)
84
90
  elif suffix == ".zarr":
85
- conn, storage = registry.open("zarr", filepath, mode=mode)
91
+ conn, storage = registry.open("zarr", objectpath, mode=mode)
92
+ elif _is_pyarrow_dataset(objectpath):
93
+ return _open_pyarrow_dataset(objectpath)
86
94
  else:
87
95
  raise ValueError(
88
96
  "object should have .h5, .hdf5, .h5ad, .zarr, .tiledbsoma suffix, not"
@@ -0,0 +1,31 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ import pyarrow.dataset
6
+ from lamindb_setup.core.upath import LocalPathClasses
7
+
8
+ if TYPE_CHECKING:
9
+ from upath import UPath
10
+
11
+
12
+ PYARROW_SUFFIXES = (".parquet", ".csv", ".json", ".orc", ".arrow", ".feather")
13
+
14
+
15
+ def _is_pyarrow_dataset(path: UPath) -> bool:
16
+ # it is assumed here that path exists
17
+ if path.is_file():
18
+ return path.suffix in PYARROW_SUFFIXES
19
+ else:
20
+ objects = path.rglob("*")
21
+ suffixes = {object.suffix for object in objects if object.suffix != ""}
22
+ return len(suffixes) == 1 and suffixes.pop() in PYARROW_SUFFIXES
23
+
24
+
25
+ def _open_pyarrow_dataset(path: UPath) -> pyarrow.dataset.Dataset:
26
+ if isinstance(path, LocalPathClasses):
27
+ path_str, filesystem = path.as_posix(), None
28
+ else:
29
+ path_str, filesystem = path.path, path.fs
30
+
31
+ return pyarrow.dataset.dataset(path_str, filesystem=filesystem)
@@ -1,14 +1,15 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.76.14
3
+ Version: 0.76.15
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
- Requires-Python: >=3.9
6
+ Requires-Python: >=3.9,<3.13
7
7
  Description-Content-Type: text/markdown
8
8
  Classifier: Programming Language :: Python :: 3.9
9
9
  Classifier: Programming Language :: Python :: 3.10
10
10
  Classifier: Programming Language :: Python :: 3.11
11
- Requires-Dist: lnschema_core==0.76.0
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Requires-Dist: lnschema_core==0.76.1
12
13
  Requires-Dist: lamin_utils==0.13.7
13
14
  Requires-Dist: lamin_cli==0.20.1
14
15
  Requires-Dist: lamindb_setup
@@ -17,9 +18,7 @@ Requires-Dist: pyarrow
17
18
  Requires-Dist: typing_extensions!=4.6.0
18
19
  Requires-Dist: python-dateutil
19
20
  Requires-Dist: anndata>=0.8.0,<=0.10.9
20
- Requires-Dist: scipy<1.13.0rc1
21
21
  Requires-Dist: fsspec
22
- Requires-Dist: pandas
23
22
  Requires-Dist: graphviz
24
23
  Requires-Dist: psycopg2-binary
25
24
  Requires-Dist: lamindb_setup[aws] ; extra == "aws"
@@ -35,7 +34,7 @@ Requires-Dist: mudata ; extra == "dev"
35
34
  Requires-Dist: nbproject_test>=0.5.1 ; extra == "dev"
36
35
  Requires-Dist: faker-biology ; extra == "dev"
37
36
  Requires-Dist: django-schema-graph ; extra == "erdiagram"
38
- Requires-Dist: readfcs>=1.1.8 ; extra == "fcs"
37
+ Requires-Dist: readfcs>=1.1.9 ; extra == "fcs"
39
38
  Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
40
39
  Requires-Dist: nbproject==0.10.5 ; extra == "jupyter"
41
40
  Requires-Dist: jupytext ; extra == "jupyter"
@@ -1,23 +1,23 @@
1
- lamindb/__init__.py,sha256=PelFCbUaNuz_1rnpOqj-mDCMqCfT2hJ27M3C-VkGJl0,2278
2
- lamindb/_artifact.py,sha256=9soXXT3g9tG1BTwX4DCRPurbFFcpHKOT8W7SsoLiNbo,44847
3
- lamindb/_can_validate.py,sha256=1pUavLwZ_yPAtbVYKOGYUHaPxlJGZ246qZ0e-4ZUDSc,19552
4
- lamindb/_collection.py,sha256=vt604fUjkmOYCGR4Sq_NTwnPywATfjUAdkQjuJJ17y0,14613
5
- lamindb/_curate.py,sha256=rFbPEoD-E-5s3QPIcUuedUO6a2c8QfpTrBfVX9gUVpE,63120
6
- lamindb/_feature.py,sha256=nZhtrH0ssoNls-hV-dkwfK9sKypg2El59R9qfarxfUE,5340
7
- lamindb/_feature_set.py,sha256=JQSP-YLam1KW-rDzly5Dm4IYVL2A6ec7ufIf6iCc2W8,8169
1
+ lamindb/__init__.py,sha256=ZoRboX4PQc0sbVajeZ1fd4GoLN3YDj5U74zVUpcSd9I,2278
2
+ lamindb/_artifact.py,sha256=WNdKAJFu3sFgQ_Qe1JflDHiTP4EGhAIkzaHFwTthAjY,44903
3
+ lamindb/_can_validate.py,sha256=TKfkHgkPl1bwuJrQCp6pHgQbi7m2pc_zjFhZfUAUU20,19573
4
+ lamindb/_collection.py,sha256=MLOEoOgTu7rTlRD7zkm1k0YIk_gVhQDO17JbmZCptOs,14573
5
+ lamindb/_curate.py,sha256=pLsdtnnTn0qQbsUB4hrU3yuoVnVNh-BQe10EpMNZ4Ns,64083
6
+ lamindb/_feature.py,sha256=9cgrcHoyOa1jpON-9KiUfFSHcxiGECiefUAqAx4cVvU,5325
7
+ lamindb/_feature_set.py,sha256=WdXw_YGlMXCs8l0WVHOrqvvrH2hsQLqCiho8LFDYwhI,8161
8
8
  lamindb/_filter.py,sha256=Pf9NHV4gm7NOC0Frtvx4W7nvwt2EowOP74DwppyXAZs,635
9
9
  lamindb/_finish.py,sha256=VMAmxCUFmTKIMSCx7LEh4QAnWDeue6MeUAAzkMVEYMU,9546
10
10
  lamindb/_from_values.py,sha256=uRtZLaMWKoANMMXm1hrADHfckRCTiK8_d02Yp07nLkw,14119
11
- lamindb/_is_versioned.py,sha256=5lAnhTboltFkZCKVRV1uxkm0OCjJz_HKi3yQq_vEuMs,1306
12
- lamindb/_parents.py,sha256=KMBUfCLNqjmFzOdZIXaUFqDPeEpWP28MCkHHPq887h8,16341
13
- lamindb/_query_manager.py,sha256=pmPhJQ85-7XeAU9TFv6LPGi9F7dBgztZgZcXz33HYJM,3710
14
- lamindb/_query_set.py,sha256=AyWvFZ-Vnd_1dhbDLkiyEh2-2XiIR_OpEk72xoQ2JVg,12980
15
- lamindb/_record.py,sha256=FkU7G1OUl0HPQO6wh8EkPh4T_ogxcy6QGkrVz_I4WUw,26840
11
+ lamindb/_is_versioned.py,sha256=GWZk-usV6aB33Cl9AlrnEGE5nxUkZic7QJzOW_DrwQA,1298
12
+ lamindb/_parents.py,sha256=INhbqh6IaUjuYVUOp-6rnOGN-8kGZirNqqW9XQ1qz_M,17119
13
+ lamindb/_query_manager.py,sha256=noc05Ad-aADxckOVBVDAiErFB7gL8XTgckELvI4rGmM,3702
14
+ lamindb/_query_set.py,sha256=6vHOvB_uXzKVVIw8AAVw7EYOIAGuw3TYcUzkpNlFLdE,12973
15
+ lamindb/_record.py,sha256=isR9GMQFwUUwVSmPNABvEzcJS38TbjhD7Cc6kygPsTA,26819
16
16
  lamindb/_run.py,sha256=K_5drpLn3D7y3XtZ3vtAw35rG5RCSvB4bXQZx4ESSI0,1964
17
- lamindb/_save.py,sha256=BCaSFnANYPxTqL5gw7Hrh_9kz7SDyOxrJV2KW6rXqts,11366
17
+ lamindb/_save.py,sha256=OD052Qr_hiMyAonHTktKETe_Bhnp1RY810y0rwZqpBQ,11352
18
18
  lamindb/_storage.py,sha256=GBVChv-DHVMNEBJL5l_JT6B4RDhZ6NnwgzmUICphYKk,413
19
- lamindb/_transform.py,sha256=wZDkY8lp4d_OsO5a7rLs1RamkDzBXZSLaWJU34zRnmA,4728
20
- lamindb/_ulabel.py,sha256=XDSdZBXX_ki5s1vOths3MjF2x5DPggBR_PV_KF4SGyg,1611
19
+ lamindb/_transform.py,sha256=HpqRCk0ZTmqxSV4nRbyvDq8fAQEE9wTj31d-CusiL6A,4720
20
+ lamindb/_ulabel.py,sha256=DQQzAPkrOg8W9I77BJ5umajR8MQcFSvXYUy53YNN2HA,1604
21
21
  lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
22
22
  lamindb/_view.py,sha256=4Ln2ItTb3857PAI-70O8eJYqoTJ_NNFc7E_wds6OGns,2412
23
23
  lamindb/core/__init__.py,sha256=y87MCP1BEC2qHNVDIOwqVteIP_2hPCdIoa9JXr0EG8U,1524
@@ -26,7 +26,7 @@ lamindb/core/_data.py,sha256=BVZkxK8aloSecH25LivbwnjcG1fz7Gs2UDceO5pWd3I,20049
26
26
  lamindb/core/_django.py,sha256=yeMPp1n9WrFmEjVRdavfpVqAolPLd24RseTQlvsK67w,7157
27
27
  lamindb/core/_feature_manager.py,sha256=q4WmzJvFLL_fAs-vNRgV2klanAoU6Wu8_g0O2dyIjVg,40027
28
28
  lamindb/core/_label_manager.py,sha256=yh-r4KbtOArMUKPJL75yOxJc8HUKqsik8pExBVKyDlA,10949
29
- lamindb/core/_mapped_collection.py,sha256=M50haewVAFONeF71QQbzD09L8lVZCL1hyf0W87jKE5U,24575
29
+ lamindb/core/_mapped_collection.py,sha256=EDS0xzOdCc_iGE_Iqv5COTVHNm4jWue7Jtcd8DdXkJU,24591
30
30
  lamindb/core/_settings.py,sha256=6jNadlQdimxCsKR2ZyUD0YJYzOdubTnKktki-MqEWqQ,6137
31
31
  lamindb/core/_sync_git.py,sha256=lIgl6YfpH4rCFT1WILAp7zlemZfxog1d0zp3cX0KIZw,4531
32
32
  lamindb/core/_track_environment.py,sha256=Ywzg_sJ7guI1dcsN7h5orce9VdYl8VGVE3OLITlHBXQ,820
@@ -42,7 +42,8 @@ lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh
42
42
  lamindb/core/storage/__init__.py,sha256=JOIMu_7unbyhndtH1j0Q-9AvY8knSuc1IJO9sQnyBAQ,498
43
43
  lamindb/core/storage/_anndata_accessor.py,sha256=2p1HjoatmZjQ1u94tjgmXgiv8MKowrQH5xInDmiLCw4,24191
44
44
  lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
45
- lamindb/core/storage/_backed_access.py,sha256=O0zazsDlW0PKa52WHV5HooHGGI81FxLT6VEvNONdiEc,3234
45
+ lamindb/core/storage/_backed_access.py,sha256=t9iS9mlZQBy1FfIS-Twt-96npYiShbPwEo2y9_3b6jY,3517
46
+ lamindb/core/storage/_pyarrow_dataset.py,sha256=wuLsEvdblqMdUdDfMtis8AWrE3igzvFWTSTbxuD1Oc8,926
46
47
  lamindb/core/storage/_tiledbsoma.py,sha256=0NPLS5m1icEhzWPfXAv4U2SNiLGqGQd7FM6xCm5wYEc,7269
47
48
  lamindb/core/storage/_valid_suffixes.py,sha256=vUSeQ4s01rdhD_vSd6wKmFBsgMJAKkBMnL_T9Y1znMg,501
48
49
  lamindb/core/storage/_zarr.py,sha256=TODQD3p1eykoPwP-c-YRP_UDmsbMeBGMGvkBxxOMeYc,3663
@@ -55,7 +56,7 @@ lamindb/integrations/__init__.py,sha256=RWGMYYIzr8zvmNPyVB4m-p4gMDhxdRbjES2Ed23O
55
56
  lamindb/integrations/_vitessce.py,sha256=uPl45_w4Uu9_BhpBDDVonC1nDOuAnB7DAnzi5w5bZAE,4032
56
57
  lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
57
58
  lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
58
- lamindb-0.76.14.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
59
- lamindb-0.76.14.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
60
- lamindb-0.76.14.dist-info/METADATA,sha256=j7r4goc9s3ANslQ7-pT7WIct1uDLJK3DGuN3kPsixYs,2361
61
- lamindb-0.76.14.dist-info/RECORD,,
59
+ lamindb-0.76.15.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
+ lamindb-0.76.15.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
61
+ lamindb-0.76.15.dist-info/METADATA,sha256=31K0SoWfAxBpWR3x_7PLFfwpu-mnsQ8IHFqsvjdg_cw,2365
62
+ lamindb-0.76.15.dist-info/RECORD,,