PyPI - lamindb - Versions diffs - 1.3.2__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

lamindb 1.3.2py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

lamindb/__init__.py +52 -36
lamindb/_finish.py +17 -10
lamindb/_tracked.py +1 -1
lamindb/base/__init__.py +3 -1
lamindb/base/fields.py +40 -22
lamindb/base/ids.py +1 -94
lamindb/base/types.py +2 -0
lamindb/base/uids.py +117 -0
lamindb/core/_context.py +216 -133
lamindb/core/_settings.py +38 -25
lamindb/core/datasets/__init__.py +11 -4
lamindb/core/datasets/_core.py +5 -5
lamindb/core/datasets/_small.py +0 -93
lamindb/core/datasets/mini_immuno.py +172 -0
lamindb/core/loaders.py +1 -1
lamindb/core/storage/_backed_access.py +100 -6
lamindb/core/storage/_polars_lazy_df.py +51 -0
lamindb/core/storage/_pyarrow_dataset.py +15 -30
lamindb/core/storage/objects.py +6 -0
lamindb/core/subsettings/__init__.py +2 -0
lamindb/core/subsettings/_annotation_settings.py +11 -0
lamindb/curators/__init__.py +7 -3559
lamindb/curators/_legacy.py +2056 -0
lamindb/curators/core.py +1546 -0
lamindb/errors.py +11 -0
lamindb/examples/__init__.py +27 -0
lamindb/examples/schemas/__init__.py +12 -0
lamindb/examples/schemas/_anndata.py +25 -0
lamindb/examples/schemas/_simple.py +19 -0
lamindb/integrations/_vitessce.py +8 -5
lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
lamindb/models/__init__.py +12 -2
lamindb/models/_describe.py +21 -4
lamindb/models/_feature_manager.py +384 -301
lamindb/models/_from_values.py +1 -1
lamindb/models/_is_versioned.py +5 -15
lamindb/models/_label_manager.py +8 -2
lamindb/models/artifact.py +354 -177
lamindb/models/artifact_set.py +122 -0
lamindb/models/can_curate.py +4 -1
lamindb/models/collection.py +79 -56
lamindb/models/core.py +1 -1
lamindb/models/feature.py +78 -47
lamindb/models/has_parents.py +24 -9
lamindb/models/project.py +3 -3
lamindb/models/query_manager.py +221 -22
lamindb/models/query_set.py +251 -206
lamindb/models/record.py +211 -344
lamindb/models/run.py +59 -5
lamindb/models/save.py +9 -5
lamindb/models/schema.py +673 -196
lamindb/models/transform.py +5 -14
lamindb/models/ulabel.py +8 -5
{lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/METADATA +8 -7
lamindb-1.5.0.dist-info/RECORD +108 -0
lamindb-1.3.2.dist-info/RECORD +0 -95
{lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/LICENSE +0 -0
{lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/WHEEL +0 -0

lamindb/core/storage/_pyarrow_dataset.py CHANGED Viewed

@@ -13,41 +13,26 @@ if TYPE_CHECKING:
 PYARROW_SUFFIXES = (".parquet", ".csv", ".json", ".orc", ".arrow", ".feather", ".ipc")
-def _is_pyarrow_dataset(paths: UPath | list[UPath]) -> bool:
-    # it is assumed here that the paths exist
-    # we don't check here that the filesystem is the same
-    # but this is a requirement for pyarrow.dataset.dataset
-    if isinstance(paths, list):
-        path_list = paths
-    elif paths.is_dir():
-        path_list = [path for path in paths.rglob("*") if path.suffix != ""]
-    else:
-        path_list = [paths]
-    suffix = None
-    for path in path_list:
-        path_suffixes = path.suffixes
-        # this doesn't work for externally gzipped files, REMOVE LATER
-        path_suffix = (
-            path_suffixes[-2]
-            if len(path_suffixes) > 1 and ".gz" in path_suffixes
-            else path.suffix
-        )
-        if path_suffix not in PYARROW_SUFFIXES:
-            return False
-        elif suffix is None:
-            suffix = path_suffix
-        elif path_suffix != suffix:
-            return False
-    return True
 def _open_pyarrow_dataset(paths: UPath | list[UPath], **kwargs) -> PyArrowDataset:
     if isinstance(paths, list):
+        # a single path can be a directory, but a list of paths
+        # has to be a flat list of files
+        paths_str = []
         path0 = paths[0]
         if isinstance(path0, LocalPathClasses):
-            paths_str, filesystem = [path.as_posix() for path in paths], None
+            path_to_str = lambda p: p.as_posix()
+            filesystem = None
         else:
-            paths_str, filesystem = [path.path for path in paths], path0.fs
+            path_to_str = lambda p: p.path
+            filesystem = path0.fs
+        for path in paths:
+            if (
+                getattr(path, "protocol", None) not in {"http", "https"}
+                and path.is_dir()
+            ):
+                paths_str += [path_to_str(p) for p in path.rglob("*") if p.suffix != ""]
+            else:
+                paths_str.append(path_to_str(path))
     elif isinstance(paths, LocalPathClasses):
         paths_str, filesystem = paths.as_posix(), None
     else:

lamindb/core/storage/objects.py CHANGED Viewed

@@ -21,6 +21,7 @@ def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
     """Infer LaminDB storage file suffix from a data object."""
     if isinstance(dmem, AnnData):
         if format is not None:
+            # should be `.h5ad`, `.`zarr`, or `.anndata.zarr`
             if format not in {"h5ad", "zarr", "anndata.zarr"}:
                 raise ValueError(
                     "Error when specifying AnnData storage format, it should be"
@@ -31,6 +32,8 @@ def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
         return ".h5ad"
     if isinstance(dmem, DataFrame):
+        if format == ".csv":
+            return ".csv"
         return ".parquet"
     if with_package_obj(
@@ -79,6 +82,9 @@ def write_to_disk(dmem: SupportedDataTypes, filepath: UPathStr) -> None:
             raise NotImplementedError
     if isinstance(dmem, DataFrame):
+        if filepath.suffix == ".csv":
+            dmem.to_csv(filepath)
+            return
         dmem.to_parquet(filepath)
         return

lamindb/core/subsettings/__init__.py CHANGED Viewed

@@ -4,7 +4,9 @@
    :toctree: .
    CreationSettings
+   AnnotationSettings
 """
+from ._annotation_settings import AnnotationSettings
 from ._creation_settings import CreationSettings

lamindb/core/subsettings/_annotation_settings.py ADDED Viewed

@@ -0,0 +1,11 @@
+class AnnotationSettings:
+    n_max_records: int = 1000
+    """Maximal number of records to annotate with during automated annotation.
+    If the number of records to annotate exceeds this limit, print a warning and do not annotate.
+    The number is calculated per feature for labels, and per schema for features.
+    """
+annotation_settings = AnnotationSettings()

lamindb 1.3.2__py3-none-any.whl → 1.5.0__py3-none-any.whl

lamindb 1.3.2py3-none-any.whl → 1.5.0py3-none-any.whl