lamindb 1.3.2__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +52 -36
- lamindb/_finish.py +17 -10
- lamindb/_tracked.py +1 -1
- lamindb/base/__init__.py +3 -1
- lamindb/base/fields.py +40 -22
- lamindb/base/ids.py +1 -94
- lamindb/base/types.py +2 -0
- lamindb/base/uids.py +117 -0
- lamindb/core/_context.py +216 -133
- lamindb/core/_settings.py +38 -25
- lamindb/core/datasets/__init__.py +11 -4
- lamindb/core/datasets/_core.py +5 -5
- lamindb/core/datasets/_small.py +0 -93
- lamindb/core/datasets/mini_immuno.py +172 -0
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_backed_access.py +100 -6
- lamindb/core/storage/_polars_lazy_df.py +51 -0
- lamindb/core/storage/_pyarrow_dataset.py +15 -30
- lamindb/core/storage/objects.py +6 -0
- lamindb/core/subsettings/__init__.py +2 -0
- lamindb/core/subsettings/_annotation_settings.py +11 -0
- lamindb/curators/__init__.py +7 -3559
- lamindb/curators/_legacy.py +2056 -0
- lamindb/curators/core.py +1546 -0
- lamindb/errors.py +11 -0
- lamindb/examples/__init__.py +27 -0
- lamindb/examples/schemas/__init__.py +12 -0
- lamindb/examples/schemas/_anndata.py +25 -0
- lamindb/examples/schemas/_simple.py +19 -0
- lamindb/integrations/_vitessce.py +8 -5
- lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
- lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
- lamindb/models/__init__.py +12 -2
- lamindb/models/_describe.py +21 -4
- lamindb/models/_feature_manager.py +384 -301
- lamindb/models/_from_values.py +1 -1
- lamindb/models/_is_versioned.py +5 -15
- lamindb/models/_label_manager.py +8 -2
- lamindb/models/artifact.py +354 -177
- lamindb/models/artifact_set.py +122 -0
- lamindb/models/can_curate.py +4 -1
- lamindb/models/collection.py +79 -56
- lamindb/models/core.py +1 -1
- lamindb/models/feature.py +78 -47
- lamindb/models/has_parents.py +24 -9
- lamindb/models/project.py +3 -3
- lamindb/models/query_manager.py +221 -22
- lamindb/models/query_set.py +251 -206
- lamindb/models/record.py +211 -344
- lamindb/models/run.py +59 -5
- lamindb/models/save.py +9 -5
- lamindb/models/schema.py +673 -196
- lamindb/models/transform.py +5 -14
- lamindb/models/ulabel.py +8 -5
- {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/METADATA +8 -7
- lamindb-1.5.0.dist-info/RECORD +108 -0
- lamindb-1.3.2.dist-info/RECORD +0 -95
- {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/LICENSE +0 -0
- {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/WHEEL +0 -0
@@ -13,41 +13,26 @@ if TYPE_CHECKING:
|
|
13
13
|
PYARROW_SUFFIXES = (".parquet", ".csv", ".json", ".orc", ".arrow", ".feather", ".ipc")
|
14
14
|
|
15
15
|
|
16
|
-
def _is_pyarrow_dataset(paths: UPath | list[UPath]) -> bool:
|
17
|
-
# it is assumed here that the paths exist
|
18
|
-
# we don't check here that the filesystem is the same
|
19
|
-
# but this is a requirement for pyarrow.dataset.dataset
|
20
|
-
if isinstance(paths, list):
|
21
|
-
path_list = paths
|
22
|
-
elif paths.is_dir():
|
23
|
-
path_list = [path for path in paths.rglob("*") if path.suffix != ""]
|
24
|
-
else:
|
25
|
-
path_list = [paths]
|
26
|
-
suffix = None
|
27
|
-
for path in path_list:
|
28
|
-
path_suffixes = path.suffixes
|
29
|
-
# this doesn't work for externally gzipped files, REMOVE LATER
|
30
|
-
path_suffix = (
|
31
|
-
path_suffixes[-2]
|
32
|
-
if len(path_suffixes) > 1 and ".gz" in path_suffixes
|
33
|
-
else path.suffix
|
34
|
-
)
|
35
|
-
if path_suffix not in PYARROW_SUFFIXES:
|
36
|
-
return False
|
37
|
-
elif suffix is None:
|
38
|
-
suffix = path_suffix
|
39
|
-
elif path_suffix != suffix:
|
40
|
-
return False
|
41
|
-
return True
|
42
|
-
|
43
|
-
|
44
16
|
def _open_pyarrow_dataset(paths: UPath | list[UPath], **kwargs) -> PyArrowDataset:
|
45
17
|
if isinstance(paths, list):
|
18
|
+
# a single path can be a directory, but a list of paths
|
19
|
+
# has to be a flat list of files
|
20
|
+
paths_str = []
|
46
21
|
path0 = paths[0]
|
47
22
|
if isinstance(path0, LocalPathClasses):
|
48
|
-
|
23
|
+
path_to_str = lambda p: p.as_posix()
|
24
|
+
filesystem = None
|
49
25
|
else:
|
50
|
-
|
26
|
+
path_to_str = lambda p: p.path
|
27
|
+
filesystem = path0.fs
|
28
|
+
for path in paths:
|
29
|
+
if (
|
30
|
+
getattr(path, "protocol", None) not in {"http", "https"}
|
31
|
+
and path.is_dir()
|
32
|
+
):
|
33
|
+
paths_str += [path_to_str(p) for p in path.rglob("*") if p.suffix != ""]
|
34
|
+
else:
|
35
|
+
paths_str.append(path_to_str(path))
|
51
36
|
elif isinstance(paths, LocalPathClasses):
|
52
37
|
paths_str, filesystem = paths.as_posix(), None
|
53
38
|
else:
|
lamindb/core/storage/objects.py
CHANGED
@@ -21,6 +21,7 @@ def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
|
|
21
21
|
"""Infer LaminDB storage file suffix from a data object."""
|
22
22
|
if isinstance(dmem, AnnData):
|
23
23
|
if format is not None:
|
24
|
+
# should be `.h5ad`, `.`zarr`, or `.anndata.zarr`
|
24
25
|
if format not in {"h5ad", "zarr", "anndata.zarr"}:
|
25
26
|
raise ValueError(
|
26
27
|
"Error when specifying AnnData storage format, it should be"
|
@@ -31,6 +32,8 @@ def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
|
|
31
32
|
return ".h5ad"
|
32
33
|
|
33
34
|
if isinstance(dmem, DataFrame):
|
35
|
+
if format == ".csv":
|
36
|
+
return ".csv"
|
34
37
|
return ".parquet"
|
35
38
|
|
36
39
|
if with_package_obj(
|
@@ -79,6 +82,9 @@ def write_to_disk(dmem: SupportedDataTypes, filepath: UPathStr) -> None:
|
|
79
82
|
raise NotImplementedError
|
80
83
|
|
81
84
|
if isinstance(dmem, DataFrame):
|
85
|
+
if filepath.suffix == ".csv":
|
86
|
+
dmem.to_csv(filepath)
|
87
|
+
return
|
82
88
|
dmem.to_parquet(filepath)
|
83
89
|
return
|
84
90
|
|
@@ -0,0 +1,11 @@
|
|
1
|
+
class AnnotationSettings:
|
2
|
+
n_max_records: int = 1000
|
3
|
+
"""Maximal number of records to annotate with during automated annotation.
|
4
|
+
|
5
|
+
If the number of records to annotate exceeds this limit, print a warning and do not annotate.
|
6
|
+
|
7
|
+
The number is calculated per feature for labels, and per schema for features.
|
8
|
+
"""
|
9
|
+
|
10
|
+
|
11
|
+
annotation_settings = AnnotationSettings()
|