lamindb 1.3.2__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. lamindb/__init__.py +52 -36
  2. lamindb/_finish.py +17 -10
  3. lamindb/_tracked.py +1 -1
  4. lamindb/base/__init__.py +3 -1
  5. lamindb/base/fields.py +40 -22
  6. lamindb/base/ids.py +1 -94
  7. lamindb/base/types.py +2 -0
  8. lamindb/base/uids.py +117 -0
  9. lamindb/core/_context.py +216 -133
  10. lamindb/core/_settings.py +38 -25
  11. lamindb/core/datasets/__init__.py +11 -4
  12. lamindb/core/datasets/_core.py +5 -5
  13. lamindb/core/datasets/_small.py +0 -93
  14. lamindb/core/datasets/mini_immuno.py +172 -0
  15. lamindb/core/loaders.py +1 -1
  16. lamindb/core/storage/_backed_access.py +100 -6
  17. lamindb/core/storage/_polars_lazy_df.py +51 -0
  18. lamindb/core/storage/_pyarrow_dataset.py +15 -30
  19. lamindb/core/storage/objects.py +6 -0
  20. lamindb/core/subsettings/__init__.py +2 -0
  21. lamindb/core/subsettings/_annotation_settings.py +11 -0
  22. lamindb/curators/__init__.py +7 -3559
  23. lamindb/curators/_legacy.py +2056 -0
  24. lamindb/curators/core.py +1546 -0
  25. lamindb/errors.py +11 -0
  26. lamindb/examples/__init__.py +27 -0
  27. lamindb/examples/schemas/__init__.py +12 -0
  28. lamindb/examples/schemas/_anndata.py +25 -0
  29. lamindb/examples/schemas/_simple.py +19 -0
  30. lamindb/integrations/_vitessce.py +8 -5
  31. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
  32. lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
  33. lamindb/models/__init__.py +12 -2
  34. lamindb/models/_describe.py +21 -4
  35. lamindb/models/_feature_manager.py +384 -301
  36. lamindb/models/_from_values.py +1 -1
  37. lamindb/models/_is_versioned.py +5 -15
  38. lamindb/models/_label_manager.py +8 -2
  39. lamindb/models/artifact.py +354 -177
  40. lamindb/models/artifact_set.py +122 -0
  41. lamindb/models/can_curate.py +4 -1
  42. lamindb/models/collection.py +79 -56
  43. lamindb/models/core.py +1 -1
  44. lamindb/models/feature.py +78 -47
  45. lamindb/models/has_parents.py +24 -9
  46. lamindb/models/project.py +3 -3
  47. lamindb/models/query_manager.py +221 -22
  48. lamindb/models/query_set.py +251 -206
  49. lamindb/models/record.py +211 -344
  50. lamindb/models/run.py +59 -5
  51. lamindb/models/save.py +9 -5
  52. lamindb/models/schema.py +673 -196
  53. lamindb/models/transform.py +5 -14
  54. lamindb/models/ulabel.py +8 -5
  55. {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/METADATA +8 -7
  56. lamindb-1.5.0.dist-info/RECORD +108 -0
  57. lamindb-1.3.2.dist-info/RECORD +0 -95
  58. {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/LICENSE +0 -0
  59. {lamindb-1.3.2.dist-info → lamindb-1.5.0.dist-info}/WHEEL +0 -0
@@ -13,41 +13,26 @@ if TYPE_CHECKING:
13
13
  PYARROW_SUFFIXES = (".parquet", ".csv", ".json", ".orc", ".arrow", ".feather", ".ipc")
14
14
 
15
15
 
16
- def _is_pyarrow_dataset(paths: UPath | list[UPath]) -> bool:
17
- # it is assumed here that the paths exist
18
- # we don't check here that the filesystem is the same
19
- # but this is a requirement for pyarrow.dataset.dataset
20
- if isinstance(paths, list):
21
- path_list = paths
22
- elif paths.is_dir():
23
- path_list = [path for path in paths.rglob("*") if path.suffix != ""]
24
- else:
25
- path_list = [paths]
26
- suffix = None
27
- for path in path_list:
28
- path_suffixes = path.suffixes
29
- # this doesn't work for externally gzipped files, REMOVE LATER
30
- path_suffix = (
31
- path_suffixes[-2]
32
- if len(path_suffixes) > 1 and ".gz" in path_suffixes
33
- else path.suffix
34
- )
35
- if path_suffix not in PYARROW_SUFFIXES:
36
- return False
37
- elif suffix is None:
38
- suffix = path_suffix
39
- elif path_suffix != suffix:
40
- return False
41
- return True
42
-
43
-
44
16
  def _open_pyarrow_dataset(paths: UPath | list[UPath], **kwargs) -> PyArrowDataset:
45
17
  if isinstance(paths, list):
18
+ # a single path can be a directory, but a list of paths
19
+ # has to be a flat list of files
20
+ paths_str = []
46
21
  path0 = paths[0]
47
22
  if isinstance(path0, LocalPathClasses):
48
- paths_str, filesystem = [path.as_posix() for path in paths], None
23
+ path_to_str = lambda p: p.as_posix()
24
+ filesystem = None
49
25
  else:
50
- paths_str, filesystem = [path.path for path in paths], path0.fs
26
+ path_to_str = lambda p: p.path
27
+ filesystem = path0.fs
28
+ for path in paths:
29
+ if (
30
+ getattr(path, "protocol", None) not in {"http", "https"}
31
+ and path.is_dir()
32
+ ):
33
+ paths_str += [path_to_str(p) for p in path.rglob("*") if p.suffix != ""]
34
+ else:
35
+ paths_str.append(path_to_str(path))
51
36
  elif isinstance(paths, LocalPathClasses):
52
37
  paths_str, filesystem = paths.as_posix(), None
53
38
  else:
@@ -21,6 +21,7 @@ def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
21
21
  """Infer LaminDB storage file suffix from a data object."""
22
22
  if isinstance(dmem, AnnData):
23
23
  if format is not None:
24
+ # should be `.h5ad`, `.`zarr`, or `.anndata.zarr`
24
25
  if format not in {"h5ad", "zarr", "anndata.zarr"}:
25
26
  raise ValueError(
26
27
  "Error when specifying AnnData storage format, it should be"
@@ -31,6 +32,8 @@ def infer_suffix(dmem: SupportedDataTypes, format: str | None = None):
31
32
  return ".h5ad"
32
33
 
33
34
  if isinstance(dmem, DataFrame):
35
+ if format == ".csv":
36
+ return ".csv"
34
37
  return ".parquet"
35
38
 
36
39
  if with_package_obj(
@@ -79,6 +82,9 @@ def write_to_disk(dmem: SupportedDataTypes, filepath: UPathStr) -> None:
79
82
  raise NotImplementedError
80
83
 
81
84
  if isinstance(dmem, DataFrame):
85
+ if filepath.suffix == ".csv":
86
+ dmem.to_csv(filepath)
87
+ return
82
88
  dmem.to_parquet(filepath)
83
89
  return
84
90
 
@@ -4,7 +4,9 @@
4
4
  :toctree: .
5
5
 
6
6
  CreationSettings
7
+ AnnotationSettings
7
8
 
8
9
  """
9
10
 
11
+ from ._annotation_settings import AnnotationSettings
10
12
  from ._creation_settings import CreationSettings
@@ -0,0 +1,11 @@
1
+ class AnnotationSettings:
2
+ n_max_records: int = 1000
3
+ """Maximal number of records to annotate with during automated annotation.
4
+
5
+ If the number of records to annotate exceeds this limit, print a warning and do not annotate.
6
+
7
+ The number is calculated per feature for labels, and per schema for features.
8
+ """
9
+
10
+
11
+ annotation_settings = AnnotationSettings()