lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +114 -113
  2. lamindb/_artifact.py +1206 -1205
  3. lamindb/_can_validate.py +621 -579
  4. lamindb/_collection.py +390 -387
  5. lamindb/_curate.py +1603 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +244 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +250 -256
  10. lamindb/_from_values.py +403 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +364 -362
  15. lamindb/_record.py +668 -649
  16. lamindb/_run.py +60 -57
  17. lamindb/_save.py +310 -308
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +130 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +590 -574
  25. lamindb/core/_data.py +510 -438
  26. lamindb/core/_django.py +209 -0
  27. lamindb/core/_feature_manager.py +994 -867
  28. lamindb/core/_label_manager.py +289 -253
  29. lamindb/core/_mapped_collection.py +631 -597
  30. lamindb/core/_settings.py +188 -187
  31. lamindb/core/_sync_git.py +138 -138
  32. lamindb/core/_track_environment.py +27 -27
  33. lamindb/core/datasets/__init__.py +59 -59
  34. lamindb/core/datasets/_core.py +581 -571
  35. lamindb/core/datasets/_fake.py +36 -36
  36. lamindb/core/exceptions.py +90 -90
  37. lamindb/core/fields.py +12 -12
  38. lamindb/core/loaders.py +164 -164
  39. lamindb/core/schema.py +56 -56
  40. lamindb/core/storage/__init__.py +25 -25
  41. lamindb/core/storage/_anndata_accessor.py +741 -740
  42. lamindb/core/storage/_anndata_sizes.py +41 -41
  43. lamindb/core/storage/_backed_access.py +98 -98
  44. lamindb/core/storage/_tiledbsoma.py +204 -204
  45. lamindb/core/storage/_valid_suffixes.py +21 -21
  46. lamindb/core/storage/_zarr.py +110 -110
  47. lamindb/core/storage/objects.py +62 -62
  48. lamindb/core/storage/paths.py +172 -172
  49. lamindb/core/subsettings/__init__.py +12 -12
  50. lamindb/core/subsettings/_creation_settings.py +38 -38
  51. lamindb/core/subsettings/_transform_settings.py +21 -21
  52. lamindb/core/types.py +19 -19
  53. lamindb/core/versioning.py +146 -158
  54. lamindb/integrations/__init__.py +12 -12
  55. lamindb/integrations/_vitessce.py +107 -107
  56. lamindb/setup/__init__.py +14 -14
  57. lamindb/setup/core/__init__.py +4 -4
  58. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
  59. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
  60. lamindb-0.76.10.dist-info/RECORD +61 -0
  61. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
  62. lamindb-0.76.8.dist-info/RECORD +0 -60
@@ -1,36 +1,36 @@
1
- from __future__ import annotations
2
-
3
-
4
- def fake_bio_notebook_titles(n=100) -> list[str]:
5
- """A fake collection of study titles."""
6
- from faker import Faker
7
-
8
- fake = Faker()
9
-
10
- from faker_biology.mol_biol import Antibody
11
- from faker_biology.physiology import CellType, Organ, Organelle
12
-
13
- fake.add_provider(CellType)
14
- fake.add_provider(Organ)
15
- fake.add_provider(Organelle)
16
- fake.add_provider(Antibody)
17
-
18
- my_words = [
19
- "study",
20
- "investigate",
21
- "research",
22
- "result",
23
- "cluster",
24
- "rank",
25
- "candidate",
26
- "visualize",
27
- "efficiency",
28
- "classify",
29
- ]
30
- my_words += [fake.organ() for i in range(5)] + ["intestine", "intestinal"]
31
- my_words += [fake.celltype() for i in range(10)]
32
- my_words += [fake.antibody_isotype() for i in range(20)]
33
-
34
- my_notebook_titles = [fake.sentence(ext_word_list=my_words) for i in range(n)]
35
-
36
- return my_notebook_titles
1
+ from __future__ import annotations
2
+
3
+
4
+ def fake_bio_notebook_titles(n=100) -> list[str]:
5
+ """A fake collection of study titles."""
6
+ from faker import Faker
7
+
8
+ fake = Faker()
9
+
10
+ from faker_biology.mol_biol import Antibody
11
+ from faker_biology.physiology import CellType, Organ, Organelle
12
+
13
+ fake.add_provider(CellType)
14
+ fake.add_provider(Organ)
15
+ fake.add_provider(Organelle)
16
+ fake.add_provider(Antibody)
17
+
18
+ my_words = [
19
+ "study",
20
+ "investigate",
21
+ "research",
22
+ "result",
23
+ "cluster",
24
+ "rank",
25
+ "candidate",
26
+ "visualize",
27
+ "efficiency",
28
+ "classify",
29
+ ]
30
+ my_words += [fake.organ() for i in range(5)] + ["intestine", "intestinal"]
31
+ my_words += [fake.celltype() for i in range(10)]
32
+ my_words += [fake.antibody_isotype() for i in range(20)]
33
+
34
+ my_notebook_titles = [fake.sentence(ext_word_list=my_words) for i in range(n)]
35
+
36
+ return my_notebook_titles
@@ -1,90 +1,90 @@
1
- """Exceptions.
2
-
3
- .. autosummary::
4
- :toctree: .
5
-
6
- InvalidArgument
7
- DoesNotExist
8
- ValidationError
9
- NotebookNotSavedError
10
- NoTitleError
11
- MissingContextUID
12
- UpdateContext
13
- IntegrityError
14
-
15
- """
16
-
17
- # inheriting from SystemExit has the sole purpose of suppressing
18
- # the traceback - this isn't optimal but the current best solution
19
- # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1726856875597489
20
-
21
-
22
- class InvalidArgument(SystemExit):
23
- """Invalid method or function argument."""
24
-
25
- pass
26
-
27
-
28
- class TrackNotCalled(SystemExit):
29
- """ln.context.track() wasn't called."""
30
-
31
- pass
32
-
33
-
34
- class NotebookNotSaved(SystemExit):
35
- """Notebook wasn't saved."""
36
-
37
- pass
38
-
39
-
40
- class ValidationError(SystemExit):
41
- """Validation error: not mapped in registry."""
42
-
43
- pass
44
-
45
-
46
- # inspired by Django's DoesNotExist
47
- # equivalent to SQLAlchemy's NoResultFound
48
- class DoesNotExist(Exception):
49
- """No record found."""
50
-
51
- pass
52
-
53
-
54
- # -------------------------------------------------------------------------------------
55
- # ln.context.track() AKA context
56
- # -------------------------------------------------------------------------------------
57
-
58
-
59
- class IntegrityError(Exception):
60
- """Integrity error.
61
-
62
- For instance, it's not allowed to delete artifacts outside managed storage
63
- locations.
64
- """
65
-
66
- pass
67
-
68
-
69
- class NotebookNotSavedError(Exception):
70
- """Notebook wasn't saved."""
71
-
72
- pass
73
-
74
-
75
- class NoTitleError(Exception):
76
- """Notebook has no title."""
77
-
78
- pass
79
-
80
-
81
- class MissingContextUID(SystemExit):
82
- """User didn't define transform settings."""
83
-
84
- pass
85
-
86
-
87
- class UpdateContext(SystemExit):
88
- """Transform settings require update."""
89
-
90
- pass
1
+ """Exceptions.
2
+
3
+ .. autosummary::
4
+ :toctree: .
5
+
6
+ InvalidArgument
7
+ DoesNotExist
8
+ ValidationError
9
+ NotebookNotSaved
10
+ NoTitleError
11
+ MissingContextUID
12
+ UpdateContext
13
+ IntegrityError
14
+
15
+ """
16
+
17
+ # inheriting from SystemExit has the sole purpose of suppressing
18
+ # the traceback - this isn't optimal but the current best solution
19
+ # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1726856875597489
20
+
21
+
22
+ class InvalidArgument(SystemExit):
23
+ """Invalid method or function argument."""
24
+
25
+ pass
26
+
27
+
28
+ class TrackNotCalled(SystemExit):
29
+ """`ln.track()` wasn't called."""
30
+
31
+ pass
32
+
33
+
34
+ class NotebookNotSaved(SystemExit):
35
+ """Notebook wasn't saved."""
36
+
37
+ pass
38
+
39
+
40
+ class ValidationError(SystemExit):
41
+ """Validation error: not mapped in registry."""
42
+
43
+ pass
44
+
45
+
46
+ # inspired by Django's DoesNotExist
47
+ # equivalent to SQLAlchemy's NoResultFound
48
+ class DoesNotExist(Exception):
49
+ """No record found."""
50
+
51
+ pass
52
+
53
+
54
+ class InconsistentKey(Exception):
55
+ """Inconsistent transform or artifact `key`."""
56
+
57
+ pass
58
+
59
+
60
+ # -------------------------------------------------------------------------------------
61
+ # run context
62
+ # -------------------------------------------------------------------------------------
63
+
64
+
65
+ class IntegrityError(Exception):
66
+ """Integrity error.
67
+
68
+ For instance, it's not allowed to delete artifacts outside managed storage
69
+ locations.
70
+ """
71
+
72
+ pass
73
+
74
+
75
+ class NoTitleError(Exception):
76
+ """Notebook has no title."""
77
+
78
+ pass
79
+
80
+
81
+ class MissingContextUID(SystemExit):
82
+ """User didn't define transform settings."""
83
+
84
+ pass
85
+
86
+
87
+ class UpdateContext(SystemExit):
88
+ """Transform settings require update."""
89
+
90
+ pass
lamindb/core/fields.py CHANGED
@@ -1,12 +1,12 @@
1
- """Fields.
2
-
3
- The field accessor of a :class:`~lamindb.core.Record`:
4
-
5
- .. autosummary::
6
- :toctree: .
7
-
8
- FieldAttr
9
-
10
- """
11
-
12
- from lnschema_core.types import FieldAttr
1
+ """Fields.
2
+
3
+ The field accessor of a :class:`~lamindb.core.Record`:
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ FieldAttr
9
+
10
+ """
11
+
12
+ from lnschema_core.types import FieldAttr
lamindb/core/loaders.py CHANGED
@@ -1,164 +1,164 @@
1
- """Loaders in :class:`lamindb.Artifact.load`.
2
-
3
- .. autosummary::
4
- :toctree: .
5
-
6
- SUPPORTED_SUFFIXES
7
- load_fcs
8
- load_tsv
9
- load_h5ad
10
- load_h5mu
11
- load_html
12
- load_json
13
- load_image
14
- load_svg
15
-
16
- """
17
-
18
- from __future__ import annotations
19
-
20
- import builtins
21
- import re
22
- from pathlib import Path
23
- from typing import TYPE_CHECKING
24
-
25
- import anndata as ad
26
- import pandas as pd
27
- from lamindb_setup.core.upath import (
28
- create_path,
29
- infer_filesystem,
30
- )
31
-
32
- from lamindb.core._settings import settings
33
-
34
- if TYPE_CHECKING:
35
- import mudata as md
36
- from lamindb_setup.core.types import UPathStr
37
-
38
- try:
39
- from .storage._zarr import load_anndata_zarr
40
- except ImportError:
41
-
42
- def load_anndata_zarr(storepath): # type: ignore
43
- raise ImportError("Please install zarr: pip install zarr")
44
-
45
-
46
- is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
47
-
48
-
49
- # tested in lamin-usecases
50
- def load_fcs(*args, **kwargs) -> ad.AnnData:
51
- """Load an `.fcs` file to `AnnData`."""
52
- try:
53
- import readfcs
54
- except ImportError: # pragma: no cover
55
- raise ImportError("Please install readfcs: pip install readfcs") from None
56
- return readfcs.read(*args, **kwargs)
57
-
58
-
59
- def load_tsv(path: UPathStr, **kwargs) -> pd.DataFrame:
60
- """Load `.tsv` file to `DataFrame`."""
61
- path_sanitized = Path(path)
62
- return pd.read_csv(path_sanitized, sep="\t", **kwargs)
63
-
64
-
65
- def load_h5ad(filepath, **kwargs) -> ad.AnnData:
66
- """Load an `.h5ad` file to `AnnData`."""
67
- fs, filepath = infer_filesystem(filepath)
68
-
69
- with fs.open(filepath, mode="rb") as file:
70
- adata = ad.read_h5ad(file, backed=False, **kwargs)
71
- return adata
72
-
73
-
74
- def load_h5mu(filepath: UPathStr, **kwargs):
75
- """Load an `.h5mu` file to `MuData`."""
76
- import mudata as md
77
-
78
- path_sanitized = Path(filepath)
79
- return md.read_h5mu(path_sanitized, **kwargs)
80
-
81
-
82
- def load_html(path: UPathStr):
83
- """Display `.html` in ipython, otherwise return path."""
84
- if is_run_from_ipython:
85
- with open(path, encoding="utf-8") as f:
86
- html_content = f.read()
87
- # Extract the body content using regular expressions
88
- body_content = re.findall(
89
- r"<body(?:.*?)>(?:.*?)</body>", html_content, re.DOTALL
90
- )
91
- # Remove any empty body tags
92
- if body_content:
93
- body_content = body_content[0]
94
- body_content = body_content.strip() # type: ignore
95
- from IPython.display import HTML, display
96
-
97
- display(HTML(data=body_content))
98
- else:
99
- return path
100
-
101
-
102
- def load_json(path: UPathStr) -> dict:
103
- """Load `.json` to `dict`."""
104
- import json
105
-
106
- with open(path) as f:
107
- data = json.load(f)
108
- return data
109
-
110
-
111
- def load_image(path: UPathStr):
112
- """Display `.svg` in ipython, otherwise return path."""
113
- if is_run_from_ipython:
114
- from IPython.display import Image, display
115
-
116
- display(Image(filename=path))
117
- else:
118
- return path
119
-
120
-
121
- def load_svg(path: UPathStr) -> None | Path:
122
- """Display `.svg` in ipython, otherwise return path."""
123
- if is_run_from_ipython:
124
- from IPython.display import SVG, display
125
-
126
- display(SVG(filename=path))
127
- return None
128
- else:
129
- return path
130
-
131
-
132
- FILE_LOADERS = {
133
- ".csv": pd.read_csv,
134
- ".tsv": load_tsv,
135
- ".h5ad": load_h5ad,
136
- ".parquet": pd.read_parquet,
137
- ".fcs": load_fcs,
138
- ".zarr": load_anndata_zarr,
139
- ".html": load_html,
140
- ".json": load_json,
141
- ".h5mu": load_h5mu,
142
- ".jpg": load_image,
143
- ".png": load_image,
144
- ".svg": load_svg,
145
- }
146
-
147
- SUPPORTED_SUFFIXES = list(FILE_LOADERS.keys())
148
- """Suffixes with defined artifact loaders."""
149
-
150
-
151
- def load_to_memory(filepath: UPathStr, **kwargs):
152
- """Load a file into memory.
153
-
154
- Returns the filepath if no in-memory form is found.
155
- """
156
- filepath = create_path(filepath)
157
-
158
- filepath = settings._storage_settings.cloud_to_local(filepath, print_progress=True)
159
-
160
- loader = FILE_LOADERS.get(filepath.suffix)
161
- if loader is None:
162
- return filepath
163
- else:
164
- return loader(filepath, **kwargs)
1
+ """Loaders in :class:`lamindb.Artifact.load`.
2
+
3
+ .. autosummary::
4
+ :toctree: .
5
+
6
+ SUPPORTED_SUFFIXES
7
+ load_fcs
8
+ load_tsv
9
+ load_h5ad
10
+ load_h5mu
11
+ load_html
12
+ load_json
13
+ load_image
14
+ load_svg
15
+
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import builtins
21
+ import re
22
+ from pathlib import Path
23
+ from typing import TYPE_CHECKING
24
+
25
+ import anndata as ad
26
+ import pandas as pd
27
+ from lamindb_setup.core.upath import (
28
+ create_path,
29
+ infer_filesystem,
30
+ )
31
+
32
+ from lamindb.core._settings import settings
33
+
34
+ if TYPE_CHECKING:
35
+ import mudata as md
36
+ from lamindb_setup.core.types import UPathStr
37
+
38
+ try:
39
+ from .storage._zarr import load_anndata_zarr
40
+ except ImportError:
41
+
42
+ def load_anndata_zarr(storepath): # type: ignore
43
+ raise ImportError("Please install zarr: pip install zarr")
44
+
45
+
46
+ is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
47
+
48
+
49
+ # tested in lamin-usecases
50
+ def load_fcs(*args, **kwargs) -> ad.AnnData:
51
+ """Load an `.fcs` file to `AnnData`."""
52
+ try:
53
+ import readfcs
54
+ except ImportError: # pragma: no cover
55
+ raise ImportError("Please install readfcs: pip install readfcs") from None
56
+ return readfcs.read(*args, **kwargs)
57
+
58
+
59
+ def load_tsv(path: UPathStr, **kwargs) -> pd.DataFrame:
60
+ """Load `.tsv` file to `DataFrame`."""
61
+ path_sanitized = Path(path)
62
+ return pd.read_csv(path_sanitized, sep="\t", **kwargs)
63
+
64
+
65
+ def load_h5ad(filepath, **kwargs) -> ad.AnnData:
66
+ """Load an `.h5ad` file to `AnnData`."""
67
+ fs, filepath = infer_filesystem(filepath)
68
+
69
+ with fs.open(filepath, mode="rb") as file:
70
+ adata = ad.read_h5ad(file, backed=False, **kwargs)
71
+ return adata
72
+
73
+
74
+ def load_h5mu(filepath: UPathStr, **kwargs):
75
+ """Load an `.h5mu` file to `MuData`."""
76
+ import mudata as md
77
+
78
+ path_sanitized = Path(filepath)
79
+ return md.read_h5mu(path_sanitized, **kwargs)
80
+
81
+
82
+ def load_html(path: UPathStr):
83
+ """Display `.html` in ipython, otherwise return path."""
84
+ if is_run_from_ipython:
85
+ with open(path, encoding="utf-8") as f:
86
+ html_content = f.read()
87
+ # Extract the body content using regular expressions
88
+ body_content = re.findall(
89
+ r"<body(?:.*?)>(?:.*?)</body>", html_content, re.DOTALL
90
+ )
91
+ # Remove any empty body tags
92
+ if body_content:
93
+ body_content = body_content[0]
94
+ body_content = body_content.strip() # type: ignore
95
+ from IPython.display import HTML, display
96
+
97
+ display(HTML(data=body_content))
98
+ else:
99
+ return path
100
+
101
+
102
+ def load_json(path: UPathStr) -> dict:
103
+ """Load `.json` to `dict`."""
104
+ import json
105
+
106
+ with open(path) as f:
107
+ data = json.load(f)
108
+ return data
109
+
110
+
111
+ def load_image(path: UPathStr):
112
+ """Display `.svg` in ipython, otherwise return path."""
113
+ if is_run_from_ipython:
114
+ from IPython.display import Image, display
115
+
116
+ display(Image(filename=path))
117
+ else:
118
+ return path
119
+
120
+
121
+ def load_svg(path: UPathStr) -> None | Path:
122
+ """Display `.svg` in ipython, otherwise return path."""
123
+ if is_run_from_ipython:
124
+ from IPython.display import SVG, display
125
+
126
+ display(SVG(filename=path))
127
+ return None
128
+ else:
129
+ return path
130
+
131
+
132
+ FILE_LOADERS = {
133
+ ".csv": pd.read_csv,
134
+ ".tsv": load_tsv,
135
+ ".h5ad": load_h5ad,
136
+ ".parquet": pd.read_parquet,
137
+ ".fcs": load_fcs,
138
+ ".zarr": load_anndata_zarr,
139
+ ".html": load_html,
140
+ ".json": load_json,
141
+ ".h5mu": load_h5mu,
142
+ ".jpg": load_image,
143
+ ".png": load_image,
144
+ ".svg": load_svg,
145
+ }
146
+
147
+ SUPPORTED_SUFFIXES = list(FILE_LOADERS.keys())
148
+ """Suffixes with defined artifact loaders."""
149
+
150
+
151
+ def load_to_memory(filepath: UPathStr, **kwargs):
152
+ """Load a file into memory.
153
+
154
+ Returns the filepath if no in-memory form is found.
155
+ """
156
+ filepath = create_path(filepath)
157
+
158
+ filepath = settings._storage_settings.cloud_to_local(filepath, print_progress=True)
159
+
160
+ loader = FILE_LOADERS.get(filepath.suffix)
161
+ if loader is None:
162
+ return filepath
163
+ else:
164
+ return loader(filepath, **kwargs)