lamindb 0.76.7__py3-none-any.whl → 0.76.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. lamindb/__init__.py +113 -113
  2. lamindb/_artifact.py +1205 -1178
  3. lamindb/_can_validate.py +579 -579
  4. lamindb/_collection.py +387 -387
  5. lamindb/_curate.py +1601 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +242 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +256 -256
  10. lamindb/_from_values.py +382 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +362 -362
  15. lamindb/_record.py +649 -649
  16. lamindb/_run.py +57 -57
  17. lamindb/_save.py +308 -295
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +127 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +574 -574
  25. lamindb/core/_data.py +438 -438
  26. lamindb/core/_feature_manager.py +867 -867
  27. lamindb/core/_label_manager.py +253 -253
  28. lamindb/core/_mapped_collection.py +597 -597
  29. lamindb/core/_settings.py +187 -187
  30. lamindb/core/_sync_git.py +138 -138
  31. lamindb/core/_track_environment.py +27 -27
  32. lamindb/core/datasets/__init__.py +59 -59
  33. lamindb/core/datasets/_core.py +571 -571
  34. lamindb/core/datasets/_fake.py +36 -36
  35. lamindb/core/exceptions.py +90 -77
  36. lamindb/core/fields.py +12 -12
  37. lamindb/core/loaders.py +164 -164
  38. lamindb/core/schema.py +56 -56
  39. lamindb/core/storage/__init__.py +25 -25
  40. lamindb/core/storage/_anndata_accessor.py +740 -740
  41. lamindb/core/storage/_anndata_sizes.py +41 -41
  42. lamindb/core/storage/_backed_access.py +98 -98
  43. lamindb/core/storage/_tiledbsoma.py +204 -204
  44. lamindb/core/storage/_valid_suffixes.py +21 -21
  45. lamindb/core/storage/_zarr.py +110 -110
  46. lamindb/core/storage/objects.py +62 -62
  47. lamindb/core/storage/paths.py +172 -141
  48. lamindb/core/subsettings/__init__.py +12 -12
  49. lamindb/core/subsettings/_creation_settings.py +38 -38
  50. lamindb/core/subsettings/_transform_settings.py +21 -21
  51. lamindb/core/types.py +19 -19
  52. lamindb/core/versioning.py +158 -158
  53. lamindb/integrations/__init__.py +12 -12
  54. lamindb/integrations/_vitessce.py +107 -107
  55. lamindb/setup/__init__.py +14 -14
  56. lamindb/setup/core/__init__.py +4 -4
  57. {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/LICENSE +201 -201
  58. {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/METADATA +3 -3
  59. lamindb-0.76.8.dist-info/RECORD +60 -0
  60. {lamindb-0.76.7.dist-info → lamindb-0.76.8.dist-info}/WHEEL +1 -1
  61. lamindb-0.76.7.dist-info/RECORD +0 -60
lamindb/_finish.py CHANGED
@@ -1,256 +1,256 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import re
5
- import shutil
6
- from datetime import datetime, timezone
7
- from typing import TYPE_CHECKING
8
-
9
- import lamindb_setup as ln_setup
10
- from lamin_utils import logger
11
- from lamindb_setup.core.hashing import hash_file
12
-
13
- if TYPE_CHECKING:
14
- from pathlib import Path
15
-
16
- from lnschema_core import Run, Transform
17
-
18
- from ._query_set import QuerySet
19
-
20
-
21
- # this is from the get_title function in nbproject
22
- # should be moved into lamindb sooner or later
23
- def prepare_notebook(
24
- nb,
25
- strip_title: bool = False,
26
- ) -> str | None:
27
- """Strip title from the notebook if requested."""
28
- title_found = False
29
- for cell in nb.cells:
30
- cell.metadata.clear() # strip cell metadata
31
- if not title_found and cell["cell_type"] == "markdown":
32
- lines = cell["source"].split("\n")
33
- for i, line in enumerate(lines):
34
- if line.startswith("# "):
35
- line.lstrip("#").strip(" .").strip()
36
- title_found = True
37
- if strip_title:
38
- lines.pop(i)
39
- cell["source"] = "\n".join(lines)
40
- return None
41
-
42
-
43
- def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
44
- import nbformat
45
- import traitlets.config as config
46
- from nbconvert import HTMLExporter
47
-
48
- with open(notebook_path, encoding="utf-8") as f:
49
- notebook = nbformat.read(f, as_version=4)
50
- prepare_notebook(notebook, strip_title=True)
51
- notebook.metadata.clear() # strip notebook metadata
52
- # if we were to export as ipynb, the following two lines would do it
53
- # with open(output_path, "w", encoding="utf-8") as f:
54
- # nbformat.write(notebook, f)
55
- # instead we need all this code
56
- c = config.Config()
57
- c.HTMLExporter.preprocessors = []
58
- c.HTMLExporter.exclude_input_prompt = True
59
- c.HTMLExporter.exclude_output_prompt = True
60
- c.HTMLExporter.anchor_link_text = " "
61
- html_exporter = HTMLExporter(config=c)
62
- html, _ = html_exporter.from_notebook_node(notebook)
63
- output_path.write_text(html, encoding="utf-8")
64
-
65
-
66
- def notebook_to_script(
67
- transform: Transform, notebook_path: Path, script_path: Path
68
- ) -> None:
69
- import jupytext
70
-
71
- notebook = jupytext.read(notebook_path)
72
- py_content = jupytext.writes(notebook, fmt="py:percent")
73
- # remove global metadata header
74
- py_content = re.sub(r"^# ---\n.*?# ---\n\n", "", py_content, flags=re.DOTALL)
75
- # replace title
76
- py_content = py_content.replace(f"# # {transform.name}", "# # transform.name")
77
- script_path.write_text(py_content)
78
-
79
-
80
- def script_to_notebook(transform: Transform, notebook_path: Path) -> None:
81
- import jupytext
82
-
83
- # get title back
84
- py_content = transform.source_code.replace(
85
- "# # transform.name", f"# # {transform.name}"
86
- )
87
- notebook = jupytext.reads(py_content, fmt="py:percent")
88
- jupytext.write(notebook, notebook_path)
89
-
90
-
91
- def save_context_core(
92
- *,
93
- run: Run,
94
- transform: Transform,
95
- filepath: Path,
96
- finished_at: bool = False,
97
- ignore_non_consecutive: bool | None = None,
98
- from_cli: bool = False,
99
- ) -> str | None:
100
- import lamindb as ln
101
-
102
- from .core._context import context, is_run_from_ipython
103
-
104
- ln.settings.verbosity = "success"
105
-
106
- # for scripts, things are easy
107
- is_consecutive = True
108
- is_notebook = transform.type == "notebook"
109
- source_code_path = filepath
110
- # for notebooks, we need more work
111
- if is_notebook:
112
- try:
113
- import jupytext
114
- from nbproject.dev import (
115
- check_consecutiveness,
116
- read_notebook,
117
- )
118
- except ImportError:
119
- logger.error("install nbproject & jupytext: pip install nbproject jupytext")
120
- return None
121
- notebook_content = read_notebook(filepath) # type: ignore
122
- if not ignore_non_consecutive: # ignore_non_consecutive is None or False
123
- is_consecutive = check_consecutiveness(
124
- notebook_content, calling_statement=".finish()"
125
- )
126
- if not is_consecutive:
127
- response = "n" # ignore_non_consecutive == False
128
- if ignore_non_consecutive is None:
129
- response = input(
130
- " Do you still want to proceed with finishing? (y/n) "
131
- )
132
- if response != "y":
133
- return "aborted-non-consecutive"
134
- # write the report
135
- report_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
136
- ".ipynb", ".html"
137
- )
138
- notebook_to_report(filepath, report_path)
139
- # write the source code
140
- source_code_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
141
- ".ipynb", ".py"
142
- )
143
- notebook_to_script(transform, filepath, source_code_path)
144
- ln.settings.creation.artifact_silence_missing_run_warning = True
145
- # track source code
146
- hash, _ = hash_file(source_code_path) # ignore hash_type for now
147
- if (
148
- transform._source_code_artifact_id is not None
149
- or transform.source_code is not None # equivalent to transform.hash is not None
150
- ):
151
- # check if the hash of the transform source code matches
152
- # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
153
- ref_hash = (
154
- transform.hash
155
- if transform.hash is not None
156
- else transform._source_code_artifact.hash
157
- )
158
- if hash != ref_hash:
159
- response = input(
160
- f"You are about to overwrite existing source code (hash '{ref_hash}') for Transform('{transform.uid}')."
161
- f"Proceed? (y/n)"
162
- )
163
- if response == "y":
164
- transform.source_code = source_code_path.read_text()
165
- transform.hash = hash
166
- else:
167
- logger.warning(
168
- "Please re-run `ln.context.track()` to make a new version"
169
- )
170
- return "rerun-the-notebook"
171
- else:
172
- logger.important("source code is already saved")
173
- else:
174
- transform.source_code = source_code_path.read_text()
175
- transform.hash = hash
176
-
177
- # track environment
178
- env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
179
- if env_path.exists():
180
- overwrite_env = True
181
- if run.environment_id is not None and from_cli:
182
- logger.important("run.environment is already saved")
183
- overwrite_env = False
184
- if overwrite_env:
185
- hash, _ = hash_file(env_path)
186
- artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
187
- new_env_artifact = artifact is None
188
- if new_env_artifact:
189
- artifact = ln.Artifact(
190
- env_path,
191
- description="requirements.txt",
192
- visibility=0,
193
- run=False,
194
- )
195
- artifact.save(upload=True, print_progress=False)
196
- run.environment = artifact
197
- if new_env_artifact:
198
- logger.debug(f"saved run.environment: {run.environment}")
199
-
200
- # set finished_at
201
- if finished_at:
202
- run.finished_at = datetime.now(timezone.utc)
203
-
204
- # track report and set is_consecutive
205
- if not is_notebook:
206
- run.is_consecutive = True
207
- run.save()
208
- else:
209
- if run.report_id is not None:
210
- hash, _ = hash_file(report_path) # ignore hash_type for now
211
- if hash != run.report.hash:
212
- response = input(
213
- f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
214
- )
215
- if response == "y":
216
- run.report.replace(report_path)
217
- run.report.save(upload=True)
218
- else:
219
- logger.important("keeping old report")
220
- else:
221
- logger.important("report is already saved")
222
- else:
223
- report_file = ln.Artifact(
224
- report_path,
225
- description=f"Report of run {run.uid}",
226
- visibility=0, # hidden file
227
- run=False,
228
- )
229
- report_file.save(upload=True, print_progress=False)
230
- run.report = report_file
231
- run.is_consecutive = is_consecutive
232
- run.save()
233
- logger.debug(
234
- f"saved transform.latest_run.report: {transform.latest_run.report}"
235
- )
236
- transform.save()
237
-
238
- # finalize
239
- if ln_setup.settings.instance.is_on_hub:
240
- identifier = ln_setup.settings.instance.slug
241
- logger.important(
242
- f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
243
- )
244
- if not from_cli:
245
- thing, name = (
246
- ("notebook", "notebook.ipynb")
247
- if is_notebook
248
- else ("script", "script.py")
249
- )
250
- logger.important(
251
- f"if you want to update your {thing} without re-running it, use `lamin save {name}`"
252
- )
253
- # because run & transform changed, update the global context
254
- context._run = run
255
- context._transform = transform
256
- return None
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import re
5
+ import shutil
6
+ from datetime import datetime, timezone
7
+ from typing import TYPE_CHECKING
8
+
9
+ import lamindb_setup as ln_setup
10
+ from lamin_utils import logger
11
+ from lamindb_setup.core.hashing import hash_file
12
+
13
+ if TYPE_CHECKING:
14
+ from pathlib import Path
15
+
16
+ from lnschema_core import Run, Transform
17
+
18
+ from ._query_set import QuerySet
19
+
20
+
21
+ # this is from the get_title function in nbproject
22
+ # should be moved into lamindb sooner or later
23
+ def prepare_notebook(
24
+ nb,
25
+ strip_title: bool = False,
26
+ ) -> str | None:
27
+ """Strip title from the notebook if requested."""
28
+ title_found = False
29
+ for cell in nb.cells:
30
+ cell.metadata.clear() # strip cell metadata
31
+ if not title_found and cell["cell_type"] == "markdown":
32
+ lines = cell["source"].split("\n")
33
+ for i, line in enumerate(lines):
34
+ if line.startswith("# "):
35
+ line.lstrip("#").strip(" .").strip()
36
+ title_found = True
37
+ if strip_title:
38
+ lines.pop(i)
39
+ cell["source"] = "\n".join(lines)
40
+ return None
41
+
42
+
43
+ def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
44
+ import nbformat
45
+ import traitlets.config as config
46
+ from nbconvert import HTMLExporter
47
+
48
+ with open(notebook_path, encoding="utf-8") as f:
49
+ notebook = nbformat.read(f, as_version=4)
50
+ prepare_notebook(notebook, strip_title=True)
51
+ notebook.metadata.clear() # strip notebook metadata
52
+ # if we were to export as ipynb, the following two lines would do it
53
+ # with open(output_path, "w", encoding="utf-8") as f:
54
+ # nbformat.write(notebook, f)
55
+ # instead we need all this code
56
+ c = config.Config()
57
+ c.HTMLExporter.preprocessors = []
58
+ c.HTMLExporter.exclude_input_prompt = True
59
+ c.HTMLExporter.exclude_output_prompt = True
60
+ c.HTMLExporter.anchor_link_text = " "
61
+ html_exporter = HTMLExporter(config=c)
62
+ html, _ = html_exporter.from_notebook_node(notebook)
63
+ output_path.write_text(html, encoding="utf-8")
64
+
65
+
66
+ def notebook_to_script(
67
+ transform: Transform, notebook_path: Path, script_path: Path
68
+ ) -> None:
69
+ import jupytext
70
+
71
+ notebook = jupytext.read(notebook_path)
72
+ py_content = jupytext.writes(notebook, fmt="py:percent")
73
+ # remove global metadata header
74
+ py_content = re.sub(r"^# ---\n.*?# ---\n\n", "", py_content, flags=re.DOTALL)
75
+ # replace title
76
+ py_content = py_content.replace(f"# # {transform.name}", "# # transform.name")
77
+ script_path.write_text(py_content)
78
+
79
+
80
+ def script_to_notebook(transform: Transform, notebook_path: Path) -> None:
81
+ import jupytext
82
+
83
+ # get title back
84
+ py_content = transform.source_code.replace(
85
+ "# # transform.name", f"# # {transform.name}"
86
+ )
87
+ notebook = jupytext.reads(py_content, fmt="py:percent")
88
+ jupytext.write(notebook, notebook_path)
89
+
90
+
91
+ def save_context_core(
92
+ *,
93
+ run: Run,
94
+ transform: Transform,
95
+ filepath: Path,
96
+ finished_at: bool = False,
97
+ ignore_non_consecutive: bool | None = None,
98
+ from_cli: bool = False,
99
+ ) -> str | None:
100
+ import lamindb as ln
101
+
102
+ from .core._context import context, is_run_from_ipython
103
+
104
+ ln.settings.verbosity = "success"
105
+
106
+ # for scripts, things are easy
107
+ is_consecutive = True
108
+ is_notebook = transform.type == "notebook"
109
+ source_code_path = filepath
110
+ # for notebooks, we need more work
111
+ if is_notebook:
112
+ try:
113
+ import jupytext
114
+ from nbproject.dev import (
115
+ check_consecutiveness,
116
+ read_notebook,
117
+ )
118
+ except ImportError:
119
+ logger.error("install nbproject & jupytext: pip install nbproject jupytext")
120
+ return None
121
+ notebook_content = read_notebook(filepath) # type: ignore
122
+ if not ignore_non_consecutive: # ignore_non_consecutive is None or False
123
+ is_consecutive = check_consecutiveness(
124
+ notebook_content, calling_statement=".finish()"
125
+ )
126
+ if not is_consecutive:
127
+ response = "n" # ignore_non_consecutive == False
128
+ if ignore_non_consecutive is None:
129
+ response = input(
130
+ " Do you still want to proceed with finishing? (y/n) "
131
+ )
132
+ if response != "y":
133
+ return "aborted-non-consecutive"
134
+ # write the report
135
+ report_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
136
+ ".ipynb", ".html"
137
+ )
138
+ notebook_to_report(filepath, report_path)
139
+ # write the source code
140
+ source_code_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
141
+ ".ipynb", ".py"
142
+ )
143
+ notebook_to_script(transform, filepath, source_code_path)
144
+ ln.settings.creation.artifact_silence_missing_run_warning = True
145
+ # track source code
146
+ hash, _ = hash_file(source_code_path) # ignore hash_type for now
147
+ if (
148
+ transform._source_code_artifact_id is not None
149
+ or transform.source_code is not None # equivalent to transform.hash is not None
150
+ ):
151
+ # check if the hash of the transform source code matches
152
+ # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
153
+ ref_hash = (
154
+ transform.hash
155
+ if transform.hash is not None
156
+ else transform._source_code_artifact.hash
157
+ )
158
+ if hash != ref_hash:
159
+ response = input(
160
+ f"You are about to overwrite existing source code (hash '{ref_hash}') for Transform('{transform.uid}')."
161
+ f"Proceed? (y/n)"
162
+ )
163
+ if response == "y":
164
+ transform.source_code = source_code_path.read_text()
165
+ transform.hash = hash
166
+ else:
167
+ logger.warning(
168
+ "Please re-run `ln.context.track()` to make a new version"
169
+ )
170
+ return "rerun-the-notebook"
171
+ else:
172
+ logger.important("source code is already saved")
173
+ else:
174
+ transform.source_code = source_code_path.read_text()
175
+ transform.hash = hash
176
+
177
+ # track environment
178
+ env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
179
+ if env_path.exists():
180
+ overwrite_env = True
181
+ if run.environment_id is not None and from_cli:
182
+ logger.important("run.environment is already saved")
183
+ overwrite_env = False
184
+ if overwrite_env:
185
+ hash, _ = hash_file(env_path)
186
+ artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
187
+ new_env_artifact = artifact is None
188
+ if new_env_artifact:
189
+ artifact = ln.Artifact(
190
+ env_path,
191
+ description="requirements.txt",
192
+ visibility=0,
193
+ run=False,
194
+ )
195
+ artifact.save(upload=True, print_progress=False)
196
+ run.environment = artifact
197
+ if new_env_artifact:
198
+ logger.debug(f"saved run.environment: {run.environment}")
199
+
200
+ # set finished_at
201
+ if finished_at:
202
+ run.finished_at = datetime.now(timezone.utc)
203
+
204
+ # track report and set is_consecutive
205
+ if not is_notebook:
206
+ run.is_consecutive = True
207
+ run.save()
208
+ else:
209
+ if run.report_id is not None:
210
+ hash, _ = hash_file(report_path) # ignore hash_type for now
211
+ if hash != run.report.hash:
212
+ response = input(
213
+ f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
214
+ )
215
+ if response == "y":
216
+ run.report.replace(report_path)
217
+ run.report.save(upload=True)
218
+ else:
219
+ logger.important("keeping old report")
220
+ else:
221
+ logger.important("report is already saved")
222
+ else:
223
+ report_file = ln.Artifact(
224
+ report_path,
225
+ description=f"Report of run {run.uid}",
226
+ visibility=0, # hidden file
227
+ run=False,
228
+ )
229
+ report_file.save(upload=True, print_progress=False)
230
+ run.report = report_file
231
+ run.is_consecutive = is_consecutive
232
+ run.save()
233
+ logger.debug(
234
+ f"saved transform.latest_run.report: {transform.latest_run.report}"
235
+ )
236
+ transform.save()
237
+
238
+ # finalize
239
+ if ln_setup.settings.instance.is_on_hub:
240
+ identifier = ln_setup.settings.instance.slug
241
+ logger.important(
242
+ f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
243
+ )
244
+ if not from_cli:
245
+ thing, name = (
246
+ ("notebook", "notebook.ipynb")
247
+ if is_notebook
248
+ else ("script", "script.py")
249
+ )
250
+ logger.important(
251
+ f"if you want to update your {thing} without re-running it, use `lamin save {name}`"
252
+ )
253
+ # because run & transform changed, update the global context
254
+ context._run = run
255
+ context._transform = transform
256
+ return None