lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +114 -113
  2. lamindb/_artifact.py +1206 -1205
  3. lamindb/_can_validate.py +621 -579
  4. lamindb/_collection.py +390 -387
  5. lamindb/_curate.py +1603 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +244 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +250 -256
  10. lamindb/_from_values.py +403 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +364 -362
  15. lamindb/_record.py +668 -649
  16. lamindb/_run.py +60 -57
  17. lamindb/_save.py +310 -308
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +130 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +590 -574
  25. lamindb/core/_data.py +510 -438
  26. lamindb/core/_django.py +209 -0
  27. lamindb/core/_feature_manager.py +994 -867
  28. lamindb/core/_label_manager.py +289 -253
  29. lamindb/core/_mapped_collection.py +631 -597
  30. lamindb/core/_settings.py +188 -187
  31. lamindb/core/_sync_git.py +138 -138
  32. lamindb/core/_track_environment.py +27 -27
  33. lamindb/core/datasets/__init__.py +59 -59
  34. lamindb/core/datasets/_core.py +581 -571
  35. lamindb/core/datasets/_fake.py +36 -36
  36. lamindb/core/exceptions.py +90 -90
  37. lamindb/core/fields.py +12 -12
  38. lamindb/core/loaders.py +164 -164
  39. lamindb/core/schema.py +56 -56
  40. lamindb/core/storage/__init__.py +25 -25
  41. lamindb/core/storage/_anndata_accessor.py +741 -740
  42. lamindb/core/storage/_anndata_sizes.py +41 -41
  43. lamindb/core/storage/_backed_access.py +98 -98
  44. lamindb/core/storage/_tiledbsoma.py +204 -204
  45. lamindb/core/storage/_valid_suffixes.py +21 -21
  46. lamindb/core/storage/_zarr.py +110 -110
  47. lamindb/core/storage/objects.py +62 -62
  48. lamindb/core/storage/paths.py +172 -172
  49. lamindb/core/subsettings/__init__.py +12 -12
  50. lamindb/core/subsettings/_creation_settings.py +38 -38
  51. lamindb/core/subsettings/_transform_settings.py +21 -21
  52. lamindb/core/types.py +19 -19
  53. lamindb/core/versioning.py +146 -158
  54. lamindb/integrations/__init__.py +12 -12
  55. lamindb/integrations/_vitessce.py +107 -107
  56. lamindb/setup/__init__.py +14 -14
  57. lamindb/setup/core/__init__.py +4 -4
  58. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
  59. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
  60. lamindb-0.76.10.dist-info/RECORD +61 -0
  61. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
  62. lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/_finish.py CHANGED
@@ -1,256 +1,250 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import re
5
- import shutil
6
- from datetime import datetime, timezone
7
- from typing import TYPE_CHECKING
8
-
9
- import lamindb_setup as ln_setup
10
- from lamin_utils import logger
11
- from lamindb_setup.core.hashing import hash_file
12
-
13
- if TYPE_CHECKING:
14
- from pathlib import Path
15
-
16
- from lnschema_core import Run, Transform
17
-
18
- from ._query_set import QuerySet
19
-
20
-
21
- # this is from the get_title function in nbproject
22
- # should be moved into lamindb sooner or later
23
- def prepare_notebook(
24
- nb,
25
- strip_title: bool = False,
26
- ) -> str | None:
27
- """Strip title from the notebook if requested."""
28
- title_found = False
29
- for cell in nb.cells:
30
- cell.metadata.clear() # strip cell metadata
31
- if not title_found and cell["cell_type"] == "markdown":
32
- lines = cell["source"].split("\n")
33
- for i, line in enumerate(lines):
34
- if line.startswith("# "):
35
- line.lstrip("#").strip(" .").strip()
36
- title_found = True
37
- if strip_title:
38
- lines.pop(i)
39
- cell["source"] = "\n".join(lines)
40
- return None
41
-
42
-
43
- def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
44
- import nbformat
45
- import traitlets.config as config
46
- from nbconvert import HTMLExporter
47
-
48
- with open(notebook_path, encoding="utf-8") as f:
49
- notebook = nbformat.read(f, as_version=4)
50
- prepare_notebook(notebook, strip_title=True)
51
- notebook.metadata.clear() # strip notebook metadata
52
- # if we were to export as ipynb, the following two lines would do it
53
- # with open(output_path, "w", encoding="utf-8") as f:
54
- # nbformat.write(notebook, f)
55
- # instead we need all this code
56
- c = config.Config()
57
- c.HTMLExporter.preprocessors = []
58
- c.HTMLExporter.exclude_input_prompt = True
59
- c.HTMLExporter.exclude_output_prompt = True
60
- c.HTMLExporter.anchor_link_text = " "
61
- html_exporter = HTMLExporter(config=c)
62
- html, _ = html_exporter.from_notebook_node(notebook)
63
- output_path.write_text(html, encoding="utf-8")
64
-
65
-
66
- def notebook_to_script(
67
- transform: Transform, notebook_path: Path, script_path: Path
68
- ) -> None:
69
- import jupytext
70
-
71
- notebook = jupytext.read(notebook_path)
72
- py_content = jupytext.writes(notebook, fmt="py:percent")
73
- # remove global metadata header
74
- py_content = re.sub(r"^# ---\n.*?# ---\n\n", "", py_content, flags=re.DOTALL)
75
- # replace title
76
- py_content = py_content.replace(f"# # {transform.name}", "# # transform.name")
77
- script_path.write_text(py_content)
78
-
79
-
80
- def script_to_notebook(transform: Transform, notebook_path: Path) -> None:
81
- import jupytext
82
-
83
- # get title back
84
- py_content = transform.source_code.replace(
85
- "# # transform.name", f"# # {transform.name}"
86
- )
87
- notebook = jupytext.reads(py_content, fmt="py:percent")
88
- jupytext.write(notebook, notebook_path)
89
-
90
-
91
- def save_context_core(
92
- *,
93
- run: Run,
94
- transform: Transform,
95
- filepath: Path,
96
- finished_at: bool = False,
97
- ignore_non_consecutive: bool | None = None,
98
- from_cli: bool = False,
99
- ) -> str | None:
100
- import lamindb as ln
101
-
102
- from .core._context import context, is_run_from_ipython
103
-
104
- ln.settings.verbosity = "success"
105
-
106
- # for scripts, things are easy
107
- is_consecutive = True
108
- is_notebook = transform.type == "notebook"
109
- source_code_path = filepath
110
- # for notebooks, we need more work
111
- if is_notebook:
112
- try:
113
- import jupytext
114
- from nbproject.dev import (
115
- check_consecutiveness,
116
- read_notebook,
117
- )
118
- except ImportError:
119
- logger.error("install nbproject & jupytext: pip install nbproject jupytext")
120
- return None
121
- notebook_content = read_notebook(filepath) # type: ignore
122
- if not ignore_non_consecutive: # ignore_non_consecutive is None or False
123
- is_consecutive = check_consecutiveness(
124
- notebook_content, calling_statement=".finish()"
125
- )
126
- if not is_consecutive:
127
- response = "n" # ignore_non_consecutive == False
128
- if ignore_non_consecutive is None:
129
- response = input(
130
- " Do you still want to proceed with finishing? (y/n) "
131
- )
132
- if response != "y":
133
- return "aborted-non-consecutive"
134
- # write the report
135
- report_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
136
- ".ipynb", ".html"
137
- )
138
- notebook_to_report(filepath, report_path)
139
- # write the source code
140
- source_code_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
141
- ".ipynb", ".py"
142
- )
143
- notebook_to_script(transform, filepath, source_code_path)
144
- ln.settings.creation.artifact_silence_missing_run_warning = True
145
- # track source code
146
- hash, _ = hash_file(source_code_path) # ignore hash_type for now
147
- if (
148
- transform._source_code_artifact_id is not None
149
- or transform.source_code is not None # equivalent to transform.hash is not None
150
- ):
151
- # check if the hash of the transform source code matches
152
- # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
153
- ref_hash = (
154
- transform.hash
155
- if transform.hash is not None
156
- else transform._source_code_artifact.hash
157
- )
158
- if hash != ref_hash:
159
- response = input(
160
- f"You are about to overwrite existing source code (hash '{ref_hash}') for Transform('{transform.uid}')."
161
- f"Proceed? (y/n)"
162
- )
163
- if response == "y":
164
- transform.source_code = source_code_path.read_text()
165
- transform.hash = hash
166
- else:
167
- logger.warning(
168
- "Please re-run `ln.context.track()` to make a new version"
169
- )
170
- return "rerun-the-notebook"
171
- else:
172
- logger.important("source code is already saved")
173
- else:
174
- transform.source_code = source_code_path.read_text()
175
- transform.hash = hash
176
-
177
- # track environment
178
- env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
179
- if env_path.exists():
180
- overwrite_env = True
181
- if run.environment_id is not None and from_cli:
182
- logger.important("run.environment is already saved")
183
- overwrite_env = False
184
- if overwrite_env:
185
- hash, _ = hash_file(env_path)
186
- artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
187
- new_env_artifact = artifact is None
188
- if new_env_artifact:
189
- artifact = ln.Artifact(
190
- env_path,
191
- description="requirements.txt",
192
- visibility=0,
193
- run=False,
194
- )
195
- artifact.save(upload=True, print_progress=False)
196
- run.environment = artifact
197
- if new_env_artifact:
198
- logger.debug(f"saved run.environment: {run.environment}")
199
-
200
- # set finished_at
201
- if finished_at:
202
- run.finished_at = datetime.now(timezone.utc)
203
-
204
- # track report and set is_consecutive
205
- if not is_notebook:
206
- run.is_consecutive = True
207
- run.save()
208
- else:
209
- if run.report_id is not None:
210
- hash, _ = hash_file(report_path) # ignore hash_type for now
211
- if hash != run.report.hash:
212
- response = input(
213
- f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
214
- )
215
- if response == "y":
216
- run.report.replace(report_path)
217
- run.report.save(upload=True)
218
- else:
219
- logger.important("keeping old report")
220
- else:
221
- logger.important("report is already saved")
222
- else:
223
- report_file = ln.Artifact(
224
- report_path,
225
- description=f"Report of run {run.uid}",
226
- visibility=0, # hidden file
227
- run=False,
228
- )
229
- report_file.save(upload=True, print_progress=False)
230
- run.report = report_file
231
- run.is_consecutive = is_consecutive
232
- run.save()
233
- logger.debug(
234
- f"saved transform.latest_run.report: {transform.latest_run.report}"
235
- )
236
- transform.save()
237
-
238
- # finalize
239
- if ln_setup.settings.instance.is_on_hub:
240
- identifier = ln_setup.settings.instance.slug
241
- logger.important(
242
- f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
243
- )
244
- if not from_cli:
245
- thing, name = (
246
- ("notebook", "notebook.ipynb")
247
- if is_notebook
248
- else ("script", "script.py")
249
- )
250
- logger.important(
251
- f"if you want to update your {thing} without re-running it, use `lamin save {name}`"
252
- )
253
- # because run & transform changed, update the global context
254
- context._run = run
255
- context._transform = transform
256
- return None
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from datetime import datetime, timezone
5
+ from typing import TYPE_CHECKING
6
+
7
+ import lamindb_setup as ln_setup
8
+ from lamin_utils import logger
9
+ from lamindb_setup.core.hashing import hash_file
10
+
11
+ if TYPE_CHECKING:
12
+ from pathlib import Path
13
+
14
+ from lnschema_core import Run, Transform
15
+
16
+ from ._query_set import QuerySet
17
+
18
+
19
+ # this is from the get_title function in nbproject
20
+ # should be moved into lamindb sooner or later
21
+ def prepare_notebook(
22
+ nb,
23
+ strip_title: bool = False,
24
+ ) -> str | None:
25
+ """Strip title from the notebook if requested."""
26
+ title_found = False
27
+ for cell in nb.cells:
28
+ cell.metadata.clear() # strip cell metadata
29
+ if not title_found and cell["cell_type"] == "markdown":
30
+ lines = cell["source"].split("\n")
31
+ for i, line in enumerate(lines):
32
+ if line.startswith("# "):
33
+ line.lstrip("#").strip(" .").strip()
34
+ title_found = True
35
+ if strip_title:
36
+ lines.pop(i)
37
+ cell["source"] = "\n".join(lines)
38
+ return None
39
+
40
+
41
+ def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
42
+ import nbformat
43
+ import traitlets.config as config
44
+ from nbconvert import HTMLExporter
45
+
46
+ with open(notebook_path, encoding="utf-8") as f:
47
+ notebook = nbformat.read(f, as_version=4)
48
+ prepare_notebook(notebook, strip_title=True)
49
+ notebook.metadata.clear() # strip notebook metadata
50
+ # if we were to export as ipynb, the following two lines would do it
51
+ # with open(output_path, "w", encoding="utf-8") as f:
52
+ # nbformat.write(notebook, f)
53
+ # instead we need all this code
54
+ c = config.Config()
55
+ c.HTMLExporter.preprocessors = []
56
+ c.HTMLExporter.exclude_input_prompt = True
57
+ c.HTMLExporter.exclude_output_prompt = True
58
+ c.HTMLExporter.anchor_link_text = " "
59
+ html_exporter = HTMLExporter(config=c)
60
+ html, _ = html_exporter.from_notebook_node(notebook)
61
+ output_path.write_text(html, encoding="utf-8")
62
+
63
+
64
+ def notebook_to_script(
65
+ transform: Transform, notebook_path: Path, script_path: Path
66
+ ) -> None:
67
+ import jupytext
68
+
69
+ notebook = jupytext.read(notebook_path)
70
+ py_content = jupytext.writes(notebook, fmt="py:percent")
71
+ # remove global metadata header
72
+ py_content = re.sub(r"^# ---\n.*?# ---\n\n", "", py_content, flags=re.DOTALL)
73
+ # replace title
74
+ py_content = py_content.replace(f"# # {transform.name}", "# # transform.name")
75
+ script_path.write_text(py_content)
76
+
77
+
78
+ def save_context_core(
79
+ *,
80
+ run: Run,
81
+ transform: Transform,
82
+ filepath: Path,
83
+ finished_at: bool = False,
84
+ ignore_non_consecutive: bool | None = None,
85
+ from_cli: bool = False,
86
+ ) -> str | None:
87
+ from lnschema_core.models import (
88
+ format_field_value, # needs to come after lamindb was imported because of CLI use
89
+ )
90
+
91
+ import lamindb as ln
92
+
93
+ from .core._context import context, is_run_from_ipython
94
+
95
+ ln.settings.verbosity = "success"
96
+
97
+ # for scripts, things are easy
98
+ is_consecutive = True
99
+ is_notebook = transform.type == "notebook"
100
+ source_code_path = filepath
101
+ # for notebooks, we need more work
102
+ if is_notebook:
103
+ try:
104
+ import jupytext
105
+ from nbproject.dev import (
106
+ check_consecutiveness,
107
+ read_notebook,
108
+ )
109
+ except ImportError:
110
+ logger.error("install nbproject & jupytext: pip install nbproject jupytext")
111
+ return None
112
+ notebook_content = read_notebook(filepath) # type: ignore
113
+ if not ignore_non_consecutive: # ignore_non_consecutive is None or False
114
+ is_consecutive = check_consecutiveness(
115
+ notebook_content, calling_statement=".finish()"
116
+ )
117
+ if not is_consecutive:
118
+ response = "n" # ignore_non_consecutive == False
119
+ if ignore_non_consecutive is None:
120
+ response = input(
121
+ " Do you still want to proceed with finishing? (y/n) "
122
+ )
123
+ if response != "y":
124
+ return "aborted-non-consecutive"
125
+ # write the report
126
+ report_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
127
+ ".ipynb", ".html"
128
+ )
129
+ notebook_to_report(filepath, report_path)
130
+ # write the source code
131
+ source_code_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
132
+ ".ipynb", ".py"
133
+ )
134
+ notebook_to_script(transform, filepath, source_code_path)
135
+ ln.settings.creation.artifact_silence_missing_run_warning = True
136
+ # track source code
137
+ hash, _ = hash_file(source_code_path) # ignore hash_type for now
138
+ if (
139
+ transform._source_code_artifact_id is not None
140
+ or transform.source_code is not None # equivalent to transform.hash is not None
141
+ ):
142
+ # check if the hash of the transform source code matches
143
+ # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
144
+ ref_hash = (
145
+ transform.hash
146
+ if transform.hash is not None
147
+ else transform._source_code_artifact.hash
148
+ )
149
+ if hash != ref_hash:
150
+ response = input(
151
+ f"You are about to overwrite existing source code (hash '{ref_hash}') for Transform('{transform.uid}')."
152
+ f" Proceed? (y/n)"
153
+ )
154
+ if response == "y":
155
+ transform.source_code = source_code_path.read_text()
156
+ transform.hash = hash
157
+ else:
158
+ logger.warning("Please re-run `ln.track()` to make a new version")
159
+ return "rerun-the-notebook"
160
+ else:
161
+ logger.important("source code is already saved")
162
+ else:
163
+ transform.source_code = source_code_path.read_text()
164
+ transform.hash = hash
165
+
166
+ # track environment
167
+ env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
168
+ if env_path.exists():
169
+ overwrite_env = True
170
+ if run.environment_id is not None and from_cli:
171
+ logger.important("run.environment is already saved")
172
+ overwrite_env = False
173
+ if overwrite_env:
174
+ hash, _ = hash_file(env_path)
175
+ artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
176
+ new_env_artifact = artifact is None
177
+ if new_env_artifact:
178
+ artifact = ln.Artifact(
179
+ env_path,
180
+ description="requirements.txt",
181
+ visibility=0,
182
+ run=False,
183
+ )
184
+ artifact.save(upload=True, print_progress=False)
185
+ run.environment = artifact
186
+ if new_env_artifact:
187
+ logger.debug(f"saved run.environment: {run.environment}")
188
+
189
+ # set finished_at
190
+ if finished_at:
191
+ run.finished_at = datetime.now(timezone.utc)
192
+
193
+ # track report and set is_consecutive
194
+ if not is_notebook:
195
+ run.is_consecutive = True
196
+ run.save()
197
+ else:
198
+ if run.report_id is not None:
199
+ hash, _ = hash_file(report_path) # ignore hash_type for now
200
+ if hash != run.report.hash:
201
+ response = input(
202
+ f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
203
+ )
204
+ if response == "y":
205
+ run.report.replace(report_path)
206
+ run.report.save(upload=True)
207
+ else:
208
+ logger.important("keeping old report")
209
+ else:
210
+ logger.important("report is already saved")
211
+ else:
212
+ report_file = ln.Artifact(
213
+ report_path,
214
+ description=f"Report of run {run.uid}",
215
+ visibility=0, # hidden file
216
+ run=False,
217
+ )
218
+ report_file.save(upload=True, print_progress=False)
219
+ run.report = report_file
220
+ run.is_consecutive = is_consecutive
221
+ run.save()
222
+ logger.debug(
223
+ f"saved transform.latest_run.report: {transform.latest_run.report}"
224
+ )
225
+ transform.save()
226
+
227
+ # finalize
228
+ if not from_cli:
229
+ run_time = run.finished_at - run.started_at
230
+ logger.important(
231
+ f"finished Run('{run.uid[:8]}') after {run_time} at {format_field_value(run.finished_at)}"
232
+ )
233
+ if ln_setup.settings.instance.is_on_hub:
234
+ identifier = ln_setup.settings.instance.slug
235
+ logger.important(
236
+ f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
237
+ )
238
+ if not from_cli:
239
+ thing, name = (
240
+ ("notebook", "notebook.ipynb")
241
+ if is_notebook
242
+ else ("script", "script.py")
243
+ )
244
+ logger.important(
245
+ f"if you want to update your {thing} without re-running it, use `lamin save {name}`"
246
+ )
247
+ # because run & transform changed, update the global context
248
+ context._run = run
249
+ context._transform = transform
250
+ return None