lamindb 0.77.2__py3-none-any.whl → 1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +39 -32
- lamindb/_artifact.py +95 -64
- lamindb/_can_curate.py +19 -10
- lamindb/_collection.py +51 -49
- lamindb/_feature.py +9 -9
- lamindb/_finish.py +99 -86
- lamindb/_from_values.py +20 -17
- lamindb/_is_versioned.py +2 -1
- lamindb/_parents.py +23 -16
- lamindb/_query_manager.py +3 -3
- lamindb/_query_set.py +85 -18
- lamindb/_record.py +121 -46
- lamindb/_run.py +3 -3
- lamindb/_save.py +14 -8
- lamindb/{_feature_set.py → _schema.py} +34 -31
- lamindb/_storage.py +2 -1
- lamindb/_transform.py +51 -23
- lamindb/_ulabel.py +17 -8
- lamindb/_view.py +15 -14
- lamindb/base/__init__.py +24 -0
- lamindb/base/fields.py +281 -0
- lamindb/base/ids.py +103 -0
- lamindb/base/types.py +51 -0
- lamindb/base/users.py +30 -0
- lamindb/base/validation.py +67 -0
- lamindb/core/__init__.py +19 -14
- lamindb/core/_context.py +297 -228
- lamindb/core/_data.py +44 -49
- lamindb/core/_describe.py +41 -31
- lamindb/core/_django.py +59 -44
- lamindb/core/_feature_manager.py +192 -168
- lamindb/core/_label_manager.py +22 -22
- lamindb/core/_mapped_collection.py +17 -14
- lamindb/core/_settings.py +1 -12
- lamindb/core/_sync_git.py +56 -9
- lamindb/core/_track_environment.py +1 -1
- lamindb/core/datasets/_core.py +5 -6
- lamindb/core/exceptions.py +0 -7
- lamindb/core/fields.py +1 -1
- lamindb/core/loaders.py +18 -2
- lamindb/core/{schema.py → relations.py} +22 -19
- lamindb/core/storage/_anndata_accessor.py +1 -2
- lamindb/core/storage/_backed_access.py +2 -1
- lamindb/core/storage/_tiledbsoma.py +40 -13
- lamindb/core/storage/objects.py +1 -1
- lamindb/core/storage/paths.py +13 -8
- lamindb/core/subsettings/__init__.py +0 -2
- lamindb/core/types.py +2 -23
- lamindb/core/versioning.py +11 -7
- lamindb/{_curate.py → curators/__init__.py} +700 -57
- lamindb/curators/_spatial.py +528 -0
- lamindb/integrations/_vitessce.py +1 -3
- lamindb/migrations/0052_squashed.py +1261 -0
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
- lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
- lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
- lamindb/migrations/0060_alter_artifact__actions.py +22 -0
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
- lamindb/migrations/0062_add_is_latest_field.py +32 -0
- lamindb/migrations/0063_populate_latest_field.py +45 -0
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
- lamindb/migrations/0069_squashed.py +1770 -0
- lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
- lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
- lamindb/migrations/0073_merge_ourprojects.py +945 -0
- lamindb/migrations/0074_lamindbv1_part4.py +374 -0
- lamindb/migrations/0075_lamindbv1_part5.py +276 -0
- lamindb/migrations/0076_lamindbv1_part6.py +621 -0
- lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
- lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
- lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
- lamindb/migrations/__init__.py +0 -0
- lamindb/models.py +4064 -0
- {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/METADATA +15 -20
- lamindb-1.0rc1.dist-info/RECORD +100 -0
- {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
- lamindb/core/subsettings/_transform_settings.py +0 -21
- lamindb-0.77.2.dist-info/RECORD +0 -63
- {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0
lamindb/core/_context.py
CHANGED
@@ -2,35 +2,39 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import builtins
|
4
4
|
import hashlib
|
5
|
+
import signal
|
6
|
+
import sys
|
7
|
+
import threading
|
8
|
+
import traceback
|
5
9
|
from datetime import datetime, timezone
|
6
|
-
from pathlib import Path
|
10
|
+
from pathlib import Path
|
7
11
|
from typing import TYPE_CHECKING
|
8
12
|
|
9
13
|
import lamindb_setup as ln_setup
|
14
|
+
from django.db.models import Func, IntegerField
|
10
15
|
from lamin_utils import logger
|
11
16
|
from lamindb_setup.core.hashing import hash_file
|
12
|
-
|
13
|
-
from
|
14
|
-
from
|
17
|
+
|
18
|
+
from lamindb.base import ids
|
19
|
+
from lamindb.base.ids import base62_12
|
20
|
+
from lamindb.models import Run, Transform, format_field_value
|
15
21
|
|
16
22
|
from ._settings import settings
|
17
23
|
from ._sync_git import get_transform_reference_from_git_repo
|
18
24
|
from ._track_environment import track_environment
|
19
25
|
from .exceptions import (
|
20
26
|
InconsistentKey,
|
21
|
-
MissingContextUID,
|
22
27
|
NotebookNotSaved,
|
23
|
-
NoTitleError,
|
24
28
|
TrackNotCalled,
|
25
29
|
UpdateContext,
|
26
30
|
)
|
27
|
-
from .subsettings._transform_settings import transform_settings
|
28
31
|
from .versioning import bump_version as bump_version_function
|
29
32
|
from .versioning import increment_base62, message_update_key_in_version_family
|
30
33
|
|
31
34
|
if TYPE_CHECKING:
|
32
35
|
from lamindb_setup.core.types import UPathStr
|
33
|
-
|
36
|
+
|
37
|
+
from lamindb.base.types import TransformType
|
34
38
|
|
35
39
|
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
36
40
|
|
@@ -46,7 +50,7 @@ def get_uid_ext(version: str) -> str:
|
|
46
50
|
return encodebytes(hashlib.md5(version.encode()).digest())[:4] # noqa: S324
|
47
51
|
|
48
52
|
|
49
|
-
def get_notebook_path():
|
53
|
+
def get_notebook_path() -> Path:
|
50
54
|
from nbproject.dev._jupyter_communicate import (
|
51
55
|
notebook_path as get_notebook_path,
|
52
56
|
)
|
@@ -58,50 +62,25 @@ def get_notebook_path():
|
|
58
62
|
raise RuntimeError(msg_path_failed) from None
|
59
63
|
if path is None:
|
60
64
|
raise RuntimeError(msg_path_failed) from None
|
61
|
-
return path
|
65
|
+
return Path(path)
|
62
66
|
|
63
67
|
|
64
68
|
# from https://stackoverflow.com/questions/61901628
|
65
|
-
def
|
69
|
+
def get_notebook_key_colab() -> str:
|
66
70
|
from socket import gethostbyname, gethostname # type: ignore
|
67
71
|
|
68
72
|
from requests import get # type: ignore
|
69
73
|
|
70
74
|
ip = gethostbyname(gethostname()) # 172.28.0.12
|
71
75
|
try:
|
72
|
-
|
76
|
+
key = get(f"http://{ip}:9000/api/sessions").json()[0]["name"] # noqa: S113
|
77
|
+
key = f"colab/{key}"
|
73
78
|
except Exception:
|
74
79
|
logger.warning(
|
75
|
-
"could not get notebook
|
76
|
-
)
|
77
|
-
name = "notebook.ipynb"
|
78
|
-
return name.rstrip(".ipynb")
|
79
|
-
|
80
|
-
|
81
|
-
def raise_missing_context(transform_type: str, key: str) -> bool:
|
82
|
-
transform = Transform.filter(key=key).latest_version().first()
|
83
|
-
if transform is None:
|
84
|
-
new_uid = f"{base62_12()}0000"
|
85
|
-
message = f'to track this {transform_type}, run: ln.track("{new_uid}")'
|
86
|
-
else:
|
87
|
-
uid = transform.uid
|
88
|
-
new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
|
89
|
-
message = (
|
90
|
-
f"you already have a transform with key '{key}': Transform('{transform.uid[:8]}')\n"
|
91
|
-
f' (1) to make a revision, run: ln.track("{new_uid}")\n (2) to create a new transform, rename your {transform_type} file and re-run: ln.track()'
|
80
|
+
"could not get notebook key from Google Colab, using: colab/notebook.ipynb"
|
92
81
|
)
|
93
|
-
|
94
|
-
|
95
|
-
response = input("→ Ready to re-run? (y/n)")
|
96
|
-
if response == "y":
|
97
|
-
logger.important(
|
98
|
-
"note: restart your notebook if you want consecutive cell execution"
|
99
|
-
)
|
100
|
-
return True
|
101
|
-
raise MissingContextUID("Please follow the instructions.")
|
102
|
-
else:
|
103
|
-
raise MissingContextUID(f"✗ {message}")
|
104
|
-
return False
|
82
|
+
key = "colab/notebook.ipynb"
|
83
|
+
return key
|
105
84
|
|
106
85
|
|
107
86
|
def pretty_pypackages(dependencies: dict) -> str:
|
@@ -115,6 +94,83 @@ def pretty_pypackages(dependencies: dict) -> str:
|
|
115
94
|
return " ".join(deps_list)
|
116
95
|
|
117
96
|
|
97
|
+
class LogStreamHandler:
|
98
|
+
def __init__(self, log_stream, file):
|
99
|
+
self.log_stream = log_stream
|
100
|
+
self.file = file
|
101
|
+
|
102
|
+
def write(self, data):
|
103
|
+
self.log_stream.write(data)
|
104
|
+
self.file.write(data)
|
105
|
+
self.file.flush()
|
106
|
+
|
107
|
+
def flush(self):
|
108
|
+
self.log_stream.flush()
|
109
|
+
self.file.flush()
|
110
|
+
|
111
|
+
|
112
|
+
class LogStreamTracker:
|
113
|
+
def __init__(self):
|
114
|
+
self.original_stdout = None
|
115
|
+
self.original_stderr = None
|
116
|
+
self.log_file = None
|
117
|
+
self.original_excepthook = sys.excepthook
|
118
|
+
self.is_cleaning_up = False
|
119
|
+
|
120
|
+
def start(self, run: Run):
|
121
|
+
self.original_stdout = sys.stdout
|
122
|
+
self.original_stderr = sys.stderr
|
123
|
+
self.run = run
|
124
|
+
self.log_file_path = (
|
125
|
+
ln_setup.settings.cache_dir / f"run_logs_{self.run.uid}.txt"
|
126
|
+
)
|
127
|
+
self.log_file = open(self.log_file_path, "w")
|
128
|
+
sys.stdout = LogStreamHandler(self.original_stdout, self.log_file)
|
129
|
+
sys.stderr = LogStreamHandler(self.original_stderr, self.log_file)
|
130
|
+
# handle signals
|
131
|
+
# signal should be used only in the main thread, otherwise
|
132
|
+
# ValueError: signal only works in main thread of the main interpreter
|
133
|
+
if threading.current_thread() == threading.main_thread():
|
134
|
+
signal.signal(signal.SIGTERM, self.cleanup)
|
135
|
+
signal.signal(signal.SIGINT, self.cleanup)
|
136
|
+
# handle exceptions
|
137
|
+
sys.excepthook = self.handle_exception
|
138
|
+
|
139
|
+
def finish(self):
|
140
|
+
if self.original_stdout:
|
141
|
+
sys.stdout = self.original_stdout
|
142
|
+
sys.stderr = self.original_stderr
|
143
|
+
self.log_file.close()
|
144
|
+
|
145
|
+
def cleanup(self, signo=None, frame=None):
|
146
|
+
from lamindb._finish import save_run_logs
|
147
|
+
|
148
|
+
if self.original_stdout and not self.is_cleaning_up:
|
149
|
+
self.is_cleaning_up = True
|
150
|
+
if signo is not None:
|
151
|
+
signal_msg = f"\nProcess terminated by signal {signo} ({signal.Signals(signo).name})\n"
|
152
|
+
if frame:
|
153
|
+
signal_msg += (
|
154
|
+
f"Frame info:\n{''.join(traceback.format_stack(frame))}"
|
155
|
+
)
|
156
|
+
self.log_file.write(signal_msg)
|
157
|
+
sys.stdout = self.original_stdout
|
158
|
+
sys.stderr = self.original_stderr
|
159
|
+
self.log_file.flush()
|
160
|
+
self.log_file.close()
|
161
|
+
save_run_logs(self.run, save_run=True)
|
162
|
+
|
163
|
+
def handle_exception(self, exc_type, exc_value, exc_traceback):
|
164
|
+
if not self.is_cleaning_up:
|
165
|
+
error_msg = f"{''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))}"
|
166
|
+
if self.log_file.closed:
|
167
|
+
self.log_file = open(self.log_file_path, "a")
|
168
|
+
self.log_file.write(error_msg)
|
169
|
+
self.log_file.flush()
|
170
|
+
self.cleanup()
|
171
|
+
self.original_excepthook(exc_type, exc_value, exc_traceback)
|
172
|
+
|
173
|
+
|
118
174
|
class Context:
|
119
175
|
"""Run context.
|
120
176
|
|
@@ -136,7 +192,7 @@ class Context:
|
|
136
192
|
|
137
193
|
def __init__(self):
|
138
194
|
self._uid: str | None = None
|
139
|
-
self.
|
195
|
+
self._description: str | None = None
|
140
196
|
self._version: str | None = None
|
141
197
|
self._transform: Transform | None = None
|
142
198
|
self._run: Run | None = None
|
@@ -144,6 +200,7 @@ class Context:
|
|
144
200
|
"""A local path to the script that's running."""
|
145
201
|
self._logging_message_track: str = ""
|
146
202
|
self._logging_message_imports: str = ""
|
203
|
+
self._stream_tracker: LogStreamTracker = LogStreamTracker()
|
147
204
|
|
148
205
|
@property
|
149
206
|
def transform(self) -> Transform | None:
|
@@ -151,22 +208,31 @@ class Context:
|
|
151
208
|
return self._transform
|
152
209
|
|
153
210
|
@property
|
154
|
-
def
|
155
|
-
"""`
|
156
|
-
return self.
|
211
|
+
def description(self) -> str | None:
|
212
|
+
"""`description` argument for `context.transform`."""
|
213
|
+
return self._description
|
157
214
|
|
158
|
-
@
|
159
|
-
def
|
160
|
-
self.
|
215
|
+
@description.setter
|
216
|
+
def description(self, value: str | None):
|
217
|
+
self._description = value
|
161
218
|
|
162
219
|
@property
|
163
220
|
def name(self) -> str | None:
|
164
|
-
"""`
|
165
|
-
return self.
|
221
|
+
"""Deprecated. Populates `description` argument for `context.transform`."""
|
222
|
+
return self._description
|
166
223
|
|
167
224
|
@name.setter
|
168
225
|
def name(self, value: str | None):
|
169
|
-
self.
|
226
|
+
self._description = value
|
227
|
+
|
228
|
+
@property
|
229
|
+
def uid(self) -> str | None:
|
230
|
+
"""`uid` argument for `context.transform`."""
|
231
|
+
return self._uid
|
232
|
+
|
233
|
+
@uid.setter
|
234
|
+
def uid(self, value: str | None):
|
235
|
+
self._uid = value
|
170
236
|
|
171
237
|
@property
|
172
238
|
def version(self) -> str | None:
|
@@ -189,13 +255,14 @@ class Context:
|
|
189
255
|
params: dict | None = None,
|
190
256
|
new_run: bool | None = None,
|
191
257
|
path: str | None = None,
|
258
|
+
log_to_file: bool | None = None,
|
192
259
|
) -> None:
|
193
260
|
"""Initiate a run with tracked data lineage.
|
194
261
|
|
195
262
|
- sets :attr:`~lamindb.core.Context.transform` &
|
196
263
|
:attr:`~lamindb.core.Context.run` by creating or loading `Transform` &
|
197
264
|
`Run` records
|
198
|
-
- saves
|
265
|
+
- saves Python environment as a `requirements.txt` file: `run.environment`
|
199
266
|
|
200
267
|
If :attr:`~lamindb.core.Settings.sync_git_repo` is set, checks whether a
|
201
268
|
script-like transform exists in a git repository and links it.
|
@@ -203,110 +270,49 @@ class Context:
|
|
203
270
|
Args:
|
204
271
|
transform: A transform `uid` or record. If `None`, creates a `uid`.
|
205
272
|
params: A dictionary of parameters to track for the run.
|
206
|
-
new_run: If `False`, loads latest run of transform
|
207
|
-
(default notebook), if `True`, creates new run (default
|
273
|
+
new_run: If `False`, loads the latest run of transform
|
274
|
+
(default notebook), if `True`, creates new run (default non-notebook).
|
208
275
|
path: Filepath of notebook or script. Only needed if it can't be
|
209
276
|
automatically detected.
|
277
|
+
log_to_file: If `True`, logs stdout and stderr to a file and
|
278
|
+
saves the file within the current run (default non-notebook),
|
279
|
+
if `False`, does not log the output (default notebook).
|
210
280
|
|
211
281
|
Examples:
|
212
282
|
|
213
|
-
To create a transform `uid` for tracking a script or notebook, call:
|
214
|
-
|
215
|
-
>>> ln.track()
|
216
|
-
|
217
283
|
To track the run of a notebook or script, call:
|
218
284
|
|
219
|
-
>>> ln.track(
|
285
|
+
>>> ln.track()
|
220
286
|
|
221
287
|
"""
|
222
288
|
self._logging_message_track = ""
|
223
289
|
self._logging_message_imports = ""
|
224
|
-
uid = None
|
225
290
|
if transform is not None and isinstance(transform, str):
|
226
|
-
uid = transform
|
227
|
-
self.uid = uid
|
291
|
+
self.uid = transform
|
228
292
|
transform = None
|
229
293
|
self._path = None
|
230
294
|
if transform is None:
|
231
|
-
|
232
|
-
transform_settings_are_set = (
|
233
|
-
transform_settings.stem_uid is not None
|
234
|
-
and transform_settings.version is not None
|
235
|
-
)
|
236
|
-
transform = None
|
237
|
-
stem_uid = None
|
238
|
-
# you can set ln.context.uid and then call ln.track() without passing anythin
|
239
|
-
# that has been the preferred syntax for a while; we'll likely
|
240
|
-
# deprecate it at some point
|
241
|
-
if uid is not None or self.uid is not None:
|
242
|
-
transform = Transform.filter(uid=self.uid).one_or_none()
|
243
|
-
if self.version is not None:
|
244
|
-
# test inconsistent version passed
|
245
|
-
if (
|
246
|
-
transform is not None
|
247
|
-
and transform.version is not None # type: ignore
|
248
|
-
and self.version != transform.version # type: ignore
|
249
|
-
):
|
250
|
-
raise SystemExit(
|
251
|
-
f"Please pass consistent version: ln.context.version = '{transform.version}'" # type: ignore
|
252
|
-
)
|
253
|
-
# test whether version was already used for another member of the family
|
254
|
-
suid, vuid = (
|
255
|
-
self.uid[: Transform._len_stem_uid],
|
256
|
-
self.uid[Transform._len_stem_uid :],
|
257
|
-
)
|
258
|
-
transform = Transform.filter(
|
259
|
-
uid__startswith=suid, version=self.version
|
260
|
-
).one_or_none()
|
261
|
-
if (
|
262
|
-
transform is not None
|
263
|
-
and vuid != transform.uid[Transform._len_stem_uid :]
|
264
|
-
):
|
265
|
-
better_version = bump_version_function(self.version)
|
266
|
-
raise SystemExit(
|
267
|
-
f"Version '{self.version}' is already taken by Transform(uid='{transform.uid}'); please set another version, e.g., ln.context.version = '{better_version}'"
|
268
|
-
)
|
269
|
-
elif transform_settings_are_set:
|
270
|
-
stem_uid, self.version = (
|
271
|
-
transform_settings.stem_uid,
|
272
|
-
transform_settings.version,
|
273
|
-
)
|
274
|
-
transform = Transform.filter(
|
275
|
-
uid__startswith=stem_uid, version=self.version
|
276
|
-
).one_or_none()
|
295
|
+
description = None
|
277
296
|
if is_run_from_ipython:
|
278
|
-
|
297
|
+
self._path, description = self._track_notebook(path_str=path)
|
279
298
|
transform_type = "notebook"
|
280
299
|
transform_ref = None
|
281
300
|
transform_ref_type = None
|
282
301
|
else:
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
)
|
289
|
-
if
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
name=name,
|
298
|
-
transform_ref=transform_ref,
|
299
|
-
transform_ref_type=transform_ref_type,
|
300
|
-
transform_type=transform_type,
|
301
|
-
key=key,
|
302
|
-
transform=transform,
|
303
|
-
)
|
304
|
-
# if no error is raised, the transform is tracked
|
305
|
-
is_tracked = True
|
306
|
-
if not is_tracked:
|
307
|
-
early_return = raise_missing_context(transform_type, key)
|
308
|
-
if early_return:
|
309
|
-
return None
|
302
|
+
(
|
303
|
+
self._path,
|
304
|
+
transform_type,
|
305
|
+
transform_ref,
|
306
|
+
transform_ref_type,
|
307
|
+
) = self._track_source_code(path=path)
|
308
|
+
if description is None:
|
309
|
+
description = self._description
|
310
|
+
self._create_or_load_transform(
|
311
|
+
description=description,
|
312
|
+
transform_ref=transform_ref,
|
313
|
+
transform_ref_type=transform_ref_type,
|
314
|
+
transform_type=transform_type,
|
315
|
+
)
|
310
316
|
else:
|
311
317
|
if transform.type in {"notebook", "script"}:
|
312
318
|
raise ValueError(
|
@@ -319,14 +325,10 @@ class Context:
|
|
319
325
|
transform_exists = Transform.filter(id=transform.id).first()
|
320
326
|
if transform_exists is None:
|
321
327
|
transform.save()
|
322
|
-
self._logging_message_track += (
|
323
|
-
f"created Transform('{transform.uid[:8]}')"
|
324
|
-
)
|
328
|
+
self._logging_message_track += f"created Transform('{transform.uid}')"
|
325
329
|
transform_exists = transform
|
326
330
|
else:
|
327
|
-
self._logging_message_track += (
|
328
|
-
f"loaded Transform('{transform.uid[:8]}')"
|
329
|
-
)
|
331
|
+
self._logging_message_track += f"loaded Transform('{transform.uid}')"
|
330
332
|
self._transform = transform_exists
|
331
333
|
|
332
334
|
if new_run is None: # for notebooks, default to loading latest runs
|
@@ -343,7 +345,7 @@ class Context:
|
|
343
345
|
)
|
344
346
|
if run is not None: # loaded latest run
|
345
347
|
run.started_at = datetime.now(timezone.utc) # update run time
|
346
|
-
self._logging_message_track += f", re-started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}"
|
348
|
+
self._logging_message_track += f", re-started Run('{run.uid[:8]}...') at {format_field_value(run.started_at)}"
|
347
349
|
|
348
350
|
if run is None: # create new run
|
349
351
|
run = Run(
|
@@ -351,7 +353,7 @@ class Context:
|
|
351
353
|
params=params,
|
352
354
|
)
|
353
355
|
run.started_at = datetime.now(timezone.utc)
|
354
|
-
self._logging_message_track += f", started new Run('{run.uid[:8]}') at {format_field_value(run.started_at)}"
|
356
|
+
self._logging_message_track += f", started new Run('{run.uid[:8]}...') at {format_field_value(run.started_at)}"
|
355
357
|
# can only determine at ln.finish() if run was consecutive in
|
356
358
|
# interactive session, otherwise, is consecutive
|
357
359
|
run.is_consecutive = True if is_run_from_ipython else None
|
@@ -359,11 +361,15 @@ class Context:
|
|
359
361
|
run.save()
|
360
362
|
if params is not None:
|
361
363
|
run.params.add_values(params)
|
362
|
-
self._logging_message_track += "\n→ params: " + " ".join(
|
363
|
-
f"{key}=
|
364
|
+
self._logging_message_track += "\n→ params: " + ", ".join(
|
365
|
+
f"{key}={value}" for key, value in params.items()
|
364
366
|
)
|
365
367
|
self._run = run
|
366
368
|
track_environment(run)
|
369
|
+
if log_to_file is None:
|
370
|
+
log_to_file = self.transform.type != "notebook"
|
371
|
+
if log_to_file:
|
372
|
+
self._stream_tracker.start(run)
|
367
373
|
logger.important(self._logging_message_track)
|
368
374
|
if self._logging_message_imports:
|
369
375
|
logger.important(self._logging_message_imports)
|
@@ -372,61 +378,59 @@ class Context:
|
|
372
378
|
self,
|
373
379
|
*,
|
374
380
|
path: UPathStr | None,
|
375
|
-
) -> tuple[
|
381
|
+
) -> tuple[Path, str, str, str]:
|
382
|
+
# for `.py` files, classified as "script"
|
383
|
+
# for `.Rmd` and `.qmd` files, which we classify
|
384
|
+
# as "notebook" because they typically come with an .html run report
|
376
385
|
if path is None:
|
377
386
|
import inspect
|
378
387
|
|
379
388
|
frame = inspect.stack()[2]
|
380
389
|
module = inspect.getmodule(frame[0])
|
381
|
-
|
390
|
+
# None for interactive session
|
391
|
+
if module is None:
|
392
|
+
raise NotImplementedError(
|
393
|
+
"Interactive sessions are not yet supported to be tracked."
|
394
|
+
)
|
395
|
+
path = Path(module.__file__)
|
382
396
|
else:
|
383
|
-
|
384
|
-
transform_type =
|
385
|
-
"notebook" if self._path.suffix in {".Rmd", ".qmd"} else "script"
|
386
|
-
)
|
387
|
-
name = self._path.name
|
388
|
-
key = name
|
397
|
+
path = Path(path)
|
398
|
+
transform_type = "notebook" if path.suffix in {".Rmd", ".qmd"} else "script"
|
389
399
|
reference = None
|
390
400
|
reference_type = None
|
391
401
|
if settings.sync_git_repo is not None:
|
392
|
-
reference = get_transform_reference_from_git_repo(
|
402
|
+
reference = get_transform_reference_from_git_repo(path)
|
393
403
|
reference_type = "url"
|
394
|
-
return
|
404
|
+
return path, transform_type, reference, reference_type
|
395
405
|
|
396
406
|
def _track_notebook(
|
397
407
|
self,
|
398
408
|
*,
|
399
|
-
|
400
|
-
):
|
401
|
-
if
|
409
|
+
path_str: str | None,
|
410
|
+
) -> tuple[Path, str | None]:
|
411
|
+
if path_str is None:
|
402
412
|
path = get_notebook_path()
|
403
|
-
key = Path(path).name
|
404
|
-
if isinstance(path, (Path, PurePath)):
|
405
|
-
path_str = path.as_posix() # type: ignore
|
406
413
|
else:
|
407
|
-
|
414
|
+
path = Path(path_str)
|
415
|
+
description = None
|
416
|
+
path_str = path.as_posix()
|
408
417
|
if path_str.endswith("Untitled.ipynb"):
|
409
418
|
raise RuntimeError("Please rename your notebook before tracking it")
|
410
419
|
if path_str.startswith("/fileId="):
|
411
|
-
|
412
|
-
|
420
|
+
logger.warning("tracking on Google Colab is experimental")
|
421
|
+
path_str = get_notebook_key_colab()
|
422
|
+
path = Path(path_str)
|
413
423
|
else:
|
414
424
|
import nbproject
|
415
425
|
|
416
426
|
try:
|
417
427
|
nbproject_title = nbproject.meta.live.title
|
418
428
|
except IndexError:
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
raise NoTitleError(
|
425
|
-
"Please add a title to your notebook in a markdown cell: # Title"
|
426
|
-
) from None
|
427
|
-
name = nbproject_title
|
428
|
-
# log imported python packages
|
429
|
-
if not path_str.startswith("/fileId="):
|
429
|
+
# notebook is not saved
|
430
|
+
pass
|
431
|
+
if nbproject_title is not None:
|
432
|
+
description = nbproject_title
|
433
|
+
# log imported python packages
|
430
434
|
try:
|
431
435
|
from nbproject.dev._pypackage import infer_pypackages
|
432
436
|
|
@@ -438,21 +442,15 @@ class Context:
|
|
438
442
|
except Exception:
|
439
443
|
logger.debug("inferring imported packages failed")
|
440
444
|
pass
|
441
|
-
|
442
|
-
return key, name
|
445
|
+
return path, description
|
443
446
|
|
444
447
|
def _create_or_load_transform(
|
445
448
|
self,
|
446
449
|
*,
|
447
|
-
|
448
|
-
stem_uid: str | None,
|
449
|
-
version: str | None,
|
450
|
-
name: str,
|
450
|
+
description: str,
|
451
451
|
transform_ref: str | None = None,
|
452
452
|
transform_ref_type: str | None = None,
|
453
|
-
key: str | None = None,
|
454
453
|
transform_type: TransformType = None,
|
455
|
-
transform: Transform | None = None,
|
456
454
|
):
|
457
455
|
def get_key_clashing_message(transform: Transform, key: str) -> str:
|
458
456
|
update_key_note = message_update_key_in_version_family(
|
@@ -462,63 +460,139 @@ class Context:
|
|
462
460
|
registry="Transform",
|
463
461
|
)
|
464
462
|
return (
|
465
|
-
f'
|
463
|
+
f'Filepath "{key}" clashes with the existing key "{transform.key}" for uid "{transform.uid[:-4]}...."\n\nEither init a new transform with a new uid:\n\n'
|
466
464
|
f'ln.track("{ids.base62_12()}0000")\n\n{update_key_note}'
|
467
465
|
)
|
468
466
|
|
467
|
+
revises = None
|
468
|
+
# the user did not pass the uid
|
469
|
+
if self.uid is None:
|
470
|
+
|
471
|
+
class SlashCount(Func):
|
472
|
+
template = "LENGTH(%(expressions)s) - LENGTH(REPLACE(%(expressions)s, '/', ''))"
|
473
|
+
output_field = IntegerField()
|
474
|
+
|
475
|
+
# we need to traverse from greater depth to shorter depth so that we match better matches first
|
476
|
+
transforms = (
|
477
|
+
Transform.filter(key__endswith=self._path.name, is_latest=True)
|
478
|
+
.annotate(slash_count=SlashCount("key"))
|
479
|
+
.order_by("-slash_count")
|
480
|
+
)
|
481
|
+
uid = f"{base62_12()}0000"
|
482
|
+
key = self._path.name
|
483
|
+
target_transform = None
|
484
|
+
hash, _ = hash_file(self._path)
|
485
|
+
if len(transforms) != 0:
|
486
|
+
message = ""
|
487
|
+
found_key = False
|
488
|
+
for aux_transform in transforms:
|
489
|
+
if aux_transform.key in self._path.as_posix():
|
490
|
+
key = aux_transform.key
|
491
|
+
if (
|
492
|
+
aux_transform.source_code is None
|
493
|
+
or aux_transform.hash == hash
|
494
|
+
):
|
495
|
+
uid = aux_transform.uid
|
496
|
+
target_transform = aux_transform
|
497
|
+
else:
|
498
|
+
uid = f"{aux_transform.uid[:-4]}{increment_base62(aux_transform.uid[-4:])}"
|
499
|
+
message = f"there already is a transform with key '{aux_transform.key}', creating new version '{uid}'"
|
500
|
+
revises = aux_transform
|
501
|
+
found_key = True
|
502
|
+
break
|
503
|
+
if not found_key:
|
504
|
+
plural_s = "s" if len(transforms) > 1 else ""
|
505
|
+
transforms_str = "\n".join(
|
506
|
+
[
|
507
|
+
f" {transform.uid} → {transform.key}"
|
508
|
+
for transform in transforms
|
509
|
+
]
|
510
|
+
)
|
511
|
+
message = f"ignoring transform{plural_s} with same filedescription:\n{transforms_str}"
|
512
|
+
if message != "":
|
513
|
+
logger.important(message)
|
514
|
+
self.uid, transform = uid, target_transform
|
515
|
+
# the user did pass the uid
|
516
|
+
else:
|
517
|
+
transform = Transform.filter(uid=self.uid).one_or_none()
|
518
|
+
if transform is not None:
|
519
|
+
if transform.key not in self._path.as_posix():
|
520
|
+
n_parts = len(Path(transform.key).parts)
|
521
|
+
last_path_elements = (
|
522
|
+
Path(*self._path.parts[-n_parts:]).as_posix()
|
523
|
+
if n_parts > 0
|
524
|
+
else ""
|
525
|
+
)
|
526
|
+
raise UpdateContext(
|
527
|
+
get_key_clashing_message(transform, last_path_elements)
|
528
|
+
)
|
529
|
+
key = transform.key # type: ignore
|
530
|
+
else:
|
531
|
+
key = self._path.name
|
532
|
+
if self.version is not None:
|
533
|
+
# test inconsistent version passed
|
534
|
+
if (
|
535
|
+
transform is not None
|
536
|
+
and transform.version is not None # type: ignore
|
537
|
+
and self.version != transform.version # type: ignore
|
538
|
+
):
|
539
|
+
raise SystemExit(
|
540
|
+
f"✗ please pass consistent version: ln.context.version = '{transform.version}'" # type: ignore
|
541
|
+
)
|
542
|
+
# test whether version was already used for another member of the family
|
543
|
+
suid, vuid = (self.uid[:-4], self.uid[-4:])
|
544
|
+
transform = Transform.filter(
|
545
|
+
uid__startswith=suid, version=self.version
|
546
|
+
).one_or_none()
|
547
|
+
if transform is not None and vuid != transform.uid[-4:]:
|
548
|
+
better_version = bump_version_function(self.version)
|
549
|
+
raise SystemExit(
|
550
|
+
f"✗ version '{self.version}' is already taken by Transform('{transform.uid}'); please set another version, e.g., ln.context.version = '{better_version}'"
|
551
|
+
)
|
469
552
|
# make a new transform record
|
470
553
|
if transform is None:
|
471
|
-
if uid is None:
|
472
|
-
uid = f"{stem_uid}{get_uid_ext(version)}"
|
473
|
-
# let's query revises so that we can pass it to the constructor and use it for error handling
|
474
|
-
revises = (
|
475
|
-
Transform.filter(uid__startswith=uid[:-4], is_latest=True)
|
476
|
-
.order_by("-created_at")
|
477
|
-
.first()
|
478
|
-
)
|
479
|
-
# note that here we're not passing revises because we're not querying it
|
480
|
-
# hence, we need to do a revision family lookup based on key
|
481
|
-
# hence, we need key to be not None
|
482
554
|
assert key is not None # noqa: S101
|
483
555
|
raise_update_context = False
|
484
556
|
try:
|
485
557
|
transform = Transform(
|
486
|
-
uid=uid,
|
487
|
-
version=version,
|
488
|
-
|
558
|
+
uid=self.uid,
|
559
|
+
version=self.version,
|
560
|
+
description=description,
|
489
561
|
key=key,
|
490
562
|
reference=transform_ref,
|
491
563
|
reference_type=transform_ref_type,
|
492
564
|
type=transform_type,
|
493
|
-
revises=revises,
|
494
565
|
).save()
|
495
566
|
except InconsistentKey:
|
496
567
|
raise_update_context = True
|
497
568
|
if raise_update_context:
|
569
|
+
if revises is None:
|
570
|
+
revises = (
|
571
|
+
Transform.filter(uid__startswith=self.uid[:-4], is_latest=True)
|
572
|
+
.order_by("-created_at")
|
573
|
+
.first()
|
574
|
+
)
|
498
575
|
raise UpdateContext(get_key_clashing_message(revises, key))
|
499
|
-
self._logging_message_track += f"created Transform('{transform.uid
|
576
|
+
self._logging_message_track += f"created Transform('{transform.uid}')"
|
500
577
|
else:
|
501
578
|
uid = transform.uid
|
502
579
|
# transform was already saved via `finish()`
|
503
|
-
transform_was_saved =
|
504
|
-
transform._source_code_artifact_id is not None
|
505
|
-
or transform.source_code is not None
|
506
|
-
)
|
580
|
+
transform_was_saved = transform.source_code is not None
|
507
581
|
# check whether the transform.key is consistent
|
508
582
|
if transform.key != key:
|
509
583
|
raise UpdateContext(get_key_clashing_message(transform, key))
|
510
|
-
elif transform.
|
511
|
-
transform.
|
584
|
+
elif transform.description != description:
|
585
|
+
transform.description = description
|
512
586
|
transform.save()
|
513
587
|
self._logging_message_track += (
|
514
|
-
"updated transform
|
588
|
+
"updated transform description, " # white space on purpose
|
515
589
|
)
|
516
590
|
elif (
|
517
591
|
transform.created_by_id != ln_setup.settings.user.id
|
518
592
|
and not transform_was_saved
|
519
593
|
):
|
520
594
|
raise UpdateContext(
|
521
|
-
f'{transform.created_by.
|
595
|
+
f'{transform.created_by.description} ({transform.created_by.handle}) already works on this draft {transform.type}.\n\nPlease create a revision via `ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")` or a new transform with a *different* filedescription and `ln.track("{ids.base62_12()}0000")`.'
|
522
596
|
)
|
523
597
|
# check whether transform source code was already saved
|
524
598
|
if transform_was_saved:
|
@@ -527,30 +601,23 @@ class Context:
|
|
527
601
|
bump_revision = True
|
528
602
|
else:
|
529
603
|
hash, _ = hash_file(self._path) # ignore hash_type for now
|
530
|
-
if transform.hash
|
531
|
-
condition = hash != transform.hash
|
532
|
-
else:
|
533
|
-
condition = hash != transform._source_code_artifact.hash
|
534
|
-
if condition:
|
604
|
+
if hash != transform.hash:
|
535
605
|
bump_revision = True
|
536
606
|
else:
|
537
607
|
self._logging_message_track += (
|
538
|
-
f"loaded Transform('{transform.uid
|
608
|
+
f"loaded Transform('{transform.uid}')"
|
539
609
|
)
|
540
610
|
if bump_revision:
|
541
611
|
change_type = (
|
542
|
-
"
|
612
|
+
"re-running saved notebook"
|
543
613
|
if is_run_from_ipython
|
544
|
-
else "
|
614
|
+
else "source code changed"
|
545
615
|
)
|
546
616
|
raise UpdateContext(
|
547
|
-
f
|
548
|
-
f'ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")'
|
617
|
+
f'✗ {change_type}, run: ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")'
|
549
618
|
)
|
550
619
|
else:
|
551
|
-
self._logging_message_track += (
|
552
|
-
f"loaded Transform('{transform.uid[:8]}')"
|
553
|
-
)
|
620
|
+
self._logging_message_track += f"loaded Transform('{transform.uid}')"
|
554
621
|
self._transform = transform
|
555
622
|
|
556
623
|
def finish(self, ignore_non_consecutive: None | bool = None) -> None:
|
@@ -580,8 +647,8 @@ class Context:
|
|
580
647
|
|
581
648
|
"""
|
582
649
|
from lamindb._finish import (
|
650
|
+
get_save_notebook_message,
|
583
651
|
get_seconds_since_modified,
|
584
|
-
get_shortcut,
|
585
652
|
save_context_core,
|
586
653
|
)
|
587
654
|
|
@@ -600,13 +667,13 @@ class Context:
|
|
600
667
|
import nbproject
|
601
668
|
|
602
669
|
# it might be that the user modifies the title just before ln.finish()
|
603
|
-
if (
|
604
|
-
|
670
|
+
if (
|
671
|
+
nbproject_title := nbproject.meta.live.title
|
672
|
+
) != self.transform.description:
|
673
|
+
self.transform.description = nbproject_title
|
605
674
|
self.transform.save()
|
606
675
|
if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING:
|
607
|
-
raise NotebookNotSaved(
|
608
|
-
f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) within 2 sec before calling `ln.finish()`"
|
609
|
-
)
|
676
|
+
raise NotebookNotSaved(get_save_notebook_message())
|
610
677
|
save_context_core(
|
611
678
|
run=self.run,
|
612
679
|
transform=self.run.transform,
|
@@ -614,6 +681,8 @@ class Context:
|
|
614
681
|
finished_at=True,
|
615
682
|
ignore_non_consecutive=ignore_non_consecutive,
|
616
683
|
)
|
684
|
+
if self.transform.type != "notebook":
|
685
|
+
self._stream_tracker.finish()
|
617
686
|
|
618
687
|
|
619
688
|
context = Context()
|