lamindb 0.76.0__py3-none-any.whl → 0.76.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,513 @@
1
+ from __future__ import annotations
2
+
3
+ import builtins
4
+ import hashlib
5
+ import os
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path, PurePath
8
+ from typing import TYPE_CHECKING
9
+
10
+ from lamin_utils import logger
11
+ from lamindb_setup.core.hashing import hash_file
12
+ from lnschema_core import Run, Transform, ids
13
+ from lnschema_core.ids import base62_12
14
+ from lnschema_core.users import current_user_id
15
+
16
+ from ._settings import settings
17
+ from ._sync_git import get_transform_reference_from_git_repo
18
+ from ._track_environment import track_environment
19
+ from .exceptions import (
20
+ MissingContext,
21
+ NotebookNotSaved,
22
+ NotebookNotSavedError,
23
+ NoTitleError,
24
+ TrackNotCalled,
25
+ UpdateContext,
26
+ )
27
+ from .subsettings._transform_settings import transform_settings
28
+ from .versioning import bump_version as bump_version_function
29
+ from .versioning import increment_base62
30
+
31
+ if TYPE_CHECKING:
32
+ from lamindb_setup.core.types import UPathStr
33
+ from lnschema_core.types import TransformType
34
+
35
+ is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
36
+
37
+ msg_path_failed = (
38
+ "failed to infer notebook path.\nfix: pass `path` to ln.context.track()"
39
+ )
40
+
41
+
42
+ def get_uid_ext(version: str) -> str:
43
+ from lamin_utils._base62 import encodebytes
44
+
45
+ # merely zero-padding the nbproject version such that the base62 encoding is
46
+ # at least 4 characters long doesn't yields sufficiently diverse hashes and
47
+ # leads to collisions; it'd be nice because the uid_ext would be ordered
48
+ return encodebytes(hashlib.md5(version.encode()).digest())[:4] # noqa: S324
49
+
50
+
51
+ def get_notebook_path():
52
+ from nbproject.dev._jupyter_communicate import (
53
+ notebook_path as get_notebook_path,
54
+ )
55
+
56
+ path = None
57
+ try:
58
+ path = get_notebook_path()
59
+ except Exception:
60
+ raise RuntimeError(msg_path_failed) from None
61
+ if path is None:
62
+ raise RuntimeError(msg_path_failed) from None
63
+ return path
64
+
65
+
66
+ # from https://stackoverflow.com/questions/61901628
67
+ def get_notebook_name_colab() -> str:
68
+ from socket import gethostbyname, gethostname # type: ignore
69
+
70
+ from requests import get # type: ignore
71
+
72
+ ip = gethostbyname(gethostname()) # 172.28.0.12
73
+ try:
74
+ name = get(f"http://{ip}:9000/api/sessions").json()[0]["name"] # noqa: S113
75
+ except Exception:
76
+ logger.warning(
77
+ "could not get notebook name from Google Colab, using: notebook.ipynb"
78
+ )
79
+ name = "notebook.ipynb"
80
+ return name.rstrip(".ipynb")
81
+
82
+
83
+ def raise_missing_context(transform_type: str, key: str) -> None:
84
+ transform = Transform.filter(key=key).latest_version().first()
85
+ if transform is None:
86
+ new_uid = f"{base62_12()}0000"
87
+ message = f"To track this {transform_type}, set\n\n"
88
+ else:
89
+ uid = transform.uid
90
+ suid, ruid = uid[: Transform._len_stem_uid], uid[Transform._len_stem_uid :]
91
+ new_ruid = increment_base62(ruid)
92
+ new_uid = f"{suid}{new_ruid}"
93
+ message = f"You already have a {transform_type} version family with key '{key}', suid '{transform.stem_uid}' & name '{transform.name}'.\n\n- to create a new {transform_type} version family, rename your file and rerun: ln.context.track()\n- to bump the version, set: "
94
+ message += f'ln.context.uid = "{new_uid}"'
95
+ if transform_type == "notebook":
96
+ message += "\n\nRestart your notebook if you want consecutive cell execution."
97
+ raise MissingContext(message)
98
+
99
+
100
+ def pretty_pypackages(dependencies: dict) -> str:
101
+ deps_list = []
102
+ for pkg, ver in dependencies.items():
103
+ if ver != "":
104
+ deps_list.append(pkg + f"=={ver}")
105
+ else:
106
+ deps_list.append(pkg)
107
+ deps_list.sort()
108
+ return " ".join(deps_list)
109
+
110
+
111
+ class Context:
112
+ """Run context.
113
+
114
+ Bundles all metadata to track run contexts.
115
+ """
116
+
117
+ def __init__(self):
118
+ self._uid: str | None = None
119
+ self._name: str | None = None
120
+ self._version: str | None = None
121
+ self._transform: Transform | None = None
122
+ self._run: Run | None = None
123
+ self._path: Path | None = None
124
+ """A local path to the script that's running."""
125
+ self._logging_message: str = ""
126
+
127
+ @property
128
+ def transform(self) -> Transform | None:
129
+ """Transform of context."""
130
+ return self._transform
131
+
132
+ @property
133
+ def uid(self) -> str | None:
134
+ """`uid` to create transform."""
135
+ return self._uid
136
+
137
+ @uid.setter
138
+ def uid(self, value: str | None):
139
+ self._uid = value
140
+
141
+ @property
142
+ def name(self) -> str | None:
143
+ """`name` to create transform."""
144
+ return self._name
145
+
146
+ @name.setter
147
+ def name(self, value: str | None):
148
+ self._name = value
149
+
150
+ @property
151
+ def version(self) -> str | None:
152
+ """`version` to create transform."""
153
+ return self._version
154
+
155
+ @version.setter
156
+ def version(self, value: str | None):
157
+ self._version = value
158
+
159
+ @property
160
+ def run(self) -> Run | None:
161
+ """Run of context."""
162
+ return self._run
163
+
164
+ def track(
165
+ self,
166
+ *,
167
+ params: dict | None = None,
168
+ transform: Transform | None = None,
169
+ new_run: bool | None = None,
170
+ path: str | None = None,
171
+ ) -> None:
172
+ """Track notebook or script run.
173
+
174
+ Creates or loads a global :class:`~lamindb.Run` that enables data
175
+ lineage tracking.
176
+
177
+ Saves source code and compute environment.
178
+
179
+ If :attr:`~lamindb.core.Settings.sync_git_repo` is set, will first check
180
+ whether the script exists in the git repository and add a link.
181
+
182
+ Args:
183
+ params: A dictionary of parameters to track for the run.
184
+ transform: Can be of type `"pipeline"` or `"notebook"`
185
+ (:class:`~lamindb.core.types.TransformType`).
186
+ new_run: If `False`, loads latest run of transform
187
+ (default notebook), if `True`, creates new run (default pipeline).
188
+ path: Filepath of notebook or script. Only needed if it can't be
189
+ automatically detected.
190
+
191
+ Examples:
192
+
193
+ To track a notebook or script, call:
194
+
195
+ >>> import lamindb as ln
196
+ >>> ln.context.track()
197
+
198
+ If you'd like to track an abstract pipeline run, pass a
199
+ :class:`~lamindb.Transform` object of ``type`` ``"pipeline"``:
200
+
201
+ >>> ln.Transform(name="Cell Ranger", version="2", type="pipeline").save()
202
+ >>> transform = ln.Transform.filter(name="Cell Ranger", version="2").one()
203
+ >>> ln.context.track(transform=transform)
204
+ """
205
+ self._path = None
206
+ if transform is None:
207
+ is_tracked = False
208
+ transform_settings_are_set = (
209
+ transform_settings.stem_uid is not None
210
+ and transform_settings.version is not None
211
+ )
212
+ transform = None
213
+ stem_uid = None
214
+ if self.uid is not None:
215
+ transform = Transform.filter(uid=self.uid).one_or_none()
216
+ if self.version is not None:
217
+ # test inconsistent version passed
218
+ if (
219
+ transform is not None
220
+ and transform.version is not None
221
+ and self.version != transform.version
222
+ ):
223
+ raise SystemExit(
224
+ f"Please pass consistent version: ln.context.version = '{transform.version}'"
225
+ )
226
+ # test whether version was already used for another member of the family
227
+ suid, ruid = (
228
+ self.uid[: Transform._len_stem_uid],
229
+ self.uid[Transform._len_stem_uid :],
230
+ )
231
+ transform = Transform.filter(
232
+ uid__startswith=suid, version=self.version
233
+ ).one_or_none()
234
+ if (
235
+ transform is not None
236
+ and ruid != transform.uid[Transform._len_stem_uid :]
237
+ ):
238
+ better_version = bump_version_function(self.version)
239
+ raise SystemExit(
240
+ f"Version '{self.version}' is already taken by Transform('{transform.uid}'); please set another version, e.g., ln.context.version = '{better_version}'"
241
+ )
242
+ elif transform_settings_are_set:
243
+ stem_uid, self.version = (
244
+ transform_settings.stem_uid,
245
+ transform_settings.version,
246
+ )
247
+ transform = Transform.filter(
248
+ uid__startswith=stem_uid, version=self.version
249
+ ).one_or_none()
250
+ if is_run_from_ipython:
251
+ key, name = self._track_notebook(path=path)
252
+ transform_type = "notebook"
253
+ transform_ref = None
254
+ transform_ref_type = None
255
+ else:
256
+ (name, key, transform_ref, transform_ref_type) = self._track_script(
257
+ path=path
258
+ )
259
+ transform_type = "script"
260
+ if self.uid is not None or transform_settings_are_set:
261
+ # overwrite whatever is auto-detected in the notebook or script
262
+ if self.name is not None:
263
+ name = self.name
264
+ self._create_or_load_transform(
265
+ uid=self.uid,
266
+ stem_uid=stem_uid,
267
+ version=self.version,
268
+ name=name,
269
+ transform_ref=transform_ref,
270
+ transform_ref_type=transform_ref_type,
271
+ transform_type=transform_type,
272
+ key=key,
273
+ transform=transform,
274
+ )
275
+ # if no error is raised, the transform is tracked
276
+ is_tracked = True
277
+ if not is_tracked:
278
+ raise_missing_context(transform_type, key)
279
+ else:
280
+ if transform.type in {"notebook", "script"}:
281
+ raise ValueError(
282
+ "Use ln.context.track() without passing transform in a notebook or script"
283
+ " - metadata is automatically parsed"
284
+ )
285
+ transform_exists = None
286
+ if transform.id is not None:
287
+ # transform has an id but unclear whether already saved
288
+ transform_exists = Transform.filter(id=transform.id).first()
289
+ if transform_exists is None:
290
+ transform.save()
291
+ self._logging_message += f"created Transform('{transform.uid}')"
292
+ transform_exists = transform
293
+ else:
294
+ self._logging_message += f"loaded Transform('{transform.uid}')"
295
+ self._transform = transform_exists
296
+
297
+ if new_run is None: # for notebooks, default to loading latest runs
298
+ new_run = False if self._transform.type == "notebook" else True # type: ignore
299
+
300
+ run = None
301
+ if not new_run: # try loading latest run by same user
302
+ run = (
303
+ Run.filter(transform=self._transform, created_by_id=current_user_id())
304
+ .order_by("-created_at")
305
+ .first()
306
+ )
307
+ if run is not None: # loaded latest run
308
+ run.started_at = datetime.now(timezone.utc) # update run time
309
+ self._logging_message += f" & loaded Run('{run.started_at}')"
310
+
311
+ if run is None: # create new run
312
+ run = Run(
313
+ transform=self._transform,
314
+ params=params,
315
+ )
316
+ run.started_at = datetime.now(timezone.utc)
317
+ self._logging_message += f" & created Run('{run.started_at}')"
318
+ # can only determine at ln.finish() if run was consecutive in
319
+ # interactive session, otherwise, is consecutive
320
+ run.is_consecutive = True if is_run_from_ipython else None
321
+ # need to save in all cases
322
+ run.save()
323
+ if params is not None:
324
+ run.params.add_values(params)
325
+ self._run = run
326
+ track_environment(run)
327
+ logger.important(self._logging_message)
328
+ self._logging_message = ""
329
+
330
+ def _track_script(
331
+ self,
332
+ *,
333
+ path: UPathStr | None,
334
+ ) -> tuple[str, str, str, str]:
335
+ if path is None:
336
+ import inspect
337
+
338
+ frame = inspect.stack()[2]
339
+ module = inspect.getmodule(frame[0])
340
+ self._path = Path(module.__file__)
341
+ else:
342
+ self._path = Path(path)
343
+ name = self._path.name
344
+ key = name
345
+ reference = None
346
+ reference_type = None
347
+ if settings.sync_git_repo is not None:
348
+ reference = get_transform_reference_from_git_repo(self._path)
349
+ reference_type = "url"
350
+ return name, key, reference, reference_type
351
+
352
+ def _track_notebook(
353
+ self,
354
+ *,
355
+ path: str | None,
356
+ ):
357
+ if path is None:
358
+ path = get_notebook_path()
359
+ key = Path(path).name
360
+ if isinstance(path, (Path, PurePath)):
361
+ path_str = path.as_posix() # type: ignore
362
+ else:
363
+ path_str = str(path)
364
+ if path_str.endswith("Untitled.ipynb"):
365
+ raise RuntimeError("Please rename your notebook before tracking it")
366
+ if path_str.startswith("/fileId="):
367
+ name = get_notebook_name_colab()
368
+ key = f"{name}.ipynb"
369
+ else:
370
+ import nbproject
371
+
372
+ try:
373
+ nbproject_title = nbproject.meta.live.title
374
+ except IndexError:
375
+ raise NotebookNotSavedError(
376
+ "The notebook is not saved, please save the notebook and"
377
+ " rerun `ln.context.track()`"
378
+ ) from None
379
+ if nbproject_title is None:
380
+ raise NoTitleError(
381
+ "Please add a title to your notebook in a markdown cell: # Title"
382
+ ) from None
383
+ name = nbproject_title
384
+ # log imported python packages
385
+ if not path_str.startswith("/fileId="):
386
+ try:
387
+ from nbproject.dev._pypackage import infer_pypackages
388
+
389
+ nb = nbproject.dev.read_notebook(path_str)
390
+ logger.important(
391
+ "notebook imports:"
392
+ f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
393
+ )
394
+ except Exception:
395
+ logger.debug("inferring imported packages failed")
396
+ pass
397
+ self._path = Path(path_str)
398
+ return key, name
399
+
400
+ def _create_or_load_transform(
401
+ self,
402
+ *,
403
+ uid: str | None,
404
+ stem_uid: str | None,
405
+ version: str | None,
406
+ name: str,
407
+ transform_ref: str | None = None,
408
+ transform_ref_type: str | None = None,
409
+ key: str | None = None,
410
+ transform_type: TransformType = None,
411
+ transform: Transform | None = None,
412
+ ):
413
+ # make a new transform record
414
+ if transform is None:
415
+ if uid is None:
416
+ uid = f"{stem_uid}{get_uid_ext(version)}"
417
+ transform = Transform(
418
+ uid=uid,
419
+ version=version,
420
+ name=name,
421
+ key=key,
422
+ reference=transform_ref,
423
+ reference_type=transform_ref_type,
424
+ type=transform_type,
425
+ )
426
+ transform.save()
427
+ self._logging_message += f"created Transform('{transform.uid}')"
428
+ else:
429
+ uid = transform.uid
430
+ # check whether the transform file has been renamed
431
+ if transform.key != key:
432
+ suid = transform.stem_uid
433
+ new_suid = ids.base62_12()
434
+ transform_type = "Notebook" if is_run_from_ipython else "Script"
435
+ note = f'Or update key "{transform.key}" in your existing family:\n\nln.Transform.filter(uid__startswith="{suid}").update(key="{key}")'
436
+ raise UpdateContext(
437
+ f"{transform_type} filename changed.\n\nEither init a new transform family by setting:\n\n"
438
+ f'ln.context.uid = "{new_suid}0000"\n\n{note}'
439
+ )
440
+ elif transform.name != name:
441
+ transform.name = name
442
+ transform.save()
443
+ self._logging_message += (
444
+ "updated transform name, " # white space on purpose
445
+ )
446
+ # check whether transform source code was already saved
447
+ if transform._source_code_artifact_id is not None:
448
+ response = None
449
+ if is_run_from_ipython:
450
+ response = "y" # auto-bump version
451
+ else:
452
+ hash, _ = hash_file(self._path) # ignore hash_type for now
453
+ if hash != transform._source_code_artifact.hash:
454
+ response = "y" # auto-bump version
455
+ else:
456
+ self._logging_message += f"loaded Transform('{transform.uid}')"
457
+ if response is not None:
458
+ change_type = (
459
+ "Re-running saved notebook"
460
+ if is_run_from_ipython
461
+ else "Source code changed"
462
+ )
463
+ suid, ruid = (
464
+ uid[: Transform._len_stem_uid],
465
+ uid[Transform._len_stem_uid :],
466
+ )
467
+ new_ruid = increment_base62(ruid)
468
+ raise UpdateContext(
469
+ f"{change_type}, bump version by setting:\n\n"
470
+ f'ln.context.uid = "{suid}{new_ruid}"'
471
+ )
472
+ else:
473
+ self._logging_message += f"loaded Transform('{transform.uid}')"
474
+ self._transform = transform
475
+
476
+ def finish(self) -> None:
477
+ """Mark a tracked run as finished.
478
+
479
+ Saves source code and, for notebooks, a run report to your default storage location.
480
+ """
481
+ from lamindb._finish import save_context_core
482
+
483
+ def get_seconds_since_modified(filepath) -> float:
484
+ return datetime.now().timestamp() - filepath.stat().st_mtime
485
+
486
+ if context.run is None:
487
+ raise TrackNotCalled("Please run `ln.context.track()` before `ln.finish()`")
488
+ if context._path is None:
489
+ if context.run.transform.type in {"script", "notebook"}:
490
+ raise ValueError(
491
+ f"Transform type is not allowed to be 'script' or 'notebook' but is {context.run.transform.type}."
492
+ )
493
+ context.run.finished_at = datetime.now(timezone.utc)
494
+ context.run.save()
495
+ # nothing else to do
496
+ return None
497
+ if is_run_from_ipython: # notebooks
498
+ if (
499
+ get_seconds_since_modified(context._path) > 3
500
+ and os.getenv("LAMIN_TESTING") is None
501
+ ):
502
+ raise NotebookNotSaved(
503
+ "Please save the notebook in your editor right before running `ln.finish()`"
504
+ )
505
+ save_context_core(
506
+ run=context.run,
507
+ transform=context.run.transform,
508
+ filepath=context._path,
509
+ finished_at=True,
510
+ )
511
+
512
+
513
+ context = Context()
lamindb/core/_data.py CHANGED
@@ -23,6 +23,7 @@ from lamindb._query_set import QuerySet
23
23
  from lamindb._record import get_name_field
24
24
  from lamindb.core._settings import settings
25
25
 
26
+ from ._context import context
26
27
  from ._feature_manager import (
27
28
  get_feature_set_links,
28
29
  get_host_id_field,
@@ -30,7 +31,6 @@ from ._feature_manager import (
30
31
  print_features,
31
32
  )
32
33
  from ._label_manager import print_labels
33
- from ._run_context import run_context
34
34
  from .exceptions import ValidationError
35
35
  from .schema import (
36
36
  dict_related_model_to_related_name,
@@ -40,12 +40,14 @@ from .schema import (
40
40
  if TYPE_CHECKING:
41
41
  from lnschema_core.types import StrField
42
42
 
43
- WARNING_RUN_TRANSFORM = "no run & transform get linked, consider calling ln.track()"
43
+ WARNING_RUN_TRANSFORM = (
44
+ "no run & transform get linked, consider calling ln.context.track()"
45
+ )
44
46
 
45
47
 
46
48
  def get_run(run: Run | None) -> Run | None:
47
49
  if run is None:
48
- run = run_context.run
50
+ run = context.run
49
51
  if run is None and not settings.creation.artifact_silence_missing_run_warning:
50
52
  logger.warning(WARNING_RUN_TRANSFORM)
51
53
  # suppress run by passing False
@@ -336,7 +338,7 @@ def _track_run_input(
336
338
  run = is_run_input
337
339
  is_run_input = True
338
340
  elif run is None:
339
- run = run_context.run
341
+ run = context.run
340
342
  # consider that data is an iterable of Data
341
343
  data_iter: Iterable[HasFeatures] = [data] if isinstance(data, HasFeatures) else data
342
344
  track_run_input = False
@@ -364,7 +366,7 @@ def _track_run_input(
364
366
  if settings.track_run_inputs:
365
367
  logger.hint(
366
368
  "you can auto-track these data as a run input by calling"
367
- " `ln.track()`"
369
+ " `ln.context.track()`"
368
370
  )
369
371
  # assume we have a run record
370
372
  else:
@@ -392,7 +394,7 @@ def _track_run_input(
392
394
  if track_run_input:
393
395
  if run is None:
394
396
  raise ValueError(
395
- "No run context set. Call ln.track() or link input to a"
397
+ "No run context set. Call ln.context.track() or link input to a"
396
398
  " run object via `run.input_artifacts.add(artifact)`"
397
399
  )
398
400
  # avoid adding the same run twice