lamindb 0.77.2__py3-none-any.whl → 1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. lamindb/__init__.py +39 -32
  2. lamindb/_artifact.py +95 -64
  3. lamindb/_can_curate.py +19 -10
  4. lamindb/_collection.py +51 -49
  5. lamindb/_feature.py +9 -9
  6. lamindb/_finish.py +99 -86
  7. lamindb/_from_values.py +20 -17
  8. lamindb/_is_versioned.py +2 -1
  9. lamindb/_parents.py +23 -16
  10. lamindb/_query_manager.py +3 -3
  11. lamindb/_query_set.py +85 -18
  12. lamindb/_record.py +121 -46
  13. lamindb/_run.py +3 -3
  14. lamindb/_save.py +14 -8
  15. lamindb/{_feature_set.py → _schema.py} +34 -31
  16. lamindb/_storage.py +2 -1
  17. lamindb/_transform.py +51 -23
  18. lamindb/_ulabel.py +17 -8
  19. lamindb/_view.py +15 -14
  20. lamindb/base/__init__.py +24 -0
  21. lamindb/base/fields.py +281 -0
  22. lamindb/base/ids.py +103 -0
  23. lamindb/base/types.py +51 -0
  24. lamindb/base/users.py +30 -0
  25. lamindb/base/validation.py +67 -0
  26. lamindb/core/__init__.py +19 -14
  27. lamindb/core/_context.py +297 -228
  28. lamindb/core/_data.py +44 -49
  29. lamindb/core/_describe.py +41 -31
  30. lamindb/core/_django.py +59 -44
  31. lamindb/core/_feature_manager.py +192 -168
  32. lamindb/core/_label_manager.py +22 -22
  33. lamindb/core/_mapped_collection.py +17 -14
  34. lamindb/core/_settings.py +1 -12
  35. lamindb/core/_sync_git.py +56 -9
  36. lamindb/core/_track_environment.py +1 -1
  37. lamindb/core/datasets/_core.py +5 -6
  38. lamindb/core/exceptions.py +0 -7
  39. lamindb/core/fields.py +1 -1
  40. lamindb/core/loaders.py +18 -2
  41. lamindb/core/{schema.py → relations.py} +22 -19
  42. lamindb/core/storage/_anndata_accessor.py +1 -2
  43. lamindb/core/storage/_backed_access.py +2 -1
  44. lamindb/core/storage/_tiledbsoma.py +40 -13
  45. lamindb/core/storage/objects.py +1 -1
  46. lamindb/core/storage/paths.py +13 -8
  47. lamindb/core/subsettings/__init__.py +0 -2
  48. lamindb/core/types.py +2 -23
  49. lamindb/core/versioning.py +11 -7
  50. lamindb/{_curate.py → curators/__init__.py} +700 -57
  51. lamindb/curators/_spatial.py +528 -0
  52. lamindb/integrations/_vitessce.py +1 -3
  53. lamindb/migrations/0052_squashed.py +1261 -0
  54. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
  55. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
  56. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
  57. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
  58. lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
  59. lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
  60. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
  61. lamindb/migrations/0060_alter_artifact__actions.py +22 -0
  62. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
  63. lamindb/migrations/0062_add_is_latest_field.py +32 -0
  64. lamindb/migrations/0063_populate_latest_field.py +45 -0
  65. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
  66. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
  67. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
  68. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
  69. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
  70. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
  71. lamindb/migrations/0069_squashed.py +1770 -0
  72. lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
  73. lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
  74. lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
  75. lamindb/migrations/0073_merge_ourprojects.py +945 -0
  76. lamindb/migrations/0074_lamindbv1_part4.py +374 -0
  77. lamindb/migrations/0075_lamindbv1_part5.py +276 -0
  78. lamindb/migrations/0076_lamindbv1_part6.py +621 -0
  79. lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
  80. lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
  81. lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
  82. lamindb/migrations/__init__.py +0 -0
  83. lamindb/models.py +4064 -0
  84. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/METADATA +15 -20
  85. lamindb-1.0rc1.dist-info/RECORD +100 -0
  86. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
  87. lamindb/core/subsettings/_transform_settings.py +0 -21
  88. lamindb-0.77.2.dist-info/RECORD +0 -63
  89. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0
lamindb/core/_context.py CHANGED
@@ -2,35 +2,39 @@ from __future__ import annotations
2
2
 
3
3
  import builtins
4
4
  import hashlib
5
+ import signal
6
+ import sys
7
+ import threading
8
+ import traceback
5
9
  from datetime import datetime, timezone
6
- from pathlib import Path, PurePath
10
+ from pathlib import Path
7
11
  from typing import TYPE_CHECKING
8
12
 
9
13
  import lamindb_setup as ln_setup
14
+ from django.db.models import Func, IntegerField
10
15
  from lamin_utils import logger
11
16
  from lamindb_setup.core.hashing import hash_file
12
- from lnschema_core import Run, Transform, ids
13
- from lnschema_core.ids import base62_12
14
- from lnschema_core.models import format_field_value
17
+
18
+ from lamindb.base import ids
19
+ from lamindb.base.ids import base62_12
20
+ from lamindb.models import Run, Transform, format_field_value
15
21
 
16
22
  from ._settings import settings
17
23
  from ._sync_git import get_transform_reference_from_git_repo
18
24
  from ._track_environment import track_environment
19
25
  from .exceptions import (
20
26
  InconsistentKey,
21
- MissingContextUID,
22
27
  NotebookNotSaved,
23
- NoTitleError,
24
28
  TrackNotCalled,
25
29
  UpdateContext,
26
30
  )
27
- from .subsettings._transform_settings import transform_settings
28
31
  from .versioning import bump_version as bump_version_function
29
32
  from .versioning import increment_base62, message_update_key_in_version_family
30
33
 
31
34
  if TYPE_CHECKING:
32
35
  from lamindb_setup.core.types import UPathStr
33
- from lnschema_core.types import TransformType
36
+
37
+ from lamindb.base.types import TransformType
34
38
 
35
39
  is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
36
40
 
@@ -46,7 +50,7 @@ def get_uid_ext(version: str) -> str:
46
50
  return encodebytes(hashlib.md5(version.encode()).digest())[:4] # noqa: S324
47
51
 
48
52
 
49
- def get_notebook_path():
53
+ def get_notebook_path() -> Path:
50
54
  from nbproject.dev._jupyter_communicate import (
51
55
  notebook_path as get_notebook_path,
52
56
  )
@@ -58,50 +62,25 @@ def get_notebook_path():
58
62
  raise RuntimeError(msg_path_failed) from None
59
63
  if path is None:
60
64
  raise RuntimeError(msg_path_failed) from None
61
- return path
65
+ return Path(path)
62
66
 
63
67
 
64
68
  # from https://stackoverflow.com/questions/61901628
65
- def get_notebook_name_colab() -> str:
69
+ def get_notebook_key_colab() -> str:
66
70
  from socket import gethostbyname, gethostname # type: ignore
67
71
 
68
72
  from requests import get # type: ignore
69
73
 
70
74
  ip = gethostbyname(gethostname()) # 172.28.0.12
71
75
  try:
72
- name = get(f"http://{ip}:9000/api/sessions").json()[0]["name"] # noqa: S113
76
+ key = get(f"http://{ip}:9000/api/sessions").json()[0]["name"] # noqa: S113
77
+ key = f"colab/{key}"
73
78
  except Exception:
74
79
  logger.warning(
75
- "could not get notebook name from Google Colab, using: notebook.ipynb"
76
- )
77
- name = "notebook.ipynb"
78
- return name.rstrip(".ipynb")
79
-
80
-
81
- def raise_missing_context(transform_type: str, key: str) -> bool:
82
- transform = Transform.filter(key=key).latest_version().first()
83
- if transform is None:
84
- new_uid = f"{base62_12()}0000"
85
- message = f'to track this {transform_type}, run: ln.track("{new_uid}")'
86
- else:
87
- uid = transform.uid
88
- new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
89
- message = (
90
- f"you already have a transform with key '{key}': Transform('{transform.uid[:8]}')\n"
91
- f' (1) to make a revision, run: ln.track("{new_uid}")\n (2) to create a new transform, rename your {transform_type} file and re-run: ln.track()'
80
+ "could not get notebook key from Google Colab, using: colab/notebook.ipynb"
92
81
  )
93
- if is_run_from_ipython:
94
- print(f"→ {message}")
95
- response = input("→ Ready to re-run? (y/n)")
96
- if response == "y":
97
- logger.important(
98
- "note: restart your notebook if you want consecutive cell execution"
99
- )
100
- return True
101
- raise MissingContextUID("Please follow the instructions.")
102
- else:
103
- raise MissingContextUID(f"✗ {message}")
104
- return False
82
+ key = "colab/notebook.ipynb"
83
+ return key
105
84
 
106
85
 
107
86
  def pretty_pypackages(dependencies: dict) -> str:
@@ -115,6 +94,83 @@ def pretty_pypackages(dependencies: dict) -> str:
115
94
  return " ".join(deps_list)
116
95
 
117
96
 
97
+ class LogStreamHandler:
98
+ def __init__(self, log_stream, file):
99
+ self.log_stream = log_stream
100
+ self.file = file
101
+
102
+ def write(self, data):
103
+ self.log_stream.write(data)
104
+ self.file.write(data)
105
+ self.file.flush()
106
+
107
+ def flush(self):
108
+ self.log_stream.flush()
109
+ self.file.flush()
110
+
111
+
112
+ class LogStreamTracker:
113
+ def __init__(self):
114
+ self.original_stdout = None
115
+ self.original_stderr = None
116
+ self.log_file = None
117
+ self.original_excepthook = sys.excepthook
118
+ self.is_cleaning_up = False
119
+
120
+ def start(self, run: Run):
121
+ self.original_stdout = sys.stdout
122
+ self.original_stderr = sys.stderr
123
+ self.run = run
124
+ self.log_file_path = (
125
+ ln_setup.settings.cache_dir / f"run_logs_{self.run.uid}.txt"
126
+ )
127
+ self.log_file = open(self.log_file_path, "w")
128
+ sys.stdout = LogStreamHandler(self.original_stdout, self.log_file)
129
+ sys.stderr = LogStreamHandler(self.original_stderr, self.log_file)
130
+ # handle signals
131
+ # signal should be used only in the main thread, otherwise
132
+ # ValueError: signal only works in main thread of the main interpreter
133
+ if threading.current_thread() == threading.main_thread():
134
+ signal.signal(signal.SIGTERM, self.cleanup)
135
+ signal.signal(signal.SIGINT, self.cleanup)
136
+ # handle exceptions
137
+ sys.excepthook = self.handle_exception
138
+
139
+ def finish(self):
140
+ if self.original_stdout:
141
+ sys.stdout = self.original_stdout
142
+ sys.stderr = self.original_stderr
143
+ self.log_file.close()
144
+
145
+ def cleanup(self, signo=None, frame=None):
146
+ from lamindb._finish import save_run_logs
147
+
148
+ if self.original_stdout and not self.is_cleaning_up:
149
+ self.is_cleaning_up = True
150
+ if signo is not None:
151
+ signal_msg = f"\nProcess terminated by signal {signo} ({signal.Signals(signo).name})\n"
152
+ if frame:
153
+ signal_msg += (
154
+ f"Frame info:\n{''.join(traceback.format_stack(frame))}"
155
+ )
156
+ self.log_file.write(signal_msg)
157
+ sys.stdout = self.original_stdout
158
+ sys.stderr = self.original_stderr
159
+ self.log_file.flush()
160
+ self.log_file.close()
161
+ save_run_logs(self.run, save_run=True)
162
+
163
+ def handle_exception(self, exc_type, exc_value, exc_traceback):
164
+ if not self.is_cleaning_up:
165
+ error_msg = f"{''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))}"
166
+ if self.log_file.closed:
167
+ self.log_file = open(self.log_file_path, "a")
168
+ self.log_file.write(error_msg)
169
+ self.log_file.flush()
170
+ self.cleanup()
171
+ self.original_excepthook(exc_type, exc_value, exc_traceback)
172
+
173
+
118
174
  class Context:
119
175
  """Run context.
120
176
 
@@ -136,7 +192,7 @@ class Context:
136
192
 
137
193
  def __init__(self):
138
194
  self._uid: str | None = None
139
- self._name: str | None = None
195
+ self._description: str | None = None
140
196
  self._version: str | None = None
141
197
  self._transform: Transform | None = None
142
198
  self._run: Run | None = None
@@ -144,6 +200,7 @@ class Context:
144
200
  """A local path to the script that's running."""
145
201
  self._logging_message_track: str = ""
146
202
  self._logging_message_imports: str = ""
203
+ self._stream_tracker: LogStreamTracker = LogStreamTracker()
147
204
 
148
205
  @property
149
206
  def transform(self) -> Transform | None:
@@ -151,22 +208,31 @@ class Context:
151
208
  return self._transform
152
209
 
153
210
  @property
154
- def uid(self) -> str | None:
155
- """`uid` argument for `context.transform`."""
156
- return self._uid
211
+ def description(self) -> str | None:
212
+ """`description` argument for `context.transform`."""
213
+ return self._description
157
214
 
158
- @uid.setter
159
- def uid(self, value: str | None):
160
- self._uid = value
215
+ @description.setter
216
+ def description(self, value: str | None):
217
+ self._description = value
161
218
 
162
219
  @property
163
220
  def name(self) -> str | None:
164
- """`name argument for `context.transform`."""
165
- return self._name
221
+ """Deprecated. Populates `description` argument for `context.transform`."""
222
+ return self._description
166
223
 
167
224
  @name.setter
168
225
  def name(self, value: str | None):
169
- self._name = value
226
+ self._description = value
227
+
228
+ @property
229
+ def uid(self) -> str | None:
230
+ """`uid` argument for `context.transform`."""
231
+ return self._uid
232
+
233
+ @uid.setter
234
+ def uid(self, value: str | None):
235
+ self._uid = value
170
236
 
171
237
  @property
172
238
  def version(self) -> str | None:
@@ -189,13 +255,14 @@ class Context:
189
255
  params: dict | None = None,
190
256
  new_run: bool | None = None,
191
257
  path: str | None = None,
258
+ log_to_file: bool | None = None,
192
259
  ) -> None:
193
260
  """Initiate a run with tracked data lineage.
194
261
 
195
262
  - sets :attr:`~lamindb.core.Context.transform` &
196
263
  :attr:`~lamindb.core.Context.run` by creating or loading `Transform` &
197
264
  `Run` records
198
- - saves compute environment as a `requirements.txt` file: `run.environment`
265
+ - saves Python environment as a `requirements.txt` file: `run.environment`
199
266
 
200
267
  If :attr:`~lamindb.core.Settings.sync_git_repo` is set, checks whether a
201
268
  script-like transform exists in a git repository and links it.
@@ -203,110 +270,49 @@ class Context:
203
270
  Args:
204
271
  transform: A transform `uid` or record. If `None`, creates a `uid`.
205
272
  params: A dictionary of parameters to track for the run.
206
- new_run: If `False`, loads latest run of transform
207
- (default notebook), if `True`, creates new run (default pipeline).
273
+ new_run: If `False`, loads the latest run of transform
274
+ (default notebook), if `True`, creates new run (default non-notebook).
208
275
  path: Filepath of notebook or script. Only needed if it can't be
209
276
  automatically detected.
277
+ log_to_file: If `True`, logs stdout and stderr to a file and
278
+ saves the file within the current run (default non-notebook),
279
+ if `False`, does not log the output (default notebook).
210
280
 
211
281
  Examples:
212
282
 
213
- To create a transform `uid` for tracking a script or notebook, call:
214
-
215
- >>> ln.track()
216
-
217
283
  To track the run of a notebook or script, call:
218
284
 
219
- >>> ln.track("FPnfDtJz8qbE0000") # replace with your uid
285
+ >>> ln.track()
220
286
 
221
287
  """
222
288
  self._logging_message_track = ""
223
289
  self._logging_message_imports = ""
224
- uid = None
225
290
  if transform is not None and isinstance(transform, str):
226
- uid = transform
227
- self.uid = uid
291
+ self.uid = transform
228
292
  transform = None
229
293
  self._path = None
230
294
  if transform is None:
231
- is_tracked = False
232
- transform_settings_are_set = (
233
- transform_settings.stem_uid is not None
234
- and transform_settings.version is not None
235
- )
236
- transform = None
237
- stem_uid = None
238
- # you can set ln.context.uid and then call ln.track() without passing anythin
239
- # that has been the preferred syntax for a while; we'll likely
240
- # deprecate it at some point
241
- if uid is not None or self.uid is not None:
242
- transform = Transform.filter(uid=self.uid).one_or_none()
243
- if self.version is not None:
244
- # test inconsistent version passed
245
- if (
246
- transform is not None
247
- and transform.version is not None # type: ignore
248
- and self.version != transform.version # type: ignore
249
- ):
250
- raise SystemExit(
251
- f"Please pass consistent version: ln.context.version = '{transform.version}'" # type: ignore
252
- )
253
- # test whether version was already used for another member of the family
254
- suid, vuid = (
255
- self.uid[: Transform._len_stem_uid],
256
- self.uid[Transform._len_stem_uid :],
257
- )
258
- transform = Transform.filter(
259
- uid__startswith=suid, version=self.version
260
- ).one_or_none()
261
- if (
262
- transform is not None
263
- and vuid != transform.uid[Transform._len_stem_uid :]
264
- ):
265
- better_version = bump_version_function(self.version)
266
- raise SystemExit(
267
- f"Version '{self.version}' is already taken by Transform(uid='{transform.uid}'); please set another version, e.g., ln.context.version = '{better_version}'"
268
- )
269
- elif transform_settings_are_set:
270
- stem_uid, self.version = (
271
- transform_settings.stem_uid,
272
- transform_settings.version,
273
- )
274
- transform = Transform.filter(
275
- uid__startswith=stem_uid, version=self.version
276
- ).one_or_none()
295
+ description = None
277
296
  if is_run_from_ipython:
278
- key, name = self._track_notebook(path=path)
297
+ self._path, description = self._track_notebook(path_str=path)
279
298
  transform_type = "notebook"
280
299
  transform_ref = None
281
300
  transform_ref_type = None
282
301
  else:
283
- # the below function is typically used for `.py` scripts
284
- # it is also used for `.Rmd` and `.qmd` files, which we classify
285
- # as "notebook" because they typically come with an .html run report
286
- (name, key, transform_type, transform_ref, transform_ref_type) = (
287
- self._track_source_code(path=path)
288
- )
289
- if self.uid is not None or transform_settings_are_set:
290
- # overwrite whatever is auto-detected in the notebook or script
291
- if self.name is not None:
292
- name = self.name
293
- self._create_or_load_transform(
294
- uid=self.uid,
295
- stem_uid=stem_uid,
296
- version=self.version,
297
- name=name,
298
- transform_ref=transform_ref,
299
- transform_ref_type=transform_ref_type,
300
- transform_type=transform_type,
301
- key=key,
302
- transform=transform,
303
- )
304
- # if no error is raised, the transform is tracked
305
- is_tracked = True
306
- if not is_tracked:
307
- early_return = raise_missing_context(transform_type, key)
308
- if early_return:
309
- return None
302
+ (
303
+ self._path,
304
+ transform_type,
305
+ transform_ref,
306
+ transform_ref_type,
307
+ ) = self._track_source_code(path=path)
308
+ if description is None:
309
+ description = self._description
310
+ self._create_or_load_transform(
311
+ description=description,
312
+ transform_ref=transform_ref,
313
+ transform_ref_type=transform_ref_type,
314
+ transform_type=transform_type,
315
+ )
310
316
  else:
311
317
  if transform.type in {"notebook", "script"}:
312
318
  raise ValueError(
@@ -319,14 +325,10 @@ class Context:
319
325
  transform_exists = Transform.filter(id=transform.id).first()
320
326
  if transform_exists is None:
321
327
  transform.save()
322
- self._logging_message_track += (
323
- f"created Transform('{transform.uid[:8]}')"
324
- )
328
+ self._logging_message_track += f"created Transform('{transform.uid}')"
325
329
  transform_exists = transform
326
330
  else:
327
- self._logging_message_track += (
328
- f"loaded Transform('{transform.uid[:8]}')"
329
- )
331
+ self._logging_message_track += f"loaded Transform('{transform.uid}')"
330
332
  self._transform = transform_exists
331
333
 
332
334
  if new_run is None: # for notebooks, default to loading latest runs
@@ -343,7 +345,7 @@ class Context:
343
345
  )
344
346
  if run is not None: # loaded latest run
345
347
  run.started_at = datetime.now(timezone.utc) # update run time
346
- self._logging_message_track += f", re-started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}"
348
+ self._logging_message_track += f", re-started Run('{run.uid[:8]}...') at {format_field_value(run.started_at)}"
347
349
 
348
350
  if run is None: # create new run
349
351
  run = Run(
@@ -351,7 +353,7 @@ class Context:
351
353
  params=params,
352
354
  )
353
355
  run.started_at = datetime.now(timezone.utc)
354
- self._logging_message_track += f", started new Run('{run.uid[:8]}') at {format_field_value(run.started_at)}"
356
+ self._logging_message_track += f", started new Run('{run.uid[:8]}...') at {format_field_value(run.started_at)}"
355
357
  # can only determine at ln.finish() if run was consecutive in
356
358
  # interactive session, otherwise, is consecutive
357
359
  run.is_consecutive = True if is_run_from_ipython else None
@@ -359,11 +361,15 @@ class Context:
359
361
  run.save()
360
362
  if params is not None:
361
363
  run.params.add_values(params)
362
- self._logging_message_track += "\n→ params: " + " ".join(
363
- f"{key}='{value}'" for key, value in params.items()
364
+ self._logging_message_track += "\n→ params: " + ", ".join(
365
+ f"{key}={value}" for key, value in params.items()
364
366
  )
365
367
  self._run = run
366
368
  track_environment(run)
369
+ if log_to_file is None:
370
+ log_to_file = self.transform.type != "notebook"
371
+ if log_to_file:
372
+ self._stream_tracker.start(run)
367
373
  logger.important(self._logging_message_track)
368
374
  if self._logging_message_imports:
369
375
  logger.important(self._logging_message_imports)
@@ -372,61 +378,59 @@ class Context:
372
378
  self,
373
379
  *,
374
380
  path: UPathStr | None,
375
- ) -> tuple[str, str, str, str, str]:
381
+ ) -> tuple[Path, str, str, str]:
382
+ # for `.py` files, classified as "script"
383
+ # for `.Rmd` and `.qmd` files, which we classify
384
+ # as "notebook" because they typically come with an .html run report
376
385
  if path is None:
377
386
  import inspect
378
387
 
379
388
  frame = inspect.stack()[2]
380
389
  module = inspect.getmodule(frame[0])
381
- self._path = Path(module.__file__)
390
+ # None for interactive session
391
+ if module is None:
392
+ raise NotImplementedError(
393
+ "Interactive sessions are not yet supported to be tracked."
394
+ )
395
+ path = Path(module.__file__)
382
396
  else:
383
- self._path = Path(path)
384
- transform_type = (
385
- "notebook" if self._path.suffix in {".Rmd", ".qmd"} else "script"
386
- )
387
- name = self._path.name
388
- key = name
397
+ path = Path(path)
398
+ transform_type = "notebook" if path.suffix in {".Rmd", ".qmd"} else "script"
389
399
  reference = None
390
400
  reference_type = None
391
401
  if settings.sync_git_repo is not None:
392
- reference = get_transform_reference_from_git_repo(self._path)
402
+ reference = get_transform_reference_from_git_repo(path)
393
403
  reference_type = "url"
394
- return name, key, transform_type, reference, reference_type
404
+ return path, transform_type, reference, reference_type
395
405
 
396
406
  def _track_notebook(
397
407
  self,
398
408
  *,
399
- path: str | None,
400
- ):
401
- if path is None:
409
+ path_str: str | None,
410
+ ) -> tuple[Path, str | None]:
411
+ if path_str is None:
402
412
  path = get_notebook_path()
403
- key = Path(path).name
404
- if isinstance(path, (Path, PurePath)):
405
- path_str = path.as_posix() # type: ignore
406
413
  else:
407
- path_str = str(path)
414
+ path = Path(path_str)
415
+ description = None
416
+ path_str = path.as_posix()
408
417
  if path_str.endswith("Untitled.ipynb"):
409
418
  raise RuntimeError("Please rename your notebook before tracking it")
410
419
  if path_str.startswith("/fileId="):
411
- name = get_notebook_name_colab()
412
- key = f"{name}.ipynb"
420
+ logger.warning("tracking on Google Colab is experimental")
421
+ path_str = get_notebook_key_colab()
422
+ path = Path(path_str)
413
423
  else:
414
424
  import nbproject
415
425
 
416
426
  try:
417
427
  nbproject_title = nbproject.meta.live.title
418
428
  except IndexError:
419
- raise NotebookNotSaved(
420
- "The notebook is not saved, please save the notebook and"
421
- " rerun ``"
422
- ) from None
423
- if nbproject_title is None:
424
- raise NoTitleError(
425
- "Please add a title to your notebook in a markdown cell: # Title"
426
- ) from None
427
- name = nbproject_title
428
- # log imported python packages
429
- if not path_str.startswith("/fileId="):
429
+ # notebook is not saved
430
+ pass
431
+ if nbproject_title is not None:
432
+ description = nbproject_title
433
+ # log imported python packages
430
434
  try:
431
435
  from nbproject.dev._pypackage import infer_pypackages
432
436
 
@@ -438,21 +442,15 @@ class Context:
438
442
  except Exception:
439
443
  logger.debug("inferring imported packages failed")
440
444
  pass
441
- self._path = Path(path_str)
442
- return key, name
445
+ return path, description
443
446
 
444
447
  def _create_or_load_transform(
445
448
  self,
446
449
  *,
447
- uid: str | None,
448
- stem_uid: str | None,
449
- version: str | None,
450
- name: str,
450
+ description: str,
451
451
  transform_ref: str | None = None,
452
452
  transform_ref_type: str | None = None,
453
- key: str | None = None,
454
453
  transform_type: TransformType = None,
455
- transform: Transform | None = None,
456
454
  ):
457
455
  def get_key_clashing_message(transform: Transform, key: str) -> str:
458
456
  update_key_note = message_update_key_in_version_family(
@@ -462,63 +460,139 @@ class Context:
462
460
  registry="Transform",
463
461
  )
464
462
  return (
465
- f'Filename "{key}" clashes with the existing key "{transform.key}" for uid "{transform.uid[:-4]}...."\n\nEither init a new transform with a new uid:\n\n'
463
+ f'Filepath "{key}" clashes with the existing key "{transform.key}" for uid "{transform.uid[:-4]}...."\n\nEither init a new transform with a new uid:\n\n'
466
464
  f'ln.track("{ids.base62_12()}0000")\n\n{update_key_note}'
467
465
  )
468
466
 
467
+ revises = None
468
+ # the user did not pass the uid
469
+ if self.uid is None:
470
+
471
+ class SlashCount(Func):
472
+ template = "LENGTH(%(expressions)s) - LENGTH(REPLACE(%(expressions)s, '/', ''))"
473
+ output_field = IntegerField()
474
+
475
+ # we need to traverse from greater depth to shorter depth so that we match better matches first
476
+ transforms = (
477
+ Transform.filter(key__endswith=self._path.name, is_latest=True)
478
+ .annotate(slash_count=SlashCount("key"))
479
+ .order_by("-slash_count")
480
+ )
481
+ uid = f"{base62_12()}0000"
482
+ key = self._path.name
483
+ target_transform = None
484
+ hash, _ = hash_file(self._path)
485
+ if len(transforms) != 0:
486
+ message = ""
487
+ found_key = False
488
+ for aux_transform in transforms:
489
+ if aux_transform.key in self._path.as_posix():
490
+ key = aux_transform.key
491
+ if (
492
+ aux_transform.source_code is None
493
+ or aux_transform.hash == hash
494
+ ):
495
+ uid = aux_transform.uid
496
+ target_transform = aux_transform
497
+ else:
498
+ uid = f"{aux_transform.uid[:-4]}{increment_base62(aux_transform.uid[-4:])}"
499
+ message = f"there already is a transform with key '{aux_transform.key}', creating new version '{uid}'"
500
+ revises = aux_transform
501
+ found_key = True
502
+ break
503
+ if not found_key:
504
+ plural_s = "s" if len(transforms) > 1 else ""
505
+ transforms_str = "\n".join(
506
+ [
507
+ f" {transform.uid} → {transform.key}"
508
+ for transform in transforms
509
+ ]
510
+ )
511
+ message = f"ignoring transform{plural_s} with same filedescription:\n{transforms_str}"
512
+ if message != "":
513
+ logger.important(message)
514
+ self.uid, transform = uid, target_transform
515
+ # the user did pass the uid
516
+ else:
517
+ transform = Transform.filter(uid=self.uid).one_or_none()
518
+ if transform is not None:
519
+ if transform.key not in self._path.as_posix():
520
+ n_parts = len(Path(transform.key).parts)
521
+ last_path_elements = (
522
+ Path(*self._path.parts[-n_parts:]).as_posix()
523
+ if n_parts > 0
524
+ else ""
525
+ )
526
+ raise UpdateContext(
527
+ get_key_clashing_message(transform, last_path_elements)
528
+ )
529
+ key = transform.key # type: ignore
530
+ else:
531
+ key = self._path.name
532
+ if self.version is not None:
533
+ # test inconsistent version passed
534
+ if (
535
+ transform is not None
536
+ and transform.version is not None # type: ignore
537
+ and self.version != transform.version # type: ignore
538
+ ):
539
+ raise SystemExit(
540
+ f"✗ please pass consistent version: ln.context.version = '{transform.version}'" # type: ignore
541
+ )
542
+ # test whether version was already used for another member of the family
543
+ suid, vuid = (self.uid[:-4], self.uid[-4:])
544
+ transform = Transform.filter(
545
+ uid__startswith=suid, version=self.version
546
+ ).one_or_none()
547
+ if transform is not None and vuid != transform.uid[-4:]:
548
+ better_version = bump_version_function(self.version)
549
+ raise SystemExit(
550
+ f"✗ version '{self.version}' is already taken by Transform('{transform.uid}'); please set another version, e.g., ln.context.version = '{better_version}'"
551
+ )
469
552
  # make a new transform record
470
553
  if transform is None:
471
- if uid is None:
472
- uid = f"{stem_uid}{get_uid_ext(version)}"
473
- # let's query revises so that we can pass it to the constructor and use it for error handling
474
- revises = (
475
- Transform.filter(uid__startswith=uid[:-4], is_latest=True)
476
- .order_by("-created_at")
477
- .first()
478
- )
479
- # note that here we're not passing revises because we're not querying it
480
- # hence, we need to do a revision family lookup based on key
481
- # hence, we need key to be not None
482
554
  assert key is not None # noqa: S101
483
555
  raise_update_context = False
484
556
  try:
485
557
  transform = Transform(
486
- uid=uid,
487
- version=version,
488
- name=name,
558
+ uid=self.uid,
559
+ version=self.version,
560
+ description=description,
489
561
  key=key,
490
562
  reference=transform_ref,
491
563
  reference_type=transform_ref_type,
492
564
  type=transform_type,
493
- revises=revises,
494
565
  ).save()
495
566
  except InconsistentKey:
496
567
  raise_update_context = True
497
568
  if raise_update_context:
569
+ if revises is None:
570
+ revises = (
571
+ Transform.filter(uid__startswith=self.uid[:-4], is_latest=True)
572
+ .order_by("-created_at")
573
+ .first()
574
+ )
498
575
  raise UpdateContext(get_key_clashing_message(revises, key))
499
- self._logging_message_track += f"created Transform('{transform.uid[:8]}')"
576
+ self._logging_message_track += f"created Transform('{transform.uid}')"
500
577
  else:
501
578
  uid = transform.uid
502
579
  # transform was already saved via `finish()`
503
- transform_was_saved = (
504
- transform._source_code_artifact_id is not None
505
- or transform.source_code is not None
506
- )
580
+ transform_was_saved = transform.source_code is not None
507
581
  # check whether the transform.key is consistent
508
582
  if transform.key != key:
509
583
  raise UpdateContext(get_key_clashing_message(transform, key))
510
- elif transform.name != name:
511
- transform.name = name
584
+ elif transform.description != description:
585
+ transform.description = description
512
586
  transform.save()
513
587
  self._logging_message_track += (
514
- "updated transform name, " # white space on purpose
588
+ "updated transform description, " # white space on purpose
515
589
  )
516
590
  elif (
517
591
  transform.created_by_id != ln_setup.settings.user.id
518
592
  and not transform_was_saved
519
593
  ):
520
594
  raise UpdateContext(
521
- f'{transform.created_by.name} ({transform.created_by.handle}) already works on this draft {transform.type}.\n\nPlease create a revision via `ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")` or a new transform with a *different* filename and `ln.track("{ids.base62_12()}0000")`.'
595
+ f'{transform.created_by.description} ({transform.created_by.handle}) already works on this draft {transform.type}.\n\nPlease create a revision via `ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")` or a new transform with a *different* filedescription and `ln.track("{ids.base62_12()}0000")`.'
522
596
  )
523
597
  # check whether transform source code was already saved
524
598
  if transform_was_saved:
@@ -527,30 +601,23 @@ class Context:
527
601
  bump_revision = True
528
602
  else:
529
603
  hash, _ = hash_file(self._path) # ignore hash_type for now
530
- if transform.hash is not None:
531
- condition = hash != transform.hash
532
- else:
533
- condition = hash != transform._source_code_artifact.hash
534
- if condition:
604
+ if hash != transform.hash:
535
605
  bump_revision = True
536
606
  else:
537
607
  self._logging_message_track += (
538
- f"loaded Transform('{transform.uid[:8]}')"
608
+ f"loaded Transform('{transform.uid}')"
539
609
  )
540
610
  if bump_revision:
541
611
  change_type = (
542
- "Re-running saved notebook"
612
+ "re-running saved notebook"
543
613
  if is_run_from_ipython
544
- else "Source code changed"
614
+ else "source code changed"
545
615
  )
546
616
  raise UpdateContext(
547
- f"{change_type}, bump revision by setting:\n\n"
548
- f'ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")'
617
+ f'✗ {change_type}, run: ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")'
549
618
  )
550
619
  else:
551
- self._logging_message_track += (
552
- f"loaded Transform('{transform.uid[:8]}')"
553
- )
620
+ self._logging_message_track += f"loaded Transform('{transform.uid}')"
554
621
  self._transform = transform
555
622
 
556
623
  def finish(self, ignore_non_consecutive: None | bool = None) -> None:
@@ -580,8 +647,8 @@ class Context:
580
647
 
581
648
  """
582
649
  from lamindb._finish import (
650
+ get_save_notebook_message,
583
651
  get_seconds_since_modified,
584
- get_shortcut,
585
652
  save_context_core,
586
653
  )
587
654
 
@@ -600,13 +667,13 @@ class Context:
600
667
  import nbproject
601
668
 
602
669
  # it might be that the user modifies the title just before ln.finish()
603
- if (nbproject_title := nbproject.meta.live.title) != self.transform.name:
604
- self.transform.name = nbproject_title
670
+ if (
671
+ nbproject_title := nbproject.meta.live.title
672
+ ) != self.transform.description:
673
+ self.transform.description = nbproject_title
605
674
  self.transform.save()
606
675
  if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING:
607
- raise NotebookNotSaved(
608
- f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) within 2 sec before calling `ln.finish()`"
609
- )
676
+ raise NotebookNotSaved(get_save_notebook_message())
610
677
  save_context_core(
611
678
  run=self.run,
612
679
  transform=self.run.transform,
@@ -614,6 +681,8 @@ class Context:
614
681
  finished_at=True,
615
682
  ignore_non_consecutive=ignore_non_consecutive,
616
683
  )
684
+ if self.transform.type != "notebook":
685
+ self._stream_tracker.finish()
617
686
 
618
687
 
619
688
  context = Context()