lamindb 0.76.6__py3-none-any.whl → 0.76.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +113 -113
- lamindb/_artifact.py +1205 -1174
- lamindb/_can_validate.py +579 -579
- lamindb/_collection.py +387 -382
- lamindb/_curate.py +1601 -1601
- lamindb/_feature.py +155 -155
- lamindb/_feature_set.py +242 -242
- lamindb/_filter.py +23 -23
- lamindb/_finish.py +256 -256
- lamindb/_from_values.py +382 -382
- lamindb/_is_versioned.py +40 -40
- lamindb/_parents.py +476 -476
- lamindb/_query_manager.py +125 -125
- lamindb/_query_set.py +362 -362
- lamindb/_record.py +649 -649
- lamindb/_run.py +57 -57
- lamindb/_save.py +308 -295
- lamindb/_storage.py +14 -14
- lamindb/_transform.py +127 -127
- lamindb/_ulabel.py +56 -56
- lamindb/_utils.py +9 -9
- lamindb/_view.py +72 -72
- lamindb/core/__init__.py +94 -93
- lamindb/core/_context.py +574 -558
- lamindb/core/_data.py +438 -438
- lamindb/core/_feature_manager.py +867 -866
- lamindb/core/_label_manager.py +253 -252
- lamindb/core/_mapped_collection.py +597 -597
- lamindb/core/_settings.py +187 -187
- lamindb/core/_sync_git.py +138 -138
- lamindb/core/_track_environment.py +27 -27
- lamindb/core/datasets/__init__.py +59 -59
- lamindb/core/datasets/_core.py +571 -571
- lamindb/core/datasets/_fake.py +36 -36
- lamindb/core/exceptions.py +90 -77
- lamindb/core/fields.py +12 -12
- lamindb/core/loaders.py +164 -0
- lamindb/core/schema.py +56 -56
- lamindb/core/storage/__init__.py +25 -25
- lamindb/core/storage/_anndata_accessor.py +740 -740
- lamindb/core/storage/_anndata_sizes.py +41 -41
- lamindb/core/storage/_backed_access.py +98 -98
- lamindb/core/storage/_tiledbsoma.py +204 -196
- lamindb/core/storage/_valid_suffixes.py +21 -21
- lamindb/core/storage/_zarr.py +110 -110
- lamindb/core/storage/objects.py +62 -62
- lamindb/core/storage/paths.py +172 -245
- lamindb/core/subsettings/__init__.py +12 -12
- lamindb/core/subsettings/_creation_settings.py +38 -38
- lamindb/core/subsettings/_transform_settings.py +21 -21
- lamindb/core/types.py +19 -19
- lamindb/core/versioning.py +158 -158
- lamindb/integrations/__init__.py +12 -12
- lamindb/integrations/_vitessce.py +107 -107
- lamindb/setup/__init__.py +14 -14
- lamindb/setup/core/__init__.py +4 -4
- {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/LICENSE +201 -201
- {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/METADATA +5 -5
- lamindb-0.76.8.dist-info/RECORD +60 -0
- {lamindb-0.76.6.dist-info → lamindb-0.76.8.dist-info}/WHEEL +1 -1
- lamindb-0.76.6.dist-info/RECORD +0 -59
lamindb/core/_context.py
CHANGED
@@ -1,558 +1,574 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import builtins
|
4
|
-
import hashlib
|
5
|
-
from datetime import datetime, timezone
|
6
|
-
from pathlib import Path, PurePath
|
7
|
-
from typing import TYPE_CHECKING
|
8
|
-
|
9
|
-
import lamindb_setup as ln_setup
|
10
|
-
from lamin_utils import logger
|
11
|
-
from lamindb_setup.core.hashing import hash_file
|
12
|
-
from lnschema_core import Run, Transform, ids
|
13
|
-
from lnschema_core.ids import base62_12
|
14
|
-
from lnschema_core.models import format_field_value
|
15
|
-
from lnschema_core.users import current_user_id
|
16
|
-
|
17
|
-
from ._settings import settings
|
18
|
-
from ._sync_git import get_transform_reference_from_git_repo
|
19
|
-
from ._track_environment import track_environment
|
20
|
-
from .exceptions import (
|
21
|
-
|
22
|
-
|
23
|
-
NotebookNotSavedError,
|
24
|
-
NoTitleError,
|
25
|
-
TrackNotCalled,
|
26
|
-
UpdateContext,
|
27
|
-
)
|
28
|
-
from .subsettings._transform_settings import transform_settings
|
29
|
-
from .versioning import bump_version as bump_version_function
|
30
|
-
from .versioning import increment_base62, message_update_key_in_version_family
|
31
|
-
|
32
|
-
if TYPE_CHECKING:
|
33
|
-
from lamindb_setup.core.types import UPathStr
|
34
|
-
from lnschema_core.types import TransformType
|
35
|
-
|
36
|
-
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
37
|
-
|
38
|
-
msg_path_failed = (
|
39
|
-
"failed to infer notebook path.\nfix: pass `path` to ln.context.track()"
|
40
|
-
)
|
41
|
-
|
42
|
-
|
43
|
-
def get_uid_ext(version: str) -> str:
|
44
|
-
from lamin_utils._base62 import encodebytes
|
45
|
-
|
46
|
-
# merely zero-padding the nbproject version such that the base62 encoding is
|
47
|
-
# at least 4 characters long doesn't yields sufficiently diverse hashes and
|
48
|
-
# leads to collisions; it'd be nice because the uid_ext would be ordered
|
49
|
-
return encodebytes(hashlib.md5(version.encode()).digest())[:4] # noqa: S324
|
50
|
-
|
51
|
-
|
52
|
-
def get_notebook_path():
|
53
|
-
from nbproject.dev._jupyter_communicate import (
|
54
|
-
notebook_path as get_notebook_path,
|
55
|
-
)
|
56
|
-
|
57
|
-
path = None
|
58
|
-
try:
|
59
|
-
path = get_notebook_path()
|
60
|
-
except Exception:
|
61
|
-
raise RuntimeError(msg_path_failed) from None
|
62
|
-
if path is None:
|
63
|
-
raise RuntimeError(msg_path_failed) from None
|
64
|
-
return path
|
65
|
-
|
66
|
-
|
67
|
-
# from https://stackoverflow.com/questions/61901628
|
68
|
-
def get_notebook_name_colab() -> str:
|
69
|
-
from socket import gethostbyname, gethostname # type: ignore
|
70
|
-
|
71
|
-
from requests import get # type: ignore
|
72
|
-
|
73
|
-
ip = gethostbyname(gethostname()) # 172.28.0.12
|
74
|
-
try:
|
75
|
-
name = get(f"http://{ip}:9000/api/sessions").json()[0]["name"] # noqa: S113
|
76
|
-
except Exception:
|
77
|
-
logger.warning(
|
78
|
-
"could not get notebook name from Google Colab, using: notebook.ipynb"
|
79
|
-
)
|
80
|
-
name = "notebook.ipynb"
|
81
|
-
return name.rstrip(".ipynb")
|
82
|
-
|
83
|
-
|
84
|
-
def raise_missing_context(transform_type: str, key: str) ->
|
85
|
-
transform = Transform.filter(key=key).latest_version().first()
|
86
|
-
if transform is None:
|
87
|
-
new_uid = f"{base62_12()}0000"
|
88
|
-
message = f"To track this {transform_type},
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
if transform_type == "notebook":
|
97
|
-
message
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
self.
|
152
|
-
|
153
|
-
@property
|
154
|
-
def
|
155
|
-
"""`
|
156
|
-
return self.
|
157
|
-
|
158
|
-
@
|
159
|
-
def
|
160
|
-
self.
|
161
|
-
|
162
|
-
@property
|
163
|
-
def
|
164
|
-
"""`
|
165
|
-
return self.
|
166
|
-
|
167
|
-
@
|
168
|
-
def
|
169
|
-
self.
|
170
|
-
|
171
|
-
@property
|
172
|
-
def
|
173
|
-
"""
|
174
|
-
return self.
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
)
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
# test
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
)
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
if
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
.
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
)
|
323
|
-
run
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
run
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
)
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
key
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
)
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
run
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import builtins
|
4
|
+
import hashlib
|
5
|
+
from datetime import datetime, timezone
|
6
|
+
from pathlib import Path, PurePath
|
7
|
+
from typing import TYPE_CHECKING
|
8
|
+
|
9
|
+
import lamindb_setup as ln_setup
|
10
|
+
from lamin_utils import logger
|
11
|
+
from lamindb_setup.core.hashing import hash_file
|
12
|
+
from lnschema_core import Run, Transform, ids
|
13
|
+
from lnschema_core.ids import base62_12
|
14
|
+
from lnschema_core.models import format_field_value
|
15
|
+
from lnschema_core.users import current_user_id
|
16
|
+
|
17
|
+
from ._settings import settings
|
18
|
+
from ._sync_git import get_transform_reference_from_git_repo
|
19
|
+
from ._track_environment import track_environment
|
20
|
+
from .exceptions import (
|
21
|
+
MissingContextUID,
|
22
|
+
NotebookNotSaved,
|
23
|
+
NotebookNotSavedError,
|
24
|
+
NoTitleError,
|
25
|
+
TrackNotCalled,
|
26
|
+
UpdateContext,
|
27
|
+
)
|
28
|
+
from .subsettings._transform_settings import transform_settings
|
29
|
+
from .versioning import bump_version as bump_version_function
|
30
|
+
from .versioning import increment_base62, message_update_key_in_version_family
|
31
|
+
|
32
|
+
if TYPE_CHECKING:
|
33
|
+
from lamindb_setup.core.types import UPathStr
|
34
|
+
from lnschema_core.types import TransformType
|
35
|
+
|
36
|
+
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
37
|
+
|
38
|
+
msg_path_failed = (
|
39
|
+
"failed to infer notebook path.\nfix: pass `path` to ln.context.track()"
|
40
|
+
)
|
41
|
+
|
42
|
+
|
43
|
+
def get_uid_ext(version: str) -> str:
|
44
|
+
from lamin_utils._base62 import encodebytes
|
45
|
+
|
46
|
+
# merely zero-padding the nbproject version such that the base62 encoding is
|
47
|
+
# at least 4 characters long doesn't yields sufficiently diverse hashes and
|
48
|
+
# leads to collisions; it'd be nice because the uid_ext would be ordered
|
49
|
+
return encodebytes(hashlib.md5(version.encode()).digest())[:4] # noqa: S324
|
50
|
+
|
51
|
+
|
52
|
+
def get_notebook_path():
|
53
|
+
from nbproject.dev._jupyter_communicate import (
|
54
|
+
notebook_path as get_notebook_path,
|
55
|
+
)
|
56
|
+
|
57
|
+
path = None
|
58
|
+
try:
|
59
|
+
path = get_notebook_path()
|
60
|
+
except Exception:
|
61
|
+
raise RuntimeError(msg_path_failed) from None
|
62
|
+
if path is None:
|
63
|
+
raise RuntimeError(msg_path_failed) from None
|
64
|
+
return path
|
65
|
+
|
66
|
+
|
67
|
+
# from https://stackoverflow.com/questions/61901628
|
68
|
+
def get_notebook_name_colab() -> str:
|
69
|
+
from socket import gethostbyname, gethostname # type: ignore
|
70
|
+
|
71
|
+
from requests import get # type: ignore
|
72
|
+
|
73
|
+
ip = gethostbyname(gethostname()) # 172.28.0.12
|
74
|
+
try:
|
75
|
+
name = get(f"http://{ip}:9000/api/sessions").json()[0]["name"] # noqa: S113
|
76
|
+
except Exception:
|
77
|
+
logger.warning(
|
78
|
+
"could not get notebook name from Google Colab, using: notebook.ipynb"
|
79
|
+
)
|
80
|
+
name = "notebook.ipynb"
|
81
|
+
return name.rstrip(".ipynb")
|
82
|
+
|
83
|
+
|
84
|
+
def raise_missing_context(transform_type: str, key: str) -> bool:
|
85
|
+
transform = Transform.filter(key=key).latest_version().first()
|
86
|
+
if transform is None:
|
87
|
+
new_uid = f"{base62_12()}0000"
|
88
|
+
message = f"To track this {transform_type}, copy & paste the below into the current cell and re-run it\n\n"
|
89
|
+
message += f'ln.context.uid = "{new_uid}"\nln.context.track()'
|
90
|
+
else:
|
91
|
+
uid = transform.uid
|
92
|
+
suid, vuid = uid[: Transform._len_stem_uid], uid[Transform._len_stem_uid :]
|
93
|
+
new_vuid = increment_base62(vuid)
|
94
|
+
new_uid = f"{suid}{new_vuid}"
|
95
|
+
message = f"You already have a version family with key '{key}' (stem_uid='{transform.stem_uid}').\n\n- to make a revision, set `ln.context.uid = '{new_uid}'`\n- to start a new version family, rename your file and rerun: `ln.context.track()`"
|
96
|
+
if transform_type == "notebook":
|
97
|
+
print(f"→ {message}\n")
|
98
|
+
response = input("→ Ready to re-run? (y/n)")
|
99
|
+
if response == "y":
|
100
|
+
logger.important(
|
101
|
+
"Note: Restart your notebook if you want consecutive cell execution"
|
102
|
+
)
|
103
|
+
return True
|
104
|
+
raise MissingContextUID("Please follow the instructions.")
|
105
|
+
else:
|
106
|
+
raise MissingContextUID(message)
|
107
|
+
return False
|
108
|
+
|
109
|
+
|
110
|
+
def pretty_pypackages(dependencies: dict) -> str:
|
111
|
+
deps_list = []
|
112
|
+
for pkg, ver in dependencies.items():
|
113
|
+
if ver != "":
|
114
|
+
deps_list.append(pkg + f"=={ver}")
|
115
|
+
else:
|
116
|
+
deps_list.append(pkg)
|
117
|
+
deps_list.sort()
|
118
|
+
return " ".join(deps_list)
|
119
|
+
|
120
|
+
|
121
|
+
class Context:
|
122
|
+
"""Run context.
|
123
|
+
|
124
|
+
Enables convenient data lineage tracking by managing a transform & run
|
125
|
+
upon :meth:`~lamindb.core.Context.track` & :meth:`~lamindb.core.Context.finish`.
|
126
|
+
|
127
|
+
Examples:
|
128
|
+
|
129
|
+
Is typically used via :class:`~lamindb.context`:
|
130
|
+
|
131
|
+
>>> import lamindb as ln
|
132
|
+
>>> ln.context.track()
|
133
|
+
>>> # do things while tracking data lineage
|
134
|
+
>>> ln.context.finish()
|
135
|
+
|
136
|
+
"""
|
137
|
+
|
138
|
+
def __init__(self):
|
139
|
+
self._uid: str | None = None
|
140
|
+
self._name: str | None = None
|
141
|
+
self._version: str | None = None
|
142
|
+
self._transform: Transform | None = None
|
143
|
+
self._run: Run | None = None
|
144
|
+
self._path: Path | None = None
|
145
|
+
"""A local path to the script that's running."""
|
146
|
+
self._logging_message: str = ""
|
147
|
+
|
148
|
+
@property
|
149
|
+
def transform(self) -> Transform | None:
|
150
|
+
"""Transform of context."""
|
151
|
+
return self._transform
|
152
|
+
|
153
|
+
@property
|
154
|
+
def uid(self) -> str | None:
|
155
|
+
"""`uid` to create transform."""
|
156
|
+
return self._uid
|
157
|
+
|
158
|
+
@uid.setter
|
159
|
+
def uid(self, value: str | None):
|
160
|
+
self._uid = value
|
161
|
+
|
162
|
+
@property
|
163
|
+
def name(self) -> str | None:
|
164
|
+
"""`name` to create transform."""
|
165
|
+
return self._name
|
166
|
+
|
167
|
+
@name.setter
|
168
|
+
def name(self, value: str | None):
|
169
|
+
self._name = value
|
170
|
+
|
171
|
+
@property
|
172
|
+
def version(self) -> str | None:
|
173
|
+
"""`version` to create transform."""
|
174
|
+
return self._version
|
175
|
+
|
176
|
+
@version.setter
|
177
|
+
def version(self, value: str | None):
|
178
|
+
self._version = value
|
179
|
+
|
180
|
+
@property
|
181
|
+
def run(self) -> Run | None:
|
182
|
+
"""Run of context."""
|
183
|
+
return self._run
|
184
|
+
|
185
|
+
def track(
|
186
|
+
self,
|
187
|
+
*,
|
188
|
+
params: dict | None = None,
|
189
|
+
new_run: bool | None = None,
|
190
|
+
path: str | None = None,
|
191
|
+
transform: Transform | None = None,
|
192
|
+
) -> None:
|
193
|
+
"""Starts data lineage tracking for a run.
|
194
|
+
|
195
|
+
- sets :attr:`~lamindb.core.Context.transform` &
|
196
|
+
:attr:`~lamindb.core.Context.run` by creating or loading `Transform` &
|
197
|
+
`Run` records
|
198
|
+
- saves compute environment as a `requirements.txt` file: `run.environment`
|
199
|
+
|
200
|
+
If :attr:`~lamindb.core.Settings.sync_git_repo` is set, checks whether a
|
201
|
+
script-like transform exists in a git repository and links it.
|
202
|
+
|
203
|
+
Args:
|
204
|
+
params: A dictionary of parameters to track for the run.
|
205
|
+
new_run: If `False`, loads latest run of transform
|
206
|
+
(default notebook), if `True`, creates new run (default pipeline).
|
207
|
+
path: Filepath of notebook or script. Only needed if it can't be
|
208
|
+
automatically detected.
|
209
|
+
transform: Useful to track an abstract pipeline.
|
210
|
+
|
211
|
+
Examples:
|
212
|
+
|
213
|
+
To track the run of a notebook or script, call:
|
214
|
+
|
215
|
+
>>> import lamindb as ln
|
216
|
+
>>> ln.context.track()
|
217
|
+
|
218
|
+
"""
|
219
|
+
self._path = None
|
220
|
+
if transform is None:
|
221
|
+
is_tracked = False
|
222
|
+
transform_settings_are_set = (
|
223
|
+
transform_settings.stem_uid is not None
|
224
|
+
and transform_settings.version is not None
|
225
|
+
)
|
226
|
+
transform = None
|
227
|
+
stem_uid = None
|
228
|
+
if self.uid is not None:
|
229
|
+
transform = Transform.filter(uid=self.uid).one_or_none()
|
230
|
+
if self.version is not None:
|
231
|
+
# test inconsistent version passed
|
232
|
+
if (
|
233
|
+
transform is not None
|
234
|
+
and transform.version is not None
|
235
|
+
and self.version != transform.version
|
236
|
+
):
|
237
|
+
raise SystemExit(
|
238
|
+
f"Please pass consistent version: ln.context.version = '{transform.version}'"
|
239
|
+
)
|
240
|
+
# test whether version was already used for another member of the family
|
241
|
+
suid, vuid = (
|
242
|
+
self.uid[: Transform._len_stem_uid],
|
243
|
+
self.uid[Transform._len_stem_uid :],
|
244
|
+
)
|
245
|
+
transform = Transform.filter(
|
246
|
+
uid__startswith=suid, version=self.version
|
247
|
+
).one_or_none()
|
248
|
+
if (
|
249
|
+
transform is not None
|
250
|
+
and vuid != transform.uid[Transform._len_stem_uid :]
|
251
|
+
):
|
252
|
+
better_version = bump_version_function(self.version)
|
253
|
+
raise SystemExit(
|
254
|
+
f"Version '{self.version}' is already taken by Transform(uid='{transform.uid}'); please set another version, e.g., ln.context.version = '{better_version}'"
|
255
|
+
)
|
256
|
+
elif transform_settings_are_set:
|
257
|
+
stem_uid, self.version = (
|
258
|
+
transform_settings.stem_uid,
|
259
|
+
transform_settings.version,
|
260
|
+
)
|
261
|
+
transform = Transform.filter(
|
262
|
+
uid__startswith=stem_uid, version=self.version
|
263
|
+
).one_or_none()
|
264
|
+
if is_run_from_ipython:
|
265
|
+
key, name = self._track_notebook(path=path)
|
266
|
+
transform_type = "notebook"
|
267
|
+
transform_ref = None
|
268
|
+
transform_ref_type = None
|
269
|
+
else:
|
270
|
+
(name, key, transform_ref, transform_ref_type) = self._track_script(
|
271
|
+
path=path
|
272
|
+
)
|
273
|
+
transform_type = "script"
|
274
|
+
if self.uid is not None or transform_settings_are_set:
|
275
|
+
# overwrite whatever is auto-detected in the notebook or script
|
276
|
+
if self.name is not None:
|
277
|
+
name = self.name
|
278
|
+
self._create_or_load_transform(
|
279
|
+
uid=self.uid,
|
280
|
+
stem_uid=stem_uid,
|
281
|
+
version=self.version,
|
282
|
+
name=name,
|
283
|
+
transform_ref=transform_ref,
|
284
|
+
transform_ref_type=transform_ref_type,
|
285
|
+
transform_type=transform_type,
|
286
|
+
key=key,
|
287
|
+
transform=transform,
|
288
|
+
)
|
289
|
+
# if no error is raised, the transform is tracked
|
290
|
+
is_tracked = True
|
291
|
+
if not is_tracked:
|
292
|
+
early_return = raise_missing_context(transform_type, key)
|
293
|
+
if early_return:
|
294
|
+
return None
|
295
|
+
else:
|
296
|
+
if transform.type in {"notebook", "script"}:
|
297
|
+
raise ValueError(
|
298
|
+
"Use ln.context.track() without passing transform in a notebook or script"
|
299
|
+
" - metadata is automatically parsed"
|
300
|
+
)
|
301
|
+
transform_exists = None
|
302
|
+
if transform.id is not None:
|
303
|
+
# transform has an id but unclear whether already saved
|
304
|
+
transform_exists = Transform.filter(id=transform.id).first()
|
305
|
+
if transform_exists is None:
|
306
|
+
transform.save()
|
307
|
+
self._logging_message += f"created Transform(uid='{transform.uid}')"
|
308
|
+
transform_exists = transform
|
309
|
+
else:
|
310
|
+
self._logging_message += f"loaded Transform(uid='{transform.uid}')"
|
311
|
+
self._transform = transform_exists
|
312
|
+
|
313
|
+
if new_run is None: # for notebooks, default to loading latest runs
|
314
|
+
new_run = False if self._transform.type == "notebook" else True # type: ignore
|
315
|
+
|
316
|
+
run = None
|
317
|
+
if not new_run: # try loading latest run by same user
|
318
|
+
run = (
|
319
|
+
Run.filter(transform=self._transform, created_by_id=current_user_id())
|
320
|
+
.order_by("-created_at")
|
321
|
+
.first()
|
322
|
+
)
|
323
|
+
if run is not None: # loaded latest run
|
324
|
+
run.started_at = datetime.now(timezone.utc) # update run time
|
325
|
+
self._logging_message += (
|
326
|
+
f" & loaded Run(started_at={format_field_value(run.started_at)})"
|
327
|
+
)
|
328
|
+
|
329
|
+
if run is None: # create new run
|
330
|
+
run = Run(
|
331
|
+
transform=self._transform,
|
332
|
+
params=params,
|
333
|
+
)
|
334
|
+
run.started_at = datetime.now(timezone.utc)
|
335
|
+
self._logging_message += (
|
336
|
+
f" & created Run(started_at={format_field_value(run.started_at)})"
|
337
|
+
)
|
338
|
+
# can only determine at ln.finish() if run was consecutive in
|
339
|
+
# interactive session, otherwise, is consecutive
|
340
|
+
run.is_consecutive = True if is_run_from_ipython else None
|
341
|
+
# need to save in all cases
|
342
|
+
run.save()
|
343
|
+
if params is not None:
|
344
|
+
run.params.add_values(params)
|
345
|
+
self._run = run
|
346
|
+
track_environment(run)
|
347
|
+
logger.important(self._logging_message)
|
348
|
+
self._logging_message = ""
|
349
|
+
|
350
|
+
def _track_script(
|
351
|
+
self,
|
352
|
+
*,
|
353
|
+
path: UPathStr | None,
|
354
|
+
) -> tuple[str, str, str, str]:
|
355
|
+
if path is None:
|
356
|
+
import inspect
|
357
|
+
|
358
|
+
frame = inspect.stack()[2]
|
359
|
+
module = inspect.getmodule(frame[0])
|
360
|
+
self._path = Path(module.__file__)
|
361
|
+
else:
|
362
|
+
self._path = Path(path)
|
363
|
+
name = self._path.name
|
364
|
+
key = name
|
365
|
+
reference = None
|
366
|
+
reference_type = None
|
367
|
+
if settings.sync_git_repo is not None:
|
368
|
+
reference = get_transform_reference_from_git_repo(self._path)
|
369
|
+
reference_type = "url"
|
370
|
+
return name, key, reference, reference_type
|
371
|
+
|
372
|
+
def _track_notebook(
|
373
|
+
self,
|
374
|
+
*,
|
375
|
+
path: str | None,
|
376
|
+
):
|
377
|
+
if path is None:
|
378
|
+
path = get_notebook_path()
|
379
|
+
key = Path(path).name
|
380
|
+
if isinstance(path, (Path, PurePath)):
|
381
|
+
path_str = path.as_posix() # type: ignore
|
382
|
+
else:
|
383
|
+
path_str = str(path)
|
384
|
+
if path_str.endswith("Untitled.ipynb"):
|
385
|
+
raise RuntimeError("Please rename your notebook before tracking it")
|
386
|
+
if path_str.startswith("/fileId="):
|
387
|
+
name = get_notebook_name_colab()
|
388
|
+
key = f"{name}.ipynb"
|
389
|
+
else:
|
390
|
+
import nbproject
|
391
|
+
|
392
|
+
try:
|
393
|
+
nbproject_title = nbproject.meta.live.title
|
394
|
+
except IndexError:
|
395
|
+
raise NotebookNotSavedError(
|
396
|
+
"The notebook is not saved, please save the notebook and"
|
397
|
+
" rerun `ln.context.track()`"
|
398
|
+
) from None
|
399
|
+
if nbproject_title is None:
|
400
|
+
raise NoTitleError(
|
401
|
+
"Please add a title to your notebook in a markdown cell: # Title"
|
402
|
+
) from None
|
403
|
+
name = nbproject_title
|
404
|
+
# log imported python packages
|
405
|
+
if not path_str.startswith("/fileId="):
|
406
|
+
try:
|
407
|
+
from nbproject.dev._pypackage import infer_pypackages
|
408
|
+
|
409
|
+
nb = nbproject.dev.read_notebook(path_str)
|
410
|
+
logger.important(
|
411
|
+
"notebook imports:"
|
412
|
+
f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
|
413
|
+
)
|
414
|
+
except Exception:
|
415
|
+
logger.debug("inferring imported packages failed")
|
416
|
+
pass
|
417
|
+
self._path = Path(path_str)
|
418
|
+
return key, name
|
419
|
+
|
420
|
+
def _create_or_load_transform(
|
421
|
+
self,
|
422
|
+
*,
|
423
|
+
uid: str | None,
|
424
|
+
stem_uid: str | None,
|
425
|
+
version: str | None,
|
426
|
+
name: str,
|
427
|
+
transform_ref: str | None = None,
|
428
|
+
transform_ref_type: str | None = None,
|
429
|
+
key: str | None = None,
|
430
|
+
transform_type: TransformType = None,
|
431
|
+
transform: Transform | None = None,
|
432
|
+
):
|
433
|
+
# make a new transform record
|
434
|
+
if transform is None:
|
435
|
+
if uid is None:
|
436
|
+
uid = f"{stem_uid}{get_uid_ext(version)}"
|
437
|
+
# note that here we're not passing revises because we're not querying it
|
438
|
+
# hence, we need to do a revision family lookup based on key
|
439
|
+
# hence, we need key to be not None
|
440
|
+
assert key is not None # noqa: S101
|
441
|
+
transform = Transform(
|
442
|
+
uid=uid,
|
443
|
+
version=version,
|
444
|
+
name=name,
|
445
|
+
key=key,
|
446
|
+
reference=transform_ref,
|
447
|
+
reference_type=transform_ref_type,
|
448
|
+
type=transform_type,
|
449
|
+
).save()
|
450
|
+
self._logging_message += f"created Transform(uid='{transform.uid}')"
|
451
|
+
else:
|
452
|
+
uid = transform.uid
|
453
|
+
# check whether the transform file has been renamed
|
454
|
+
if transform.key != key:
|
455
|
+
suid = transform.stem_uid
|
456
|
+
new_suid = ids.base62_12()
|
457
|
+
transform_type = "Notebook" if is_run_from_ipython else "Script"
|
458
|
+
note = message_update_key_in_version_family(
|
459
|
+
suid=suid,
|
460
|
+
existing_key=transform.key,
|
461
|
+
new_key=key,
|
462
|
+
registry="Transform",
|
463
|
+
)
|
464
|
+
raise UpdateContext(
|
465
|
+
f"{transform_type} filename changed.\n\nEither init a new transform family by setting:\n\n"
|
466
|
+
f'ln.context.uid = "{new_suid}0000"\n\n{note}'
|
467
|
+
)
|
468
|
+
elif transform.name != name:
|
469
|
+
transform.name = name
|
470
|
+
transform.save()
|
471
|
+
self._logging_message += (
|
472
|
+
"updated transform name, " # white space on purpose
|
473
|
+
)
|
474
|
+
# check whether transform source code was already saved
|
475
|
+
if (
|
476
|
+
transform._source_code_artifact_id is not None
|
477
|
+
or transform.source_code is not None
|
478
|
+
):
|
479
|
+
bump_revision = False
|
480
|
+
if is_run_from_ipython:
|
481
|
+
bump_revision = True
|
482
|
+
else:
|
483
|
+
hash, _ = hash_file(self._path) # ignore hash_type for now
|
484
|
+
if transform.hash is not None:
|
485
|
+
condition = hash != transform.hash
|
486
|
+
else:
|
487
|
+
condition = hash != transform._source_code_artifact.hash
|
488
|
+
if condition:
|
489
|
+
bump_revision = True
|
490
|
+
else:
|
491
|
+
self._logging_message += (
|
492
|
+
f"loaded Transform(uid='{transform.uid}')"
|
493
|
+
)
|
494
|
+
if bump_revision:
|
495
|
+
change_type = (
|
496
|
+
"Re-running saved notebook"
|
497
|
+
if is_run_from_ipython
|
498
|
+
else "Source code changed"
|
499
|
+
)
|
500
|
+
suid, vuid = (
|
501
|
+
uid[:-4],
|
502
|
+
uid[-4:],
|
503
|
+
)
|
504
|
+
new_vuid = increment_base62(vuid)
|
505
|
+
raise UpdateContext(
|
506
|
+
f"{change_type}, bump revision by setting:\n\n"
|
507
|
+
f'ln.context.uid = "{suid}{new_vuid}"'
|
508
|
+
)
|
509
|
+
else:
|
510
|
+
self._logging_message += f"loaded Transform(uid='{transform.uid}')"
|
511
|
+
self._transform = transform
|
512
|
+
|
513
|
+
def finish(self, ignore_non_consecutive: None | bool = None) -> None:
|
514
|
+
"""Mark the run context as finished.
|
515
|
+
|
516
|
+
- writes a timestamp: `run.finished_at`
|
517
|
+
- saves the source code: `transform.source_code`
|
518
|
+
|
519
|
+
When called in the last cell of a notebook:
|
520
|
+
|
521
|
+
- prompts for user input if not consecutively executed
|
522
|
+
- requires to save the notebook in your editor right before
|
523
|
+
- saves a run report: `run.report`
|
524
|
+
|
525
|
+
Args:
|
526
|
+
ignore_non_consecutive: Whether to ignore if a notebook was non-consecutively executed.
|
527
|
+
|
528
|
+
Examples:
|
529
|
+
|
530
|
+
>>> import lamindb as ln
|
531
|
+
>>> ln.context.track()
|
532
|
+
>>> # do things while tracking data lineage
|
533
|
+
>>> ln.context.finish()
|
534
|
+
|
535
|
+
See Also:
|
536
|
+
`lamin save script.py` or `lamin save notebook.ipynb` → `docs </cli#lamin-save>`__
|
537
|
+
|
538
|
+
"""
|
539
|
+
from lamindb._finish import save_context_core
|
540
|
+
|
541
|
+
def get_seconds_since_modified(filepath) -> float:
|
542
|
+
return datetime.now().timestamp() - filepath.stat().st_mtime
|
543
|
+
|
544
|
+
def get_shortcut() -> str:
|
545
|
+
import platform
|
546
|
+
|
547
|
+
return "CMD + s" if platform.system() == "Darwin" else "CTRL + s"
|
548
|
+
|
549
|
+
if context.run is None:
|
550
|
+
raise TrackNotCalled("Please run `ln.context.track()` before `ln.finish()`")
|
551
|
+
if context._path is None:
|
552
|
+
if context.run.transform.type in {"script", "notebook"}:
|
553
|
+
raise ValueError(
|
554
|
+
f"Transform type is not allowed to be 'script' or 'notebook' but is {context.run.transform.type}."
|
555
|
+
)
|
556
|
+
context.run.finished_at = datetime.now(timezone.utc)
|
557
|
+
context.run.save()
|
558
|
+
# nothing else to do
|
559
|
+
return None
|
560
|
+
if is_run_from_ipython: # notebooks
|
561
|
+
if get_seconds_since_modified(context._path) > 2 and not ln_setup._TESTING:
|
562
|
+
raise NotebookNotSaved(
|
563
|
+
f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) right before calling `ln.context.finish()`"
|
564
|
+
)
|
565
|
+
save_context_core(
|
566
|
+
run=context.run,
|
567
|
+
transform=context.run.transform,
|
568
|
+
filepath=context._path,
|
569
|
+
finished_at=True,
|
570
|
+
ignore_non_consecutive=ignore_non_consecutive,
|
571
|
+
)
|
572
|
+
|
573
|
+
|
574
|
+
context = Context()
|