lamindb 1.6.2__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -3
- lamindb/_finish.py +32 -16
- lamindb/base/types.py +6 -4
- lamindb/core/_context.py +127 -57
- lamindb/core/_mapped_collection.py +1 -1
- lamindb/core/_settings.py +44 -4
- lamindb/core/_track_environment.py +5 -2
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_anndata_accessor.py +1 -1
- lamindb/core/storage/_tiledbsoma.py +14 -8
- lamindb/core/storage/_valid_suffixes.py +0 -1
- lamindb/core/storage/_zarr.py +1 -1
- lamindb/core/storage/objects.py +13 -8
- lamindb/core/storage/paths.py +9 -6
- lamindb/core/types.py +1 -1
- lamindb/curators/_legacy.py +2 -1
- lamindb/curators/core.py +106 -105
- lamindb/errors.py +9 -0
- lamindb/examples/fixtures/__init__.py +0 -0
- lamindb/examples/fixtures/sheets.py +224 -0
- lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +1 -1
- lamindb/migrations/0105_record_unique_name.py +20 -0
- lamindb/migrations/0106_transfer_data_migration.py +25 -0
- lamindb/migrations/0107_add_schema_to_record.py +68 -0
- lamindb/migrations/0108_remove_record_sheet_remove_sheetproject_sheet_and_more.py +30 -0
- lamindb/migrations/0109_record_input_of_runs_alter_record_run_and_more.py +123 -0
- lamindb/migrations/0110_rename_values_artifacts_record_linked_artifacts.py +17 -0
- lamindb/migrations/0111_remove_record__sort_order.py +148 -0
- lamindb/migrations/0112_alter_recordartifact_feature_and_more.py +105 -0
- lamindb/migrations/0113_lower_case_branch_and_space_names.py +62 -0
- lamindb/migrations/0114_alter_run__status_code.py +24 -0
- lamindb/migrations/0115_alter_space_uid.py +52 -0
- lamindb/migrations/{0104_squashed.py → 0115_squashed.py} +261 -257
- lamindb/models/__init__.py +4 -3
- lamindb/models/_describe.py +88 -31
- lamindb/models/_feature_manager.py +627 -658
- lamindb/models/_label_manager.py +1 -3
- lamindb/models/artifact.py +214 -99
- lamindb/models/collection.py +7 -1
- lamindb/models/feature.py +288 -60
- lamindb/models/has_parents.py +3 -3
- lamindb/models/project.py +32 -15
- lamindb/models/query_manager.py +7 -1
- lamindb/models/query_set.py +118 -41
- lamindb/models/record.py +140 -94
- lamindb/models/run.py +42 -42
- lamindb/models/save.py +102 -16
- lamindb/models/schema.py +41 -8
- lamindb/models/sqlrecord.py +105 -40
- lamindb/models/storage.py +278 -0
- lamindb/models/transform.py +10 -2
- lamindb/models/ulabel.py +9 -1
- lamindb/py.typed +0 -0
- lamindb/setup/__init__.py +2 -1
- lamindb/setup/_switch.py +16 -0
- lamindb/setup/errors/__init__.py +4 -0
- lamindb/setup/types/__init__.py +4 -0
- {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/METADATA +5 -5
- {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/RECORD +61 -44
- lamindb/models/core.py +0 -135
- {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/LICENSE +0 -0
- {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
@@ -46,7 +46,6 @@ Manage flexible records to track, e.g., samples or donors.
|
|
46
46
|
:toctree: .
|
47
47
|
|
48
48
|
Record
|
49
|
-
Sheet
|
50
49
|
|
51
50
|
Manage projects.
|
52
51
|
|
@@ -109,7 +108,7 @@ Backwards compatibility.
|
|
109
108
|
|
110
109
|
# ruff: noqa: I001
|
111
110
|
# denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
|
112
|
-
__version__ = "1.
|
111
|
+
__version__ = "1.7.0"
|
113
112
|
|
114
113
|
import warnings
|
115
114
|
|
@@ -155,7 +154,6 @@ if _check_instance_setup(from_module="lamindb"):
|
|
155
154
|
Space,
|
156
155
|
Branch,
|
157
156
|
Record,
|
158
|
-
Sheet,
|
159
157
|
)
|
160
158
|
from .models.save import save
|
161
159
|
from . import core
|
lamindb/_finish.py
CHANGED
@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING
|
|
9
9
|
import lamindb_setup as ln_setup
|
10
10
|
from lamin_utils import logger
|
11
11
|
from lamin_utils._logger import LEVEL_TO_COLORS, LEVEL_TO_ICONS, RESET_COLOR
|
12
|
-
from lamindb_setup.core.hashing import hash_file
|
12
|
+
from lamindb_setup.core.hashing import hash_dir, hash_file
|
13
13
|
|
14
14
|
from lamindb.models import Artifact, Run, Transform
|
15
15
|
|
@@ -241,6 +241,7 @@ def save_context_core(
|
|
241
241
|
transform: Transform,
|
242
242
|
filepath: Path,
|
243
243
|
finished_at: bool = False,
|
244
|
+
skip_save_report: bool = False,
|
244
245
|
ignore_non_consecutive: bool | None = None,
|
245
246
|
from_cli: bool = False,
|
246
247
|
is_retry: bool = False,
|
@@ -343,7 +344,6 @@ def save_context_core(
|
|
343
344
|
transform_hash, _ = hash_file(source_code_path) # ignore hash_type for now
|
344
345
|
if transform.hash is not None:
|
345
346
|
# check if the hash of the transform source code matches
|
346
|
-
# (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
|
347
347
|
if transform_hash != transform.hash:
|
348
348
|
response = input(
|
349
349
|
f"You are about to overwrite existing source code (hash '{transform.hash}') for Transform('{transform.uid}')."
|
@@ -361,26 +361,46 @@ def save_context_core(
|
|
361
361
|
transform.source_code = source_code_path.read_text()
|
362
362
|
transform.hash = transform_hash
|
363
363
|
|
364
|
-
# track run environment
|
365
364
|
if run is not None:
|
366
|
-
|
367
|
-
|
365
|
+
base_path = ln_setup.settings.cache_dir / "environments" / f"run_{run.uid}"
|
366
|
+
paths = [base_path / "run_env_pip.txt", base_path / "r_pak_lockfile.json"]
|
367
|
+
existing_paths = [path for path in paths if path.exists()]
|
368
|
+
|
369
|
+
if existing_paths:
|
368
370
|
overwrite_env = True
|
369
371
|
if run.environment_id is not None and from_cli:
|
370
372
|
logger.important("run.environment is already saved, ignoring")
|
371
373
|
overwrite_env = False
|
374
|
+
|
372
375
|
if overwrite_env:
|
373
|
-
|
376
|
+
# Use directory if multiple files exist, otherwise use the single file
|
377
|
+
artifact_path: Path = (
|
378
|
+
base_path if len(existing_paths) > 1 else existing_paths[0]
|
379
|
+
)
|
380
|
+
|
381
|
+
# Set description based on what we're saving
|
382
|
+
if len(existing_paths) == 1:
|
383
|
+
if existing_paths[0].name == "run_env_pip.txt":
|
384
|
+
description = "requirements.txt"
|
385
|
+
elif existing_paths[0].name == "r_pak_lockfile.json":
|
386
|
+
description = "r_pak_lockfile.json"
|
387
|
+
env_hash, _ = hash_file(artifact_path)
|
388
|
+
else:
|
389
|
+
description = "environments"
|
390
|
+
_, env_hash, _, _ = hash_dir(artifact_path)
|
391
|
+
|
374
392
|
artifact = ln.Artifact.objects.filter(hash=env_hash).one_or_none()
|
375
393
|
new_env_artifact = artifact is None
|
394
|
+
|
376
395
|
if new_env_artifact:
|
377
|
-
artifact = ln.Artifact(
|
378
|
-
|
379
|
-
description=
|
396
|
+
artifact = ln.Artifact(
|
397
|
+
artifact_path,
|
398
|
+
description=description,
|
380
399
|
kind="__lamindb_run__",
|
381
400
|
run=False,
|
382
401
|
)
|
383
402
|
artifact.save(upload=True, print_progress=False)
|
403
|
+
|
384
404
|
run.environment = artifact
|
385
405
|
if new_env_artifact:
|
386
406
|
logger.debug(f"saved run.environment: {run.environment}")
|
@@ -394,12 +414,8 @@ def save_context_core(
|
|
394
414
|
if update_finished_at:
|
395
415
|
run.finished_at = datetime.now(timezone.utc)
|
396
416
|
|
397
|
-
# track logs
|
398
|
-
if run is not None and not from_cli and not is_ipynb and not is_r_notebook:
|
399
|
-
save_run_logs(run)
|
400
|
-
|
401
417
|
# track report and set is_consecutive
|
402
|
-
if save_source_code_and_report:
|
418
|
+
if save_source_code_and_report and not skip_save_report:
|
403
419
|
if run is not None:
|
404
420
|
# do not save a run report if executing through nbconvert
|
405
421
|
if report_path is not None and notebook_runner != "nbconvert":
|
@@ -454,7 +470,7 @@ def save_context_core(
|
|
454
470
|
ln.Transform.get(transform_id_prior_to_save).delete()
|
455
471
|
|
456
472
|
# finalize
|
457
|
-
if not from_cli and run is not None:
|
473
|
+
if finished_at and not from_cli and run is not None:
|
458
474
|
run_time = run.finished_at - run.started_at
|
459
475
|
days = run_time.days
|
460
476
|
seconds = run_time.seconds
|
@@ -480,7 +496,7 @@ def save_context_core(
|
|
480
496
|
logger.important(
|
481
497
|
f"go to: https://lamin.ai/{instance_slug}/transform/{transform.uid}"
|
482
498
|
)
|
483
|
-
if not from_cli and save_source_code_and_report:
|
499
|
+
if finished_at and not from_cli and save_source_code_and_report:
|
484
500
|
thing = "notebook" if (is_ipynb or is_r_notebook) else "script"
|
485
501
|
logger.important(
|
486
502
|
f"to update your {thing} from the CLI, run: lamin save {filepath}"
|
lamindb/base/types.py
CHANGED
@@ -27,7 +27,7 @@ from typing import Literal, Union
|
|
27
27
|
import numpy as np
|
28
28
|
import pandas as pd
|
29
29
|
from django.db.models.query_utils import DeferredAttribute as FieldAttr
|
30
|
-
from lamindb_setup.
|
30
|
+
from lamindb_setup.types import UPathStr # noqa: F401
|
31
31
|
|
32
32
|
# need to use Union because __future__.annotations doesn't do the job here <3.10
|
33
33
|
# typing.TypeAlias, >3.10 on but already deprecated
|
@@ -37,7 +37,7 @@ StrField = Union[str, FieldAttr] # typing.TypeAlias
|
|
37
37
|
TransformType = Literal[
|
38
38
|
"pipeline", "notebook", "upload", "script", "function", "linker"
|
39
39
|
]
|
40
|
-
ArtifactKind = Literal["dataset", "model"]
|
40
|
+
ArtifactKind = Literal["dataset", "model", "__lamindb_run__"]
|
41
41
|
|
42
42
|
# below is used for Feature.dtype and Param.dtype
|
43
43
|
Dtype = Literal[
|
@@ -51,10 +51,11 @@ Dtype = Literal[
|
|
51
51
|
"datetime", # datetime
|
52
52
|
"dict", # dictionary
|
53
53
|
"object", # this is a pandas input dtype, we're only using it for complicated types, not for strings
|
54
|
+
"path", # path, validated as str, but specially treated in the UI
|
54
55
|
]
|
55
56
|
"""Data type.
|
56
57
|
|
57
|
-
|
58
|
+
String-serialized representations of common data types.
|
58
59
|
|
59
60
|
Overview
|
60
61
|
========
|
@@ -68,8 +69,9 @@ integer `"int"` `int64 | int32 | int16 | int8 | uint | ...`
|
|
68
69
|
float `"float"` `float64 | float32 | float16 | float8 | ...`
|
69
70
|
string `"str"` `object`
|
70
71
|
datetime `"datetime"` `datetime`
|
71
|
-
date `"date"` `date`
|
72
|
+
date `"date"` `object` (pandera requires an ISO-format string, convert with `df["date"] = df["date"].dt.date`)
|
72
73
|
dictionary `"dict"` `object`
|
74
|
+
path `"path"` `str` (pandas does not have a dedicated path type, validated as `str`)
|
73
75
|
============ ============ =================================================
|
74
76
|
|
75
77
|
Categoricals
|
lamindb/core/_context.py
CHANGED
@@ -9,7 +9,7 @@ import threading
|
|
9
9
|
import traceback
|
10
10
|
from datetime import datetime, timezone
|
11
11
|
from pathlib import Path
|
12
|
-
from typing import TYPE_CHECKING
|
12
|
+
from typing import TYPE_CHECKING, TextIO
|
13
13
|
|
14
14
|
import lamindb_setup as ln_setup
|
15
15
|
from django.db.models import Func, IntegerField, Q
|
@@ -32,10 +32,10 @@ from ..models._is_versioned import (
|
|
32
32
|
increment_base62,
|
33
33
|
)
|
34
34
|
from ._sync_git import get_transform_reference_from_git_repo
|
35
|
-
from ._track_environment import
|
35
|
+
from ._track_environment import track_python_environment
|
36
36
|
|
37
37
|
if TYPE_CHECKING:
|
38
|
-
from lamindb_setup.
|
38
|
+
from lamindb_setup.types import UPathStr
|
39
39
|
|
40
40
|
from lamindb.base.types import TransformType
|
41
41
|
from lamindb.models import Branch, Project, Space
|
@@ -100,19 +100,50 @@ def pretty_pypackages(dependencies: dict) -> str:
|
|
100
100
|
return " ".join(deps_list)
|
101
101
|
|
102
102
|
|
103
|
+
def last_non_empty_r_block(line: str) -> str:
|
104
|
+
for block in reversed(line.split("\r")):
|
105
|
+
if block:
|
106
|
+
return block
|
107
|
+
return ""
|
108
|
+
|
109
|
+
|
103
110
|
class LogStreamHandler:
|
104
|
-
def __init__(self, log_stream, file):
|
111
|
+
def __init__(self, log_stream: TextIO, file: TextIO, use_buffer: bool):
|
105
112
|
self.log_stream = log_stream
|
106
113
|
self.file = file
|
107
114
|
|
108
|
-
|
115
|
+
self._buffer = ""
|
116
|
+
self._use_buffer = use_buffer
|
117
|
+
|
118
|
+
def write(self, data: str) -> int:
|
109
119
|
self.log_stream.write(data)
|
110
|
-
|
111
|
-
self.
|
120
|
+
|
121
|
+
if not self._use_buffer:
|
122
|
+
self.file.write(data)
|
123
|
+
self.file.flush()
|
124
|
+
return len(data)
|
125
|
+
|
126
|
+
self._buffer += data
|
127
|
+
# write only the last part of a line with carriage returns
|
128
|
+
while "\n" in self._buffer:
|
129
|
+
line, self._buffer = self._buffer.split("\n", 1)
|
130
|
+
self.file.write(last_non_empty_r_block(line) + "\n")
|
131
|
+
self.file.flush()
|
132
|
+
|
133
|
+
return len(data)
|
112
134
|
|
113
135
|
def flush(self):
|
114
136
|
self.log_stream.flush()
|
115
|
-
self.file.
|
137
|
+
if not self.file.closed:
|
138
|
+
self.file.flush()
|
139
|
+
|
140
|
+
# .flush is sometimes (in jupyter etc.) called after every .write
|
141
|
+
# this needs to be called only at the end
|
142
|
+
def flush_buffer(self):
|
143
|
+
if not self.file.closed and self._buffer:
|
144
|
+
self.file.write(last_non_empty_r_block(self._buffer))
|
145
|
+
self._buffer = ""
|
146
|
+
self.flush()
|
116
147
|
|
117
148
|
|
118
149
|
class LogStreamTracker:
|
@@ -131,8 +162,14 @@ class LogStreamTracker:
|
|
131
162
|
ln_setup.settings.cache_dir / f"run_logs_{self.run.uid}.txt"
|
132
163
|
)
|
133
164
|
self.log_file = open(self.log_file_path, "w")
|
134
|
-
|
135
|
-
sys.
|
165
|
+
# use buffering for correct handling of carriage returns
|
166
|
+
sys.stdout = LogStreamHandler(
|
167
|
+
self.original_stdout, self.log_file, use_buffer=True
|
168
|
+
)
|
169
|
+
# write evrything immediately in stderr
|
170
|
+
sys.stderr = LogStreamHandler(
|
171
|
+
self.original_stderr, self.log_file, use_buffer=False
|
172
|
+
)
|
136
173
|
# handle signals
|
137
174
|
# signal should be used only in the main thread, otherwise
|
138
175
|
# ValueError: signal only works in main thread of the main interpreter
|
@@ -144,43 +181,60 @@ class LogStreamTracker:
|
|
144
181
|
|
145
182
|
def finish(self):
|
146
183
|
if self.original_stdout:
|
184
|
+
getattr(sys.stdout, "flush_buffer", sys.stdout.flush)()
|
185
|
+
sys.stderr.flush()
|
147
186
|
sys.stdout = self.original_stdout
|
148
187
|
sys.stderr = self.original_stderr
|
149
188
|
self.log_file.close()
|
150
189
|
|
151
190
|
def cleanup(self, signo=None, frame=None):
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
self.
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
191
|
+
try:
|
192
|
+
from .._finish import save_run_logs
|
193
|
+
|
194
|
+
if self.original_stdout and not self.is_cleaning_up:
|
195
|
+
self.is_cleaning_up = True
|
196
|
+
getattr(sys.stdout, "flush_buffer", sys.stdout.flush)()
|
197
|
+
sys.stderr.flush()
|
198
|
+
if signo is not None:
|
199
|
+
signal_msg = f"\nProcess terminated by signal {signo} ({signal.Signals(signo).name})\n"
|
200
|
+
if frame:
|
201
|
+
signal_msg += (
|
202
|
+
f"Frame info:\n{''.join(traceback.format_stack(frame))}"
|
203
|
+
)
|
204
|
+
self.log_file.write(signal_msg)
|
205
|
+
self.log_file.flush()
|
206
|
+
self.run._status_code = 2 # aborted
|
207
|
+
else:
|
208
|
+
self.run._status_code = 1 # errored
|
209
|
+
self.run.finished_at = datetime.now(timezone.utc)
|
210
|
+
sys.stdout = self.original_stdout
|
211
|
+
sys.stderr = self.original_stderr
|
212
|
+
self.log_file.close()
|
213
|
+
save_run_logs(self.run, save_run=True)
|
214
|
+
except: # noqa: E722, S110
|
215
|
+
pass
|
168
216
|
|
169
217
|
def handle_exception(self, exc_type, exc_value, exc_traceback):
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
self.log_file
|
174
|
-
|
175
|
-
|
176
|
-
|
218
|
+
try:
|
219
|
+
if not self.is_cleaning_up:
|
220
|
+
error_msg = f"{''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))}"
|
221
|
+
if self.log_file.closed:
|
222
|
+
self.log_file = open(self.log_file_path, "a")
|
223
|
+
else:
|
224
|
+
getattr(sys.stdout, "flush_buffer", sys.stdout.flush)()
|
225
|
+
sys.stderr.flush()
|
226
|
+
self.log_file.write(error_msg)
|
227
|
+
self.log_file.flush()
|
228
|
+
self.cleanup()
|
229
|
+
except: # noqa: E722, S110
|
230
|
+
pass
|
177
231
|
self.original_excepthook(exc_type, exc_value, exc_traceback)
|
178
232
|
|
179
233
|
|
180
234
|
class Context:
|
181
235
|
"""Run context.
|
182
236
|
|
183
|
-
Is the book keeper for :
|
237
|
+
Is the book keeper for :func:`~lamindb.track` and :func:`~lamindb.finish`.
|
184
238
|
"""
|
185
239
|
|
186
240
|
def __init__(self):
|
@@ -190,7 +244,6 @@ class Context:
|
|
190
244
|
self._transform: Transform | None = None
|
191
245
|
self._run: Run | None = None
|
192
246
|
self._path: Path | None = None
|
193
|
-
"""A local path to the script or notebook that's running."""
|
194
247
|
self._project: Project | None = None
|
195
248
|
self._space: Space | None = None
|
196
249
|
self._branch: Branch | None = None
|
@@ -281,8 +334,8 @@ class Context:
|
|
281
334
|
Args:
|
282
335
|
transform: A transform (stem) `uid` (or record). If `None`, auto-creates a `transform` with its `uid`.
|
283
336
|
project: A project (or its `name` or `uid`) for labeling entities.
|
284
|
-
space: A restricted space (or its `name` or `uid`) in which to store
|
285
|
-
Default: the `"
|
337
|
+
space: A restricted space (or its `name` or `uid`) in which to store entities.
|
338
|
+
Default: the `"all"` space. Note that bionty entities ignore this setting and always get written to the `"all"` space.
|
286
339
|
If you want to manually move entities to a different space, set the `.space` field (:doc:`docs:access`).
|
287
340
|
branch: A branch (or its `name` or `uid`) on which to store records.
|
288
341
|
params: A dictionary of parameters to track for the run.
|
@@ -308,6 +361,10 @@ class Context:
|
|
308
361
|
"""
|
309
362
|
from lamindb.models import Branch, Project, Space
|
310
363
|
|
364
|
+
from .._finish import (
|
365
|
+
save_context_core,
|
366
|
+
)
|
367
|
+
|
311
368
|
instance_settings = ln_setup.settings.instance
|
312
369
|
# similar logic here: https://github.com/laminlabs/lamindb/pull/2527
|
313
370
|
# TODO: refactor upon new access management
|
@@ -433,6 +490,7 @@ class Context:
|
|
433
490
|
)
|
434
491
|
if run is not None: # loaded latest run
|
435
492
|
run.started_at = datetime.now(timezone.utc) # update run time
|
493
|
+
run._status_code = -2 # re-started
|
436
494
|
self._logging_message_track += f", re-started Run('{run.uid[:8]}...') at {format_field_value(run.started_at)}"
|
437
495
|
|
438
496
|
if run is None: # create new run
|
@@ -441,6 +499,7 @@ class Context:
|
|
441
499
|
params=params,
|
442
500
|
)
|
443
501
|
run.started_at = datetime.now(timezone.utc)
|
502
|
+
run._status_code = -1 # started
|
444
503
|
self._logging_message_track += f", started new Run('{run.uid[:8]}...') at {format_field_value(run.started_at)}"
|
445
504
|
# can only determine at ln.finish() if run was consecutive in
|
446
505
|
# interactive session, otherwise, is consecutive
|
@@ -453,7 +512,7 @@ class Context:
|
|
453
512
|
f"{key}={value}" for key, value in params.items()
|
454
513
|
)
|
455
514
|
self._run = run
|
456
|
-
|
515
|
+
track_python_environment(run)
|
457
516
|
if self.project is not None:
|
458
517
|
# to update a potential project link
|
459
518
|
# is only necessary if transform is loaded rather than newly created
|
@@ -491,6 +550,8 @@ class Context:
|
|
491
550
|
logger.important_hint(
|
492
551
|
f'recommendation: to identify the {notebook_or_script} across renames, pass the uid: ln{r_or_python}track("{self.transform.uid[:-4]}"{kwargs_str})'
|
493
552
|
)
|
553
|
+
if self.transform.type == "script":
|
554
|
+
save_context_core(run=run, transform=self.transform, filepath=self._path)
|
494
555
|
|
495
556
|
def _track_source_code(
|
496
557
|
self,
|
@@ -653,6 +714,8 @@ class Context:
|
|
653
714
|
message = ""
|
654
715
|
found_key = False
|
655
716
|
for aux_transform in transforms:
|
717
|
+
# check whether the transform key is in the path
|
718
|
+
# that's not going to be the case for keys that have "/" in them and don't match the folder
|
656
719
|
if aux_transform.key in self._path.as_posix():
|
657
720
|
key = aux_transform.key
|
658
721
|
uid, target_transform, message = self._process_aux_transform(
|
@@ -668,7 +731,7 @@ class Context:
|
|
668
731
|
for transform in transforms
|
669
732
|
]
|
670
733
|
)
|
671
|
-
message = f"ignoring transform{plural_s} with same filename:\n{transforms_str}"
|
734
|
+
message = f"ignoring transform{plural_s} with same filename in different folder:\n{transforms_str}"
|
672
735
|
if message != "":
|
673
736
|
logger.important(message)
|
674
737
|
self.uid, transform = uid, target_transform
|
@@ -715,7 +778,10 @@ class Context:
|
|
715
778
|
f"Please respond with either 1 or 2, not {response}"
|
716
779
|
)
|
717
780
|
if response == "2":
|
718
|
-
transform_hash =
|
781
|
+
aux_transform, transform_hash = (
|
782
|
+
None,
|
783
|
+
None,
|
784
|
+
) # make a new transform
|
719
785
|
if aux_transform is not None:
|
720
786
|
if aux_transform.key.endswith(self._path.name):
|
721
787
|
key = aux_transform.key
|
@@ -740,8 +806,9 @@ class Context:
|
|
740
806
|
and transform.version is not None # type: ignore
|
741
807
|
and self.version != transform.version # type: ignore
|
742
808
|
):
|
743
|
-
raise
|
744
|
-
f"
|
809
|
+
raise ValueError(
|
810
|
+
f"Transform is already tagged with version {transform.version}, but you passed {self.version}\n" # noqa: S608
|
811
|
+
f"If you want to update the transform version, set it outside ln.track(): transform.version = '{self.version}'; transform.save()"
|
745
812
|
)
|
746
813
|
# test whether version was already used for another member of the family
|
747
814
|
if self.uid is not None and len(self.uid) == 16:
|
@@ -850,9 +917,7 @@ class Context:
|
|
850
917
|
`lamin save script.py` or `lamin save notebook.ipynb` → `docs </cli#lamin-save>`__
|
851
918
|
|
852
919
|
"""
|
853
|
-
from
|
854
|
-
save_context_core,
|
855
|
-
)
|
920
|
+
from .._finish import save_context_core, save_run_logs
|
856
921
|
|
857
922
|
if self.run is None:
|
858
923
|
raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
|
@@ -865,18 +930,23 @@ class Context:
|
|
865
930
|
self.run.save()
|
866
931
|
# nothing else to do
|
867
932
|
return None
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
933
|
+
self.run._status_code = 0
|
934
|
+
if self.transform.type == "notebook":
|
935
|
+
return_code = save_context_core(
|
936
|
+
run=self.run,
|
937
|
+
transform=self.run.transform,
|
938
|
+
filepath=self._path,
|
939
|
+
finished_at=True,
|
940
|
+
ignore_non_consecutive=ignore_non_consecutive,
|
941
|
+
is_retry=self._is_finish_retry,
|
942
|
+
notebook_runner=self._notebook_runner,
|
943
|
+
)
|
944
|
+
if return_code == "retry":
|
945
|
+
self._is_finish_retry = True
|
946
|
+
return None
|
947
|
+
else:
|
948
|
+
self.run.finished_at = datetime.now(timezone.utc)
|
949
|
+
save_run_logs(self.run, save_run=True)
|
880
950
|
if self.transform.type != "notebook":
|
881
951
|
self._stream_tracker.finish()
|
882
952
|
# reset the context attributes so that somebody who runs `track()` after finish
|
lamindb/core/_settings.py
CHANGED
@@ -4,7 +4,7 @@ import os
|
|
4
4
|
from typing import TYPE_CHECKING
|
5
5
|
|
6
6
|
import lamindb_setup as ln_setup
|
7
|
-
from lamin_utils import logger
|
7
|
+
from lamin_utils import colors, logger
|
8
8
|
from lamindb_setup._set_managed_storage import set_managed_storage
|
9
9
|
from lamindb_setup.core._settings import settings as setup_settings
|
10
10
|
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
@@ -35,7 +35,7 @@ VERBOSITY_TO_STR: dict[int, str] = dict(
|
|
35
35
|
class Settings:
|
36
36
|
"""Settings.
|
37
37
|
|
38
|
-
|
38
|
+
Please use the global `ln.settings` object instead of instantiating this class yourself.
|
39
39
|
"""
|
40
40
|
|
41
41
|
def __init__(self):
|
@@ -43,6 +43,36 @@ class Settings:
|
|
43
43
|
logger.set_verbosity(self._verbosity_int)
|
44
44
|
self._sync_git_repo: str | None = None
|
45
45
|
|
46
|
+
def __repr__(self) -> str: # pragma: no cover
|
47
|
+
cls_name = colors.green(self.__class__.__name__)
|
48
|
+
verbosity_color = colors.yellow if self.verbosity == "warning" else colors.green
|
49
|
+
verbosity_str = verbosity_color(self.verbosity)
|
50
|
+
|
51
|
+
storage_root = self._storage_settings.root_as_str
|
52
|
+
storage_str = colors.italic(storage_root)
|
53
|
+
|
54
|
+
instance_str = colors.italic(self.instance_uid)
|
55
|
+
track_color = colors.green if self.track_run_inputs else colors.yellow
|
56
|
+
track_str = track_color(str(self.track_run_inputs))
|
57
|
+
|
58
|
+
lines = [
|
59
|
+
f"{cls_name}",
|
60
|
+
f" instance: {instance_str}",
|
61
|
+
f" storage: {storage_str}",
|
62
|
+
f" verbosity: {verbosity_str}",
|
63
|
+
f" track_run_inputs: {track_str}",
|
64
|
+
]
|
65
|
+
|
66
|
+
if self.sync_git_repo:
|
67
|
+
repo_name = (
|
68
|
+
self.sync_git_repo.split("/")[-1]
|
69
|
+
if "/" in self.sync_git_repo
|
70
|
+
else self.sync_git_repo
|
71
|
+
)
|
72
|
+
lines.append(f" sync_git_repo: {colors.italic(repo_name)}")
|
73
|
+
|
74
|
+
return "\n".join(lines)
|
75
|
+
|
46
76
|
@property
|
47
77
|
def creation(self) -> CreationSettings:
|
48
78
|
"""SQLRecord creation settings.
|
@@ -61,10 +91,15 @@ class Settings:
|
|
61
91
|
"""
|
62
92
|
return annotation_settings
|
63
93
|
|
94
|
+
# note: this setting should probably be deprecated soon
|
95
|
+
# warnings could then be filtered with a regular warning mechanism
|
64
96
|
track_run_inputs: bool = True
|
65
|
-
"""Track
|
97
|
+
"""Track run inputs (default `True`).
|
66
98
|
|
67
|
-
|
99
|
+
If this setting is true, an artifact is recorded as run input upon `.load()`, `.cache()` & `.open()` provided :func:`~lamindb.track` was called in the current compute (Python, R) session.
|
100
|
+
If :func:`~lamindb.track` was not called, you receive a warning message upon `.load()`, `.cache()` & `.open()`.
|
101
|
+
|
102
|
+
If you switch this setting to `False`, you won't see the warning message anymore and no run inputs will be recorded.
|
68
103
|
|
69
104
|
FAQ: :doc:`/faq/track-run-inputs`
|
70
105
|
"""
|
@@ -150,6 +185,11 @@ class Settings:
|
|
150
185
|
path, kwargs = path_kwargs, {}
|
151
186
|
set_managed_storage(path, **kwargs)
|
152
187
|
|
188
|
+
@property
|
189
|
+
def instance_uid(self) -> str:
|
190
|
+
"""The `uid` of the current instance."""
|
191
|
+
return ln_setup.settings.instance.uid
|
192
|
+
|
153
193
|
@property
|
154
194
|
def cache_dir(self) -> UPath:
|
155
195
|
"""Cache root, a local directory to cache cloud files."""
|
@@ -11,8 +11,11 @@ if TYPE_CHECKING:
|
|
11
11
|
from lamindb.models import Run
|
12
12
|
|
13
13
|
|
14
|
-
def
|
15
|
-
|
14
|
+
def track_python_environment(run: Run) -> None:
|
15
|
+
env_dir = ln_setup.settings.cache_dir / "environments" / f"run_{run.uid}"
|
16
|
+
filepath = env_dir / "run_env_pip.txt"
|
17
|
+
if not env_dir.exists():
|
18
|
+
filepath.parent.mkdir(parents=True)
|
16
19
|
# create a requirements.txt
|
17
20
|
# we don't create a conda environment.yml mostly for its slowness
|
18
21
|
try:
|
lamindb/core/loaders.py
CHANGED
@@ -34,7 +34,7 @@ from ..core._settings import settings
|
|
34
34
|
|
35
35
|
if TYPE_CHECKING:
|
36
36
|
from anndata import AnnData
|
37
|
-
from lamindb_setup.
|
37
|
+
from lamindb_setup.types import UPathStr
|
38
38
|
from mudata import MuData
|
39
39
|
|
40
40
|
from lamindb.core.types import ScverseDataStructures
|