lamindb 0.77.2__py3-none-any.whl → 1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +39 -32
- lamindb/_artifact.py +95 -64
- lamindb/_can_curate.py +19 -10
- lamindb/_collection.py +51 -49
- lamindb/_feature.py +9 -9
- lamindb/_finish.py +99 -86
- lamindb/_from_values.py +20 -17
- lamindb/_is_versioned.py +2 -1
- lamindb/_parents.py +23 -16
- lamindb/_query_manager.py +3 -3
- lamindb/_query_set.py +85 -18
- lamindb/_record.py +121 -46
- lamindb/_run.py +3 -3
- lamindb/_save.py +14 -8
- lamindb/{_feature_set.py → _schema.py} +34 -31
- lamindb/_storage.py +2 -1
- lamindb/_transform.py +51 -23
- lamindb/_ulabel.py +17 -8
- lamindb/_view.py +15 -14
- lamindb/base/__init__.py +24 -0
- lamindb/base/fields.py +281 -0
- lamindb/base/ids.py +103 -0
- lamindb/base/types.py +51 -0
- lamindb/base/users.py +30 -0
- lamindb/base/validation.py +67 -0
- lamindb/core/__init__.py +19 -14
- lamindb/core/_context.py +297 -228
- lamindb/core/_data.py +44 -49
- lamindb/core/_describe.py +41 -31
- lamindb/core/_django.py +59 -44
- lamindb/core/_feature_manager.py +192 -168
- lamindb/core/_label_manager.py +22 -22
- lamindb/core/_mapped_collection.py +17 -14
- lamindb/core/_settings.py +1 -12
- lamindb/core/_sync_git.py +56 -9
- lamindb/core/_track_environment.py +1 -1
- lamindb/core/datasets/_core.py +5 -6
- lamindb/core/exceptions.py +0 -7
- lamindb/core/fields.py +1 -1
- lamindb/core/loaders.py +18 -2
- lamindb/core/{schema.py → relations.py} +22 -19
- lamindb/core/storage/_anndata_accessor.py +1 -2
- lamindb/core/storage/_backed_access.py +2 -1
- lamindb/core/storage/_tiledbsoma.py +40 -13
- lamindb/core/storage/objects.py +1 -1
- lamindb/core/storage/paths.py +13 -8
- lamindb/core/subsettings/__init__.py +0 -2
- lamindb/core/types.py +2 -23
- lamindb/core/versioning.py +11 -7
- lamindb/{_curate.py → curators/__init__.py} +700 -57
- lamindb/curators/_spatial.py +528 -0
- lamindb/integrations/_vitessce.py +1 -3
- lamindb/migrations/0052_squashed.py +1261 -0
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
- lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
- lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
- lamindb/migrations/0060_alter_artifact__actions.py +22 -0
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
- lamindb/migrations/0062_add_is_latest_field.py +32 -0
- lamindb/migrations/0063_populate_latest_field.py +45 -0
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
- lamindb/migrations/0069_squashed.py +1770 -0
- lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
- lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
- lamindb/migrations/0073_merge_ourprojects.py +945 -0
- lamindb/migrations/0074_lamindbv1_part4.py +374 -0
- lamindb/migrations/0075_lamindbv1_part5.py +276 -0
- lamindb/migrations/0076_lamindbv1_part6.py +621 -0
- lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
- lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
- lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
- lamindb/migrations/__init__.py +0 -0
- lamindb/models.py +4064 -0
- {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/METADATA +15 -20
- lamindb-1.0rc1.dist-info/RECORD +100 -0
- {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
- lamindb/core/subsettings/_transform_settings.py +0 -21
- lamindb-0.77.2.dist-info/RECORD +0 -63
- {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0
lamindb/_collection.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import warnings
|
3
4
|
from collections import defaultdict
|
4
5
|
from typing import (
|
5
6
|
TYPE_CHECKING,
|
@@ -13,28 +14,27 @@ import pandas as pd
|
|
13
14
|
from lamin_utils import logger
|
14
15
|
from lamindb_setup.core._docs import doc_args
|
15
16
|
from lamindb_setup.core.hashing import hash_set
|
16
|
-
|
17
|
+
|
18
|
+
from lamindb.models import (
|
17
19
|
Collection,
|
18
20
|
CollectionArtifact,
|
19
|
-
|
21
|
+
Schema,
|
20
22
|
)
|
21
|
-
from lnschema_core.types import VisibilityChoice
|
22
23
|
|
23
|
-
from . import Artifact, Run
|
24
24
|
from ._parents import view_lineage
|
25
25
|
from ._record import init_self_from_db, update_attributes
|
26
26
|
from ._utils import attach_func_to_class_method
|
27
27
|
from .core._data import (
|
28
28
|
_track_run_input,
|
29
|
-
add_transform_to_kwargs,
|
30
29
|
describe,
|
31
30
|
get_run,
|
32
|
-
|
33
|
-
|
31
|
+
save_schema_links,
|
32
|
+
save_staged__schemas_m2m,
|
34
33
|
)
|
35
34
|
from .core._mapped_collection import MappedCollection
|
36
35
|
from .core._settings import settings
|
37
36
|
from .core.versioning import process_revises
|
37
|
+
from .models import Artifact, Run
|
38
38
|
|
39
39
|
if TYPE_CHECKING:
|
40
40
|
from collections.abc import Iterable
|
@@ -50,31 +50,31 @@ class CollectionFeatureManager:
|
|
50
50
|
def __init__(self, collection: Collection):
|
51
51
|
self._collection = collection
|
52
52
|
|
53
|
-
def
|
54
|
-
|
53
|
+
def _get_staged__schemas_m2m_union(self) -> dict[str, Schema]:
|
54
|
+
links_schema_artifact = Artifact._schemas_m2m.through.objects.filter(
|
55
55
|
artifact_id__in=self._collection.artifacts.values_list("id", flat=True)
|
56
56
|
)
|
57
|
-
|
58
|
-
for link in
|
59
|
-
|
60
|
-
|
61
|
-
for slot,
|
62
|
-
|
63
|
-
related_name =
|
64
|
-
features_registry = getattr(
|
57
|
+
_schemas_m2m_by_slots = defaultdict(list)
|
58
|
+
for link in links_schema_artifact:
|
59
|
+
_schemas_m2m_by_slots[link.slot].append(link.schema_id)
|
60
|
+
_schemas_m2m_union = {}
|
61
|
+
for slot, schema_ids_slot in _schemas_m2m_by_slots.items():
|
62
|
+
schema_1 = Schema.get(id=schema_ids_slot[0])
|
63
|
+
related_name = schema_1._get_related_name()
|
64
|
+
features_registry = getattr(Schema, related_name).field.model
|
65
65
|
# this way of writing the __in statement turned out to be the fastest
|
66
66
|
# evaluated on a link table with 16M entries connecting 500 feature sets with
|
67
67
|
# 60k genes
|
68
68
|
feature_ids = (
|
69
|
-
features_registry.
|
70
|
-
|
69
|
+
features_registry.schemas.through.objects.filter(
|
70
|
+
schema_id__in=schema_ids_slot
|
71
71
|
)
|
72
72
|
.values(f"{features_registry.__name__.lower()}_id")
|
73
73
|
.distinct()
|
74
74
|
)
|
75
75
|
features = features_registry.filter(id__in=feature_ids)
|
76
|
-
|
77
|
-
return
|
76
|
+
_schemas_m2m_union[slot] = Schema(features, dtype=schema_1.dtype)
|
77
|
+
return _schemas_m2m_union
|
78
78
|
|
79
79
|
|
80
80
|
def __init__(
|
@@ -95,7 +95,7 @@ def __init__(
|
|
95
95
|
meta_artifact: Artifact | None = (
|
96
96
|
kwargs.pop("meta_artifact") if "meta_artifact" in kwargs else None
|
97
97
|
)
|
98
|
-
|
98
|
+
key: str | None = kwargs.pop("key") if "key" in kwargs else None
|
99
99
|
description: str | None = (
|
100
100
|
kwargs.pop("description") if "description" in kwargs else None
|
101
101
|
)
|
@@ -106,27 +106,29 @@ def __init__(
|
|
106
106
|
run: Run | None = kwargs.pop("run") if "run" in kwargs else None
|
107
107
|
revises: Collection | None = kwargs.pop("revises") if "revises" in kwargs else None
|
108
108
|
version: str | None = kwargs.pop("version") if "version" in kwargs else None
|
109
|
-
|
110
|
-
kwargs.pop("
|
111
|
-
if "visibility" in kwargs
|
112
|
-
else VisibilityChoice.default.value
|
109
|
+
_branch_code: int | None = (
|
110
|
+
kwargs.pop("_branch_code") if "_branch_code" in kwargs else 1
|
113
111
|
)
|
114
|
-
if "
|
115
|
-
|
116
|
-
|
112
|
+
if "name" in kwargs:
|
113
|
+
key = kwargs.pop("name")
|
114
|
+
warnings.warn(
|
115
|
+
f"argument `name` will be removed, please pass {key} to `key` instead",
|
116
|
+
FutureWarning,
|
117
|
+
stacklevel=2,
|
118
|
+
)
|
117
119
|
if not len(kwargs) == 0:
|
118
120
|
raise ValueError(
|
119
|
-
f"Only artifacts,
|
121
|
+
f"Only artifacts, key, run, description, reference, reference_type can be passed, you passed: {kwargs}"
|
120
122
|
)
|
121
|
-
provisional_uid, version,
|
122
|
-
revises, version,
|
123
|
+
provisional_uid, version, key, description, revises = process_revises(
|
124
|
+
revises, version, key, description, Collection
|
123
125
|
)
|
124
126
|
run = get_run(run)
|
125
127
|
if isinstance(artifacts, Artifact):
|
126
128
|
artifacts = [artifacts]
|
127
129
|
else:
|
128
130
|
if not hasattr(artifacts, "__getitem__"):
|
129
|
-
raise ValueError("Artifact or
|
131
|
+
raise ValueError("Artifact or list[Artifact] is allowed.")
|
130
132
|
assert isinstance(artifacts[0], Artifact) # type: ignore # noqa: S101
|
131
133
|
hash = from_artifacts(artifacts) # type: ignore
|
132
134
|
if meta_artifact is not None:
|
@@ -144,7 +146,7 @@ def __init__(
|
|
144
146
|
existing_collection = None
|
145
147
|
if existing_collection is not None:
|
146
148
|
logger.warning(
|
147
|
-
f"returning existing collection with same hash: {existing_collection}"
|
149
|
+
f"returning existing collection with same hash: {existing_collection}; if you intended to query to track this collection as an input, use: ln.Collection.get()"
|
148
150
|
)
|
149
151
|
# update the run of the existing collection
|
150
152
|
if run is not None:
|
@@ -157,18 +159,16 @@ def __init__(
|
|
157
159
|
)
|
158
160
|
# update the run of the collection with the latest run
|
159
161
|
existing_collection.run = run
|
160
|
-
existing_collection.transform = run.transform
|
161
162
|
init_self_from_db(collection, existing_collection)
|
162
|
-
update_attributes(collection, {"description": description, "
|
163
|
+
update_attributes(collection, {"description": description, "key": key})
|
163
164
|
else:
|
164
165
|
kwargs = {}
|
165
|
-
add_transform_to_kwargs(kwargs, run)
|
166
166
|
search_names_setting = settings.creation.search_names
|
167
|
-
if revises is not None and
|
167
|
+
if revises is not None and key == revises.key:
|
168
168
|
settings.creation.search_names = False
|
169
169
|
super(Collection, collection).__init__(
|
170
170
|
uid=provisional_uid,
|
171
|
-
|
171
|
+
key=key,
|
172
172
|
description=description,
|
173
173
|
reference=reference,
|
174
174
|
reference_type=reference_type,
|
@@ -176,7 +176,7 @@ def __init__(
|
|
176
176
|
hash=hash,
|
177
177
|
run=run,
|
178
178
|
version=version,
|
179
|
-
|
179
|
+
_branch_code=_branch_code,
|
180
180
|
revises=revises,
|
181
181
|
**kwargs,
|
182
182
|
)
|
@@ -224,7 +224,7 @@ def mapped(
|
|
224
224
|
layers_keys: str | list[str] | None = None,
|
225
225
|
obs_keys: str | list[str] | None = None,
|
226
226
|
obsm_keys: str | list[str] | None = None,
|
227
|
-
obs_filter:
|
227
|
+
obs_filter: dict[str, str | tuple[str, ...]] | None = None,
|
228
228
|
join: Literal["inner", "outer"] | None = "inner",
|
229
229
|
encode_labels: bool | list[str] = True,
|
230
230
|
unknown_label: str | dict[str, str] | None = None,
|
@@ -306,12 +306,14 @@ def load(
|
|
306
306
|
|
307
307
|
# docstring handled through attach_func_to_class_method
|
308
308
|
def delete(self, permanent: bool | None = None) -> None:
|
309
|
-
# change
|
310
|
-
|
311
|
-
if self.
|
312
|
-
self.
|
309
|
+
# change _branch_code to trash
|
310
|
+
trash__branch_code = -1
|
311
|
+
if self._branch_code > trash__branch_code and permanent is not True:
|
312
|
+
self._branch_code = trash__branch_code
|
313
313
|
self.save()
|
314
|
-
logger.warning(
|
314
|
+
logger.warning(
|
315
|
+
f"moved collection to trash (_branch_code = {trash__branch_code})"
|
316
|
+
)
|
315
317
|
return
|
316
318
|
|
317
319
|
# permanent delete
|
@@ -333,7 +335,7 @@ def save(self, using: str | None = None) -> Collection:
|
|
333
335
|
if self.meta_artifact is not None:
|
334
336
|
self.meta_artifact.save()
|
335
337
|
# we don't need to save feature sets again
|
336
|
-
|
338
|
+
save_staged__schemas_m2m(self)
|
337
339
|
super(Collection, self).save()
|
338
340
|
# we don't allow updating the collection of artifacts
|
339
341
|
# if users want to update the set of artifacts, they
|
@@ -348,7 +350,7 @@ def save(self, using: str | None = None) -> Collection:
|
|
348
350
|
# merely using .artifacts.set(*...) doesn't achieve this
|
349
351
|
# we need ignore_conflicts=True so that this won't error if links already exist
|
350
352
|
CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
|
351
|
-
|
353
|
+
save_schema_links(self)
|
352
354
|
if using is not None:
|
353
355
|
logger.warning("using argument is ignored")
|
354
356
|
return self
|
@@ -356,7 +358,7 @@ def save(self, using: str | None = None) -> Collection:
|
|
356
358
|
|
357
359
|
# docstring handled through attach_func_to_class_method
|
358
360
|
def restore(self) -> None:
|
359
|
-
self.
|
361
|
+
self._branch_code = 1
|
360
362
|
self.save()
|
361
363
|
|
362
364
|
|
lamindb/_feature.py
CHANGED
@@ -1,28 +1,28 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from typing import TYPE_CHECKING, Any,
|
3
|
+
from typing import TYPE_CHECKING, Any, get_args
|
4
4
|
|
5
5
|
import lamindb_setup as ln_setup
|
6
6
|
import pandas as pd
|
7
7
|
from lamin_utils import logger
|
8
8
|
from lamindb_setup.core._docs import doc_args
|
9
|
-
from lnschema_core.models import Artifact, Feature, Record
|
10
|
-
from lnschema_core.types import FeatureDtype
|
11
9
|
from pandas.api.types import CategoricalDtype, is_string_dtype
|
12
10
|
|
11
|
+
from lamindb.base.types import FeatureDtype
|
13
12
|
from lamindb.core.exceptions import ValidationError
|
13
|
+
from lamindb.models import Artifact, Feature, Record
|
14
14
|
|
15
15
|
from ._query_set import RecordList
|
16
16
|
from ._utils import attach_func_to_class_method
|
17
|
-
from .core.
|
18
|
-
from .core.schema import dict_schema_name_to_model_name
|
17
|
+
from .core.relations import dict_module_name_to_model_name
|
19
18
|
|
20
19
|
if TYPE_CHECKING:
|
21
20
|
from collections.abc import Iterable
|
22
21
|
|
23
|
-
from lnschema_core.types import FieldAttr
|
24
22
|
from pandas.core.dtypes.base import ExtensionDtype
|
25
23
|
|
24
|
+
from lamindb.base.types import FieldAttr
|
25
|
+
|
26
26
|
|
27
27
|
FEATURE_DTYPES = set(get_args(FeatureDtype))
|
28
28
|
|
@@ -38,9 +38,9 @@ def get_dtype_str_from_dtype(dtype: Any) -> str:
|
|
38
38
|
raise ValueError(error_message)
|
39
39
|
registries_str = ""
|
40
40
|
for registry in dtype:
|
41
|
-
if not hasattr(registry, "
|
41
|
+
if not hasattr(registry, "__get_name_with_module__"):
|
42
42
|
raise ValueError(error_message)
|
43
|
-
registries_str += registry.
|
43
|
+
registries_str += registry.__get_name_with_module__() + "|"
|
44
44
|
dtype_str = f'cat[{registries_str.rstrip("|")}]'
|
45
45
|
return dtype_str
|
46
46
|
|
@@ -89,7 +89,7 @@ def __init__(self, *args, **kwargs):
|
|
89
89
|
if registries_str != "":
|
90
90
|
registry_str_list = registries_str.split("|")
|
91
91
|
for registry_str in registry_str_list:
|
92
|
-
if registry_str not in
|
92
|
+
if registry_str not in dict_module_name_to_model_name(Artifact):
|
93
93
|
raise ValueError(
|
94
94
|
f"'{registry_str}' is an invalid dtype, pass, e.g. `[ln.ULabel, bt.CellType]` or similar"
|
95
95
|
)
|
lamindb/_finish.py
CHANGED
@@ -9,17 +9,14 @@ from lamin_utils import logger
|
|
9
9
|
from lamindb_setup.core.hashing import hash_file
|
10
10
|
|
11
11
|
from lamindb.core.exceptions import NotebookNotSaved
|
12
|
+
from lamindb.models import Artifact, Run, Transform
|
12
13
|
|
13
14
|
if TYPE_CHECKING:
|
14
15
|
from pathlib import Path
|
15
16
|
|
16
|
-
from lnschema_core import Run, Transform
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
def get_r_save_notebook_message() -> str:
|
22
|
-
return f"Please save the notebook in RStudio (shortcut `{get_shortcut()}`) within 2 sec before calling `db$finish()`"
|
18
|
+
def get_save_notebook_message() -> str:
|
19
|
+
return f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) within 2 sec before calling `finish()`"
|
23
20
|
|
24
21
|
|
25
22
|
def get_shortcut() -> str:
|
@@ -32,13 +29,29 @@ def get_seconds_since_modified(filepath) -> float:
|
|
32
29
|
return datetime.now().timestamp() - filepath.stat().st_mtime
|
33
30
|
|
34
31
|
|
32
|
+
def save_run_logs(run: Run, save_run: bool = False) -> None:
|
33
|
+
logs_path = ln_setup.settings.cache_dir / f"run_logs_{run.uid}.txt"
|
34
|
+
if logs_path.exists():
|
35
|
+
if run.report is not None:
|
36
|
+
logger.important("overwriting run.report")
|
37
|
+
artifact = Artifact(
|
38
|
+
logs_path,
|
39
|
+
description=f"log streams of run {run.uid}",
|
40
|
+
_branch_code=0,
|
41
|
+
run=False,
|
42
|
+
)
|
43
|
+
artifact.save(upload=True, print_progress=False)
|
44
|
+
run.report = artifact
|
45
|
+
if save_run: # defaults to false because is slow
|
46
|
+
run.save()
|
47
|
+
|
48
|
+
|
35
49
|
# this is from the get_title function in nbproject
|
36
50
|
# should be moved into lamindb sooner or later
|
37
51
|
def prepare_notebook(
|
38
52
|
nb,
|
39
53
|
strip_title: bool = False,
|
40
54
|
) -> str | None:
|
41
|
-
"""Strip title from the notebook if requested."""
|
42
55
|
title_found = False
|
43
56
|
for cell in nb.cells:
|
44
57
|
cell.metadata.clear() # strip cell metadata
|
@@ -85,8 +98,8 @@ def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
|
|
85
98
|
|
86
99
|
|
87
100
|
def notebook_to_script(
|
88
|
-
transform: Transform, notebook_path: Path, script_path: Path
|
89
|
-
) -> None:
|
101
|
+
transform: Transform, notebook_path: Path, script_path: Path | None = None
|
102
|
+
) -> None | str:
|
90
103
|
import jupytext
|
91
104
|
|
92
105
|
notebook = jupytext.read(notebook_path)
|
@@ -94,8 +107,11 @@ def notebook_to_script(
|
|
94
107
|
# remove global metadata header
|
95
108
|
py_content = re.sub(r"^# ---\n.*?# ---\n\n", "", py_content, flags=re.DOTALL)
|
96
109
|
# replace title
|
97
|
-
py_content = py_content.replace(f"# # {transform.
|
98
|
-
script_path
|
110
|
+
py_content = py_content.replace(f"# # {transform.description}", "#")
|
111
|
+
if script_path is None:
|
112
|
+
return py_content
|
113
|
+
else:
|
114
|
+
script_path.write_text(py_content)
|
99
115
|
|
100
116
|
|
101
117
|
# removes NotebookNotSaved error message from notebook html
|
@@ -114,7 +130,7 @@ def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
|
|
114
130
|
cleaned_content = re.sub(pattern_title, "", cleaned_content)
|
115
131
|
cleaned_content = re.sub(pattern_h1, "", cleaned_content)
|
116
132
|
cleaned_content = cleaned_content.replace(
|
117
|
-
f"NotebookNotSaved: {
|
133
|
+
f"NotebookNotSaved: {get_save_notebook_message()}", ""
|
118
134
|
)
|
119
135
|
cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}")
|
120
136
|
cleaned_path.write_text(cleaned_content)
|
@@ -123,20 +139,19 @@ def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
|
|
123
139
|
|
124
140
|
def save_context_core(
|
125
141
|
*,
|
126
|
-
run: Run,
|
142
|
+
run: Run | None,
|
127
143
|
transform: Transform,
|
128
144
|
filepath: Path,
|
129
145
|
finished_at: bool = False,
|
130
146
|
ignore_non_consecutive: bool | None = None,
|
131
147
|
from_cli: bool = False,
|
132
148
|
) -> str | None:
|
133
|
-
|
149
|
+
import lamindb as ln
|
150
|
+
from lamindb.models import (
|
134
151
|
format_field_value, # needs to come after lamindb was imported because of CLI use
|
135
152
|
)
|
136
153
|
|
137
|
-
|
138
|
-
|
139
|
-
from .core._context import context, is_run_from_ipython
|
154
|
+
from .core._context import context
|
140
155
|
|
141
156
|
ln.settings.verbosity = "success"
|
142
157
|
|
@@ -149,7 +164,7 @@ def save_context_core(
|
|
149
164
|
# for notebooks, we need more work
|
150
165
|
if is_ipynb:
|
151
166
|
try:
|
152
|
-
import jupytext
|
167
|
+
import jupytext # noqa: F401
|
153
168
|
from nbproject.dev import (
|
154
169
|
check_consecutiveness,
|
155
170
|
read_notebook,
|
@@ -189,23 +204,19 @@ def save_context_core(
|
|
189
204
|
logger.warning(
|
190
205
|
f"no {filepath.with_suffix('.nb.html')} found, save your manually rendered .html report via the CLI: lamin save {filepath}"
|
191
206
|
)
|
207
|
+
if report_path is not None and not from_cli:
|
208
|
+
if get_seconds_since_modified(report_path) > 2 and not ln_setup._TESTING:
|
209
|
+
# this can happen when auto-knitting an html with RStudio
|
210
|
+
raise NotebookNotSaved(get_save_notebook_message())
|
192
211
|
ln.settings.creation.artifact_silence_missing_run_warning = True
|
193
212
|
# track source code
|
194
213
|
hash, _ = hash_file(source_code_path) # ignore hash_type for now
|
195
|
-
if
|
196
|
-
transform._source_code_artifact_id is not None
|
197
|
-
or transform.hash is not None # .hash is equivalent to .transform
|
198
|
-
):
|
214
|
+
if transform.hash is not None:
|
199
215
|
# check if the hash of the transform source code matches
|
200
216
|
# (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
|
201
|
-
|
202
|
-
transform.hash
|
203
|
-
if transform.hash is not None
|
204
|
-
else transform._source_code_artifact.hash
|
205
|
-
)
|
206
|
-
if hash != ref_hash:
|
217
|
+
if hash != transform.hash:
|
207
218
|
response = input(
|
208
|
-
f"You are about to overwrite existing source code (hash '{
|
219
|
+
f"You are about to overwrite existing source code (hash '{transform.hash}') for Transform('{transform.uid}')."
|
209
220
|
f" Proceed? (y/n)"
|
210
221
|
)
|
211
222
|
if response == "y":
|
@@ -221,75 +232,77 @@ def save_context_core(
|
|
221
232
|
transform.hash = hash
|
222
233
|
|
223
234
|
# track environment
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
235
|
+
if run is not None:
|
236
|
+
env_path = ln_setup.settings.cache_dir / f"run_env_pip_{run.uid}.txt"
|
237
|
+
if env_path.exists():
|
238
|
+
overwrite_env = True
|
239
|
+
if run.environment_id is not None and from_cli:
|
240
|
+
logger.important("run.environment is already saved, ignoring")
|
241
|
+
overwrite_env = False
|
242
|
+
if overwrite_env:
|
243
|
+
hash, _ = hash_file(env_path)
|
244
|
+
artifact = ln.Artifact.filter(hash=hash, _branch_code=0).one_or_none()
|
245
|
+
new_env_artifact = artifact is None
|
246
|
+
if new_env_artifact:
|
247
|
+
artifact = ln.Artifact(
|
248
|
+
env_path,
|
249
|
+
description="requirements.txt",
|
250
|
+
_branch_code=0,
|
251
|
+
run=False,
|
252
|
+
)
|
253
|
+
artifact.save(upload=True, print_progress=False)
|
254
|
+
run.environment = artifact
|
255
|
+
if new_env_artifact:
|
256
|
+
logger.debug(f"saved run.environment: {run.environment}")
|
245
257
|
|
246
258
|
# set finished_at
|
247
|
-
if finished_at:
|
259
|
+
if finished_at and run is not None:
|
248
260
|
run.finished_at = datetime.now(timezone.utc)
|
249
261
|
|
262
|
+
# track logs
|
263
|
+
if run is not None and not from_cli and not is_ipynb and not is_r_notebook:
|
264
|
+
save_run_logs(run)
|
265
|
+
|
250
266
|
# track report and set is_consecutive
|
251
|
-
if
|
252
|
-
if not
|
253
|
-
if
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
run.report.save(upload=True, print_progress=False)
|
267
|
+
if run is not None:
|
268
|
+
if report_path is not None:
|
269
|
+
if is_r_notebook:
|
270
|
+
title_text, report_path = clean_r_notebook_html(report_path)
|
271
|
+
if title_text is not None:
|
272
|
+
transform.description = title_text
|
273
|
+
if run.report_id is not None:
|
274
|
+
hash, _ = hash_file(report_path) # ignore hash_type for now
|
275
|
+
if hash != run.report.hash:
|
276
|
+
response = input(
|
277
|
+
f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
|
278
|
+
)
|
279
|
+
if response == "y":
|
280
|
+
run.report.replace(report_path)
|
281
|
+
run.report.save(upload=True, print_progress=False)
|
282
|
+
else:
|
283
|
+
logger.important("keeping old report")
|
269
284
|
else:
|
270
|
-
logger.important("
|
285
|
+
logger.important("report is already saved")
|
271
286
|
else:
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
287
|
+
report_file = ln.Artifact(
|
288
|
+
report_path,
|
289
|
+
description=f"Report of run {run.uid}",
|
290
|
+
_branch_code=0, # hidden file
|
291
|
+
run=False,
|
292
|
+
)
|
293
|
+
report_file.save(upload=True, print_progress=False)
|
294
|
+
run.report = report_file
|
295
|
+
logger.debug(
|
296
|
+
f"saved transform.latest_run.report: {transform.latest_run.report}"
|
279
297
|
)
|
280
|
-
|
281
|
-
run.report = report_file
|
282
|
-
logger.debug(
|
283
|
-
f"saved transform.latest_run.report: {transform.latest_run.report}"
|
284
|
-
)
|
285
|
-
run.is_consecutive = is_consecutive
|
298
|
+
run.is_consecutive = is_consecutive
|
286
299
|
|
287
|
-
|
288
|
-
|
300
|
+
# save both run & transform records if we arrive here
|
301
|
+
run.save()
|
289
302
|
transform.save()
|
290
303
|
|
291
304
|
# finalize
|
292
|
-
if not from_cli:
|
305
|
+
if not from_cli and run is not None:
|
293
306
|
run_time = run.finished_at - run.started_at
|
294
307
|
days = run_time.days
|
295
308
|
seconds = run_time.seconds
|