lamindb 0.69.1__py3-none-any.whl → 0.69.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +6 -4
- lamindb/_annotate.py +790 -0
- lamindb/_artifact.py +2 -8
- lamindb/_collection.py +16 -4
- lamindb/_feature.py +11 -9
- lamindb/_finish.py +194 -11
- lamindb/_query_set.py +6 -4
- lamindb/_run.py +3 -1
- lamindb/_save.py +34 -21
- lamindb/core/__init__.py +4 -0
- lamindb/core/_data.py +3 -0
- lamindb/core/_feature_manager.py +4 -3
- lamindb/core/_run_context.py +17 -5
- lamindb/core/storage/_backed_access.py +48 -11
- lamindb/core/storage/file.py +2 -7
- {lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/METADATA +7 -7
- {lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/RECORD +20 -25
- lamindb/validation/__init__.py +0 -19
- lamindb/validation/_anndata_validator.py +0 -130
- lamindb/validation/_lookup.py +0 -38
- lamindb/validation/_register.py +0 -214
- lamindb/validation/_validate.py +0 -131
- lamindb/validation/_validator.py +0 -205
- /lamindb/{_validate.py → _can_validate.py} +0 -0
- {lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/LICENSE +0 -0
- {lamindb-0.69.1.dist-info → lamindb-0.69.3.dist-info}/WHEEL +0 -0
lamindb/_artifact.py
CHANGED
@@ -15,8 +15,7 @@ from lamindb_setup.core.types import UPathStr
|
|
15
15
|
from lamindb_setup.core.upath import (
|
16
16
|
create_path,
|
17
17
|
extract_suffix_from_path,
|
18
|
-
|
19
|
-
get_stat_dir_s3,
|
18
|
+
get_stat_dir_cloud,
|
20
19
|
get_stat_file_cloud,
|
21
20
|
)
|
22
21
|
from lnschema_core import Artifact, Run, Storage
|
@@ -192,10 +191,7 @@ def get_stat_or_artifact(
|
|
192
191
|
if "ETag" in stat: # is file
|
193
192
|
size, hash, hash_type = get_stat_file_cloud(stat)
|
194
193
|
elif path.is_dir():
|
195
|
-
|
196
|
-
size, hash, hash_type, n_objects = get_stat_dir_s3(path)
|
197
|
-
elif path.protocol == "gs":
|
198
|
-
size, hash, hash_type, n_objects = get_stat_dir_gs(path)
|
194
|
+
size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
|
199
195
|
if hash is None:
|
200
196
|
logger.warning(f"did not add hash for {path}")
|
201
197
|
return size, hash, hash_type, n_objects
|
@@ -827,8 +823,6 @@ def load(
|
|
827
823
|
|
828
824
|
# docstring handled through attach_func_to_class_method
|
829
825
|
def stage(self, is_run_input: Optional[bool] = None) -> Path:
|
830
|
-
if self.suffix in {".zrad", ".zarr"}:
|
831
|
-
raise RuntimeError("zarr object can't be staged, please use load() or stream()")
|
832
826
|
_track_run_input(self, is_run_input)
|
833
827
|
|
834
828
|
using_key = settings._using_key
|
lamindb/_collection.py
CHANGED
@@ -24,6 +24,7 @@ from lnschema_core.types import DataLike, VisibilityChoice
|
|
24
24
|
from lamindb._utils import attach_func_to_class_method
|
25
25
|
from lamindb.core._data import _track_run_input
|
26
26
|
from lamindb.core._mapped_collection import MappedCollection
|
27
|
+
from lamindb.core.storage import UPath
|
27
28
|
from lamindb.core.versioning import get_uid_from_old_version, init_uid
|
28
29
|
|
29
30
|
from . import Artifact, Run
|
@@ -339,17 +340,16 @@ def mapped(
|
|
339
340
|
stream: bool = False,
|
340
341
|
is_run_input: Optional[bool] = None,
|
341
342
|
) -> "MappedCollection":
|
342
|
-
_track_run_input(self, is_run_input)
|
343
343
|
path_list = []
|
344
344
|
for artifact in self.artifacts.all():
|
345
345
|
if artifact.suffix not in {".h5ad", ".zrad", ".zarr"}:
|
346
346
|
logger.warning(f"Ignoring artifact with suffix {artifact.suffix}")
|
347
347
|
continue
|
348
|
-
elif not stream
|
348
|
+
elif not stream:
|
349
349
|
path_list.append(artifact.stage())
|
350
350
|
else:
|
351
351
|
path_list.append(artifact.path)
|
352
|
-
|
352
|
+
ds = MappedCollection(
|
353
353
|
path_list,
|
354
354
|
label_keys,
|
355
355
|
join,
|
@@ -359,6 +359,18 @@ def mapped(
|
|
359
359
|
parallel,
|
360
360
|
dtype,
|
361
361
|
)
|
362
|
+
# track only if successful
|
363
|
+
_track_run_input(self, is_run_input)
|
364
|
+
return ds
|
365
|
+
|
366
|
+
|
367
|
+
# docstring handled through attach_func_to_class_method
|
368
|
+
def stage(self, is_run_input: Optional[bool] = None) -> List[UPath]:
|
369
|
+
_track_run_input(self, is_run_input)
|
370
|
+
path_list = []
|
371
|
+
for artifact in self.artifacts.all():
|
372
|
+
path_list.append(artifact.stage())
|
373
|
+
return path_list
|
362
374
|
|
363
375
|
|
364
376
|
# docstring handled through attach_func_to_class_method
|
@@ -467,7 +479,6 @@ def restore(self) -> None:
|
|
467
479
|
@doc_args(Collection.artifacts.__doc__)
|
468
480
|
def artifacts(self) -> QuerySet:
|
469
481
|
"""{}."""
|
470
|
-
_track_run_input(self)
|
471
482
|
return self.unordered_artifacts.order_by("collectionartifact__id")
|
472
483
|
|
473
484
|
|
@@ -476,6 +487,7 @@ METHOD_NAMES = [
|
|
476
487
|
"from_anndata",
|
477
488
|
"from_df",
|
478
489
|
"mapped",
|
490
|
+
"stage",
|
479
491
|
"backed",
|
480
492
|
"load",
|
481
493
|
"delete",
|
lamindb/_feature.py
CHANGED
@@ -117,16 +117,18 @@ def from_df(
|
|
117
117
|
else:
|
118
118
|
types[name] = convert_numpy_dtype_to_lamin_feature_type(col.dtype)
|
119
119
|
|
120
|
-
# silence the
|
120
|
+
# silence the warning "loaded record with exact same name "
|
121
121
|
verbosity = settings.verbosity
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
122
|
+
try:
|
123
|
+
settings.verbosity = "error"
|
124
|
+
|
125
|
+
registry = field.field.model
|
126
|
+
if registry != Feature:
|
127
|
+
raise ValueError("field must be a Feature FieldAttr!")
|
128
|
+
# create records for all features including non-validated
|
129
|
+
features = [Feature(name=name, type=type) for name, type in types.items()]
|
130
|
+
finally:
|
131
|
+
settings.verbosity = verbosity
|
130
132
|
|
131
133
|
assert len(features) == len(df.columns)
|
132
134
|
|
lamindb/_finish.py
CHANGED
@@ -1,8 +1,15 @@
|
|
1
|
+
import os
|
2
|
+
import shutil
|
3
|
+
import subprocess
|
1
4
|
from datetime import datetime, timezone
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Optional
|
2
7
|
|
3
8
|
import lamindb_setup as ln_setup
|
4
9
|
from lamin_utils import logger
|
10
|
+
from lnschema_core import Run, Transform
|
5
11
|
|
12
|
+
from ._query_set import QuerySet
|
6
13
|
from .core._run_context import is_run_from_ipython, run_context
|
7
14
|
|
8
15
|
|
@@ -11,13 +18,16 @@ class CallFinishInLastCell(SystemExit):
|
|
11
18
|
|
12
19
|
|
13
20
|
def finish(i_saved_the_notebook: bool = False):
|
14
|
-
"""Mark
|
21
|
+
"""Mark a tracked run as finished.
|
15
22
|
|
16
|
-
|
17
|
-
"""
|
18
|
-
from lamin_cli._save import save
|
23
|
+
When run in notebooks, save the run report to your default storage location.
|
19
24
|
|
25
|
+
Args:
|
26
|
+
i_saved_the_notebook: Indicate that you saved the notebook in your
|
27
|
+
editor (JupyterLab, VSCode, etc.).
|
28
|
+
"""
|
20
29
|
if is_run_from_ipython:
|
30
|
+
# notebooks
|
21
31
|
from nbproject.dev import read_notebook
|
22
32
|
from nbproject.dev._check_last_cell import check_last_cell
|
23
33
|
|
@@ -26,14 +36,187 @@ def finish(i_saved_the_notebook: bool = False):
|
|
26
36
|
"Save the notebook, pass `i_saved_the_notebook=True`, and re-run this cell."
|
27
37
|
)
|
28
38
|
return None
|
29
|
-
|
30
|
-
if not check_last_cell(
|
39
|
+
notebook_content = read_notebook(run_context.path) # type: ignore
|
40
|
+
if not check_last_cell(notebook_content, "i_saved_the_notebook"):
|
31
41
|
raise CallFinishInLastCell(
|
32
42
|
"Can only finish() from the last code cell of the notebook."
|
33
43
|
)
|
34
|
-
|
35
|
-
|
36
|
-
|
44
|
+
save_run_context_core(
|
45
|
+
run=run_context.run,
|
46
|
+
transform=run_context.transform,
|
47
|
+
filepath=run_context.path,
|
48
|
+
finished_at=True,
|
49
|
+
notebook_content=notebook_content,
|
50
|
+
)
|
51
|
+
else:
|
52
|
+
# scripts
|
53
|
+
run_context.run.finished_at = datetime.now(timezone.utc) # update run time
|
54
|
+
run_context.run.save()
|
55
|
+
|
56
|
+
|
57
|
+
# do not type because we need to be aware of lnschema_core import order
|
58
|
+
def save_run_context_core(
|
59
|
+
*,
|
60
|
+
run: Run,
|
61
|
+
transform: Transform,
|
62
|
+
filepath: Path,
|
63
|
+
transform_family: Optional[QuerySet] = None,
|
64
|
+
is_consecutive: bool = True,
|
65
|
+
finished_at: bool = False,
|
66
|
+
notebook_content=None, # nbproject.Notebook
|
67
|
+
) -> Optional[str]:
|
68
|
+
import lamindb as ln
|
69
|
+
|
70
|
+
ln.settings.verbosity = "success"
|
71
|
+
|
72
|
+
if transform.type == "notebook":
|
73
|
+
try:
|
74
|
+
import nbstripout
|
75
|
+
from nbproject.dev import (
|
76
|
+
check_consecutiveness,
|
77
|
+
read_notebook,
|
78
|
+
)
|
79
|
+
except ImportError:
|
80
|
+
logger.error(
|
81
|
+
"install nbproject & nbstripout: pip install nbproject nbstripout"
|
82
|
+
)
|
83
|
+
return None
|
84
|
+
if notebook_content is None:
|
85
|
+
notebook_content = read_notebook(filepath) # type: ignore
|
86
|
+
is_consecutive = check_consecutiveness(notebook_content)
|
87
|
+
if not is_consecutive:
|
88
|
+
if os.getenv("LAMIN_TESTING") is None:
|
89
|
+
decide = input(
|
90
|
+
" Do you still want to proceed with publishing? (y/n) "
|
91
|
+
)
|
92
|
+
else:
|
93
|
+
decide = "n"
|
94
|
+
if decide != "y":
|
95
|
+
logger.error("Aborted (non-consecutive)!")
|
96
|
+
return "aborted-non-consecutive"
|
37
97
|
|
38
|
-
|
39
|
-
|
98
|
+
# convert the notebook file to html
|
99
|
+
# log_level is set to 40 to silence the nbconvert logging
|
100
|
+
result = subprocess.run(
|
101
|
+
"jupyter nbconvert --to html"
|
102
|
+
f" {filepath.as_posix()} --Application.log_level=40",
|
103
|
+
shell=True,
|
104
|
+
)
|
105
|
+
# move the temporary file into the cache dir in case it's accidentally
|
106
|
+
# in an existing storage location -> we want to move associated
|
107
|
+
# artifacts into default storage and not register them in an existing
|
108
|
+
# location
|
109
|
+
filepath_html = filepath.with_suffix(".html") # current location
|
110
|
+
shutil.move(
|
111
|
+
filepath_html, # type: ignore
|
112
|
+
ln_setup.settings.storage.cache_dir / filepath_html.name,
|
113
|
+
) # move; don't use Path.rename here because of cross-device link error
|
114
|
+
# see https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
|
115
|
+
filepath_html = (
|
116
|
+
ln_setup.settings.storage.cache_dir / filepath_html.name
|
117
|
+
) # adjust location
|
118
|
+
assert result.returncode == 0
|
119
|
+
# copy the notebook file to a temporary file
|
120
|
+
source_code_path = ln_setup.settings.storage.cache_dir / filepath.name
|
121
|
+
shutil.copy2(filepath, source_code_path) # copy
|
122
|
+
result = subprocess.run(f"nbstripout {source_code_path}", shell=True)
|
123
|
+
assert result.returncode == 0
|
124
|
+
else:
|
125
|
+
source_code_path = filepath
|
126
|
+
# find initial versions of source codes and html reports
|
127
|
+
initial_report = None
|
128
|
+
initial_source = None
|
129
|
+
if transform_family is None:
|
130
|
+
transform_family = transform.versions
|
131
|
+
if len(transform_family) > 0:
|
132
|
+
for prev_transform in transform_family.order_by("-created_at"):
|
133
|
+
# check for id to avoid query
|
134
|
+
if prev_transform.latest_report_id is not None:
|
135
|
+
# any previous latest report of this transform is OK!
|
136
|
+
initial_report = prev_transform.latest_report
|
137
|
+
if prev_transform.source_code_id is not None:
|
138
|
+
# any previous source code id is OK!
|
139
|
+
initial_source = prev_transform.source_code
|
140
|
+
ln.settings.silence_file_run_transform_warning = True
|
141
|
+
# register the source code
|
142
|
+
if transform.source_code is not None:
|
143
|
+
# check if the hash of the notebook source code matches
|
144
|
+
check_source_code = ln.Artifact(source_code_path, key="dummy")
|
145
|
+
if check_source_code._state.adding:
|
146
|
+
if os.getenv("LAMIN_TESTING") is None:
|
147
|
+
# in test, auto-confirm overwrite
|
148
|
+
response = input(
|
149
|
+
"You try to save a new notebook source code with the same version"
|
150
|
+
f" '{transform.version}'; do you want to replace the content of the"
|
151
|
+
f" existing source code {transform.source_code}? (y/n)"
|
152
|
+
)
|
153
|
+
else:
|
154
|
+
response = "y"
|
155
|
+
if response == "y":
|
156
|
+
transform.source_code.replace(source_code_path)
|
157
|
+
transform.source_code.save()
|
158
|
+
else:
|
159
|
+
logger.warning(
|
160
|
+
"Please create a new version of the notebook via `lamin track"
|
161
|
+
" <filepath>` and re-run the notebook"
|
162
|
+
)
|
163
|
+
return "rerun-the-notebook"
|
164
|
+
else:
|
165
|
+
source_code = ln.Artifact(
|
166
|
+
source_code_path,
|
167
|
+
description=f"Source of transform {transform.uid}",
|
168
|
+
version=transform.version,
|
169
|
+
is_new_version_of=initial_source,
|
170
|
+
visibility=0, # hidden file
|
171
|
+
run=False,
|
172
|
+
)
|
173
|
+
source_code.save()
|
174
|
+
transform.source_code = source_code
|
175
|
+
logger.success(f"saved transform.source_code: {transform.source_code}")
|
176
|
+
# track environment
|
177
|
+
filepath_env = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
|
178
|
+
if filepath_env.exists():
|
179
|
+
artifact = ln.Artifact(
|
180
|
+
filepath_env,
|
181
|
+
description="requirements.txt",
|
182
|
+
visibility=0,
|
183
|
+
run=False,
|
184
|
+
)
|
185
|
+
if artifact._state.adding:
|
186
|
+
artifact.save()
|
187
|
+
run.environment = artifact
|
188
|
+
logger.success(f"saved run.environment: {run.environment}")
|
189
|
+
# save report file
|
190
|
+
if not transform.type == "notebook":
|
191
|
+
run.save()
|
192
|
+
else:
|
193
|
+
if run.report_id is not None:
|
194
|
+
logger.warning(
|
195
|
+
"there is already an existing report for this run, replacing it"
|
196
|
+
)
|
197
|
+
run.report.replace(filepath_html)
|
198
|
+
run.report.save()
|
199
|
+
else:
|
200
|
+
report_file = ln.Artifact(
|
201
|
+
filepath_html,
|
202
|
+
description=f"Report of run {run.uid}",
|
203
|
+
is_new_version_of=initial_report,
|
204
|
+
visibility=0, # hidden file
|
205
|
+
run=False,
|
206
|
+
)
|
207
|
+
report_file.save()
|
208
|
+
run.report = report_file
|
209
|
+
run.is_consecutive = is_consecutive
|
210
|
+
if finished_at:
|
211
|
+
run.finished_at = datetime.now(timezone.utc)
|
212
|
+
run.save()
|
213
|
+
transform.latest_report = run.report
|
214
|
+
transform.save()
|
215
|
+
if transform.type == "notebook":
|
216
|
+
logger.success(f"saved transform.latest_report: {transform.latest_report}")
|
217
|
+
identifier = ln_setup.settings.instance.slug
|
218
|
+
logger.success(f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}")
|
219
|
+
# because run & transform changed, update the global run_context
|
220
|
+
run_context.run = run
|
221
|
+
run_context.transform = transform
|
222
|
+
return None
|
lamindb/_query_set.py
CHANGED
@@ -12,6 +12,7 @@ from lnschema_core.models import (
|
|
12
12
|
IsTree,
|
13
13
|
IsVersioned,
|
14
14
|
Registry,
|
15
|
+
Run,
|
15
16
|
Transform,
|
16
17
|
)
|
17
18
|
from lnschema_core.types import ListLike, StrField
|
@@ -165,7 +166,8 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
|
|
165
166
|
|
166
167
|
def delete(self, *args, **kwargs):
|
167
168
|
"""Delete all records in the query set."""
|
168
|
-
|
169
|
+
# both Transform & Run might reference artifacts
|
170
|
+
if self.model in {Artifact, Collection, Transform, Run}:
|
169
171
|
for record in self:
|
170
172
|
record.delete(*args, **kwargs)
|
171
173
|
else:
|
@@ -241,7 +243,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
|
|
241
243
|
self, values: ListLike, field: Optional[Union[str, StrField]] = None, **kwargs
|
242
244
|
):
|
243
245
|
"""{}."""
|
244
|
-
from .
|
246
|
+
from ._can_validate import _validate
|
245
247
|
|
246
248
|
return _validate(cls=self, values=values, field=field, **kwargs)
|
247
249
|
|
@@ -250,7 +252,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
|
|
250
252
|
self, values: ListLike, field: Optional[Union[str, StrField]] = None, **kwargs
|
251
253
|
):
|
252
254
|
"""{}."""
|
253
|
-
from .
|
255
|
+
from ._can_validate import _inspect
|
254
256
|
|
255
257
|
return _inspect(cls=self, values=values, field=field, **kwargs)
|
256
258
|
|
@@ -259,7 +261,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
|
|
259
261
|
self, values: Iterable, field: Optional[Union[str, StrField]] = None, **kwargs
|
260
262
|
):
|
261
263
|
"""{}."""
|
262
|
-
from .
|
264
|
+
from ._can_validate import _standardize
|
263
265
|
|
264
266
|
return _standardize(cls=self, values=values, field=field, **kwargs)
|
265
267
|
|
lamindb/_run.py
CHANGED
@@ -42,7 +42,9 @@ def delete_run_artifacts(run: Run) -> None:
|
|
42
42
|
if environment is not None or report is not None:
|
43
43
|
run.save()
|
44
44
|
if environment is not None:
|
45
|
-
environment
|
45
|
+
# only delete if there are no other runs attached to this environment
|
46
|
+
if environment.environment_of.count() == 0:
|
47
|
+
environment.delete(permanent=True)
|
46
48
|
if report is not None:
|
47
49
|
report.delete(permanent=True)
|
48
50
|
|
lamindb/_save.py
CHANGED
@@ -10,7 +10,7 @@ import lamindb_setup
|
|
10
10
|
from django.db import transaction
|
11
11
|
from django.utils.functional import partition
|
12
12
|
from lamin_utils import logger
|
13
|
-
from lamindb_setup.core.upath import print_hook
|
13
|
+
from lamindb_setup.core.upath import UPath, print_hook
|
14
14
|
from lnschema_core.models import Artifact, Registry
|
15
15
|
|
16
16
|
from lamindb.core._settings import settings
|
@@ -141,13 +141,15 @@ def check_and_attempt_upload(
|
|
141
141
|
# a local env it will have a _local_filepath and needs to be uploaded
|
142
142
|
if hasattr(artifact, "_local_filepath"):
|
143
143
|
try:
|
144
|
-
upload_artifact(
|
144
|
+
storage_path = upload_artifact(
|
145
|
+
artifact, using_key, access_token=access_token
|
146
|
+
)
|
145
147
|
except Exception as exception:
|
146
148
|
logger.warning(f"could not upload artifact: {artifact}")
|
147
149
|
return exception
|
148
150
|
# copies (if on-disk) or moves the temporary file (if in-memory) to the cache
|
149
151
|
if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
|
150
|
-
copy_or_move_to_cache(artifact)
|
152
|
+
copy_or_move_to_cache(artifact, storage_path)
|
151
153
|
# after successful upload, we should remove the attribute so that another call
|
152
154
|
# call to save won't upload again, the user should call replace() then
|
153
155
|
del artifact._local_filepath
|
@@ -155,35 +157,44 @@ def check_and_attempt_upload(
|
|
155
157
|
return None
|
156
158
|
|
157
159
|
|
158
|
-
def copy_or_move_to_cache(artifact: Artifact):
|
160
|
+
def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
|
159
161
|
local_path = artifact._local_filepath
|
160
162
|
|
161
|
-
# in-memory zarr
|
162
|
-
if local_path is None or not local_path.
|
163
|
+
# some in-memory cases (zarr for now)
|
164
|
+
if local_path is None or not local_path.exists():
|
163
165
|
return None
|
164
166
|
|
165
167
|
local_path = local_path.resolve()
|
166
|
-
|
168
|
+
is_dir = local_path.is_dir()
|
169
|
+
cache_dir = settings._storage_settings.cache_dir
|
167
170
|
|
168
|
-
#
|
171
|
+
# just delete from the cache dir if a local instance
|
169
172
|
if not lamindb_setup.settings.storage.is_cloud:
|
170
173
|
if cache_dir in local_path.parents:
|
171
|
-
|
174
|
+
if is_dir:
|
175
|
+
shutil.rmtree(local_path)
|
176
|
+
else:
|
177
|
+
local_path.unlink()
|
172
178
|
return None
|
173
179
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
180
|
+
cache_path = settings._storage_settings.cloud_to_local_no_update(storage_path)
|
181
|
+
if local_path != cache_path:
|
182
|
+
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
183
|
+
if cache_dir in local_path.parents:
|
184
|
+
local_path.replace(cache_path)
|
185
|
+
else:
|
186
|
+
if is_dir:
|
187
|
+
shutil.copytree(local_path, cache_path)
|
188
|
+
else:
|
189
|
+
shutil.copy(local_path, cache_path)
|
184
190
|
# make sure that the cached version is older than the cloud one
|
185
191
|
mts = datetime.now().timestamp() + 1.0
|
186
|
-
|
192
|
+
if is_dir:
|
193
|
+
files = (file for file in cache_path.rglob("*") if file.is_file())
|
194
|
+
for file in files:
|
195
|
+
os.utime(file, times=(mts, mts))
|
196
|
+
else:
|
197
|
+
os.utime(cache_path, times=(mts, mts))
|
187
198
|
|
188
199
|
|
189
200
|
# This is also used within Artifact.save()
|
@@ -264,7 +275,7 @@ def prepare_error_message(records, stored_artifacts, exception) -> str:
|
|
264
275
|
|
265
276
|
def upload_artifact(
|
266
277
|
artifact, using_key: Optional[str] = None, access_token: Optional[str] = None
|
267
|
-
) ->
|
278
|
+
) -> UPath:
|
268
279
|
"""Store and add file and its linked entries."""
|
269
280
|
# can't currently use filepath_from_artifact here because it resolves to ._local_filepath
|
270
281
|
storage_key = auto_storage_key_from_artifact(artifact)
|
@@ -283,3 +294,5 @@ def upload_artifact(
|
|
283
294
|
elif hasattr(artifact, "_to_store") and artifact._to_store:
|
284
295
|
logger.save(msg)
|
285
296
|
store_artifact(artifact._local_filepath, storage_path)
|
297
|
+
|
298
|
+
return storage_path
|
lamindb/core/__init__.py
CHANGED
@@ -14,6 +14,9 @@ Registries:
|
|
14
14
|
LabelManager
|
15
15
|
IsTree
|
16
16
|
IsVersioned
|
17
|
+
DataFrameAnnotator
|
18
|
+
AnnDataAnnotator
|
19
|
+
AnnotateLookup
|
17
20
|
CanValidate
|
18
21
|
HasParents
|
19
22
|
InspectResult
|
@@ -50,6 +53,7 @@ from lnschema_core.models import (
|
|
50
53
|
Registry,
|
51
54
|
)
|
52
55
|
|
56
|
+
from lamindb._annotate import AnnDataAnnotator, AnnotateLookup, DataFrameAnnotator
|
53
57
|
from lamindb._query_manager import QueryManager
|
54
58
|
from lamindb._query_set import QuerySet, RecordsList
|
55
59
|
from lamindb.core._feature_manager import FeatureManager
|
lamindb/core/_data.py
CHANGED
@@ -46,6 +46,9 @@ def get_run(run: Optional[Run]) -> Optional[Run]:
|
|
46
46
|
run = run_context.run
|
47
47
|
if run is None and not settings.silence_file_run_transform_warning:
|
48
48
|
logger.warning(WARNING_RUN_TRANSFORM)
|
49
|
+
# suppress run by passing False
|
50
|
+
elif not run:
|
51
|
+
run = None
|
49
52
|
return run
|
50
53
|
|
51
54
|
|
lamindb/core/_feature_manager.py
CHANGED
@@ -219,7 +219,7 @@ class FeatureManager:
|
|
219
219
|
slot = "columns" if slot is None else slot
|
220
220
|
self._add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
|
221
221
|
|
222
|
-
def add_from_df(self):
|
222
|
+
def add_from_df(self, field: FieldAttr = Feature.name, **kwargs):
|
223
223
|
"""Add features from DataFrame."""
|
224
224
|
if isinstance(self._host, Artifact):
|
225
225
|
assert self._host.accessor == "DataFrame"
|
@@ -228,11 +228,12 @@ class FeatureManager:
|
|
228
228
|
assert self._host.artifact.accessor == "DataFrame"
|
229
229
|
|
230
230
|
# parse and register features
|
231
|
+
registry = field.field.model
|
231
232
|
df = self._host.load()
|
232
|
-
features =
|
233
|
+
features = registry.from_values(df.columns, field=field, **kwargs)
|
233
234
|
if len(features) == 0:
|
234
235
|
logger.error(
|
235
|
-
"no validated features found in DataFrame! please register features first
|
236
|
+
"no validated features found in DataFrame! please register features first!"
|
236
237
|
)
|
237
238
|
return
|
238
239
|
|
lamindb/core/_run_context.py
CHANGED
@@ -42,6 +42,10 @@ class MissingTransformSettings(SystemExit):
|
|
42
42
|
pass
|
43
43
|
|
44
44
|
|
45
|
+
class UpdateTransformSettings(SystemExit):
|
46
|
+
pass
|
47
|
+
|
48
|
+
|
45
49
|
def get_uid_ext(version: str) -> str:
|
46
50
|
from lamin_utils._base62 import encodebytes
|
47
51
|
|
@@ -131,7 +135,7 @@ def update_stem_uid_or_version(
|
|
131
135
|
f'ln.settings.transform.stem_uid = "{new_stem_uid}"\nln.settings.transform.version ='
|
132
136
|
f' "{new_version}"\n'
|
133
137
|
)
|
134
|
-
raise
|
138
|
+
raise UpdateTransformSettings(
|
135
139
|
f"Please update your transform settings as follows:\n{new_metadata}"
|
136
140
|
)
|
137
141
|
return updated, new_stem_uid, new_version
|
@@ -326,15 +330,18 @@ class run_context:
|
|
326
330
|
)
|
327
331
|
if run is not None: # loaded latest run
|
328
332
|
run.started_at = datetime.now(timezone.utc) # update run time
|
329
|
-
run.save()
|
330
333
|
logger.important(f"loaded: {run}")
|
331
334
|
|
332
335
|
if run is None: # create new run
|
333
336
|
run = Run(
|
334
337
|
transform=cls.transform,
|
335
338
|
)
|
336
|
-
run.save()
|
337
339
|
logger.important(f"saved: {run}")
|
340
|
+
# can only determine at ln.finish() if run was consecutive in
|
341
|
+
# interactive session, otherwise, is consecutive
|
342
|
+
run.is_consecutive = True if is_run_from_ipython else None
|
343
|
+
# need to save in all cases
|
344
|
+
run.save()
|
338
345
|
cls.run = run
|
339
346
|
|
340
347
|
from ._track_environment import track_environment
|
@@ -343,9 +350,14 @@ class run_context:
|
|
343
350
|
|
344
351
|
if not is_run_from_ipython and cls.path is not None:
|
345
352
|
# upload run source code & environment
|
346
|
-
from
|
353
|
+
from lamindb._finish import save_run_context_core
|
347
354
|
|
348
|
-
|
355
|
+
save_run_context_core(
|
356
|
+
run=cls.run,
|
357
|
+
transform=cls.transform,
|
358
|
+
filepath=cls.path,
|
359
|
+
is_consecutive=True,
|
360
|
+
)
|
349
361
|
return None
|
350
362
|
|
351
363
|
@classmethod
|