lamindb 0.71.0__py3-none-any.whl → 0.71.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_artifact.py +26 -35
- lamindb/_finish.py +44 -32
- lamindb/_query_set.py +1 -21
- lamindb/_run.py +2 -0
- lamindb/_save.py +7 -22
- lamindb/_transform.py +5 -11
- lamindb/core/__init__.py +0 -2
- lamindb/core/_data.py +1 -1
- lamindb/core/_feature_manager.py +6 -5
- lamindb/core/_run_context.py +47 -29
- lamindb/core/_settings.py +2 -2
- lamindb/core/_sync_git.py +6 -2
- lamindb/core/datasets/_core.py +2 -2
- lamindb/core/storage/__init__.py +2 -2
- lamindb/core/storage/_valid_suffixes.py +4 -2
- lamindb/core/storage/objects.py +10 -3
- lamindb/core/storage/paths.py +1 -1
- lamindb/core/versioning.py +18 -4
- {lamindb-0.71.0.dist-info → lamindb-0.71.2.dist-info}/METADATA +5 -5
- {lamindb-0.71.0.dist-info → lamindb-0.71.2.dist-info}/RECORD +23 -24
- lamindb/core/_view_tree.py +0 -116
- {lamindb-0.71.0.dist-info → lamindb-0.71.2.dist-info}/LICENSE +0 -0
- {lamindb-0.71.0.dist-info → lamindb-0.71.2.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_artifact.py
CHANGED
@@ -21,7 +21,6 @@ from lamindb_setup.core.upath import (
|
|
21
21
|
get_stat_file_cloud,
|
22
22
|
)
|
23
23
|
from lnschema_core import Artifact, Run, Storage
|
24
|
-
from lnschema_core.models import IsTree
|
25
24
|
from lnschema_core.types import (
|
26
25
|
VisibilityChoice,
|
27
26
|
)
|
@@ -35,8 +34,7 @@ from lamindb.core.storage import (
|
|
35
34
|
delete_storage,
|
36
35
|
infer_suffix,
|
37
36
|
load_to_memory,
|
38
|
-
|
39
|
-
write_to_file,
|
37
|
+
write_to_disk,
|
40
38
|
)
|
41
39
|
from lamindb.core.storage.paths import (
|
42
40
|
auto_storage_key_from_artifact,
|
@@ -173,8 +171,7 @@ def process_data(
|
|
173
171
|
# Alex: I don't understand the line below
|
174
172
|
if path.suffixes == []:
|
175
173
|
path = path.with_suffix(suffix)
|
176
|
-
|
177
|
-
write_to_file(data, path)
|
174
|
+
write_to_disk(data, path)
|
178
175
|
use_existing_storage_key = False
|
179
176
|
else:
|
180
177
|
raise NotImplementedError(
|
@@ -194,16 +191,13 @@ def get_stat_or_artifact(
|
|
194
191
|
n_objects = None
|
195
192
|
if settings.upon_file_create_skip_size_hash:
|
196
193
|
return None, None, None, n_objects
|
197
|
-
if suffix == ".zarr" and memory_rep is not None and isinstance(memory_rep, AnnData):
|
198
|
-
size = size_adata(memory_rep)
|
199
|
-
return size, None, None, n_objects
|
200
194
|
stat = path.stat() # one network request
|
201
195
|
if not isinstance(path, LocalPathClasses):
|
202
196
|
size, hash, hash_type = None, None, None
|
203
197
|
if stat is not None:
|
204
198
|
if "ETag" in stat: # is file
|
205
199
|
size, hash, hash_type = get_stat_file_cloud(stat)
|
206
|
-
elif
|
200
|
+
elif stat["type"] == "directory":
|
207
201
|
size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
|
208
202
|
if hash is None:
|
209
203
|
logger.warning(f"did not add hash for {path}")
|
@@ -589,7 +583,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
589
583
|
init_self_from_db(artifact, kwargs_or_artifact)
|
590
584
|
# adding "key" here is dangerous because key might be auto-populated
|
591
585
|
update_attributes(artifact, {"description": description})
|
592
|
-
if artifact.key != key:
|
586
|
+
if artifact.key != key and key is not None:
|
593
587
|
logger.warning(
|
594
588
|
f"key {artifact.key} on existing artifact differs from passed key {key}"
|
595
589
|
)
|
@@ -914,11 +908,25 @@ def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs)
|
|
914
908
|
|
915
909
|
# docstring handled through attach_func_to_class_method
|
916
910
|
def cache(self, is_run_input: bool | None = None) -> Path:
|
917
|
-
_track_run_input(self, is_run_input)
|
918
|
-
|
919
911
|
using_key = settings._using_key
|
920
912
|
filepath = filepath_from_artifact(self, using_key=using_key)
|
921
|
-
|
913
|
+
try:
|
914
|
+
cache_path = setup_settings.instance.storage.cloud_to_local(
|
915
|
+
filepath, print_progress=True
|
916
|
+
)
|
917
|
+
except Exception as e:
|
918
|
+
if not isinstance(filepath, LocalPathClasses):
|
919
|
+
cache_path = setup_settings.instance.storage.cloud_to_local_no_update(
|
920
|
+
filepath
|
921
|
+
)
|
922
|
+
if cache_path.is_file():
|
923
|
+
cache_path.unlink(missing_ok=True)
|
924
|
+
elif cache_path.is_dir():
|
925
|
+
shutil.rmtree(cache_path)
|
926
|
+
raise e
|
927
|
+
# only call if sync is successfull
|
928
|
+
_track_run_input(self, is_run_input)
|
929
|
+
return cache_path
|
922
930
|
|
923
931
|
|
924
932
|
# docstring handled through attach_func_to_class_method
|
@@ -1003,6 +1011,11 @@ def save(self, upload: bool | None = None, **kwargs) -> None:
|
|
1003
1011
|
local_path = self.path
|
1004
1012
|
self.storage_id = setup_settings.instance.storage.id
|
1005
1013
|
self._local_filepath = local_path
|
1014
|
+
# switch to virtual storage key upon upload
|
1015
|
+
# the local filepath is already cached at that point
|
1016
|
+
self.key_is_virtual = True
|
1017
|
+
# ensure that the artifact is uploaded
|
1018
|
+
self._to_store = True
|
1006
1019
|
|
1007
1020
|
self._save_skip_storage(**kwargs)
|
1008
1021
|
|
@@ -1045,27 +1058,6 @@ def path(self) -> Path | UPath:
|
|
1045
1058
|
return filepath_from_artifact(self, using_key)
|
1046
1059
|
|
1047
1060
|
|
1048
|
-
@classmethod # type: ignore
|
1049
|
-
@doc_args(IsTree.view_tree.__doc__)
|
1050
|
-
def view_tree(
|
1051
|
-
cls,
|
1052
|
-
level: int = -1,
|
1053
|
-
limit_to_directories: bool = False,
|
1054
|
-
length_limit: int = 1000,
|
1055
|
-
max_files_per_dir_per_type: int = 7,
|
1056
|
-
) -> None:
|
1057
|
-
"""{}."""
|
1058
|
-
from lamindb.core._view_tree import view_tree as _view_tree
|
1059
|
-
|
1060
|
-
_view_tree(
|
1061
|
-
cls=cls,
|
1062
|
-
level=level,
|
1063
|
-
limit_to_directories=limit_to_directories,
|
1064
|
-
length_limit=length_limit,
|
1065
|
-
max_files_per_dir_per_type=max_files_per_dir_per_type,
|
1066
|
-
)
|
1067
|
-
|
1068
|
-
|
1069
1061
|
# docstring handled through attach_func_to_class_method
|
1070
1062
|
def restore(self) -> None:
|
1071
1063
|
self.visibility = VisibilityChoice.default.value
|
@@ -1085,7 +1077,6 @@ METHOD_NAMES = [
|
|
1085
1077
|
"replace",
|
1086
1078
|
"from_dir",
|
1087
1079
|
"restore",
|
1088
|
-
"view_tree",
|
1089
1080
|
]
|
1090
1081
|
|
1091
1082
|
if ln_setup._TESTING:
|
lamindb/_finish.py
CHANGED
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING
|
|
8
8
|
|
9
9
|
import lamindb_setup as ln_setup
|
10
10
|
from lamin_utils import logger
|
11
|
+
from lamindb_setup.core.hashing import hash_file
|
11
12
|
from lnschema_core.types import TransformType
|
12
13
|
|
13
14
|
from .core._run_context import is_run_from_ipython, run_context
|
@@ -35,7 +36,7 @@ def get_seconds_since_modified(filepath) -> float:
|
|
35
36
|
def finish():
|
36
37
|
"""Mark a tracked run as finished.
|
37
38
|
|
38
|
-
|
39
|
+
Saves source code and, for notebooks, a run report to your default storage location.
|
39
40
|
"""
|
40
41
|
if run_context.path is None:
|
41
42
|
raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
|
@@ -47,16 +48,12 @@ def finish():
|
|
47
48
|
raise NotebookNotSaved(
|
48
49
|
"Please save the notebook in your editor right before running `ln.finish()`"
|
49
50
|
)
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
else: # scripts
|
57
|
-
# save_run_context_core was already called during ln.track()
|
58
|
-
run_context.run.finished_at = datetime.now(timezone.utc) # update run time
|
59
|
-
run_context.run.save()
|
51
|
+
save_run_context_core(
|
52
|
+
run=run_context.run,
|
53
|
+
transform=run_context.transform,
|
54
|
+
filepath=run_context.path,
|
55
|
+
finished_at=True,
|
56
|
+
)
|
60
57
|
|
61
58
|
|
62
59
|
def save_run_context_core(
|
@@ -138,15 +135,17 @@ def save_run_context_core(
|
|
138
135
|
if prev_transform.source_code_id is not None:
|
139
136
|
prev_source = prev_transform.source_code
|
140
137
|
ln.settings.silence_file_run_transform_warning = True
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
138
|
+
|
139
|
+
# track source code
|
140
|
+
if transform.source_code_id is not None:
|
141
|
+
# check if the hash of the transform source code matches
|
142
|
+
# (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
|
143
|
+
hash, _ = hash_file(source_code_path) # ignore hash_type for now
|
144
|
+
if hash != transform.source_code.hash:
|
146
145
|
if os.getenv("LAMIN_TESTING") is None:
|
147
146
|
# in test, auto-confirm overwrite
|
148
147
|
response = input(
|
149
|
-
f"You are about to overwrite existing source code (hash {transform.source_code.hash}) for transform version"
|
148
|
+
f"You are about to replace (overwrite) existing source code (hash '{transform.source_code.hash}') for transform version"
|
150
149
|
f" '{transform.version}'. Proceed? (y/n)"
|
151
150
|
)
|
152
151
|
else:
|
@@ -154,6 +153,9 @@ def save_run_context_core(
|
|
154
153
|
if response == "y":
|
155
154
|
transform.source_code.replace(source_code_path)
|
156
155
|
transform.source_code.save(upload=True)
|
156
|
+
logger.success(
|
157
|
+
f"replaced transform.source_code: {transform.source_code}"
|
158
|
+
)
|
157
159
|
else:
|
158
160
|
logger.warning("Please re-run `ln.track()` to make a new version")
|
159
161
|
return "rerun-the-notebook"
|
@@ -169,21 +171,32 @@ def save_run_context_core(
|
|
169
171
|
source_code.save(upload=True)
|
170
172
|
transform.source_code = source_code
|
171
173
|
logger.success(f"saved transform.source_code: {transform.source_code}")
|
174
|
+
|
172
175
|
# track environment
|
173
176
|
filepath_env = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
|
174
177
|
if filepath_env.exists():
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
178
|
+
hash, _ = hash_file(filepath_env)
|
179
|
+
artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
|
180
|
+
new_env_artifact = artifact is None
|
181
|
+
if new_env_artifact:
|
182
|
+
artifact = ln.Artifact(
|
183
|
+
filepath_env,
|
184
|
+
description="requirements.txt",
|
185
|
+
visibility=0,
|
186
|
+
run=False,
|
187
|
+
)
|
182
188
|
artifact.save(upload=True)
|
183
189
|
run.environment = artifact
|
184
|
-
|
185
|
-
|
190
|
+
if new_env_artifact:
|
191
|
+
logger.success(f"saved run.environment: {run.environment}")
|
192
|
+
|
193
|
+
# set finished_at
|
194
|
+
if finished_at:
|
195
|
+
run.finished_at = datetime.now(timezone.utc)
|
196
|
+
|
197
|
+
# track report and set is_consecutive
|
186
198
|
if not transform.type == TransformType.notebook:
|
199
|
+
run.is_consecutive = True
|
187
200
|
run.save()
|
188
201
|
else:
|
189
202
|
if run.report_id is not None:
|
@@ -203,16 +216,15 @@ def save_run_context_core(
|
|
203
216
|
report_file.save(upload=True)
|
204
217
|
run.report = report_file
|
205
218
|
run.is_consecutive = is_consecutive
|
206
|
-
if finished_at:
|
207
|
-
run.finished_at = datetime.now(timezone.utc)
|
208
219
|
run.save()
|
209
220
|
transform.latest_report = run.report
|
210
|
-
transform.save()
|
211
|
-
if transform.type == TransformType.notebook:
|
212
221
|
logger.success(f"saved transform.latest_report: {transform.latest_report}")
|
213
|
-
|
222
|
+
transform.save()
|
223
|
+
|
224
|
+
# finalize
|
225
|
+
if ln_setup.settings.instance.is_on_hub:
|
214
226
|
identifier = ln_setup.settings.instance.slug
|
215
|
-
logger.
|
227
|
+
logger.important(
|
216
228
|
f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
|
217
229
|
)
|
218
230
|
# because run & transform changed, update the global run_context
|
lamindb/_query_set.py
CHANGED
@@ -11,7 +11,6 @@ from lnschema_core.models import (
|
|
11
11
|
Artifact,
|
12
12
|
CanValidate,
|
13
13
|
Collection,
|
14
|
-
IsTree,
|
15
14
|
IsVersioned,
|
16
15
|
Registry,
|
17
16
|
Run,
|
@@ -83,7 +82,7 @@ class RecordsList(UserList):
|
|
83
82
|
return one_helper(self)
|
84
83
|
|
85
84
|
|
86
|
-
class QuerySet(models.QuerySet, CanValidate
|
85
|
+
class QuerySet(models.QuerySet, CanValidate):
|
87
86
|
"""Sets of records returned by queries.
|
88
87
|
|
89
88
|
See Also:
|
@@ -265,25 +264,6 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
|
|
265
264
|
|
266
265
|
return _standardize(cls=self, values=values, field=field, **kwargs)
|
267
266
|
|
268
|
-
@doc_args(IsTree.view_tree.__doc__)
|
269
|
-
def view_tree(
|
270
|
-
self,
|
271
|
-
level: int = -1,
|
272
|
-
limit_to_directories: bool = False,
|
273
|
-
length_limit: int = 1000,
|
274
|
-
max_files_per_dir_per_type: int = 7,
|
275
|
-
) -> None:
|
276
|
-
"""{}."""
|
277
|
-
from .core._view_tree import view_tree as _view_tree
|
278
|
-
|
279
|
-
_view_tree(
|
280
|
-
cls=self,
|
281
|
-
level=level,
|
282
|
-
limit_to_directories=limit_to_directories,
|
283
|
-
length_limit=length_limit,
|
284
|
-
max_files_per_dir_per_type=max_files_per_dir_per_type,
|
285
|
-
)
|
286
|
-
|
287
267
|
|
288
268
|
def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList:
|
289
269
|
# evaluating length can be very costly, hence, the try-except block
|
lamindb/_run.py
CHANGED
@@ -13,6 +13,7 @@ def __init__(run: Run, *args, **kwargs):
|
|
13
13
|
transform: Transform = None
|
14
14
|
if "transform" in kwargs or len(args) == 1:
|
15
15
|
transform = kwargs.pop("transform") if len(args) == 0 else args[0]
|
16
|
+
params: str | None = kwargs.pop("params") if "params" in kwargs else None
|
16
17
|
reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
|
17
18
|
reference_type: str | None = (
|
18
19
|
kwargs.pop("reference_type") if "reference_type" in kwargs else None
|
@@ -25,6 +26,7 @@ def __init__(run: Run, *args, **kwargs):
|
|
25
26
|
transform=transform,
|
26
27
|
reference=reference,
|
27
28
|
reference_type=reference_type,
|
29
|
+
json=params,
|
28
30
|
)
|
29
31
|
|
30
32
|
|
lamindb/_save.py
CHANGED
@@ -6,13 +6,13 @@ import traceback
|
|
6
6
|
from collections import defaultdict
|
7
7
|
from datetime import datetime
|
8
8
|
from functools import partial
|
9
|
-
from typing import Iterable, overload
|
9
|
+
from typing import TYPE_CHECKING, Iterable, overload
|
10
10
|
|
11
11
|
import lamindb_setup
|
12
12
|
from django.db import transaction
|
13
13
|
from django.utils.functional import partition
|
14
14
|
from lamin_utils import logger
|
15
|
-
from lamindb_setup.core.upath import
|
15
|
+
from lamindb_setup.core.upath import print_hook
|
16
16
|
from lnschema_core.models import Artifact, Registry
|
17
17
|
|
18
18
|
from lamindb.core._settings import settings
|
@@ -23,12 +23,8 @@ from lamindb.core.storage.paths import (
|
|
23
23
|
store_file_or_folder,
|
24
24
|
)
|
25
25
|
|
26
|
-
|
27
|
-
from
|
28
|
-
except ImportError:
|
29
|
-
|
30
|
-
def write_adata_zarr(filepath): # type: ignore
|
31
|
-
raise ImportError("Please install zarr: pip install zarr")
|
26
|
+
if TYPE_CHECKING:
|
27
|
+
from lamindb_setup.core.upath import UPath
|
32
28
|
|
33
29
|
|
34
30
|
def save(
|
@@ -162,7 +158,7 @@ def check_and_attempt_upload(
|
|
162
158
|
def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
|
163
159
|
local_path = artifact._local_filepath
|
164
160
|
|
165
|
-
#
|
161
|
+
# in-memory cases
|
166
162
|
if local_path is None or not local_path.exists():
|
167
163
|
return None
|
168
164
|
|
@@ -284,18 +280,7 @@ def upload_artifact(
|
|
284
280
|
storage_path = attempt_accessing_path(
|
285
281
|
artifact, storage_key, using_key=using_key, access_token=access_token
|
286
282
|
)
|
287
|
-
|
288
|
-
|
289
|
-
artifact.suffix == ".zarr"
|
290
|
-
and hasattr(artifact, "_memory_rep")
|
291
|
-
and artifact._memory_rep is not None
|
292
|
-
):
|
293
|
-
logger.save(msg)
|
294
|
-
print_progress = partial(
|
295
|
-
print_hook, objectname=storage_path.name, action="uploading"
|
296
|
-
)
|
297
|
-
write_adata_zarr(artifact._memory_rep, storage_path, callback=print_progress)
|
298
|
-
elif hasattr(artifact, "_to_store") and artifact._to_store:
|
299
|
-
logger.save(msg)
|
283
|
+
if hasattr(artifact, "_to_store") and artifact._to_store:
|
284
|
+
logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
|
300
285
|
store_file_or_folder(artifact._local_filepath, storage_path)
|
301
286
|
return storage_path
|
lamindb/_transform.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from lnschema_core.models import
|
3
|
+
from lnschema_core.models import Run, Transform
|
4
4
|
from lnschema_core.types import TransformType
|
5
5
|
|
6
6
|
from ._run import delete_run_artifacts
|
7
|
-
from .core.versioning import
|
7
|
+
from .core.versioning import process_is_new_version_of
|
8
8
|
|
9
9
|
|
10
10
|
def __init__(transform: Transform, *args, **kwargs):
|
@@ -32,15 +32,9 @@ def __init__(transform: Transform, *args, **kwargs):
|
|
32
32
|
"Only name, key, version, type, is_new_version_of, reference, "
|
33
33
|
f"reference_type can be passed, but you passed: {kwargs}"
|
34
34
|
)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
if not isinstance(is_new_version_of, Transform):
|
39
|
-
raise TypeError("is_new_version_of has to be of type ln.Transform")
|
40
|
-
new_uid, version = get_uid_from_old_version(is_new_version_of, version)
|
41
|
-
if name is None:
|
42
|
-
name = is_new_version_of.name
|
43
|
-
|
35
|
+
new_uid, version, name = process_is_new_version_of(
|
36
|
+
is_new_version_of, version, name, Transform
|
37
|
+
)
|
44
38
|
# this is only because the user-facing constructor allows passing an id
|
45
39
|
# most others don't
|
46
40
|
if uid is None:
|
lamindb/core/__init__.py
CHANGED
@@ -12,7 +12,6 @@ Registries:
|
|
12
12
|
Data
|
13
13
|
FeatureManager
|
14
14
|
LabelManager
|
15
|
-
IsTree
|
16
15
|
IsVersioned
|
17
16
|
CanValidate
|
18
17
|
HasParents
|
@@ -55,7 +54,6 @@ from lnschema_core.models import (
|
|
55
54
|
CanValidate,
|
56
55
|
Data,
|
57
56
|
HasParents,
|
58
|
-
IsTree,
|
59
57
|
IsVersioned,
|
60
58
|
Registry,
|
61
59
|
)
|
lamindb/core/_data.py
CHANGED
@@ -345,7 +345,7 @@ def add_labels(
|
|
345
345
|
f" {old_feature_set}"
|
346
346
|
)
|
347
347
|
old_feature_set.delete()
|
348
|
-
self.features.
|
348
|
+
self.features.add_feature_set(feature_set, slot="external")
|
349
349
|
logger.save(
|
350
350
|
f"linked new feature '{feature.name}' together with new feature set"
|
351
351
|
f" {feature_set}"
|
lamindb/core/_feature_manager.py
CHANGED
@@ -236,7 +236,7 @@ class FeatureManager:
|
|
236
236
|
and self._host.artifact.accessor == "DataFrame"
|
237
237
|
):
|
238
238
|
slot = "columns" if slot is None else slot
|
239
|
-
self.
|
239
|
+
self.add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
|
240
240
|
|
241
241
|
def add_from_df(self, field: FieldAttr = Feature.name, organism: str | None = None):
|
242
242
|
"""Add features from DataFrame."""
|
@@ -325,7 +325,7 @@ class FeatureManager:
|
|
325
325
|
self._host._feature_sets = feature_sets
|
326
326
|
self._host.save()
|
327
327
|
|
328
|
-
def
|
328
|
+
def add_feature_set(self, feature_set: FeatureSet, slot: str):
|
329
329
|
"""Add new feature set to a slot.
|
330
330
|
|
331
331
|
Args:
|
@@ -405,7 +405,8 @@ class FeatureManager:
|
|
405
405
|
f"FeatureSet is not transferred, check if organism is set correctly: {feature_set}"
|
406
406
|
)
|
407
407
|
continue
|
408
|
-
#
|
409
|
-
|
408
|
+
# make sure the uid matches if featureset is composed of same features
|
409
|
+
if feature_set_self.hash == feature_set.hash:
|
410
|
+
feature_set_self.uid = feature_set.uid
|
410
411
|
logger.info(f"saving {slot} featureset: {feature_set_self}")
|
411
|
-
self._host.features.
|
412
|
+
self._host.features.add_feature_set(feature_set_self, slot)
|
lamindb/core/_run_context.py
CHANGED
@@ -8,6 +8,7 @@ from pathlib import Path, PurePath
|
|
8
8
|
from typing import TYPE_CHECKING
|
9
9
|
|
10
10
|
from lamin_utils import logger
|
11
|
+
from lamindb_setup.core.hashing import hash_file
|
11
12
|
from lnschema_core import Run, Transform, ids
|
12
13
|
from lnschema_core.types import TransformType
|
13
14
|
from lnschema_core.users import current_user_id
|
@@ -175,6 +176,17 @@ def raise_transform_settings_error() -> None:
|
|
175
176
|
)
|
176
177
|
|
177
178
|
|
179
|
+
def pretty_pypackages(dependencies: dict) -> str:
|
180
|
+
deps_list = []
|
181
|
+
for pkg, ver in dependencies.items():
|
182
|
+
if ver != "":
|
183
|
+
deps_list.append(pkg + f"=={ver}")
|
184
|
+
else:
|
185
|
+
deps_list.append(pkg)
|
186
|
+
deps_list.sort()
|
187
|
+
return " ".join(deps_list)
|
188
|
+
|
189
|
+
|
178
190
|
class run_context:
|
179
191
|
"""Global run context."""
|
180
192
|
|
@@ -189,6 +201,7 @@ class run_context:
|
|
189
201
|
def _track(
|
190
202
|
cls,
|
191
203
|
*,
|
204
|
+
params: dict | None = None,
|
192
205
|
transform: Transform | None = None,
|
193
206
|
new_run: bool | None = None,
|
194
207
|
path: str | None = None,
|
@@ -204,6 +217,7 @@ class run_context:
|
|
204
217
|
whether the script exists in the git repository and add a link.
|
205
218
|
|
206
219
|
Args:
|
220
|
+
params: A dictionary of parameters to track for the run.
|
207
221
|
transform: Can be of type `"pipeline"` or `"notebook"`
|
208
222
|
(:class:`~lamindb.core.types.TransformType`).
|
209
223
|
new_run: If `False`, loads latest run of transform
|
@@ -298,11 +312,13 @@ class run_context:
|
|
298
312
|
)
|
299
313
|
if run is not None: # loaded latest run
|
300
314
|
run.started_at = datetime.now(timezone.utc) # update run time
|
315
|
+
run.json = params # update run params
|
301
316
|
logger.important(f"loaded: {run}")
|
302
317
|
|
303
318
|
if run is None: # create new run
|
304
319
|
run = Run(
|
305
320
|
transform=cls.transform,
|
321
|
+
params=params,
|
306
322
|
)
|
307
323
|
logger.important(f"saved: {run}")
|
308
324
|
# can only determine at ln.finish() if run was consecutive in
|
@@ -315,16 +331,6 @@ class run_context:
|
|
315
331
|
from ._track_environment import track_environment
|
316
332
|
|
317
333
|
track_environment(run)
|
318
|
-
|
319
|
-
if not is_run_from_ipython and cls.path is not None:
|
320
|
-
# upload run source code & environment
|
321
|
-
from lamindb._finish import save_run_context_core
|
322
|
-
|
323
|
-
save_run_context_core(
|
324
|
-
run=cls.run,
|
325
|
-
transform=cls.transform,
|
326
|
-
filepath=cls.path,
|
327
|
-
)
|
328
334
|
return None
|
329
335
|
|
330
336
|
@classmethod
|
@@ -386,17 +392,12 @@ class run_context:
|
|
386
392
|
# log imported python packages
|
387
393
|
if not path_str.startswith("/fileId="):
|
388
394
|
try:
|
389
|
-
from nbproject.dev._metadata_display import DisplayMeta
|
390
395
|
from nbproject.dev._pypackage import infer_pypackages
|
391
396
|
|
392
|
-
|
393
|
-
filepath=path_str,
|
394
|
-
metadata_only=True,
|
395
|
-
)
|
396
|
-
dm = DisplayMeta(metadata)
|
397
|
+
nb = nbproject.dev.read_notebook(path_str)
|
397
398
|
logger.important(
|
398
399
|
"notebook imports:"
|
399
|
-
f" {
|
400
|
+
f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
|
400
401
|
)
|
401
402
|
except Exception:
|
402
403
|
logger.debug("inferring imported packages failed")
|
@@ -451,19 +452,36 @@ class run_context:
|
|
451
452
|
transform.save()
|
452
453
|
logger.important(f"updated: {transform}")
|
453
454
|
# check whether the notebook source code was already saved
|
454
|
-
if
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
455
|
+
if transform.source_code_id is not None:
|
456
|
+
response = None
|
457
|
+
if is_run_from_ipython:
|
458
|
+
if os.getenv("LAMIN_TESTING") is None:
|
459
|
+
response = input(
|
460
|
+
"You already saved source code for this notebook."
|
461
|
+
" Bump the version before a new run? (y/n)"
|
462
|
+
)
|
463
|
+
else:
|
464
|
+
response = "y"
|
464
465
|
else:
|
465
|
-
|
466
|
-
|
466
|
+
hash, _ = hash_file(cls.path) # ignore hash_type for now
|
467
|
+
if hash != transform.source_code.hash:
|
468
|
+
# only if hashes don't match, we need user input
|
469
|
+
if os.getenv("LAMIN_TESTING") is None:
|
470
|
+
response = input(
|
471
|
+
"You already saved source code for this script and meanwhile modified it without bumping a version."
|
472
|
+
" Bump the version before a new run? (y/n)"
|
473
|
+
)
|
474
|
+
else:
|
475
|
+
response = "y"
|
476
|
+
else:
|
477
|
+
logger.important(f"loaded: {transform}")
|
478
|
+
if response is not None:
|
479
|
+
# if a script is re-run and hashes match, we don't need user input
|
480
|
+
if response == "y":
|
481
|
+
update_stem_uid_or_version(stem_uid, version, bump_version=True)
|
482
|
+
else:
|
483
|
+
# we want a new stem_uid in this case, hence raise the error
|
484
|
+
raise_transform_settings_error()
|
467
485
|
else:
|
468
486
|
logger.important(f"loaded: {transform}")
|
469
487
|
cls.transform = transform
|
lamindb/core/_settings.py
CHANGED
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Literal, Mapping
|
|
5
5
|
|
6
6
|
import lamindb_setup as ln_setup
|
7
7
|
from lamin_utils import logger
|
8
|
-
from lamindb_setup.
|
8
|
+
from lamindb_setup._set_managed_storage import set_managed_storage
|
9
9
|
from lamindb_setup.core._settings import settings as setup_settings
|
10
10
|
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
11
11
|
|
@@ -147,7 +147,7 @@ class Settings:
|
|
147
147
|
path, kwargs = path_kwargs
|
148
148
|
else:
|
149
149
|
path, kwargs = path_kwargs, {}
|
150
|
-
|
150
|
+
set_managed_storage(path, **kwargs)
|
151
151
|
|
152
152
|
@property
|
153
153
|
def storage_local(self) -> Path:
|
lamindb/core/_sync_git.py
CHANGED
@@ -61,11 +61,15 @@ def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | N
|
|
61
61
|
capture_output=True,
|
62
62
|
cwd=repo_dir,
|
63
63
|
)
|
64
|
-
|
64
|
+
# we just care to find one commit
|
65
|
+
# hence, we split by new line ("\n") and use the first one
|
66
|
+
commit_hash = result.stdout.decode().split("\n")[0]
|
65
67
|
if commit_hash == "" or result.returncode == 1:
|
66
68
|
return None
|
67
69
|
else:
|
68
|
-
assert
|
70
|
+
assert (
|
71
|
+
len(commit_hash) == 40
|
72
|
+
), f"commit hash |{commit_hash}| is not 40 characters long"
|
69
73
|
return commit_hash
|
70
74
|
|
71
75
|
|
lamindb/core/datasets/_core.py
CHANGED
@@ -161,8 +161,8 @@ def anndata_mouse_sc_lymph_node(
|
|
161
161
|
adata.obs.columns = (
|
162
162
|
adata.obs.columns.str.replace("Sample Characteristic", "")
|
163
163
|
.str.replace("Factor Value ", "Factor Value:", regex=True)
|
164
|
-
.str.replace("Factor Value
|
165
|
-
.str.replace(" Ontology Term
|
164
|
+
.str.replace("Factor Value\\[", "Factor Value:", regex=True)
|
165
|
+
.str.replace(" Ontology Term\\[", "ontology_id:", regex=True)
|
166
166
|
.str.strip("[]")
|
167
167
|
.str.replace("organism part", "tissue")
|
168
168
|
.str.replace("organism", "organism")
|
lamindb/core/storage/__init__.py
CHANGED
@@ -10,6 +10,6 @@ from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
|
|
10
10
|
|
11
11
|
from ._anndata_sizes import size_adata
|
12
12
|
from ._backed_access import AnnDataAccessor, BackedAccessor
|
13
|
-
from ._valid_suffixes import VALID_SUFFIXES
|
14
|
-
from .objects import infer_suffix,
|
13
|
+
from ._valid_suffixes import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
|
14
|
+
from .objects import infer_suffix, write_to_disk
|
15
15
|
from .paths import delete_storage, load_to_memory
|
@@ -1,3 +1,5 @@
|
|
1
|
-
from lamindb_setup.core.upath import VALID_SUFFIXES
|
1
|
+
from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
|
2
2
|
|
3
|
-
|
3
|
+
# add new composite suffixes like so
|
4
|
+
VALID_COMPOSITE_SUFFIXES.update({".vitessce.json"})
|
5
|
+
# can do the same for simple valid suffixes
|
lamindb/core/storage/objects.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
from pathlib import PurePosixPath
|
3
4
|
from typing import TYPE_CHECKING
|
4
5
|
|
5
6
|
from anndata import AnnData
|
@@ -21,7 +22,7 @@ def infer_suffix(dmem, adata_format: str | None = None):
|
|
21
22
|
"""Infer LaminDB storage file suffix from a data object."""
|
22
23
|
if isinstance(dmem, AnnData):
|
23
24
|
if adata_format is not None:
|
24
|
-
if adata_format not in
|
25
|
+
if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
|
25
26
|
raise ValueError(
|
26
27
|
"Error when specifying AnnData storage format, it should be"
|
27
28
|
f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
|
@@ -40,9 +41,15 @@ def infer_suffix(dmem, adata_format: str | None = None):
|
|
40
41
|
raise NotImplementedError
|
41
42
|
|
42
43
|
|
43
|
-
def
|
44
|
+
def write_to_disk(dmem, filepath: UPathStr):
|
44
45
|
if isinstance(dmem, AnnData):
|
45
|
-
|
46
|
+
suffix = PurePosixPath(filepath).suffix
|
47
|
+
if suffix == ".h5ad":
|
48
|
+
dmem.write_h5ad(filepath)
|
49
|
+
elif suffix == ".zarr":
|
50
|
+
dmem.write_zarr(filepath)
|
51
|
+
else:
|
52
|
+
raise NotImplementedError
|
46
53
|
elif isinstance(dmem, DataFrame):
|
47
54
|
dmem.to_parquet(filepath)
|
48
55
|
else:
|
lamindb/core/storage/paths.py
CHANGED
@@ -140,7 +140,7 @@ def delete_storage(storagepath: Path):
|
|
140
140
|
if not storagepath.is_relative_to(settings.storage): # type: ignore
|
141
141
|
allow_delete = False
|
142
142
|
if setup_settings.instance.keep_artifacts_local:
|
143
|
-
allow_delete = storagepath.is_relative_to(
|
143
|
+
allow_delete = storagepath.is_relative_to( # type: ignore
|
144
144
|
setup_settings.instance.storage_local.root
|
145
145
|
)
|
146
146
|
if not allow_delete:
|
lamindb/core/versioning.py
CHANGED
@@ -42,10 +42,7 @@ def init_uid(
|
|
42
42
|
if is_new_version_of is not None:
|
43
43
|
stem_uid = is_new_version_of.stem_uid
|
44
44
|
else:
|
45
|
-
|
46
|
-
stem_uid = ids.base62_16()
|
47
|
-
elif n_full_id == 16:
|
48
|
-
stem_uid = ids.base62_12()
|
45
|
+
stem_uid = ids.base62(n_full_id - 4)
|
49
46
|
if version is not None:
|
50
47
|
if not isinstance(version, str):
|
51
48
|
raise ValueError(
|
@@ -90,3 +87,20 @@ def get_new_path_from_uid(old_path: UPath, old_uid: str, new_uid: str):
|
|
90
87
|
# for cloud path, the rename target must be the last part of the path
|
91
88
|
new_path = old_path.name.replace(old_uid, new_uid)
|
92
89
|
return new_path
|
90
|
+
|
91
|
+
|
92
|
+
def process_is_new_version_of(
|
93
|
+
is_new_version_of: IsVersioned,
|
94
|
+
version: str | None,
|
95
|
+
name: str | None,
|
96
|
+
type: type[IsVersioned],
|
97
|
+
) -> tuple[str, str, str]:
|
98
|
+
if is_new_version_of is not None and not isinstance(is_new_version_of, type):
|
99
|
+
raise TypeError(f"is_new_version_of has to be of type {type}")
|
100
|
+
if is_new_version_of is None:
|
101
|
+
uid = init_uid(version=version, n_full_id=type._len_stem_uid)
|
102
|
+
else:
|
103
|
+
uid, version = get_uid_from_old_version(is_new_version_of, version)
|
104
|
+
if name is None:
|
105
|
+
name = is_new_version_of.name
|
106
|
+
return uid, version, name
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.71.
|
3
|
+
Version: 0.71.2
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.66.
|
13
|
-
Requires-Dist: lamindb_setup==0.71.
|
12
|
+
Requires-Dist: lnschema_core==0.66.4
|
13
|
+
Requires-Dist: lamindb_setup==0.71.3
|
14
14
|
Requires-Dist: lamin_utils==0.13.2
|
15
|
-
Requires-Dist: lamin_cli==0.13.
|
15
|
+
Requires-Dist: lamin_cli==0.13.1
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -37,7 +37,7 @@ Requires-Dist: faker-biology ; extra == "dev"
|
|
37
37
|
Requires-Dist: django-schema-graph ; extra == "erdiagram"
|
38
38
|
Requires-Dist: readfcs>=1.1.8 ; extra == "fcs"
|
39
39
|
Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
|
40
|
-
Requires-Dist: nbproject==0.10.
|
40
|
+
Requires-Dist: nbproject==0.10.2 ; extra == "jupyter"
|
41
41
|
Requires-Dist: nbstripout==0.6.1 ; extra == "jupyter"
|
42
42
|
Requires-Dist: nbconvert ; extra == "jupyter"
|
43
43
|
Requires-Dist: zarr>=2.16.0 ; extra == "zarr"
|
@@ -1,55 +1,54 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=GKrW6unkqBBwwpxTXjuUv-5k4c4unimsV-vGFSWt68I,2182
|
2
2
|
lamindb/_annotate.py,sha256=kgbilILfgzoS-GEpjxzVwRMs7CoSa9BNEcIWXFBW69I,43915
|
3
|
-
lamindb/_artifact.py,sha256=
|
3
|
+
lamindb/_artifact.py,sha256=8uBW-dhuWyBUQGs728sAPCnuhTic-NKjSbaneF07aMo,40106
|
4
4
|
lamindb/_can_validate.py,sha256=nvoZG-35n3HofkY4Xc6hBv9AV54_RDan7Hzp5TuqY9I,14709
|
5
5
|
lamindb/_collection.py,sha256=SDM35R_5WHrgLKjVb14Q8-Rz_gn5hdZLJobPcanm4PM,14627
|
6
6
|
lamindb/_feature.py,sha256=srAKchY7gqD-h-cWlEiAWuHlpFKFwv0PWIA-JX0Go8c,6758
|
7
7
|
lamindb/_feature_set.py,sha256=AzjOcHzQajpeikPOAic-aj0z_C5b7VpHVegg3ThRSLw,9045
|
8
8
|
lamindb/_filter.py,sha256=xnjJzjF3Zj4dK_Kfymvhgczk27MhhXz5ZYc7XINbgHY,1331
|
9
|
-
lamindb/_finish.py,sha256=
|
9
|
+
lamindb/_finish.py,sha256=6GwhqrC-x-JdFd16i7-uyhCWeQgGKxr25aSsSXPZt4g,8598
|
10
10
|
lamindb/_from_values.py,sha256=DVXjnQ2wwNw-2bFzy0uXLdVlqoprrn95hTnrXwn-KqM,12638
|
11
11
|
lamindb/_is_versioned.py,sha256=0PgRCmxEmYDcAjllLSOYZm132B1lW6QgmBBERhRyFt0,1341
|
12
12
|
lamindb/_parents.py,sha256=N9T8jbd3eaoHDLE9TD1y1QgGcO81E6Brapy8LILzRCQ,14790
|
13
13
|
lamindb/_query_manager.py,sha256=3zokXqxgj9vTJBnN2sbYKS-q69fyDDPF_aGq_rFHzXU,4066
|
14
|
-
lamindb/_query_set.py,sha256=
|
14
|
+
lamindb/_query_set.py,sha256=n0owd74cTzGz6-mIv8SlDz0wcyRz7Xw3Ke1LhE8UlIg,10784
|
15
15
|
lamindb/_registry.py,sha256=fmX-BUnan3Y0WrEAx3qNwRYCIJwJgjoKnRnpgcXujEI,19358
|
16
|
-
lamindb/_run.py,sha256=
|
17
|
-
lamindb/_save.py,sha256=
|
16
|
+
lamindb/_run.py,sha256=We50MUeGH778begutDGoNFM-n5_81_BfMCnZS1bdkt0,1937
|
17
|
+
lamindb/_save.py,sha256=_7r3TUV3B6Hp75r5O_ymu3fKWyBHbGa5vmE_pxrtsVI,10923
|
18
18
|
lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
|
19
|
-
lamindb/_transform.py,sha256=
|
19
|
+
lamindb/_transform.py,sha256=E9C7psuOnsNrUQpWRuGgEUM8_pc7YhDn7n4ieHzB4X0,3169
|
20
20
|
lamindb/_ulabel.py,sha256=e5dw9h1tR0_u-DMn7Gzx0WhUhV5w7j4v3QbnLWQV7eI,1941
|
21
21
|
lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
22
22
|
lamindb/_view.py,sha256=GV1FrqIMmdooEkA-5zvcTWgV1nqx1sehi6WdWEaFpxM,2171
|
23
|
-
lamindb/core/__init__.py,sha256=
|
24
|
-
lamindb/core/_data.py,sha256=
|
25
|
-
lamindb/core/_feature_manager.py,sha256=
|
23
|
+
lamindb/core/__init__.py,sha256=TI9_1Jtpwae_cUPQ3-U0RRPH5c3GBA-gLhHvlAk_Nlo,1213
|
24
|
+
lamindb/core/_data.py,sha256=Lico6-Vx15bNpGLl1bqFqEsh62pD4YKOOBnmahse1tI,17673
|
25
|
+
lamindb/core/_feature_manager.py,sha256=uTzZZ7-qqEAmdwi48Holy2j5VGTgmoQxhb21r6mLShI,15824
|
26
26
|
lamindb/core/_label_manager.py,sha256=0RtegYnK3zIisOnd970EobOrHMpp7OCH-mEoPrPXw2c,9075
|
27
27
|
lamindb/core/_mapped_collection.py,sha256=_OwFZh5SePDUD70XIK5kngv3we_Z5-YdGHNfpUSatSQ,19469
|
28
|
-
lamindb/core/_run_context.py,sha256=
|
29
|
-
lamindb/core/_settings.py,sha256=
|
30
|
-
lamindb/core/_sync_git.py,sha256=
|
28
|
+
lamindb/core/_run_context.py,sha256=7iCCOB2z154puBI7ZKzcaEZ5l6_9S8aSYBOBJI65lyc,17117
|
29
|
+
lamindb/core/_settings.py,sha256=rW1KfEXfT56XErwcnSuQxaCytpOy1kJ-u7tVmkmNmxY,6131
|
30
|
+
lamindb/core/_sync_git.py,sha256=06Te35UZj2QBaHNcc59VSC9vJgcFct7Z2sK78NLkZBs,4119
|
31
31
|
lamindb/core/_track_environment.py,sha256=xLZ6kgzxWS6MWZ5LQ_wkbJX99vmYOT8iQ-Fz4OHCgWw,754
|
32
32
|
lamindb/core/_transform_settings.py,sha256=eV96QKX9jOojjzF-a0oo0wXQsMXN2F6QV7orE06oFC8,161
|
33
|
-
lamindb/core/_view_tree.py,sha256=PTwmKZSQL2UhKuSdV5Wp7o1JDjv1qwgsVCj3ThkbKb8,3447
|
34
33
|
lamindb/core/exceptions.py,sha256=PHk5lyBdJPrrEQcid3ItfdNzz3fgiQsUmsEDdz063F0,197
|
35
34
|
lamindb/core/fields.py,sha256=Jgi_XI-iTe6cT7oD8FV_JqEpjN1Q9rZWwL8VLtj4jkA,164
|
36
35
|
lamindb/core/types.py,sha256=xeQF2x40p2pR9eIVQrXT74RrS810z2fbjmTRTSQUqPM,230
|
37
|
-
lamindb/core/versioning.py,sha256=
|
36
|
+
lamindb/core/versioning.py,sha256=T9d28erodCUmFlRA7InralbRoffdniPQxBE7qWqs2u8,3601
|
38
37
|
lamindb/core/datasets/__init__.py,sha256=zRP98oqUAaXhqWyKMiH0s_ImVIuNeziQQ2kQ_t0f-DI,1353
|
39
|
-
lamindb/core/datasets/_core.py,sha256=
|
38
|
+
lamindb/core/datasets/_core.py,sha256=9bcDfVfMZ1h1WAS88ZBjy-R91xbP2KIm_ofHguXAKpY,20177
|
40
39
|
lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh4,953
|
41
|
-
lamindb/core/storage/__init__.py,sha256=
|
40
|
+
lamindb/core/storage/__init__.py,sha256=5LUFQKRr2BX24d-yWBezhTXBV83sShcOvPj5Y5u6qIg,441
|
42
41
|
lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
|
43
42
|
lamindb/core/storage/_backed_access.py,sha256=eManrLsu3pSSQAyAKy47FDBm-iHgjaNfHA-zLy59uDs,24536
|
44
|
-
lamindb/core/storage/_valid_suffixes.py,sha256=
|
43
|
+
lamindb/core/storage/_valid_suffixes.py,sha256=J08aglC9oo35pzahj0SQXW9IHib8Asp4dc11co-2uys,212
|
45
44
|
lamindb/core/storage/_zarr.py,sha256=5ceEz6YIvgvUnVVNWhK5Z4W0WfrvyvY82Yna5jSX1_E,3661
|
46
|
-
lamindb/core/storage/objects.py,sha256=
|
47
|
-
lamindb/core/storage/paths.py,sha256=
|
45
|
+
lamindb/core/storage/objects.py,sha256=OzvBCS-Urz5mr-O95qYt6RGBDDX5HmjfRRKWPPDn1ZE,1797
|
46
|
+
lamindb/core/storage/paths.py,sha256=JTtiTlAMICH4gkw7iZNwTRfNTT0WxrBoKiag_7E9g4I,7882
|
48
47
|
lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
|
49
48
|
lamindb/integrations/_vitessce.py,sha256=b0FqTBsP-M6Q7xCYXVwFwM8DOIeeOBZEhYbryhtq4gk,2535
|
50
49
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
51
50
|
lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
|
52
|
-
lamindb-0.71.
|
53
|
-
lamindb-0.71.
|
54
|
-
lamindb-0.71.
|
55
|
-
lamindb-0.71.
|
51
|
+
lamindb-0.71.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
52
|
+
lamindb-0.71.2.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
53
|
+
lamindb-0.71.2.dist-info/METADATA,sha256=l49_xPwqfUDB6jUvUQoAVeQu8Tj3JUNCfTPB9cqOq_Y,2674
|
54
|
+
lamindb-0.71.2.dist-info/RECORD,,
|
lamindb/core/_view_tree.py
DELETED
@@ -1,116 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from collections import defaultdict
|
4
|
-
from pathlib import Path
|
5
|
-
from typing import Iterable
|
6
|
-
|
7
|
-
from lamindb_setup import settings as setup_settings
|
8
|
-
from lnschema_core.models import Artifact, Storage
|
9
|
-
|
10
|
-
|
11
|
-
def view_tree(
|
12
|
-
cls,
|
13
|
-
level: int = -1,
|
14
|
-
limit_to_directories: bool = False,
|
15
|
-
length_limit: int = 1000,
|
16
|
-
max_files_per_dir_per_type: int = 7,
|
17
|
-
) -> None:
|
18
|
-
"""{}."""
|
19
|
-
if cls.__class__.__name__ == "QuerySet":
|
20
|
-
print("queryset")
|
21
|
-
qs = cls
|
22
|
-
storage_ids = qs.list("storage_id")
|
23
|
-
elif cls == Artifact:
|
24
|
-
print("file")
|
25
|
-
qs = cls.filter(storage_id=setup_settings.storage.id).all()
|
26
|
-
storage_ids = Storage.filter().list("id")
|
27
|
-
else:
|
28
|
-
print("else")
|
29
|
-
return
|
30
|
-
storages = Storage.filter().all()
|
31
|
-
storage_roots = {
|
32
|
-
storage_id: storages.get(id=storage_id).root for storage_id in storage_ids
|
33
|
-
}
|
34
|
-
keys = set()
|
35
|
-
for artifact in qs:
|
36
|
-
root = storage_roots.get(artifact.storage_id, "")
|
37
|
-
keys.add(f"{root}/{artifact.key}")
|
38
|
-
|
39
|
-
_view_tree(
|
40
|
-
keys=keys,
|
41
|
-
level=level,
|
42
|
-
only_dirs=limit_to_directories,
|
43
|
-
limit=length_limit,
|
44
|
-
max_files_per_dir_per_type=max_files_per_dir_per_type,
|
45
|
-
)
|
46
|
-
|
47
|
-
|
48
|
-
def _view_tree(
|
49
|
-
keys: Iterable[str],
|
50
|
-
*,
|
51
|
-
level: int = -1,
|
52
|
-
only_dirs: bool = False,
|
53
|
-
limit: int = 1000,
|
54
|
-
max_files_per_dir_per_type: int = 7,
|
55
|
-
) -> None:
|
56
|
-
# Create a nested dictionary from keys
|
57
|
-
def tree():
|
58
|
-
return defaultdict(tree)
|
59
|
-
|
60
|
-
root = tree()
|
61
|
-
|
62
|
-
n_files = 0
|
63
|
-
n_directories = 0
|
64
|
-
suffixes = set()
|
65
|
-
|
66
|
-
for key in keys:
|
67
|
-
parts = key.split("/")
|
68
|
-
node = root
|
69
|
-
for part in parts:
|
70
|
-
node = node[part]
|
71
|
-
if node == {}:
|
72
|
-
n_files += 1
|
73
|
-
suffix = Path(part).suffix
|
74
|
-
if suffix:
|
75
|
-
suffixes.add(suffix)
|
76
|
-
else:
|
77
|
-
n_directories += 1
|
78
|
-
|
79
|
-
# Function to print the tree
|
80
|
-
def print_tree(node, prefix="", depth=0, count=None, n_files_per_dir_per_type=None):
|
81
|
-
if count is None:
|
82
|
-
count = [0]
|
83
|
-
if n_files_per_dir_per_type is None:
|
84
|
-
n_files_per_dir_per_type = defaultdict(int)
|
85
|
-
|
86
|
-
if level != -1 and depth > level:
|
87
|
-
return
|
88
|
-
for name, child in node.items():
|
89
|
-
if count[0] >= limit:
|
90
|
-
return
|
91
|
-
if only_dirs and child == {}:
|
92
|
-
continue
|
93
|
-
suffix = Path(name).suffix
|
94
|
-
n_files_per_dir_per_type[suffix] += 1
|
95
|
-
if (
|
96
|
-
depth > 0
|
97
|
-
and n_files_per_dir_per_type[suffix] > max_files_per_dir_per_type
|
98
|
-
):
|
99
|
-
continue
|
100
|
-
new_prefix = prefix + ("├── " if name != list(node.keys())[-1] else "└── ")
|
101
|
-
print(new_prefix + name)
|
102
|
-
count[0] += 1
|
103
|
-
if child:
|
104
|
-
print_tree(
|
105
|
-
child,
|
106
|
-
prefix + ("│ " if name != list(node.keys())[-1] else " "),
|
107
|
-
depth + 1,
|
108
|
-
count,
|
109
|
-
(
|
110
|
-
defaultdict(int) if depth == 0 else n_files_per_dir_per_type
|
111
|
-
), # Reset the counter for each directory
|
112
|
-
)
|
113
|
-
|
114
|
-
suffix_message = f" with suffixes {', '.join(suffixes)}" if n_files > 0 else ""
|
115
|
-
print(f"{n_directories} directories, {n_files} files{suffix_message}")
|
116
|
-
print_tree(root)
|
File without changes
|
File without changes
|