lamindb 0.70.4__py3-none-any.whl → 0.71.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +2 -1
- lamindb/_artifact.py +56 -47
- lamindb/_finish.py +54 -38
- lamindb/_query_set.py +1 -21
- lamindb/_registry.py +3 -2
- lamindb/_save.py +7 -20
- lamindb/core/__init__.py +0 -2
- lamindb/core/_data.py +1 -1
- lamindb/core/_feature_manager.py +6 -5
- lamindb/core/_run_context.py +44 -71
- lamindb/core/_settings.py +21 -10
- lamindb/core/_sync_git.py +6 -2
- lamindb/core/datasets/_core.py +2 -2
- lamindb/core/storage/__init__.py +2 -2
- lamindb/core/storage/_valid_suffixes.py +4 -2
- lamindb/core/storage/objects.py +10 -3
- lamindb/core/storage/paths.py +8 -5
- {lamindb-0.70.4.dist-info → lamindb-0.71.1.dist-info}/METADATA +7 -10
- {lamindb-0.70.4.dist-info → lamindb-0.71.1.dist-info}/RECORD +21 -22
- lamindb/core/_view_tree.py +0 -116
- {lamindb-0.70.4.dist-info → lamindb-0.71.1.dist-info}/LICENSE +0 -0
- {lamindb-0.70.4.dist-info → lamindb-0.71.1.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_artifact.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import shutil
|
3
4
|
from pathlib import Path, PurePath, PurePosixPath
|
4
5
|
from typing import TYPE_CHECKING, Any, Mapping
|
5
6
|
|
@@ -9,9 +10,9 @@ import pandas as pd
|
|
9
10
|
from anndata import AnnData
|
10
11
|
from lamin_utils import colors, logger
|
11
12
|
from lamindb_setup import settings as setup_settings
|
12
|
-
from lamindb_setup._init_instance import
|
13
|
-
from lamindb_setup.core import StorageSettings
|
13
|
+
from lamindb_setup._init_instance import register_storage_in_instance
|
14
14
|
from lamindb_setup.core._docs import doc_args
|
15
|
+
from lamindb_setup.core._settings_storage import init_storage
|
15
16
|
from lamindb_setup.core.hashing import b16_to_b64, hash_file, hash_md5s_from_dir
|
16
17
|
from lamindb_setup.core.upath import (
|
17
18
|
create_path,
|
@@ -20,7 +21,6 @@ from lamindb_setup.core.upath import (
|
|
20
21
|
get_stat_file_cloud,
|
21
22
|
)
|
22
23
|
from lnschema_core import Artifact, Run, Storage
|
23
|
-
from lnschema_core.models import IsTree
|
24
24
|
from lnschema_core.types import (
|
25
25
|
VisibilityChoice,
|
26
26
|
)
|
@@ -34,8 +34,7 @@ from lamindb.core.storage import (
|
|
34
34
|
delete_storage,
|
35
35
|
infer_suffix,
|
36
36
|
load_to_memory,
|
37
|
-
|
38
|
-
write_to_file,
|
37
|
+
write_to_disk,
|
39
38
|
)
|
40
39
|
from lamindb.core.storage.paths import (
|
41
40
|
auto_storage_key_from_artifact,
|
@@ -100,12 +99,9 @@ def process_pathlike(
|
|
100
99
|
# for the storage root: the bucket
|
101
100
|
if not isinstance(filepath, LocalPathClasses):
|
102
101
|
# for a cloud path, new_root is always the bucket name
|
103
|
-
# we should check this assumption
|
104
102
|
new_root = list(filepath.parents)[-1]
|
105
|
-
|
106
|
-
|
107
|
-
storage_settings = StorageSettings(new_root_str)
|
108
|
-
storage_record = register_storage(storage_settings)
|
103
|
+
storage_settings = init_storage(new_root)
|
104
|
+
storage_record = register_storage_in_instance(storage_settings)
|
109
105
|
use_existing_storage_key = True
|
110
106
|
return storage_record, use_existing_storage_key
|
111
107
|
# if the filepath is local
|
@@ -175,8 +171,7 @@ def process_data(
|
|
175
171
|
# Alex: I don't understand the line below
|
176
172
|
if path.suffixes == []:
|
177
173
|
path = path.with_suffix(suffix)
|
178
|
-
|
179
|
-
write_to_file(data, path)
|
174
|
+
write_to_disk(data, path)
|
180
175
|
use_existing_storage_key = False
|
181
176
|
else:
|
182
177
|
raise NotImplementedError(
|
@@ -196,16 +191,13 @@ def get_stat_or_artifact(
|
|
196
191
|
n_objects = None
|
197
192
|
if settings.upon_file_create_skip_size_hash:
|
198
193
|
return None, None, None, n_objects
|
199
|
-
if suffix == ".zarr" and memory_rep is not None and isinstance(memory_rep, AnnData):
|
200
|
-
size = size_adata(memory_rep)
|
201
|
-
return size, None, None, n_objects
|
202
194
|
stat = path.stat() # one network request
|
203
195
|
if not isinstance(path, LocalPathClasses):
|
204
196
|
size, hash, hash_type = None, None, None
|
205
197
|
if stat is not None:
|
206
198
|
if "ETag" in stat: # is file
|
207
199
|
size, hash, hash_type = get_stat_file_cloud(stat)
|
208
|
-
elif
|
200
|
+
elif stat["type"] == "directory":
|
209
201
|
size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
|
210
202
|
if hash is None:
|
211
203
|
logger.warning(f"did not add hash for {path}")
|
@@ -545,11 +537,13 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
545
537
|
skip_check_exists = (
|
546
538
|
kwargs.pop("skip_check_exists") if "skip_check_exists" in kwargs else False
|
547
539
|
)
|
548
|
-
default_storage
|
549
|
-
kwargs.pop("default_storage")
|
550
|
-
|
551
|
-
|
552
|
-
|
540
|
+
if "default_storage" in kwargs:
|
541
|
+
default_storage = kwargs.pop("default_storage")
|
542
|
+
else:
|
543
|
+
if setup_settings.instance.keep_artifacts_local:
|
544
|
+
default_storage = setup_settings.instance.storage_local.record
|
545
|
+
else:
|
546
|
+
default_storage = setup_settings.instance.storage.record
|
553
547
|
using_key = (
|
554
548
|
kwargs.pop("using_key") if "using_key" in kwargs else settings._using_key
|
555
549
|
)
|
@@ -589,7 +583,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
589
583
|
init_self_from_db(artifact, kwargs_or_artifact)
|
590
584
|
# adding "key" here is dangerous because key might be auto-populated
|
591
585
|
update_attributes(artifact, {"description": description})
|
592
|
-
if artifact.key != key:
|
586
|
+
if artifact.key != key and key is not None:
|
593
587
|
logger.warning(
|
594
588
|
f"key {artifact.key} on existing artifact differs from passed key {key}"
|
595
589
|
)
|
@@ -914,11 +908,25 @@ def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs)
|
|
914
908
|
|
915
909
|
# docstring handled through attach_func_to_class_method
|
916
910
|
def cache(self, is_run_input: bool | None = None) -> Path:
|
917
|
-
_track_run_input(self, is_run_input)
|
918
|
-
|
919
911
|
using_key = settings._using_key
|
920
912
|
filepath = filepath_from_artifact(self, using_key=using_key)
|
921
|
-
|
913
|
+
try:
|
914
|
+
cache_path = setup_settings.instance.storage.cloud_to_local(
|
915
|
+
filepath, print_progress=True
|
916
|
+
)
|
917
|
+
except Exception as e:
|
918
|
+
if not isinstance(filepath, LocalPathClasses):
|
919
|
+
cache_path = setup_settings.instance.storage.cloud_to_local_no_update(
|
920
|
+
filepath
|
921
|
+
)
|
922
|
+
if cache_path.is_file():
|
923
|
+
cache_path.unlink(missing_ok=True)
|
924
|
+
elif cache_path.is_dir():
|
925
|
+
shutil.rmtree(cache_path)
|
926
|
+
raise e
|
927
|
+
# only call if sync is successfull
|
928
|
+
_track_run_input(self, is_run_input)
|
929
|
+
return cache_path
|
922
930
|
|
923
931
|
|
924
932
|
# docstring handled through attach_func_to_class_method
|
@@ -995,7 +1003,19 @@ def _delete_skip_storage(artifact, *args, **kwargs) -> None:
|
|
995
1003
|
|
996
1004
|
# docstring handled through attach_func_to_class_method
|
997
1005
|
def save(self, upload: bool | None = None, **kwargs) -> None:
|
1006
|
+
state_was_adding = self._state.adding
|
998
1007
|
access_token = kwargs.pop("access_token", None)
|
1008
|
+
local_path = None
|
1009
|
+
if upload and setup_settings.instance.keep_artifacts_local:
|
1010
|
+
# switch local storage location to cloud
|
1011
|
+
local_path = self.path
|
1012
|
+
self.storage_id = setup_settings.instance.storage.id
|
1013
|
+
self._local_filepath = local_path
|
1014
|
+
# switch to virtual storage key upon upload
|
1015
|
+
# the local filepath is already cached at that point
|
1016
|
+
self.key_is_virtual = True
|
1017
|
+
# ensure that the artifact is uploaded
|
1018
|
+
self._to_store = True
|
999
1019
|
|
1000
1020
|
self._save_skip_storage(**kwargs)
|
1001
1021
|
|
@@ -1011,6 +1031,17 @@ def save(self, upload: bool | None = None, **kwargs) -> None:
|
|
1011
1031
|
exception = check_and_attempt_clearing(self, using_key)
|
1012
1032
|
if exception is not None:
|
1013
1033
|
raise RuntimeError(exception)
|
1034
|
+
if local_path is not None and not state_was_adding:
|
1035
|
+
# only move the local artifact to cache if it was not newly created
|
1036
|
+
local_path_cache = ln_setup.settings.storage.cache_dir / local_path.name
|
1037
|
+
# don't use Path.rename here because of cross-device link error
|
1038
|
+
# https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
|
1039
|
+
shutil.move(
|
1040
|
+
local_path, # type: ignore
|
1041
|
+
local_path_cache,
|
1042
|
+
)
|
1043
|
+
logger.important(f"moved local artifact to cache: {local_path_cache}")
|
1044
|
+
return self
|
1014
1045
|
|
1015
1046
|
|
1016
1047
|
def _save_skip_storage(file, **kwargs) -> None:
|
@@ -1027,27 +1058,6 @@ def path(self) -> Path | UPath:
|
|
1027
1058
|
return filepath_from_artifact(self, using_key)
|
1028
1059
|
|
1029
1060
|
|
1030
|
-
@classmethod # type: ignore
|
1031
|
-
@doc_args(IsTree.view_tree.__doc__)
|
1032
|
-
def view_tree(
|
1033
|
-
cls,
|
1034
|
-
level: int = -1,
|
1035
|
-
limit_to_directories: bool = False,
|
1036
|
-
length_limit: int = 1000,
|
1037
|
-
max_files_per_dir_per_type: int = 7,
|
1038
|
-
) -> None:
|
1039
|
-
"""{}."""
|
1040
|
-
from lamindb.core._view_tree import view_tree as _view_tree
|
1041
|
-
|
1042
|
-
_view_tree(
|
1043
|
-
cls=cls,
|
1044
|
-
level=level,
|
1045
|
-
limit_to_directories=limit_to_directories,
|
1046
|
-
length_limit=length_limit,
|
1047
|
-
max_files_per_dir_per_type=max_files_per_dir_per_type,
|
1048
|
-
)
|
1049
|
-
|
1050
|
-
|
1051
1061
|
# docstring handled through attach_func_to_class_method
|
1052
1062
|
def restore(self) -> None:
|
1053
1063
|
self.visibility = VisibilityChoice.default.value
|
@@ -1067,7 +1077,6 @@ METHOD_NAMES = [
|
|
1067
1077
|
"replace",
|
1068
1078
|
"from_dir",
|
1069
1079
|
"restore",
|
1070
|
-
"view_tree",
|
1071
1080
|
]
|
1072
1081
|
|
1073
1082
|
if ln_setup._TESTING:
|
lamindb/_finish.py
CHANGED
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING
|
|
8
8
|
|
9
9
|
import lamindb_setup as ln_setup
|
10
10
|
from lamin_utils import logger
|
11
|
+
from lamindb_setup.core.hashing import hash_file
|
11
12
|
from lnschema_core.types import TransformType
|
12
13
|
|
13
14
|
from .core._run_context import is_run_from_ipython, run_context
|
@@ -35,7 +36,7 @@ def get_seconds_since_modified(filepath) -> float:
|
|
35
36
|
def finish():
|
36
37
|
"""Mark a tracked run as finished.
|
37
38
|
|
38
|
-
|
39
|
+
Saves source code and, for notebooks, a run report to your default storage location.
|
39
40
|
"""
|
40
41
|
if run_context.path is None:
|
41
42
|
raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
|
@@ -47,16 +48,12 @@ def finish():
|
|
47
48
|
raise NotebookNotSaved(
|
48
49
|
"Please save the notebook in your editor right before running `ln.finish()`"
|
49
50
|
)
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
else: # scripts
|
57
|
-
# save_run_context_core was already called during ln.track()
|
58
|
-
run_context.run.finished_at = datetime.now(timezone.utc) # update run time
|
59
|
-
run_context.run.save()
|
51
|
+
save_run_context_core(
|
52
|
+
run=run_context.run,
|
53
|
+
transform=run_context.transform,
|
54
|
+
filepath=run_context.path,
|
55
|
+
finished_at=True,
|
56
|
+
)
|
60
57
|
|
61
58
|
|
62
59
|
def save_run_context_core(
|
@@ -121,7 +118,11 @@ def save_run_context_core(
|
|
121
118
|
# first, copy the notebook file to a temporary file in the cache
|
122
119
|
source_code_path = ln_setup.settings.storage.cache_dir / filepath.name
|
123
120
|
shutil.copy2(filepath, source_code_path) # copy
|
124
|
-
subprocess.run(
|
121
|
+
subprocess.run(
|
122
|
+
f"nbstripout '{source_code_path}' --extra-keys='metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension'",
|
123
|
+
shell=True,
|
124
|
+
check=True,
|
125
|
+
)
|
125
126
|
# find initial versions of source codes and html reports
|
126
127
|
prev_report = None
|
127
128
|
prev_source = None
|
@@ -134,22 +135,27 @@ def save_run_context_core(
|
|
134
135
|
if prev_transform.source_code_id is not None:
|
135
136
|
prev_source = prev_transform.source_code
|
136
137
|
ln.settings.silence_file_run_transform_warning = True
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
138
|
+
|
139
|
+
# track source code
|
140
|
+
if transform.source_code_id is not None:
|
141
|
+
# check if the hash of the transform source code matches
|
142
|
+
# (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
|
143
|
+
hash, _ = hash_file(source_code_path) # ignore hash_type for now
|
144
|
+
if hash != transform.source_code.hash:
|
142
145
|
if os.getenv("LAMIN_TESTING") is None:
|
143
146
|
# in test, auto-confirm overwrite
|
144
147
|
response = input(
|
145
|
-
f"You are about to overwrite existing source code (hash {transform.source_code.hash}) for transform version"
|
148
|
+
f"You are about to replace (overwrite) existing source code (hash '{transform.source_code.hash}') for transform version"
|
146
149
|
f" '{transform.version}'. Proceed? (y/n)"
|
147
150
|
)
|
148
151
|
else:
|
149
152
|
response = "y"
|
150
153
|
if response == "y":
|
151
154
|
transform.source_code.replace(source_code_path)
|
152
|
-
transform.source_code.save()
|
155
|
+
transform.source_code.save(upload=True)
|
156
|
+
logger.success(
|
157
|
+
f"replaced transform.source_code: {transform.source_code}"
|
158
|
+
)
|
153
159
|
else:
|
154
160
|
logger.warning("Please re-run `ln.track()` to make a new version")
|
155
161
|
return "rerun-the-notebook"
|
@@ -162,24 +168,35 @@ def save_run_context_core(
|
|
162
168
|
visibility=0, # hidden file
|
163
169
|
run=False,
|
164
170
|
)
|
165
|
-
source_code.save()
|
171
|
+
source_code.save(upload=True)
|
166
172
|
transform.source_code = source_code
|
167
173
|
logger.success(f"saved transform.source_code: {transform.source_code}")
|
174
|
+
|
168
175
|
# track environment
|
169
176
|
filepath_env = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
|
170
177
|
if filepath_env.exists():
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
178
|
+
hash, _ = hash_file(filepath_env)
|
179
|
+
artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
|
180
|
+
new_env_artifact = artifact is None
|
181
|
+
if new_env_artifact:
|
182
|
+
artifact = ln.Artifact(
|
183
|
+
filepath_env,
|
184
|
+
description="requirements.txt",
|
185
|
+
visibility=0,
|
186
|
+
run=False,
|
187
|
+
)
|
188
|
+
artifact.save(upload=True)
|
179
189
|
run.environment = artifact
|
180
|
-
|
181
|
-
|
190
|
+
if new_env_artifact:
|
191
|
+
logger.success(f"saved run.environment: {run.environment}")
|
192
|
+
|
193
|
+
# set finished_at
|
194
|
+
if finished_at:
|
195
|
+
run.finished_at = datetime.now(timezone.utc)
|
196
|
+
|
197
|
+
# track report and set is_consecutive
|
182
198
|
if not transform.type == TransformType.notebook:
|
199
|
+
run.is_consecutive = True
|
183
200
|
run.save()
|
184
201
|
else:
|
185
202
|
if run.report_id is not None:
|
@@ -187,7 +204,7 @@ def save_run_context_core(
|
|
187
204
|
"there is already an existing report for this run, replacing it"
|
188
205
|
)
|
189
206
|
run.report.replace(filepath_html)
|
190
|
-
run.report.save()
|
207
|
+
run.report.save(upload=True)
|
191
208
|
else:
|
192
209
|
report_file = ln.Artifact(
|
193
210
|
filepath_html,
|
@@ -196,19 +213,18 @@ def save_run_context_core(
|
|
196
213
|
visibility=0, # hidden file
|
197
214
|
run=False,
|
198
215
|
)
|
199
|
-
report_file.save()
|
216
|
+
report_file.save(upload=True)
|
200
217
|
run.report = report_file
|
201
218
|
run.is_consecutive = is_consecutive
|
202
|
-
if finished_at:
|
203
|
-
run.finished_at = datetime.now(timezone.utc)
|
204
219
|
run.save()
|
205
220
|
transform.latest_report = run.report
|
206
|
-
transform.save()
|
207
|
-
if transform.type == TransformType.notebook:
|
208
221
|
logger.success(f"saved transform.latest_report: {transform.latest_report}")
|
209
|
-
|
222
|
+
transform.save()
|
223
|
+
|
224
|
+
# finalize
|
225
|
+
if ln_setup.settings.instance.is_on_hub:
|
210
226
|
identifier = ln_setup.settings.instance.slug
|
211
|
-
logger.
|
227
|
+
logger.important(
|
212
228
|
f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
|
213
229
|
)
|
214
230
|
# because run & transform changed, update the global run_context
|
lamindb/_query_set.py
CHANGED
@@ -11,7 +11,6 @@ from lnschema_core.models import (
|
|
11
11
|
Artifact,
|
12
12
|
CanValidate,
|
13
13
|
Collection,
|
14
|
-
IsTree,
|
15
14
|
IsVersioned,
|
16
15
|
Registry,
|
17
16
|
Run,
|
@@ -83,7 +82,7 @@ class RecordsList(UserList):
|
|
83
82
|
return one_helper(self)
|
84
83
|
|
85
84
|
|
86
|
-
class QuerySet(models.QuerySet, CanValidate
|
85
|
+
class QuerySet(models.QuerySet, CanValidate):
|
87
86
|
"""Sets of records returned by queries.
|
88
87
|
|
89
88
|
See Also:
|
@@ -265,25 +264,6 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
|
|
265
264
|
|
266
265
|
return _standardize(cls=self, values=values, field=field, **kwargs)
|
267
266
|
|
268
|
-
@doc_args(IsTree.view_tree.__doc__)
|
269
|
-
def view_tree(
|
270
|
-
self,
|
271
|
-
level: int = -1,
|
272
|
-
limit_to_directories: bool = False,
|
273
|
-
length_limit: int = 1000,
|
274
|
-
max_files_per_dir_per_type: int = 7,
|
275
|
-
) -> None:
|
276
|
-
"""{}."""
|
277
|
-
from .core._view_tree import view_tree as _view_tree
|
278
|
-
|
279
|
-
_view_tree(
|
280
|
-
cls=self,
|
281
|
-
level=level,
|
282
|
-
limit_to_directories=limit_to_directories,
|
283
|
-
length_limit=length_limit,
|
284
|
-
max_files_per_dir_per_type=max_files_per_dir_per_type,
|
285
|
-
)
|
286
|
-
|
287
267
|
|
288
268
|
def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList:
|
289
269
|
# evaluating length can be very costly, hence, the try-except block
|
lamindb/_registry.py
CHANGED
@@ -113,7 +113,7 @@ def __init__(orm: Registry, *args, **kwargs):
|
|
113
113
|
logger.warning(
|
114
114
|
f"loaded {orm.__class__.__name__} record with same"
|
115
115
|
f" name{version_comment}: '{kwargs['name']}' "
|
116
|
-
"(disable via ln.settings.upon_create_search_names)"
|
116
|
+
"(disable via `ln.settings.upon_create_search_names`)"
|
117
117
|
)
|
118
118
|
init_self_from_db(orm, existing_record)
|
119
119
|
return None
|
@@ -498,7 +498,7 @@ def transfer_to_default_db(
|
|
498
498
|
|
499
499
|
|
500
500
|
# docstring handled through attach_func_to_class_method
|
501
|
-
def save(self, *args, **kwargs) ->
|
501
|
+
def save(self, *args, **kwargs) -> Registry:
|
502
502
|
using_key = None
|
503
503
|
if "using" in kwargs:
|
504
504
|
using_key = kwargs["using"]
|
@@ -540,6 +540,7 @@ def save(self, *args, **kwargs) -> None:
|
|
540
540
|
self.features._add_from(self_on_db, **add_from_kwargs)
|
541
541
|
logger.info("transfer labels")
|
542
542
|
self.labels.add_from(self_on_db, **add_from_kwargs)
|
543
|
+
return self
|
543
544
|
|
544
545
|
|
545
546
|
METHOD_NAMES = [
|
lamindb/_save.py
CHANGED
@@ -6,13 +6,13 @@ import traceback
|
|
6
6
|
from collections import defaultdict
|
7
7
|
from datetime import datetime
|
8
8
|
from functools import partial
|
9
|
-
from typing import Iterable, overload
|
9
|
+
from typing import TYPE_CHECKING, Iterable, overload
|
10
10
|
|
11
11
|
import lamindb_setup
|
12
12
|
from django.db import transaction
|
13
13
|
from django.utils.functional import partition
|
14
14
|
from lamin_utils import logger
|
15
|
-
from lamindb_setup.core.upath import
|
15
|
+
from lamindb_setup.core.upath import print_hook
|
16
16
|
from lnschema_core.models import Artifact, Registry
|
17
17
|
|
18
18
|
from lamindb.core._settings import settings
|
@@ -23,12 +23,8 @@ from lamindb.core.storage.paths import (
|
|
23
23
|
store_file_or_folder,
|
24
24
|
)
|
25
25
|
|
26
|
-
|
27
|
-
from
|
28
|
-
except ImportError:
|
29
|
-
|
30
|
-
def write_adata_zarr(filepath): # type: ignore
|
31
|
-
raise ImportError("Please install zarr: pip install zarr")
|
26
|
+
if TYPE_CHECKING:
|
27
|
+
from lamindb_setup.core.upath import UPath
|
32
28
|
|
33
29
|
|
34
30
|
def save(
|
@@ -162,7 +158,7 @@ def check_and_attempt_upload(
|
|
162
158
|
def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
|
163
159
|
local_path = artifact._local_filepath
|
164
160
|
|
165
|
-
#
|
161
|
+
# in-memory cases
|
166
162
|
if local_path is None or not local_path.exists():
|
167
163
|
return None
|
168
164
|
|
@@ -284,16 +280,7 @@ def upload_artifact(
|
|
284
280
|
storage_path = attempt_accessing_path(
|
285
281
|
artifact, storage_key, using_key=using_key, access_token=access_token
|
286
282
|
)
|
287
|
-
|
288
|
-
|
289
|
-
artifact.suffix == ".zarr"
|
290
|
-
and hasattr(artifact, "_memory_rep")
|
291
|
-
and artifact._memory_rep is not None
|
292
|
-
):
|
293
|
-
logger.save(msg)
|
294
|
-
print_progress = partial(print_hook, filepath=storage_path, action="uploading")
|
295
|
-
write_adata_zarr(artifact._memory_rep, storage_path, callback=print_progress)
|
296
|
-
elif hasattr(artifact, "_to_store") and artifact._to_store:
|
297
|
-
logger.save(msg)
|
283
|
+
if hasattr(artifact, "_to_store") and artifact._to_store:
|
284
|
+
logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
|
298
285
|
store_file_or_folder(artifact._local_filepath, storage_path)
|
299
286
|
return storage_path
|
lamindb/core/__init__.py
CHANGED
@@ -12,7 +12,6 @@ Registries:
|
|
12
12
|
Data
|
13
13
|
FeatureManager
|
14
14
|
LabelManager
|
15
|
-
IsTree
|
16
15
|
IsVersioned
|
17
16
|
CanValidate
|
18
17
|
HasParents
|
@@ -55,7 +54,6 @@ from lnschema_core.models import (
|
|
55
54
|
CanValidate,
|
56
55
|
Data,
|
57
56
|
HasParents,
|
58
|
-
IsTree,
|
59
57
|
IsVersioned,
|
60
58
|
Registry,
|
61
59
|
)
|
lamindb/core/_data.py
CHANGED
@@ -345,7 +345,7 @@ def add_labels(
|
|
345
345
|
f" {old_feature_set}"
|
346
346
|
)
|
347
347
|
old_feature_set.delete()
|
348
|
-
self.features.
|
348
|
+
self.features.add_feature_set(feature_set, slot="external")
|
349
349
|
logger.save(
|
350
350
|
f"linked new feature '{feature.name}' together with new feature set"
|
351
351
|
f" {feature_set}"
|
lamindb/core/_feature_manager.py
CHANGED
@@ -236,7 +236,7 @@ class FeatureManager:
|
|
236
236
|
and self._host.artifact.accessor == "DataFrame"
|
237
237
|
):
|
238
238
|
slot = "columns" if slot is None else slot
|
239
|
-
self.
|
239
|
+
self.add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
|
240
240
|
|
241
241
|
def add_from_df(self, field: FieldAttr = Feature.name, organism: str | None = None):
|
242
242
|
"""Add features from DataFrame."""
|
@@ -325,7 +325,7 @@ class FeatureManager:
|
|
325
325
|
self._host._feature_sets = feature_sets
|
326
326
|
self._host.save()
|
327
327
|
|
328
|
-
def
|
328
|
+
def add_feature_set(self, feature_set: FeatureSet, slot: str):
|
329
329
|
"""Add new feature set to a slot.
|
330
330
|
|
331
331
|
Args:
|
@@ -405,7 +405,8 @@ class FeatureManager:
|
|
405
405
|
f"FeatureSet is not transferred, check if organism is set correctly: {feature_set}"
|
406
406
|
)
|
407
407
|
continue
|
408
|
-
#
|
409
|
-
|
408
|
+
# make sure the uid matches if featureset is composed of same features
|
409
|
+
if feature_set_self.hash == feature_set.hash:
|
410
|
+
feature_set_self.uid = feature_set.uid
|
410
411
|
logger.info(f"saving {slot} featureset: {feature_set_self}")
|
411
|
-
self._host.features.
|
412
|
+
self._host.features.add_feature_set(feature_set_self, slot)
|
lamindb/core/_run_context.py
CHANGED
@@ -3,16 +3,12 @@ from __future__ import annotations
|
|
3
3
|
import builtins
|
4
4
|
import hashlib
|
5
5
|
import os
|
6
|
-
import re
|
7
|
-
import subprocess
|
8
|
-
import sys
|
9
6
|
from datetime import datetime, timezone
|
10
7
|
from pathlib import Path, PurePath
|
11
|
-
from typing import TYPE_CHECKING
|
8
|
+
from typing import TYPE_CHECKING
|
12
9
|
|
13
10
|
from lamin_utils import logger
|
14
|
-
from lamindb_setup import
|
15
|
-
from lamindb_setup.core import InstanceSettings
|
11
|
+
from lamindb_setup.core.hashing import hash_file
|
16
12
|
from lnschema_core import Run, Transform, ids
|
17
13
|
from lnschema_core.types import TransformType
|
18
14
|
from lnschema_core.users import current_user_id
|
@@ -59,42 +55,6 @@ def get_uid_ext(version: str) -> str:
|
|
59
55
|
return encodebytes(hashlib.md5(version.encode()).digest())[:4]
|
60
56
|
|
61
57
|
|
62
|
-
def get_stem_uid_and_version_from_file(file_path: Path) -> tuple[str, str]:
|
63
|
-
# line-by-line matching might be faster, but let's go with this for now
|
64
|
-
with open(file_path) as file:
|
65
|
-
content = file.read()
|
66
|
-
|
67
|
-
if file_path.suffix == ".py":
|
68
|
-
stem_uid_pattern = re.compile(
|
69
|
-
r'\.transform\.stem_uid\s*=\s*["\']([^"\']+)["\']'
|
70
|
-
)
|
71
|
-
version_pattern = re.compile(r'\.transform\.version\s*=\s*["\']([^"\']+)["\']')
|
72
|
-
elif file_path.suffix == ".ipynb":
|
73
|
-
stem_uid_pattern = re.compile(
|
74
|
-
r'\.transform\.stem_uid\s*=\s*\\["\']([^"\']+)\\["\']'
|
75
|
-
)
|
76
|
-
version_pattern = re.compile(
|
77
|
-
r'\.transform\.version\s*=\s*\\["\']([^"\']+)\\["\']'
|
78
|
-
)
|
79
|
-
else:
|
80
|
-
raise ValueError("Only .py and .ipynb files are supported.")
|
81
|
-
|
82
|
-
# Search for matches in the entire file content
|
83
|
-
stem_uid_match = stem_uid_pattern.search(content)
|
84
|
-
version_match = version_pattern.search(content)
|
85
|
-
|
86
|
-
# Extract values if matches are found
|
87
|
-
stem_uid = stem_uid_match.group(1) if stem_uid_match else None
|
88
|
-
version = version_match.group(1) if version_match else None
|
89
|
-
|
90
|
-
if stem_uid is None or version is None:
|
91
|
-
raise SystemExit(
|
92
|
-
f"ln.settings.transform.stem_uid and ln.settings.transform.version aren't set in {file_path}\n"
|
93
|
-
"Call ln.track() and copy/paste the output into the notebook"
|
94
|
-
)
|
95
|
-
return stem_uid, version
|
96
|
-
|
97
|
-
|
98
58
|
def update_stem_uid_or_version(
|
99
59
|
stem_uid: str,
|
100
60
|
version: str,
|
@@ -216,6 +176,17 @@ def raise_transform_settings_error() -> None:
|
|
216
176
|
)
|
217
177
|
|
218
178
|
|
179
|
+
def pretty_pypackages(dependencies: dict) -> str:
|
180
|
+
deps_list = []
|
181
|
+
for pkg, ver in dependencies.items():
|
182
|
+
if ver != "":
|
183
|
+
deps_list.append(pkg + f"=={ver}")
|
184
|
+
else:
|
185
|
+
deps_list.append(pkg)
|
186
|
+
deps_list.sort()
|
187
|
+
return " ".join(deps_list)
|
188
|
+
|
189
|
+
|
219
190
|
class run_context:
|
220
191
|
"""Global run context."""
|
221
192
|
|
@@ -356,16 +327,6 @@ class run_context:
|
|
356
327
|
from ._track_environment import track_environment
|
357
328
|
|
358
329
|
track_environment(run)
|
359
|
-
|
360
|
-
if not is_run_from_ipython and cls.path is not None:
|
361
|
-
# upload run source code & environment
|
362
|
-
from lamindb._finish import save_run_context_core
|
363
|
-
|
364
|
-
save_run_context_core(
|
365
|
-
run=cls.run,
|
366
|
-
transform=cls.transform,
|
367
|
-
filepath=cls.path,
|
368
|
-
)
|
369
330
|
return None
|
370
331
|
|
371
332
|
@classmethod
|
@@ -427,17 +388,12 @@ class run_context:
|
|
427
388
|
# log imported python packages
|
428
389
|
if not path_str.startswith("/fileId="):
|
429
390
|
try:
|
430
|
-
from nbproject.dev._metadata_display import DisplayMeta
|
431
391
|
from nbproject.dev._pypackage import infer_pypackages
|
432
392
|
|
433
|
-
|
434
|
-
filepath=path_str,
|
435
|
-
metadata_only=True,
|
436
|
-
)
|
437
|
-
dm = DisplayMeta(metadata)
|
393
|
+
nb = nbproject.dev.read_notebook(path_str)
|
438
394
|
logger.important(
|
439
395
|
"notebook imports:"
|
440
|
-
f" {
|
396
|
+
f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
|
441
397
|
)
|
442
398
|
except Exception:
|
443
399
|
logger.debug("inferring imported packages failed")
|
@@ -492,19 +448,36 @@ class run_context:
|
|
492
448
|
transform.save()
|
493
449
|
logger.important(f"updated: {transform}")
|
494
450
|
# check whether the notebook source code was already saved
|
495
|
-
if
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
451
|
+
if transform.source_code_id is not None:
|
452
|
+
response = None
|
453
|
+
if is_run_from_ipython:
|
454
|
+
if os.getenv("LAMIN_TESTING") is None:
|
455
|
+
response = input(
|
456
|
+
"You already saved source code for this notebook."
|
457
|
+
" Bump the version before a new run? (y/n)"
|
458
|
+
)
|
459
|
+
else:
|
460
|
+
response = "y"
|
505
461
|
else:
|
506
|
-
|
507
|
-
|
462
|
+
hash, _ = hash_file(cls.path) # ignore hash_type for now
|
463
|
+
if hash != transform.source_code.hash:
|
464
|
+
# only if hashes don't match, we need user input
|
465
|
+
if os.getenv("LAMIN_TESTING") is None:
|
466
|
+
response = input(
|
467
|
+
"You already saved source code for this script and meanwhile modified it without bumping a version."
|
468
|
+
" Bump the version before a new run? (y/n)"
|
469
|
+
)
|
470
|
+
else:
|
471
|
+
response = "y"
|
472
|
+
else:
|
473
|
+
logger.important(f"loaded: {transform}")
|
474
|
+
if response is not None:
|
475
|
+
# if a script is re-run and hashes match, we don't need user input
|
476
|
+
if response == "y":
|
477
|
+
update_stem_uid_or_version(stem_uid, version, bump_version=True)
|
478
|
+
else:
|
479
|
+
# we want a new stem_uid in this case, hence raise the error
|
480
|
+
raise_transform_settings_error()
|
508
481
|
else:
|
509
482
|
logger.important(f"loaded: {transform}")
|
510
483
|
cls.transform = transform
|
lamindb/core/_settings.py
CHANGED
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Literal, Mapping
|
|
5
5
|
|
6
6
|
import lamindb_setup as ln_setup
|
7
7
|
from lamin_utils import logger
|
8
|
-
from lamindb_setup.
|
8
|
+
from lamindb_setup._set_managed_storage import set_managed_storage
|
9
9
|
from lamindb_setup.core._settings import settings as setup_settings
|
10
10
|
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
11
11
|
|
@@ -92,11 +92,11 @@ class Settings:
|
|
92
92
|
self.__using_key = value
|
93
93
|
|
94
94
|
@property
|
95
|
-
def _storage_settings(self) -> ln_setup.
|
95
|
+
def _storage_settings(self) -> ln_setup.core.StorageSettings:
|
96
96
|
if self._using_storage is None:
|
97
97
|
storage_settings = ln_setup.settings.storage
|
98
98
|
else:
|
99
|
-
storage_settings = ln_setup.
|
99
|
+
storage_settings = ln_setup.core.StorageSettings(root=self._using_storage)
|
100
100
|
return storage_settings
|
101
101
|
|
102
102
|
@property
|
@@ -127,7 +127,7 @@ class Settings:
|
|
127
127
|
|
128
128
|
Examples:
|
129
129
|
|
130
|
-
You can
|
130
|
+
You can switch to another managed storage location via:
|
131
131
|
|
132
132
|
>>> ln.settings.storage = "s3://some-bucket"
|
133
133
|
|
@@ -143,14 +143,27 @@ class Settings:
|
|
143
143
|
|
144
144
|
@storage.setter
|
145
145
|
def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
|
146
|
-
logger.warning(
|
147
|
-
"you'll no longer be able to set arbitrary storage locations soon"
|
148
|
-
)
|
149
146
|
if isinstance(path_kwargs, tuple):
|
150
147
|
path, kwargs = path_kwargs
|
151
148
|
else:
|
152
149
|
path, kwargs = path_kwargs, {}
|
153
|
-
|
150
|
+
set_managed_storage(path, **kwargs)
|
151
|
+
|
152
|
+
@property
|
153
|
+
def storage_local(self) -> Path:
|
154
|
+
"""An additional local default storage (a path to its root).
|
155
|
+
|
156
|
+
Is only available if :attr:`~lamindb.setup.core.InstanceSettings.keep_artifacts_local` is enabled.
|
157
|
+
|
158
|
+
Guide: :doc:`faq/keep-artifacts-local`
|
159
|
+
|
160
|
+
Shortcut for: `ln.setup.settings.instance.storage_local.root`
|
161
|
+
"""
|
162
|
+
return ln_setup.settings.instance.storage_local.root
|
163
|
+
|
164
|
+
@storage_local.setter
|
165
|
+
def storage_local(self, local_root: Path):
|
166
|
+
ln_setup.settings.instance.storage_local = local_root
|
154
167
|
|
155
168
|
@property
|
156
169
|
def verbosity(self) -> str:
|
@@ -162,8 +175,6 @@ class Settings:
|
|
162
175
|
- 'info': 💡 also show info messages
|
163
176
|
- 'hint': 💡 also show hint messages
|
164
177
|
- 'debug': 🐛 also show detailed debug messages
|
165
|
-
|
166
|
-
This is based on Scanpy's and Django's verbosity setting.
|
167
178
|
"""
|
168
179
|
return VERBOSITY_TO_STR[self._verbosity_int]
|
169
180
|
|
lamindb/core/_sync_git.py
CHANGED
@@ -61,11 +61,15 @@ def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | N
|
|
61
61
|
capture_output=True,
|
62
62
|
cwd=repo_dir,
|
63
63
|
)
|
64
|
-
|
64
|
+
# we just care to find one commit
|
65
|
+
# hence, we split by new line ("\n") and use the first one
|
66
|
+
commit_hash = result.stdout.decode().split("\n")[0]
|
65
67
|
if commit_hash == "" or result.returncode == 1:
|
66
68
|
return None
|
67
69
|
else:
|
68
|
-
assert
|
70
|
+
assert (
|
71
|
+
len(commit_hash) == 40
|
72
|
+
), f"commit hash |{commit_hash}| is not 40 characters long"
|
69
73
|
return commit_hash
|
70
74
|
|
71
75
|
|
lamindb/core/datasets/_core.py
CHANGED
@@ -161,8 +161,8 @@ def anndata_mouse_sc_lymph_node(
|
|
161
161
|
adata.obs.columns = (
|
162
162
|
adata.obs.columns.str.replace("Sample Characteristic", "")
|
163
163
|
.str.replace("Factor Value ", "Factor Value:", regex=True)
|
164
|
-
.str.replace("Factor Value
|
165
|
-
.str.replace(" Ontology Term
|
164
|
+
.str.replace("Factor Value\\[", "Factor Value:", regex=True)
|
165
|
+
.str.replace(" Ontology Term\\[", "ontology_id:", regex=True)
|
166
166
|
.str.strip("[]")
|
167
167
|
.str.replace("organism part", "tissue")
|
168
168
|
.str.replace("organism", "organism")
|
lamindb/core/storage/__init__.py
CHANGED
@@ -10,6 +10,6 @@ from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
|
|
10
10
|
|
11
11
|
from ._anndata_sizes import size_adata
|
12
12
|
from ._backed_access import AnnDataAccessor, BackedAccessor
|
13
|
-
from ._valid_suffixes import VALID_SUFFIXES
|
14
|
-
from .objects import infer_suffix,
|
13
|
+
from ._valid_suffixes import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
|
14
|
+
from .objects import infer_suffix, write_to_disk
|
15
15
|
from .paths import delete_storage, load_to_memory
|
@@ -1,3 +1,5 @@
|
|
1
|
-
from lamindb_setup.core.upath import VALID_SUFFIXES
|
1
|
+
from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
|
2
2
|
|
3
|
-
|
3
|
+
# add new composite suffixes like so
|
4
|
+
VALID_COMPOSITE_SUFFIXES.update({".vitessce.json"})
|
5
|
+
# can do the same for simple valid suffixes
|
lamindb/core/storage/objects.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
from pathlib import PurePosixPath
|
3
4
|
from typing import TYPE_CHECKING
|
4
5
|
|
5
6
|
from anndata import AnnData
|
@@ -21,7 +22,7 @@ def infer_suffix(dmem, adata_format: str | None = None):
|
|
21
22
|
"""Infer LaminDB storage file suffix from a data object."""
|
22
23
|
if isinstance(dmem, AnnData):
|
23
24
|
if adata_format is not None:
|
24
|
-
if adata_format not in
|
25
|
+
if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
|
25
26
|
raise ValueError(
|
26
27
|
"Error when specifying AnnData storage format, it should be"
|
27
28
|
f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
|
@@ -40,9 +41,15 @@ def infer_suffix(dmem, adata_format: str | None = None):
|
|
40
41
|
raise NotImplementedError
|
41
42
|
|
42
43
|
|
43
|
-
def
|
44
|
+
def write_to_disk(dmem, filepath: UPathStr):
|
44
45
|
if isinstance(dmem, AnnData):
|
45
|
-
|
46
|
+
suffix = PurePosixPath(filepath).suffix
|
47
|
+
if suffix == ".h5ad":
|
48
|
+
dmem.write_h5ad(filepath)
|
49
|
+
elif suffix == ".zarr":
|
50
|
+
dmem.write_zarr(filepath)
|
51
|
+
else:
|
52
|
+
raise NotImplementedError
|
46
53
|
elif isinstance(dmem, DataFrame):
|
47
54
|
dmem.to_parquet(filepath)
|
48
55
|
else:
|
lamindb/core/storage/paths.py
CHANGED
@@ -75,9 +75,6 @@ def attempt_accessing_path(
|
|
75
75
|
settings.storage, access_token=access_token
|
76
76
|
)
|
77
77
|
else:
|
78
|
-
logger.debug(
|
79
|
-
"artifact.path is slightly slower for files outside default storage"
|
80
|
-
)
|
81
78
|
if artifact._state.db not in ("default", None) and using_key is None:
|
82
79
|
storage = (
|
83
80
|
Storage.using(artifact._state.db).filter(id=artifact.storage_id).one()
|
@@ -141,8 +138,14 @@ def delete_storage(storagepath: Path):
|
|
141
138
|
# replace with check_path_is_child_of_root but this needs to first be debugged
|
142
139
|
# if not check_path_is_child_of_root(storagepath, settings.storage):
|
143
140
|
if not storagepath.is_relative_to(settings.storage): # type: ignore
|
144
|
-
|
145
|
-
|
141
|
+
allow_delete = False
|
142
|
+
if setup_settings.instance.keep_artifacts_local:
|
143
|
+
allow_delete = storagepath.is_relative_to( # type: ignore
|
144
|
+
setup_settings.instance.storage_local.root
|
145
|
+
)
|
146
|
+
if not allow_delete:
|
147
|
+
logger.warning("couldn't delete files outside of default storage")
|
148
|
+
return "did-not-delete"
|
146
149
|
# only delete files in the default storage
|
147
150
|
if storagepath.is_file():
|
148
151
|
storagepath.unlink()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.71.1
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.
|
13
|
-
Requires-Dist: lamindb_setup==0.
|
12
|
+
Requires-Dist: lnschema_core==0.66.3
|
13
|
+
Requires-Dist: lamindb_setup==0.71.3
|
14
14
|
Requires-Dist: lamin_utils==0.13.2
|
15
|
-
Requires-Dist: lamin_cli==0.
|
15
|
+
Requires-Dist: lamin_cli==0.13.1
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -23,10 +23,7 @@ Requires-Dist: fsspec
|
|
23
23
|
Requires-Dist: pandas
|
24
24
|
Requires-Dist: graphviz
|
25
25
|
Requires-Dist: psycopg2-binary
|
26
|
-
Requires-Dist:
|
27
|
-
Requires-Dist: aiobotocore[boto3]>=2.5.4,<3.0.0 ; extra == "aws"
|
28
|
-
Requires-Dist: s3fs==2023.12.2 ; extra == "aws"
|
29
|
-
Requires-Dist: fsspec[s3]==2023.12.2 ; extra == "aws"
|
26
|
+
Requires-Dist: lamindb_setup[aws] ; extra == "aws"
|
30
27
|
Requires-Dist: bionty==0.42.9 ; extra == "bionty"
|
31
28
|
Requires-Dist: pandas<2 ; extra == "dev"
|
32
29
|
Requires-Dist: pre-commit ; extra == "dev"
|
@@ -39,8 +36,8 @@ Requires-Dist: nbproject_test>=0.5.1 ; extra == "dev"
|
|
39
36
|
Requires-Dist: faker-biology ; extra == "dev"
|
40
37
|
Requires-Dist: django-schema-graph ; extra == "erdiagram"
|
41
38
|
Requires-Dist: readfcs>=1.1.8 ; extra == "fcs"
|
42
|
-
Requires-Dist:
|
43
|
-
Requires-Dist: nbproject==0.10.
|
39
|
+
Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
|
40
|
+
Requires-Dist: nbproject==0.10.2 ; extra == "jupyter"
|
44
41
|
Requires-Dist: nbstripout==0.6.1 ; extra == "jupyter"
|
45
42
|
Requires-Dist: nbconvert ; extra == "jupyter"
|
46
43
|
Requires-Dist: zarr>=2.16.0 ; extra == "zarr"
|
@@ -1,55 +1,54 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=PbZGEkozIsD8RM3XLafkYGo4iPJy7FJFdzVr0VW7Zy0,2182
|
2
2
|
lamindb/_annotate.py,sha256=kgbilILfgzoS-GEpjxzVwRMs7CoSa9BNEcIWXFBW69I,43915
|
3
|
-
lamindb/_artifact.py,sha256=
|
3
|
+
lamindb/_artifact.py,sha256=8uBW-dhuWyBUQGs728sAPCnuhTic-NKjSbaneF07aMo,40106
|
4
4
|
lamindb/_can_validate.py,sha256=nvoZG-35n3HofkY4Xc6hBv9AV54_RDan7Hzp5TuqY9I,14709
|
5
5
|
lamindb/_collection.py,sha256=SDM35R_5WHrgLKjVb14Q8-Rz_gn5hdZLJobPcanm4PM,14627
|
6
6
|
lamindb/_feature.py,sha256=srAKchY7gqD-h-cWlEiAWuHlpFKFwv0PWIA-JX0Go8c,6758
|
7
7
|
lamindb/_feature_set.py,sha256=AzjOcHzQajpeikPOAic-aj0z_C5b7VpHVegg3ThRSLw,9045
|
8
8
|
lamindb/_filter.py,sha256=xnjJzjF3Zj4dK_Kfymvhgczk27MhhXz5ZYc7XINbgHY,1331
|
9
|
-
lamindb/_finish.py,sha256=
|
9
|
+
lamindb/_finish.py,sha256=6GwhqrC-x-JdFd16i7-uyhCWeQgGKxr25aSsSXPZt4g,8598
|
10
10
|
lamindb/_from_values.py,sha256=DVXjnQ2wwNw-2bFzy0uXLdVlqoprrn95hTnrXwn-KqM,12638
|
11
11
|
lamindb/_is_versioned.py,sha256=0PgRCmxEmYDcAjllLSOYZm132B1lW6QgmBBERhRyFt0,1341
|
12
12
|
lamindb/_parents.py,sha256=N9T8jbd3eaoHDLE9TD1y1QgGcO81E6Brapy8LILzRCQ,14790
|
13
13
|
lamindb/_query_manager.py,sha256=3zokXqxgj9vTJBnN2sbYKS-q69fyDDPF_aGq_rFHzXU,4066
|
14
|
-
lamindb/_query_set.py,sha256=
|
15
|
-
lamindb/_registry.py,sha256
|
14
|
+
lamindb/_query_set.py,sha256=n0owd74cTzGz6-mIv8SlDz0wcyRz7Xw3Ke1LhE8UlIg,10784
|
15
|
+
lamindb/_registry.py,sha256=fmX-BUnan3Y0WrEAx3qNwRYCIJwJgjoKnRnpgcXujEI,19358
|
16
16
|
lamindb/_run.py,sha256=b7A52M1On3QzFgIYyfQoz5Kk7V3wcu9p_Prq5bzd8v8,1838
|
17
|
-
lamindb/_save.py,sha256=
|
17
|
+
lamindb/_save.py,sha256=_7r3TUV3B6Hp75r5O_ymu3fKWyBHbGa5vmE_pxrtsVI,10923
|
18
18
|
lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
|
19
19
|
lamindb/_transform.py,sha256=rxojJ91qQSkeYDHYbwqjFAYxBMgJd3cq_K7Z0n5g8Aw,3482
|
20
20
|
lamindb/_ulabel.py,sha256=e5dw9h1tR0_u-DMn7Gzx0WhUhV5w7j4v3QbnLWQV7eI,1941
|
21
21
|
lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
22
22
|
lamindb/_view.py,sha256=GV1FrqIMmdooEkA-5zvcTWgV1nqx1sehi6WdWEaFpxM,2171
|
23
|
-
lamindb/core/__init__.py,sha256=
|
24
|
-
lamindb/core/_data.py,sha256=
|
25
|
-
lamindb/core/_feature_manager.py,sha256=
|
23
|
+
lamindb/core/__init__.py,sha256=TI9_1Jtpwae_cUPQ3-U0RRPH5c3GBA-gLhHvlAk_Nlo,1213
|
24
|
+
lamindb/core/_data.py,sha256=Lico6-Vx15bNpGLl1bqFqEsh62pD4YKOOBnmahse1tI,17673
|
25
|
+
lamindb/core/_feature_manager.py,sha256=uTzZZ7-qqEAmdwi48Holy2j5VGTgmoQxhb21r6mLShI,15824
|
26
26
|
lamindb/core/_label_manager.py,sha256=0RtegYnK3zIisOnd970EobOrHMpp7OCH-mEoPrPXw2c,9075
|
27
27
|
lamindb/core/_mapped_collection.py,sha256=_OwFZh5SePDUD70XIK5kngv3we_Z5-YdGHNfpUSatSQ,19469
|
28
|
-
lamindb/core/_run_context.py,sha256=
|
29
|
-
lamindb/core/_settings.py,sha256=
|
30
|
-
lamindb/core/_sync_git.py,sha256=
|
28
|
+
lamindb/core/_run_context.py,sha256=3Pa9DQRR9_OZTMJyezi4p_ZIdL6JsKnQ8gM57whFpMo,16926
|
29
|
+
lamindb/core/_settings.py,sha256=rW1KfEXfT56XErwcnSuQxaCytpOy1kJ-u7tVmkmNmxY,6131
|
30
|
+
lamindb/core/_sync_git.py,sha256=06Te35UZj2QBaHNcc59VSC9vJgcFct7Z2sK78NLkZBs,4119
|
31
31
|
lamindb/core/_track_environment.py,sha256=xLZ6kgzxWS6MWZ5LQ_wkbJX99vmYOT8iQ-Fz4OHCgWw,754
|
32
32
|
lamindb/core/_transform_settings.py,sha256=eV96QKX9jOojjzF-a0oo0wXQsMXN2F6QV7orE06oFC8,161
|
33
|
-
lamindb/core/_view_tree.py,sha256=PTwmKZSQL2UhKuSdV5Wp7o1JDjv1qwgsVCj3ThkbKb8,3447
|
34
33
|
lamindb/core/exceptions.py,sha256=PHk5lyBdJPrrEQcid3ItfdNzz3fgiQsUmsEDdz063F0,197
|
35
34
|
lamindb/core/fields.py,sha256=Jgi_XI-iTe6cT7oD8FV_JqEpjN1Q9rZWwL8VLtj4jkA,164
|
36
35
|
lamindb/core/types.py,sha256=xeQF2x40p2pR9eIVQrXT74RrS810z2fbjmTRTSQUqPM,230
|
37
36
|
lamindb/core/versioning.py,sha256=DsEHpCueNwhRiIaRH5-O8H_1fJVNtWslCRx30YiIS5o,3080
|
38
37
|
lamindb/core/datasets/__init__.py,sha256=zRP98oqUAaXhqWyKMiH0s_ImVIuNeziQQ2kQ_t0f-DI,1353
|
39
|
-
lamindb/core/datasets/_core.py,sha256=
|
38
|
+
lamindb/core/datasets/_core.py,sha256=9bcDfVfMZ1h1WAS88ZBjy-R91xbP2KIm_ofHguXAKpY,20177
|
40
39
|
lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh4,953
|
41
|
-
lamindb/core/storage/__init__.py,sha256=
|
40
|
+
lamindb/core/storage/__init__.py,sha256=5LUFQKRr2BX24d-yWBezhTXBV83sShcOvPj5Y5u6qIg,441
|
42
41
|
lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
|
43
42
|
lamindb/core/storage/_backed_access.py,sha256=eManrLsu3pSSQAyAKy47FDBm-iHgjaNfHA-zLy59uDs,24536
|
44
|
-
lamindb/core/storage/_valid_suffixes.py,sha256=
|
43
|
+
lamindb/core/storage/_valid_suffixes.py,sha256=J08aglC9oo35pzahj0SQXW9IHib8Asp4dc11co-2uys,212
|
45
44
|
lamindb/core/storage/_zarr.py,sha256=5ceEz6YIvgvUnVVNWhK5Z4W0WfrvyvY82Yna5jSX1_E,3661
|
46
|
-
lamindb/core/storage/objects.py,sha256=
|
47
|
-
lamindb/core/storage/paths.py,sha256=
|
45
|
+
lamindb/core/storage/objects.py,sha256=OzvBCS-Urz5mr-O95qYt6RGBDDX5HmjfRRKWPPDn1ZE,1797
|
46
|
+
lamindb/core/storage/paths.py,sha256=JTtiTlAMICH4gkw7iZNwTRfNTT0WxrBoKiag_7E9g4I,7882
|
48
47
|
lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
|
49
48
|
lamindb/integrations/_vitessce.py,sha256=b0FqTBsP-M6Q7xCYXVwFwM8DOIeeOBZEhYbryhtq4gk,2535
|
50
49
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
51
50
|
lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
|
52
|
-
lamindb-0.
|
53
|
-
lamindb-0.
|
54
|
-
lamindb-0.
|
55
|
-
lamindb-0.
|
51
|
+
lamindb-0.71.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
52
|
+
lamindb-0.71.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
53
|
+
lamindb-0.71.1.dist-info/METADATA,sha256=fmFFlU4FrVwO0ON6JVCs8qCh8_HLWyt9WyYs_zyIZgo,2674
|
54
|
+
lamindb-0.71.1.dist-info/RECORD,,
|
lamindb/core/_view_tree.py
DELETED
@@ -1,116 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from collections import defaultdict
|
4
|
-
from pathlib import Path
|
5
|
-
from typing import Iterable
|
6
|
-
|
7
|
-
from lamindb_setup import settings as setup_settings
|
8
|
-
from lnschema_core.models import Artifact, Storage
|
9
|
-
|
10
|
-
|
11
|
-
def view_tree(
|
12
|
-
cls,
|
13
|
-
level: int = -1,
|
14
|
-
limit_to_directories: bool = False,
|
15
|
-
length_limit: int = 1000,
|
16
|
-
max_files_per_dir_per_type: int = 7,
|
17
|
-
) -> None:
|
18
|
-
"""{}."""
|
19
|
-
if cls.__class__.__name__ == "QuerySet":
|
20
|
-
print("queryset")
|
21
|
-
qs = cls
|
22
|
-
storage_ids = qs.list("storage_id")
|
23
|
-
elif cls == Artifact:
|
24
|
-
print("file")
|
25
|
-
qs = cls.filter(storage_id=setup_settings.storage.id).all()
|
26
|
-
storage_ids = Storage.filter().list("id")
|
27
|
-
else:
|
28
|
-
print("else")
|
29
|
-
return
|
30
|
-
storages = Storage.filter().all()
|
31
|
-
storage_roots = {
|
32
|
-
storage_id: storages.get(id=storage_id).root for storage_id in storage_ids
|
33
|
-
}
|
34
|
-
keys = set()
|
35
|
-
for artifact in qs:
|
36
|
-
root = storage_roots.get(artifact.storage_id, "")
|
37
|
-
keys.add(f"{root}/{artifact.key}")
|
38
|
-
|
39
|
-
_view_tree(
|
40
|
-
keys=keys,
|
41
|
-
level=level,
|
42
|
-
only_dirs=limit_to_directories,
|
43
|
-
limit=length_limit,
|
44
|
-
max_files_per_dir_per_type=max_files_per_dir_per_type,
|
45
|
-
)
|
46
|
-
|
47
|
-
|
48
|
-
def _view_tree(
|
49
|
-
keys: Iterable[str],
|
50
|
-
*,
|
51
|
-
level: int = -1,
|
52
|
-
only_dirs: bool = False,
|
53
|
-
limit: int = 1000,
|
54
|
-
max_files_per_dir_per_type: int = 7,
|
55
|
-
) -> None:
|
56
|
-
# Create a nested dictionary from keys
|
57
|
-
def tree():
|
58
|
-
return defaultdict(tree)
|
59
|
-
|
60
|
-
root = tree()
|
61
|
-
|
62
|
-
n_files = 0
|
63
|
-
n_directories = 0
|
64
|
-
suffixes = set()
|
65
|
-
|
66
|
-
for key in keys:
|
67
|
-
parts = key.split("/")
|
68
|
-
node = root
|
69
|
-
for part in parts:
|
70
|
-
node = node[part]
|
71
|
-
if node == {}:
|
72
|
-
n_files += 1
|
73
|
-
suffix = Path(part).suffix
|
74
|
-
if suffix:
|
75
|
-
suffixes.add(suffix)
|
76
|
-
else:
|
77
|
-
n_directories += 1
|
78
|
-
|
79
|
-
# Function to print the tree
|
80
|
-
def print_tree(node, prefix="", depth=0, count=None, n_files_per_dir_per_type=None):
|
81
|
-
if count is None:
|
82
|
-
count = [0]
|
83
|
-
if n_files_per_dir_per_type is None:
|
84
|
-
n_files_per_dir_per_type = defaultdict(int)
|
85
|
-
|
86
|
-
if level != -1 and depth > level:
|
87
|
-
return
|
88
|
-
for name, child in node.items():
|
89
|
-
if count[0] >= limit:
|
90
|
-
return
|
91
|
-
if only_dirs and child == {}:
|
92
|
-
continue
|
93
|
-
suffix = Path(name).suffix
|
94
|
-
n_files_per_dir_per_type[suffix] += 1
|
95
|
-
if (
|
96
|
-
depth > 0
|
97
|
-
and n_files_per_dir_per_type[suffix] > max_files_per_dir_per_type
|
98
|
-
):
|
99
|
-
continue
|
100
|
-
new_prefix = prefix + ("├── " if name != list(node.keys())[-1] else "└── ")
|
101
|
-
print(new_prefix + name)
|
102
|
-
count[0] += 1
|
103
|
-
if child:
|
104
|
-
print_tree(
|
105
|
-
child,
|
106
|
-
prefix + ("│ " if name != list(node.keys())[-1] else " "),
|
107
|
-
depth + 1,
|
108
|
-
count,
|
109
|
-
(
|
110
|
-
defaultdict(int) if depth == 0 else n_files_per_dir_per_type
|
111
|
-
), # Reset the counter for each directory
|
112
|
-
)
|
113
|
-
|
114
|
-
suffix_message = f" with suffixes {', '.join(suffixes)}" if n_files > 0 else ""
|
115
|
-
print(f"{n_directories} directories, {n_files} files{suffix_message}")
|
116
|
-
print_tree(root)
|
File without changes
|
File without changes
|