lamindb 0.71.0__py3-none-any.whl → 0.71.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_artifact.py +26 -35
- lamindb/_finish.py +44 -32
- lamindb/_query_set.py +1 -21
- lamindb/_save.py +7 -22
- lamindb/core/__init__.py +0 -2
- lamindb/core/_data.py +1 -1
- lamindb/core/_feature_manager.py +6 -5
- lamindb/core/_run_context.py +43 -29
- lamindb/core/_settings.py +2 -2
- lamindb/core/_sync_git.py +6 -2
- lamindb/core/datasets/_core.py +2 -2
- lamindb/core/storage/__init__.py +2 -2
- lamindb/core/storage/_valid_suffixes.py +4 -2
- lamindb/core/storage/objects.py +10 -3
- lamindb/core/storage/paths.py +1 -1
- {lamindb-0.71.0.dist-info → lamindb-0.71.1.dist-info}/METADATA +5 -5
- {lamindb-0.71.0.dist-info → lamindb-0.71.1.dist-info}/RECORD +20 -21
- lamindb/core/_view_tree.py +0 -116
- {lamindb-0.71.0.dist-info → lamindb-0.71.1.dist-info}/LICENSE +0 -0
- {lamindb-0.71.0.dist-info → lamindb-0.71.1.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_artifact.py
CHANGED
@@ -21,7 +21,6 @@ from lamindb_setup.core.upath import (
|
|
21
21
|
get_stat_file_cloud,
|
22
22
|
)
|
23
23
|
from lnschema_core import Artifact, Run, Storage
|
24
|
-
from lnschema_core.models import IsTree
|
25
24
|
from lnschema_core.types import (
|
26
25
|
VisibilityChoice,
|
27
26
|
)
|
@@ -35,8 +34,7 @@ from lamindb.core.storage import (
|
|
35
34
|
delete_storage,
|
36
35
|
infer_suffix,
|
37
36
|
load_to_memory,
|
38
|
-
|
39
|
-
write_to_file,
|
37
|
+
write_to_disk,
|
40
38
|
)
|
41
39
|
from lamindb.core.storage.paths import (
|
42
40
|
auto_storage_key_from_artifact,
|
@@ -173,8 +171,7 @@ def process_data(
|
|
173
171
|
# Alex: I don't understand the line below
|
174
172
|
if path.suffixes == []:
|
175
173
|
path = path.with_suffix(suffix)
|
176
|
-
|
177
|
-
write_to_file(data, path)
|
174
|
+
write_to_disk(data, path)
|
178
175
|
use_existing_storage_key = False
|
179
176
|
else:
|
180
177
|
raise NotImplementedError(
|
@@ -194,16 +191,13 @@ def get_stat_or_artifact(
|
|
194
191
|
n_objects = None
|
195
192
|
if settings.upon_file_create_skip_size_hash:
|
196
193
|
return None, None, None, n_objects
|
197
|
-
if suffix == ".zarr" and memory_rep is not None and isinstance(memory_rep, AnnData):
|
198
|
-
size = size_adata(memory_rep)
|
199
|
-
return size, None, None, n_objects
|
200
194
|
stat = path.stat() # one network request
|
201
195
|
if not isinstance(path, LocalPathClasses):
|
202
196
|
size, hash, hash_type = None, None, None
|
203
197
|
if stat is not None:
|
204
198
|
if "ETag" in stat: # is file
|
205
199
|
size, hash, hash_type = get_stat_file_cloud(stat)
|
206
|
-
elif
|
200
|
+
elif stat["type"] == "directory":
|
207
201
|
size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
|
208
202
|
if hash is None:
|
209
203
|
logger.warning(f"did not add hash for {path}")
|
@@ -589,7 +583,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
|
|
589
583
|
init_self_from_db(artifact, kwargs_or_artifact)
|
590
584
|
# adding "key" here is dangerous because key might be auto-populated
|
591
585
|
update_attributes(artifact, {"description": description})
|
592
|
-
if artifact.key != key:
|
586
|
+
if artifact.key != key and key is not None:
|
593
587
|
logger.warning(
|
594
588
|
f"key {artifact.key} on existing artifact differs from passed key {key}"
|
595
589
|
)
|
@@ -914,11 +908,25 @@ def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs)
|
|
914
908
|
|
915
909
|
# docstring handled through attach_func_to_class_method
|
916
910
|
def cache(self, is_run_input: bool | None = None) -> Path:
|
917
|
-
_track_run_input(self, is_run_input)
|
918
|
-
|
919
911
|
using_key = settings._using_key
|
920
912
|
filepath = filepath_from_artifact(self, using_key=using_key)
|
921
|
-
|
913
|
+
try:
|
914
|
+
cache_path = setup_settings.instance.storage.cloud_to_local(
|
915
|
+
filepath, print_progress=True
|
916
|
+
)
|
917
|
+
except Exception as e:
|
918
|
+
if not isinstance(filepath, LocalPathClasses):
|
919
|
+
cache_path = setup_settings.instance.storage.cloud_to_local_no_update(
|
920
|
+
filepath
|
921
|
+
)
|
922
|
+
if cache_path.is_file():
|
923
|
+
cache_path.unlink(missing_ok=True)
|
924
|
+
elif cache_path.is_dir():
|
925
|
+
shutil.rmtree(cache_path)
|
926
|
+
raise e
|
927
|
+
# only call if sync is successfull
|
928
|
+
_track_run_input(self, is_run_input)
|
929
|
+
return cache_path
|
922
930
|
|
923
931
|
|
924
932
|
# docstring handled through attach_func_to_class_method
|
@@ -1003,6 +1011,11 @@ def save(self, upload: bool | None = None, **kwargs) -> None:
|
|
1003
1011
|
local_path = self.path
|
1004
1012
|
self.storage_id = setup_settings.instance.storage.id
|
1005
1013
|
self._local_filepath = local_path
|
1014
|
+
# switch to virtual storage key upon upload
|
1015
|
+
# the local filepath is already cached at that point
|
1016
|
+
self.key_is_virtual = True
|
1017
|
+
# ensure that the artifact is uploaded
|
1018
|
+
self._to_store = True
|
1006
1019
|
|
1007
1020
|
self._save_skip_storage(**kwargs)
|
1008
1021
|
|
@@ -1045,27 +1058,6 @@ def path(self) -> Path | UPath:
|
|
1045
1058
|
return filepath_from_artifact(self, using_key)
|
1046
1059
|
|
1047
1060
|
|
1048
|
-
@classmethod # type: ignore
|
1049
|
-
@doc_args(IsTree.view_tree.__doc__)
|
1050
|
-
def view_tree(
|
1051
|
-
cls,
|
1052
|
-
level: int = -1,
|
1053
|
-
limit_to_directories: bool = False,
|
1054
|
-
length_limit: int = 1000,
|
1055
|
-
max_files_per_dir_per_type: int = 7,
|
1056
|
-
) -> None:
|
1057
|
-
"""{}."""
|
1058
|
-
from lamindb.core._view_tree import view_tree as _view_tree
|
1059
|
-
|
1060
|
-
_view_tree(
|
1061
|
-
cls=cls,
|
1062
|
-
level=level,
|
1063
|
-
limit_to_directories=limit_to_directories,
|
1064
|
-
length_limit=length_limit,
|
1065
|
-
max_files_per_dir_per_type=max_files_per_dir_per_type,
|
1066
|
-
)
|
1067
|
-
|
1068
|
-
|
1069
1061
|
# docstring handled through attach_func_to_class_method
|
1070
1062
|
def restore(self) -> None:
|
1071
1063
|
self.visibility = VisibilityChoice.default.value
|
@@ -1085,7 +1077,6 @@ METHOD_NAMES = [
|
|
1085
1077
|
"replace",
|
1086
1078
|
"from_dir",
|
1087
1079
|
"restore",
|
1088
|
-
"view_tree",
|
1089
1080
|
]
|
1090
1081
|
|
1091
1082
|
if ln_setup._TESTING:
|
lamindb/_finish.py
CHANGED
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING
|
|
8
8
|
|
9
9
|
import lamindb_setup as ln_setup
|
10
10
|
from lamin_utils import logger
|
11
|
+
from lamindb_setup.core.hashing import hash_file
|
11
12
|
from lnschema_core.types import TransformType
|
12
13
|
|
13
14
|
from .core._run_context import is_run_from_ipython, run_context
|
@@ -35,7 +36,7 @@ def get_seconds_since_modified(filepath) -> float:
|
|
35
36
|
def finish():
|
36
37
|
"""Mark a tracked run as finished.
|
37
38
|
|
38
|
-
|
39
|
+
Saves source code and, for notebooks, a run report to your default storage location.
|
39
40
|
"""
|
40
41
|
if run_context.path is None:
|
41
42
|
raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
|
@@ -47,16 +48,12 @@ def finish():
|
|
47
48
|
raise NotebookNotSaved(
|
48
49
|
"Please save the notebook in your editor right before running `ln.finish()`"
|
49
50
|
)
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
else: # scripts
|
57
|
-
# save_run_context_core was already called during ln.track()
|
58
|
-
run_context.run.finished_at = datetime.now(timezone.utc) # update run time
|
59
|
-
run_context.run.save()
|
51
|
+
save_run_context_core(
|
52
|
+
run=run_context.run,
|
53
|
+
transform=run_context.transform,
|
54
|
+
filepath=run_context.path,
|
55
|
+
finished_at=True,
|
56
|
+
)
|
60
57
|
|
61
58
|
|
62
59
|
def save_run_context_core(
|
@@ -138,15 +135,17 @@ def save_run_context_core(
|
|
138
135
|
if prev_transform.source_code_id is not None:
|
139
136
|
prev_source = prev_transform.source_code
|
140
137
|
ln.settings.silence_file_run_transform_warning = True
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
138
|
+
|
139
|
+
# track source code
|
140
|
+
if transform.source_code_id is not None:
|
141
|
+
# check if the hash of the transform source code matches
|
142
|
+
# (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
|
143
|
+
hash, _ = hash_file(source_code_path) # ignore hash_type for now
|
144
|
+
if hash != transform.source_code.hash:
|
146
145
|
if os.getenv("LAMIN_TESTING") is None:
|
147
146
|
# in test, auto-confirm overwrite
|
148
147
|
response = input(
|
149
|
-
f"You are about to overwrite existing source code (hash {transform.source_code.hash}) for transform version"
|
148
|
+
f"You are about to replace (overwrite) existing source code (hash '{transform.source_code.hash}') for transform version"
|
150
149
|
f" '{transform.version}'. Proceed? (y/n)"
|
151
150
|
)
|
152
151
|
else:
|
@@ -154,6 +153,9 @@ def save_run_context_core(
|
|
154
153
|
if response == "y":
|
155
154
|
transform.source_code.replace(source_code_path)
|
156
155
|
transform.source_code.save(upload=True)
|
156
|
+
logger.success(
|
157
|
+
f"replaced transform.source_code: {transform.source_code}"
|
158
|
+
)
|
157
159
|
else:
|
158
160
|
logger.warning("Please re-run `ln.track()` to make a new version")
|
159
161
|
return "rerun-the-notebook"
|
@@ -169,21 +171,32 @@ def save_run_context_core(
|
|
169
171
|
source_code.save(upload=True)
|
170
172
|
transform.source_code = source_code
|
171
173
|
logger.success(f"saved transform.source_code: {transform.source_code}")
|
174
|
+
|
172
175
|
# track environment
|
173
176
|
filepath_env = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
|
174
177
|
if filepath_env.exists():
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
178
|
+
hash, _ = hash_file(filepath_env)
|
179
|
+
artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
|
180
|
+
new_env_artifact = artifact is None
|
181
|
+
if new_env_artifact:
|
182
|
+
artifact = ln.Artifact(
|
183
|
+
filepath_env,
|
184
|
+
description="requirements.txt",
|
185
|
+
visibility=0,
|
186
|
+
run=False,
|
187
|
+
)
|
182
188
|
artifact.save(upload=True)
|
183
189
|
run.environment = artifact
|
184
|
-
|
185
|
-
|
190
|
+
if new_env_artifact:
|
191
|
+
logger.success(f"saved run.environment: {run.environment}")
|
192
|
+
|
193
|
+
# set finished_at
|
194
|
+
if finished_at:
|
195
|
+
run.finished_at = datetime.now(timezone.utc)
|
196
|
+
|
197
|
+
# track report and set is_consecutive
|
186
198
|
if not transform.type == TransformType.notebook:
|
199
|
+
run.is_consecutive = True
|
187
200
|
run.save()
|
188
201
|
else:
|
189
202
|
if run.report_id is not None:
|
@@ -203,16 +216,15 @@ def save_run_context_core(
|
|
203
216
|
report_file.save(upload=True)
|
204
217
|
run.report = report_file
|
205
218
|
run.is_consecutive = is_consecutive
|
206
|
-
if finished_at:
|
207
|
-
run.finished_at = datetime.now(timezone.utc)
|
208
219
|
run.save()
|
209
220
|
transform.latest_report = run.report
|
210
|
-
transform.save()
|
211
|
-
if transform.type == TransformType.notebook:
|
212
221
|
logger.success(f"saved transform.latest_report: {transform.latest_report}")
|
213
|
-
|
222
|
+
transform.save()
|
223
|
+
|
224
|
+
# finalize
|
225
|
+
if ln_setup.settings.instance.is_on_hub:
|
214
226
|
identifier = ln_setup.settings.instance.slug
|
215
|
-
logger.
|
227
|
+
logger.important(
|
216
228
|
f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
|
217
229
|
)
|
218
230
|
# because run & transform changed, update the global run_context
|
lamindb/_query_set.py
CHANGED
@@ -11,7 +11,6 @@ from lnschema_core.models import (
|
|
11
11
|
Artifact,
|
12
12
|
CanValidate,
|
13
13
|
Collection,
|
14
|
-
IsTree,
|
15
14
|
IsVersioned,
|
16
15
|
Registry,
|
17
16
|
Run,
|
@@ -83,7 +82,7 @@ class RecordsList(UserList):
|
|
83
82
|
return one_helper(self)
|
84
83
|
|
85
84
|
|
86
|
-
class QuerySet(models.QuerySet, CanValidate
|
85
|
+
class QuerySet(models.QuerySet, CanValidate):
|
87
86
|
"""Sets of records returned by queries.
|
88
87
|
|
89
88
|
See Also:
|
@@ -265,25 +264,6 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
|
|
265
264
|
|
266
265
|
return _standardize(cls=self, values=values, field=field, **kwargs)
|
267
266
|
|
268
|
-
@doc_args(IsTree.view_tree.__doc__)
|
269
|
-
def view_tree(
|
270
|
-
self,
|
271
|
-
level: int = -1,
|
272
|
-
limit_to_directories: bool = False,
|
273
|
-
length_limit: int = 1000,
|
274
|
-
max_files_per_dir_per_type: int = 7,
|
275
|
-
) -> None:
|
276
|
-
"""{}."""
|
277
|
-
from .core._view_tree import view_tree as _view_tree
|
278
|
-
|
279
|
-
_view_tree(
|
280
|
-
cls=self,
|
281
|
-
level=level,
|
282
|
-
limit_to_directories=limit_to_directories,
|
283
|
-
length_limit=length_limit,
|
284
|
-
max_files_per_dir_per_type=max_files_per_dir_per_type,
|
285
|
-
)
|
286
|
-
|
287
267
|
|
288
268
|
def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList:
|
289
269
|
# evaluating length can be very costly, hence, the try-except block
|
lamindb/_save.py
CHANGED
@@ -6,13 +6,13 @@ import traceback
|
|
6
6
|
from collections import defaultdict
|
7
7
|
from datetime import datetime
|
8
8
|
from functools import partial
|
9
|
-
from typing import Iterable, overload
|
9
|
+
from typing import TYPE_CHECKING, Iterable, overload
|
10
10
|
|
11
11
|
import lamindb_setup
|
12
12
|
from django.db import transaction
|
13
13
|
from django.utils.functional import partition
|
14
14
|
from lamin_utils import logger
|
15
|
-
from lamindb_setup.core.upath import
|
15
|
+
from lamindb_setup.core.upath import print_hook
|
16
16
|
from lnschema_core.models import Artifact, Registry
|
17
17
|
|
18
18
|
from lamindb.core._settings import settings
|
@@ -23,12 +23,8 @@ from lamindb.core.storage.paths import (
|
|
23
23
|
store_file_or_folder,
|
24
24
|
)
|
25
25
|
|
26
|
-
|
27
|
-
from
|
28
|
-
except ImportError:
|
29
|
-
|
30
|
-
def write_adata_zarr(filepath): # type: ignore
|
31
|
-
raise ImportError("Please install zarr: pip install zarr")
|
26
|
+
if TYPE_CHECKING:
|
27
|
+
from lamindb_setup.core.upath import UPath
|
32
28
|
|
33
29
|
|
34
30
|
def save(
|
@@ -162,7 +158,7 @@ def check_and_attempt_upload(
|
|
162
158
|
def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
|
163
159
|
local_path = artifact._local_filepath
|
164
160
|
|
165
|
-
#
|
161
|
+
# in-memory cases
|
166
162
|
if local_path is None or not local_path.exists():
|
167
163
|
return None
|
168
164
|
|
@@ -284,18 +280,7 @@ def upload_artifact(
|
|
284
280
|
storage_path = attempt_accessing_path(
|
285
281
|
artifact, storage_key, using_key=using_key, access_token=access_token
|
286
282
|
)
|
287
|
-
|
288
|
-
|
289
|
-
artifact.suffix == ".zarr"
|
290
|
-
and hasattr(artifact, "_memory_rep")
|
291
|
-
and artifact._memory_rep is not None
|
292
|
-
):
|
293
|
-
logger.save(msg)
|
294
|
-
print_progress = partial(
|
295
|
-
print_hook, objectname=storage_path.name, action="uploading"
|
296
|
-
)
|
297
|
-
write_adata_zarr(artifact._memory_rep, storage_path, callback=print_progress)
|
298
|
-
elif hasattr(artifact, "_to_store") and artifact._to_store:
|
299
|
-
logger.save(msg)
|
283
|
+
if hasattr(artifact, "_to_store") and artifact._to_store:
|
284
|
+
logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
|
300
285
|
store_file_or_folder(artifact._local_filepath, storage_path)
|
301
286
|
return storage_path
|
lamindb/core/__init__.py
CHANGED
@@ -12,7 +12,6 @@ Registries:
|
|
12
12
|
Data
|
13
13
|
FeatureManager
|
14
14
|
LabelManager
|
15
|
-
IsTree
|
16
15
|
IsVersioned
|
17
16
|
CanValidate
|
18
17
|
HasParents
|
@@ -55,7 +54,6 @@ from lnschema_core.models import (
|
|
55
54
|
CanValidate,
|
56
55
|
Data,
|
57
56
|
HasParents,
|
58
|
-
IsTree,
|
59
57
|
IsVersioned,
|
60
58
|
Registry,
|
61
59
|
)
|
lamindb/core/_data.py
CHANGED
@@ -345,7 +345,7 @@ def add_labels(
|
|
345
345
|
f" {old_feature_set}"
|
346
346
|
)
|
347
347
|
old_feature_set.delete()
|
348
|
-
self.features.
|
348
|
+
self.features.add_feature_set(feature_set, slot="external")
|
349
349
|
logger.save(
|
350
350
|
f"linked new feature '{feature.name}' together with new feature set"
|
351
351
|
f" {feature_set}"
|
lamindb/core/_feature_manager.py
CHANGED
@@ -236,7 +236,7 @@ class FeatureManager:
|
|
236
236
|
and self._host.artifact.accessor == "DataFrame"
|
237
237
|
):
|
238
238
|
slot = "columns" if slot is None else slot
|
239
|
-
self.
|
239
|
+
self.add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
|
240
240
|
|
241
241
|
def add_from_df(self, field: FieldAttr = Feature.name, organism: str | None = None):
|
242
242
|
"""Add features from DataFrame."""
|
@@ -325,7 +325,7 @@ class FeatureManager:
|
|
325
325
|
self._host._feature_sets = feature_sets
|
326
326
|
self._host.save()
|
327
327
|
|
328
|
-
def
|
328
|
+
def add_feature_set(self, feature_set: FeatureSet, slot: str):
|
329
329
|
"""Add new feature set to a slot.
|
330
330
|
|
331
331
|
Args:
|
@@ -405,7 +405,8 @@ class FeatureManager:
|
|
405
405
|
f"FeatureSet is not transferred, check if organism is set correctly: {feature_set}"
|
406
406
|
)
|
407
407
|
continue
|
408
|
-
#
|
409
|
-
|
408
|
+
# make sure the uid matches if featureset is composed of same features
|
409
|
+
if feature_set_self.hash == feature_set.hash:
|
410
|
+
feature_set_self.uid = feature_set.uid
|
410
411
|
logger.info(f"saving {slot} featureset: {feature_set_self}")
|
411
|
-
self._host.features.
|
412
|
+
self._host.features.add_feature_set(feature_set_self, slot)
|
lamindb/core/_run_context.py
CHANGED
@@ -8,6 +8,7 @@ from pathlib import Path, PurePath
|
|
8
8
|
from typing import TYPE_CHECKING
|
9
9
|
|
10
10
|
from lamin_utils import logger
|
11
|
+
from lamindb_setup.core.hashing import hash_file
|
11
12
|
from lnschema_core import Run, Transform, ids
|
12
13
|
from lnschema_core.types import TransformType
|
13
14
|
from lnschema_core.users import current_user_id
|
@@ -175,6 +176,17 @@ def raise_transform_settings_error() -> None:
|
|
175
176
|
)
|
176
177
|
|
177
178
|
|
179
|
+
def pretty_pypackages(dependencies: dict) -> str:
|
180
|
+
deps_list = []
|
181
|
+
for pkg, ver in dependencies.items():
|
182
|
+
if ver != "":
|
183
|
+
deps_list.append(pkg + f"=={ver}")
|
184
|
+
else:
|
185
|
+
deps_list.append(pkg)
|
186
|
+
deps_list.sort()
|
187
|
+
return " ".join(deps_list)
|
188
|
+
|
189
|
+
|
178
190
|
class run_context:
|
179
191
|
"""Global run context."""
|
180
192
|
|
@@ -315,16 +327,6 @@ class run_context:
|
|
315
327
|
from ._track_environment import track_environment
|
316
328
|
|
317
329
|
track_environment(run)
|
318
|
-
|
319
|
-
if not is_run_from_ipython and cls.path is not None:
|
320
|
-
# upload run source code & environment
|
321
|
-
from lamindb._finish import save_run_context_core
|
322
|
-
|
323
|
-
save_run_context_core(
|
324
|
-
run=cls.run,
|
325
|
-
transform=cls.transform,
|
326
|
-
filepath=cls.path,
|
327
|
-
)
|
328
330
|
return None
|
329
331
|
|
330
332
|
@classmethod
|
@@ -386,17 +388,12 @@ class run_context:
|
|
386
388
|
# log imported python packages
|
387
389
|
if not path_str.startswith("/fileId="):
|
388
390
|
try:
|
389
|
-
from nbproject.dev._metadata_display import DisplayMeta
|
390
391
|
from nbproject.dev._pypackage import infer_pypackages
|
391
392
|
|
392
|
-
|
393
|
-
filepath=path_str,
|
394
|
-
metadata_only=True,
|
395
|
-
)
|
396
|
-
dm = DisplayMeta(metadata)
|
393
|
+
nb = nbproject.dev.read_notebook(path_str)
|
397
394
|
logger.important(
|
398
395
|
"notebook imports:"
|
399
|
-
f" {
|
396
|
+
f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
|
400
397
|
)
|
401
398
|
except Exception:
|
402
399
|
logger.debug("inferring imported packages failed")
|
@@ -451,19 +448,36 @@ class run_context:
|
|
451
448
|
transform.save()
|
452
449
|
logger.important(f"updated: {transform}")
|
453
450
|
# check whether the notebook source code was already saved
|
454
|
-
if
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
451
|
+
if transform.source_code_id is not None:
|
452
|
+
response = None
|
453
|
+
if is_run_from_ipython:
|
454
|
+
if os.getenv("LAMIN_TESTING") is None:
|
455
|
+
response = input(
|
456
|
+
"You already saved source code for this notebook."
|
457
|
+
" Bump the version before a new run? (y/n)"
|
458
|
+
)
|
459
|
+
else:
|
460
|
+
response = "y"
|
464
461
|
else:
|
465
|
-
|
466
|
-
|
462
|
+
hash, _ = hash_file(cls.path) # ignore hash_type for now
|
463
|
+
if hash != transform.source_code.hash:
|
464
|
+
# only if hashes don't match, we need user input
|
465
|
+
if os.getenv("LAMIN_TESTING") is None:
|
466
|
+
response = input(
|
467
|
+
"You already saved source code for this script and meanwhile modified it without bumping a version."
|
468
|
+
" Bump the version before a new run? (y/n)"
|
469
|
+
)
|
470
|
+
else:
|
471
|
+
response = "y"
|
472
|
+
else:
|
473
|
+
logger.important(f"loaded: {transform}")
|
474
|
+
if response is not None:
|
475
|
+
# if a script is re-run and hashes match, we don't need user input
|
476
|
+
if response == "y":
|
477
|
+
update_stem_uid_or_version(stem_uid, version, bump_version=True)
|
478
|
+
else:
|
479
|
+
# we want a new stem_uid in this case, hence raise the error
|
480
|
+
raise_transform_settings_error()
|
467
481
|
else:
|
468
482
|
logger.important(f"loaded: {transform}")
|
469
483
|
cls.transform = transform
|
lamindb/core/_settings.py
CHANGED
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Literal, Mapping
|
|
5
5
|
|
6
6
|
import lamindb_setup as ln_setup
|
7
7
|
from lamin_utils import logger
|
8
|
-
from lamindb_setup.
|
8
|
+
from lamindb_setup._set_managed_storage import set_managed_storage
|
9
9
|
from lamindb_setup.core._settings import settings as setup_settings
|
10
10
|
from lamindb_setup.core._settings_instance import sanitize_git_repo_url
|
11
11
|
|
@@ -147,7 +147,7 @@ class Settings:
|
|
147
147
|
path, kwargs = path_kwargs
|
148
148
|
else:
|
149
149
|
path, kwargs = path_kwargs, {}
|
150
|
-
|
150
|
+
set_managed_storage(path, **kwargs)
|
151
151
|
|
152
152
|
@property
|
153
153
|
def storage_local(self) -> Path:
|
lamindb/core/_sync_git.py
CHANGED
@@ -61,11 +61,15 @@ def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | N
|
|
61
61
|
capture_output=True,
|
62
62
|
cwd=repo_dir,
|
63
63
|
)
|
64
|
-
|
64
|
+
# we just care to find one commit
|
65
|
+
# hence, we split by new line ("\n") and use the first one
|
66
|
+
commit_hash = result.stdout.decode().split("\n")[0]
|
65
67
|
if commit_hash == "" or result.returncode == 1:
|
66
68
|
return None
|
67
69
|
else:
|
68
|
-
assert
|
70
|
+
assert (
|
71
|
+
len(commit_hash) == 40
|
72
|
+
), f"commit hash |{commit_hash}| is not 40 characters long"
|
69
73
|
return commit_hash
|
70
74
|
|
71
75
|
|
lamindb/core/datasets/_core.py
CHANGED
@@ -161,8 +161,8 @@ def anndata_mouse_sc_lymph_node(
|
|
161
161
|
adata.obs.columns = (
|
162
162
|
adata.obs.columns.str.replace("Sample Characteristic", "")
|
163
163
|
.str.replace("Factor Value ", "Factor Value:", regex=True)
|
164
|
-
.str.replace("Factor Value
|
165
|
-
.str.replace(" Ontology Term
|
164
|
+
.str.replace("Factor Value\\[", "Factor Value:", regex=True)
|
165
|
+
.str.replace(" Ontology Term\\[", "ontology_id:", regex=True)
|
166
166
|
.str.strip("[]")
|
167
167
|
.str.replace("organism part", "tissue")
|
168
168
|
.str.replace("organism", "organism")
|
lamindb/core/storage/__init__.py
CHANGED
@@ -10,6 +10,6 @@ from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
|
|
10
10
|
|
11
11
|
from ._anndata_sizes import size_adata
|
12
12
|
from ._backed_access import AnnDataAccessor, BackedAccessor
|
13
|
-
from ._valid_suffixes import VALID_SUFFIXES
|
14
|
-
from .objects import infer_suffix,
|
13
|
+
from ._valid_suffixes import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
|
14
|
+
from .objects import infer_suffix, write_to_disk
|
15
15
|
from .paths import delete_storage, load_to_memory
|
@@ -1,3 +1,5 @@
|
|
1
|
-
from lamindb_setup.core.upath import VALID_SUFFIXES
|
1
|
+
from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
|
2
2
|
|
3
|
-
|
3
|
+
# add new composite suffixes like so
|
4
|
+
VALID_COMPOSITE_SUFFIXES.update({".vitessce.json"})
|
5
|
+
# can do the same for simple valid suffixes
|
lamindb/core/storage/objects.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
from pathlib import PurePosixPath
|
3
4
|
from typing import TYPE_CHECKING
|
4
5
|
|
5
6
|
from anndata import AnnData
|
@@ -21,7 +22,7 @@ def infer_suffix(dmem, adata_format: str | None = None):
|
|
21
22
|
"""Infer LaminDB storage file suffix from a data object."""
|
22
23
|
if isinstance(dmem, AnnData):
|
23
24
|
if adata_format is not None:
|
24
|
-
if adata_format not in
|
25
|
+
if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
|
25
26
|
raise ValueError(
|
26
27
|
"Error when specifying AnnData storage format, it should be"
|
27
28
|
f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
|
@@ -40,9 +41,15 @@ def infer_suffix(dmem, adata_format: str | None = None):
|
|
40
41
|
raise NotImplementedError
|
41
42
|
|
42
43
|
|
43
|
-
def
|
44
|
+
def write_to_disk(dmem, filepath: UPathStr):
|
44
45
|
if isinstance(dmem, AnnData):
|
45
|
-
|
46
|
+
suffix = PurePosixPath(filepath).suffix
|
47
|
+
if suffix == ".h5ad":
|
48
|
+
dmem.write_h5ad(filepath)
|
49
|
+
elif suffix == ".zarr":
|
50
|
+
dmem.write_zarr(filepath)
|
51
|
+
else:
|
52
|
+
raise NotImplementedError
|
46
53
|
elif isinstance(dmem, DataFrame):
|
47
54
|
dmem.to_parquet(filepath)
|
48
55
|
else:
|
lamindb/core/storage/paths.py
CHANGED
@@ -140,7 +140,7 @@ def delete_storage(storagepath: Path):
|
|
140
140
|
if not storagepath.is_relative_to(settings.storage): # type: ignore
|
141
141
|
allow_delete = False
|
142
142
|
if setup_settings.instance.keep_artifacts_local:
|
143
|
-
allow_delete = storagepath.is_relative_to(
|
143
|
+
allow_delete = storagepath.is_relative_to( # type: ignore
|
144
144
|
setup_settings.instance.storage_local.root
|
145
145
|
)
|
146
146
|
if not allow_delete:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.71.
|
3
|
+
Version: 0.71.1
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.66.
|
13
|
-
Requires-Dist: lamindb_setup==0.71.
|
12
|
+
Requires-Dist: lnschema_core==0.66.3
|
13
|
+
Requires-Dist: lamindb_setup==0.71.3
|
14
14
|
Requires-Dist: lamin_utils==0.13.2
|
15
|
-
Requires-Dist: lamin_cli==0.13.
|
15
|
+
Requires-Dist: lamin_cli==0.13.1
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -37,7 +37,7 @@ Requires-Dist: faker-biology ; extra == "dev"
|
|
37
37
|
Requires-Dist: django-schema-graph ; extra == "erdiagram"
|
38
38
|
Requires-Dist: readfcs>=1.1.8 ; extra == "fcs"
|
39
39
|
Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
|
40
|
-
Requires-Dist: nbproject==0.10.
|
40
|
+
Requires-Dist: nbproject==0.10.2 ; extra == "jupyter"
|
41
41
|
Requires-Dist: nbstripout==0.6.1 ; extra == "jupyter"
|
42
42
|
Requires-Dist: nbconvert ; extra == "jupyter"
|
43
43
|
Requires-Dist: zarr>=2.16.0 ; extra == "zarr"
|
@@ -1,55 +1,54 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=PbZGEkozIsD8RM3XLafkYGo4iPJy7FJFdzVr0VW7Zy0,2182
|
2
2
|
lamindb/_annotate.py,sha256=kgbilILfgzoS-GEpjxzVwRMs7CoSa9BNEcIWXFBW69I,43915
|
3
|
-
lamindb/_artifact.py,sha256=
|
3
|
+
lamindb/_artifact.py,sha256=8uBW-dhuWyBUQGs728sAPCnuhTic-NKjSbaneF07aMo,40106
|
4
4
|
lamindb/_can_validate.py,sha256=nvoZG-35n3HofkY4Xc6hBv9AV54_RDan7Hzp5TuqY9I,14709
|
5
5
|
lamindb/_collection.py,sha256=SDM35R_5WHrgLKjVb14Q8-Rz_gn5hdZLJobPcanm4PM,14627
|
6
6
|
lamindb/_feature.py,sha256=srAKchY7gqD-h-cWlEiAWuHlpFKFwv0PWIA-JX0Go8c,6758
|
7
7
|
lamindb/_feature_set.py,sha256=AzjOcHzQajpeikPOAic-aj0z_C5b7VpHVegg3ThRSLw,9045
|
8
8
|
lamindb/_filter.py,sha256=xnjJzjF3Zj4dK_Kfymvhgczk27MhhXz5ZYc7XINbgHY,1331
|
9
|
-
lamindb/_finish.py,sha256=
|
9
|
+
lamindb/_finish.py,sha256=6GwhqrC-x-JdFd16i7-uyhCWeQgGKxr25aSsSXPZt4g,8598
|
10
10
|
lamindb/_from_values.py,sha256=DVXjnQ2wwNw-2bFzy0uXLdVlqoprrn95hTnrXwn-KqM,12638
|
11
11
|
lamindb/_is_versioned.py,sha256=0PgRCmxEmYDcAjllLSOYZm132B1lW6QgmBBERhRyFt0,1341
|
12
12
|
lamindb/_parents.py,sha256=N9T8jbd3eaoHDLE9TD1y1QgGcO81E6Brapy8LILzRCQ,14790
|
13
13
|
lamindb/_query_manager.py,sha256=3zokXqxgj9vTJBnN2sbYKS-q69fyDDPF_aGq_rFHzXU,4066
|
14
|
-
lamindb/_query_set.py,sha256=
|
14
|
+
lamindb/_query_set.py,sha256=n0owd74cTzGz6-mIv8SlDz0wcyRz7Xw3Ke1LhE8UlIg,10784
|
15
15
|
lamindb/_registry.py,sha256=fmX-BUnan3Y0WrEAx3qNwRYCIJwJgjoKnRnpgcXujEI,19358
|
16
16
|
lamindb/_run.py,sha256=b7A52M1On3QzFgIYyfQoz5Kk7V3wcu9p_Prq5bzd8v8,1838
|
17
|
-
lamindb/_save.py,sha256=
|
17
|
+
lamindb/_save.py,sha256=_7r3TUV3B6Hp75r5O_ymu3fKWyBHbGa5vmE_pxrtsVI,10923
|
18
18
|
lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
|
19
19
|
lamindb/_transform.py,sha256=rxojJ91qQSkeYDHYbwqjFAYxBMgJd3cq_K7Z0n5g8Aw,3482
|
20
20
|
lamindb/_ulabel.py,sha256=e5dw9h1tR0_u-DMn7Gzx0WhUhV5w7j4v3QbnLWQV7eI,1941
|
21
21
|
lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
22
22
|
lamindb/_view.py,sha256=GV1FrqIMmdooEkA-5zvcTWgV1nqx1sehi6WdWEaFpxM,2171
|
23
|
-
lamindb/core/__init__.py,sha256=
|
24
|
-
lamindb/core/_data.py,sha256=
|
25
|
-
lamindb/core/_feature_manager.py,sha256=
|
23
|
+
lamindb/core/__init__.py,sha256=TI9_1Jtpwae_cUPQ3-U0RRPH5c3GBA-gLhHvlAk_Nlo,1213
|
24
|
+
lamindb/core/_data.py,sha256=Lico6-Vx15bNpGLl1bqFqEsh62pD4YKOOBnmahse1tI,17673
|
25
|
+
lamindb/core/_feature_manager.py,sha256=uTzZZ7-qqEAmdwi48Holy2j5VGTgmoQxhb21r6mLShI,15824
|
26
26
|
lamindb/core/_label_manager.py,sha256=0RtegYnK3zIisOnd970EobOrHMpp7OCH-mEoPrPXw2c,9075
|
27
27
|
lamindb/core/_mapped_collection.py,sha256=_OwFZh5SePDUD70XIK5kngv3we_Z5-YdGHNfpUSatSQ,19469
|
28
|
-
lamindb/core/_run_context.py,sha256=
|
29
|
-
lamindb/core/_settings.py,sha256=
|
30
|
-
lamindb/core/_sync_git.py,sha256=
|
28
|
+
lamindb/core/_run_context.py,sha256=3Pa9DQRR9_OZTMJyezi4p_ZIdL6JsKnQ8gM57whFpMo,16926
|
29
|
+
lamindb/core/_settings.py,sha256=rW1KfEXfT56XErwcnSuQxaCytpOy1kJ-u7tVmkmNmxY,6131
|
30
|
+
lamindb/core/_sync_git.py,sha256=06Te35UZj2QBaHNcc59VSC9vJgcFct7Z2sK78NLkZBs,4119
|
31
31
|
lamindb/core/_track_environment.py,sha256=xLZ6kgzxWS6MWZ5LQ_wkbJX99vmYOT8iQ-Fz4OHCgWw,754
|
32
32
|
lamindb/core/_transform_settings.py,sha256=eV96QKX9jOojjzF-a0oo0wXQsMXN2F6QV7orE06oFC8,161
|
33
|
-
lamindb/core/_view_tree.py,sha256=PTwmKZSQL2UhKuSdV5Wp7o1JDjv1qwgsVCj3ThkbKb8,3447
|
34
33
|
lamindb/core/exceptions.py,sha256=PHk5lyBdJPrrEQcid3ItfdNzz3fgiQsUmsEDdz063F0,197
|
35
34
|
lamindb/core/fields.py,sha256=Jgi_XI-iTe6cT7oD8FV_JqEpjN1Q9rZWwL8VLtj4jkA,164
|
36
35
|
lamindb/core/types.py,sha256=xeQF2x40p2pR9eIVQrXT74RrS810z2fbjmTRTSQUqPM,230
|
37
36
|
lamindb/core/versioning.py,sha256=DsEHpCueNwhRiIaRH5-O8H_1fJVNtWslCRx30YiIS5o,3080
|
38
37
|
lamindb/core/datasets/__init__.py,sha256=zRP98oqUAaXhqWyKMiH0s_ImVIuNeziQQ2kQ_t0f-DI,1353
|
39
|
-
lamindb/core/datasets/_core.py,sha256=
|
38
|
+
lamindb/core/datasets/_core.py,sha256=9bcDfVfMZ1h1WAS88ZBjy-R91xbP2KIm_ofHguXAKpY,20177
|
40
39
|
lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh4,953
|
41
|
-
lamindb/core/storage/__init__.py,sha256=
|
40
|
+
lamindb/core/storage/__init__.py,sha256=5LUFQKRr2BX24d-yWBezhTXBV83sShcOvPj5Y5u6qIg,441
|
42
41
|
lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
|
43
42
|
lamindb/core/storage/_backed_access.py,sha256=eManrLsu3pSSQAyAKy47FDBm-iHgjaNfHA-zLy59uDs,24536
|
44
|
-
lamindb/core/storage/_valid_suffixes.py,sha256=
|
43
|
+
lamindb/core/storage/_valid_suffixes.py,sha256=J08aglC9oo35pzahj0SQXW9IHib8Asp4dc11co-2uys,212
|
45
44
|
lamindb/core/storage/_zarr.py,sha256=5ceEz6YIvgvUnVVNWhK5Z4W0WfrvyvY82Yna5jSX1_E,3661
|
46
|
-
lamindb/core/storage/objects.py,sha256=
|
47
|
-
lamindb/core/storage/paths.py,sha256=
|
45
|
+
lamindb/core/storage/objects.py,sha256=OzvBCS-Urz5mr-O95qYt6RGBDDX5HmjfRRKWPPDn1ZE,1797
|
46
|
+
lamindb/core/storage/paths.py,sha256=JTtiTlAMICH4gkw7iZNwTRfNTT0WxrBoKiag_7E9g4I,7882
|
48
47
|
lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
|
49
48
|
lamindb/integrations/_vitessce.py,sha256=b0FqTBsP-M6Q7xCYXVwFwM8DOIeeOBZEhYbryhtq4gk,2535
|
50
49
|
lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
|
51
50
|
lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
|
52
|
-
lamindb-0.71.
|
53
|
-
lamindb-0.71.
|
54
|
-
lamindb-0.71.
|
55
|
-
lamindb-0.71.
|
51
|
+
lamindb-0.71.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
52
|
+
lamindb-0.71.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
53
|
+
lamindb-0.71.1.dist-info/METADATA,sha256=fmFFlU4FrVwO0ON6JVCs8qCh8_HLWyt9WyYs_zyIZgo,2674
|
54
|
+
lamindb-0.71.1.dist-info/RECORD,,
|
lamindb/core/_view_tree.py
DELETED
@@ -1,116 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from collections import defaultdict
|
4
|
-
from pathlib import Path
|
5
|
-
from typing import Iterable
|
6
|
-
|
7
|
-
from lamindb_setup import settings as setup_settings
|
8
|
-
from lnschema_core.models import Artifact, Storage
|
9
|
-
|
10
|
-
|
11
|
-
def view_tree(
|
12
|
-
cls,
|
13
|
-
level: int = -1,
|
14
|
-
limit_to_directories: bool = False,
|
15
|
-
length_limit: int = 1000,
|
16
|
-
max_files_per_dir_per_type: int = 7,
|
17
|
-
) -> None:
|
18
|
-
"""{}."""
|
19
|
-
if cls.__class__.__name__ == "QuerySet":
|
20
|
-
print("queryset")
|
21
|
-
qs = cls
|
22
|
-
storage_ids = qs.list("storage_id")
|
23
|
-
elif cls == Artifact:
|
24
|
-
print("file")
|
25
|
-
qs = cls.filter(storage_id=setup_settings.storage.id).all()
|
26
|
-
storage_ids = Storage.filter().list("id")
|
27
|
-
else:
|
28
|
-
print("else")
|
29
|
-
return
|
30
|
-
storages = Storage.filter().all()
|
31
|
-
storage_roots = {
|
32
|
-
storage_id: storages.get(id=storage_id).root for storage_id in storage_ids
|
33
|
-
}
|
34
|
-
keys = set()
|
35
|
-
for artifact in qs:
|
36
|
-
root = storage_roots.get(artifact.storage_id, "")
|
37
|
-
keys.add(f"{root}/{artifact.key}")
|
38
|
-
|
39
|
-
_view_tree(
|
40
|
-
keys=keys,
|
41
|
-
level=level,
|
42
|
-
only_dirs=limit_to_directories,
|
43
|
-
limit=length_limit,
|
44
|
-
max_files_per_dir_per_type=max_files_per_dir_per_type,
|
45
|
-
)
|
46
|
-
|
47
|
-
|
48
|
-
def _view_tree(
|
49
|
-
keys: Iterable[str],
|
50
|
-
*,
|
51
|
-
level: int = -1,
|
52
|
-
only_dirs: bool = False,
|
53
|
-
limit: int = 1000,
|
54
|
-
max_files_per_dir_per_type: int = 7,
|
55
|
-
) -> None:
|
56
|
-
# Create a nested dictionary from keys
|
57
|
-
def tree():
|
58
|
-
return defaultdict(tree)
|
59
|
-
|
60
|
-
root = tree()
|
61
|
-
|
62
|
-
n_files = 0
|
63
|
-
n_directories = 0
|
64
|
-
suffixes = set()
|
65
|
-
|
66
|
-
for key in keys:
|
67
|
-
parts = key.split("/")
|
68
|
-
node = root
|
69
|
-
for part in parts:
|
70
|
-
node = node[part]
|
71
|
-
if node == {}:
|
72
|
-
n_files += 1
|
73
|
-
suffix = Path(part).suffix
|
74
|
-
if suffix:
|
75
|
-
suffixes.add(suffix)
|
76
|
-
else:
|
77
|
-
n_directories += 1
|
78
|
-
|
79
|
-
# Function to print the tree
|
80
|
-
def print_tree(node, prefix="", depth=0, count=None, n_files_per_dir_per_type=None):
|
81
|
-
if count is None:
|
82
|
-
count = [0]
|
83
|
-
if n_files_per_dir_per_type is None:
|
84
|
-
n_files_per_dir_per_type = defaultdict(int)
|
85
|
-
|
86
|
-
if level != -1 and depth > level:
|
87
|
-
return
|
88
|
-
for name, child in node.items():
|
89
|
-
if count[0] >= limit:
|
90
|
-
return
|
91
|
-
if only_dirs and child == {}:
|
92
|
-
continue
|
93
|
-
suffix = Path(name).suffix
|
94
|
-
n_files_per_dir_per_type[suffix] += 1
|
95
|
-
if (
|
96
|
-
depth > 0
|
97
|
-
and n_files_per_dir_per_type[suffix] > max_files_per_dir_per_type
|
98
|
-
):
|
99
|
-
continue
|
100
|
-
new_prefix = prefix + ("├── " if name != list(node.keys())[-1] else "└── ")
|
101
|
-
print(new_prefix + name)
|
102
|
-
count[0] += 1
|
103
|
-
if child:
|
104
|
-
print_tree(
|
105
|
-
child,
|
106
|
-
prefix + ("│ " if name != list(node.keys())[-1] else " "),
|
107
|
-
depth + 1,
|
108
|
-
count,
|
109
|
-
(
|
110
|
-
defaultdict(int) if depth == 0 else n_files_per_dir_per_type
|
111
|
-
), # Reset the counter for each directory
|
112
|
-
)
|
113
|
-
|
114
|
-
suffix_message = f" with suffixes {', '.join(suffixes)}" if n_files > 0 else ""
|
115
|
-
print(f"{n_directories} directories, {n_files} files{suffix_message}")
|
116
|
-
print_tree(root)
|
File without changes
|
File without changes
|