lamindb 0.71.0__py3-none-any.whl → 0.71.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -41,7 +41,7 @@ Modules & settings:
41
41
  """
42
42
 
43
43
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
44
- __version__ = "0.71.0"
44
+ __version__ = "0.71.2"
45
45
 
46
46
  import os as _os
47
47
 
lamindb/_artifact.py CHANGED
@@ -21,7 +21,6 @@ from lamindb_setup.core.upath import (
21
21
  get_stat_file_cloud,
22
22
  )
23
23
  from lnschema_core import Artifact, Run, Storage
24
- from lnschema_core.models import IsTree
25
24
  from lnschema_core.types import (
26
25
  VisibilityChoice,
27
26
  )
@@ -35,8 +34,7 @@ from lamindb.core.storage import (
35
34
  delete_storage,
36
35
  infer_suffix,
37
36
  load_to_memory,
38
- size_adata,
39
- write_to_file,
37
+ write_to_disk,
40
38
  )
41
39
  from lamindb.core.storage.paths import (
42
40
  auto_storage_key_from_artifact,
@@ -173,8 +171,7 @@ def process_data(
173
171
  # Alex: I don't understand the line below
174
172
  if path.suffixes == []:
175
173
  path = path.with_suffix(suffix)
176
- if suffix != ".zarr":
177
- write_to_file(data, path)
174
+ write_to_disk(data, path)
178
175
  use_existing_storage_key = False
179
176
  else:
180
177
  raise NotImplementedError(
@@ -194,16 +191,13 @@ def get_stat_or_artifact(
194
191
  n_objects = None
195
192
  if settings.upon_file_create_skip_size_hash:
196
193
  return None, None, None, n_objects
197
- if suffix == ".zarr" and memory_rep is not None and isinstance(memory_rep, AnnData):
198
- size = size_adata(memory_rep)
199
- return size, None, None, n_objects
200
194
  stat = path.stat() # one network request
201
195
  if not isinstance(path, LocalPathClasses):
202
196
  size, hash, hash_type = None, None, None
203
197
  if stat is not None:
204
198
  if "ETag" in stat: # is file
205
199
  size, hash, hash_type = get_stat_file_cloud(stat)
206
- elif path.is_dir():
200
+ elif stat["type"] == "directory":
207
201
  size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
208
202
  if hash is None:
209
203
  logger.warning(f"did not add hash for {path}")
@@ -589,7 +583,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
589
583
  init_self_from_db(artifact, kwargs_or_artifact)
590
584
  # adding "key" here is dangerous because key might be auto-populated
591
585
  update_attributes(artifact, {"description": description})
592
- if artifact.key != key:
586
+ if artifact.key != key and key is not None:
593
587
  logger.warning(
594
588
  f"key {artifact.key} on existing artifact differs from passed key {key}"
595
589
  )
@@ -914,11 +908,25 @@ def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs)
914
908
 
915
909
  # docstring handled through attach_func_to_class_method
916
910
  def cache(self, is_run_input: bool | None = None) -> Path:
917
- _track_run_input(self, is_run_input)
918
-
919
911
  using_key = settings._using_key
920
912
  filepath = filepath_from_artifact(self, using_key=using_key)
921
- return setup_settings.instance.storage.cloud_to_local(filepath, print_progress=True)
913
+ try:
914
+ cache_path = setup_settings.instance.storage.cloud_to_local(
915
+ filepath, print_progress=True
916
+ )
917
+ except Exception as e:
918
+ if not isinstance(filepath, LocalPathClasses):
919
+ cache_path = setup_settings.instance.storage.cloud_to_local_no_update(
920
+ filepath
921
+ )
922
+ if cache_path.is_file():
923
+ cache_path.unlink(missing_ok=True)
924
+ elif cache_path.is_dir():
925
+ shutil.rmtree(cache_path)
926
+ raise e
927
+ # only call if sync is successfull
928
+ _track_run_input(self, is_run_input)
929
+ return cache_path
922
930
 
923
931
 
924
932
  # docstring handled through attach_func_to_class_method
@@ -1003,6 +1011,11 @@ def save(self, upload: bool | None = None, **kwargs) -> None:
1003
1011
  local_path = self.path
1004
1012
  self.storage_id = setup_settings.instance.storage.id
1005
1013
  self._local_filepath = local_path
1014
+ # switch to virtual storage key upon upload
1015
+ # the local filepath is already cached at that point
1016
+ self.key_is_virtual = True
1017
+ # ensure that the artifact is uploaded
1018
+ self._to_store = True
1006
1019
 
1007
1020
  self._save_skip_storage(**kwargs)
1008
1021
 
@@ -1045,27 +1058,6 @@ def path(self) -> Path | UPath:
1045
1058
  return filepath_from_artifact(self, using_key)
1046
1059
 
1047
1060
 
1048
- @classmethod # type: ignore
1049
- @doc_args(IsTree.view_tree.__doc__)
1050
- def view_tree(
1051
- cls,
1052
- level: int = -1,
1053
- limit_to_directories: bool = False,
1054
- length_limit: int = 1000,
1055
- max_files_per_dir_per_type: int = 7,
1056
- ) -> None:
1057
- """{}."""
1058
- from lamindb.core._view_tree import view_tree as _view_tree
1059
-
1060
- _view_tree(
1061
- cls=cls,
1062
- level=level,
1063
- limit_to_directories=limit_to_directories,
1064
- length_limit=length_limit,
1065
- max_files_per_dir_per_type=max_files_per_dir_per_type,
1066
- )
1067
-
1068
-
1069
1061
  # docstring handled through attach_func_to_class_method
1070
1062
  def restore(self) -> None:
1071
1063
  self.visibility = VisibilityChoice.default.value
@@ -1085,7 +1077,6 @@ METHOD_NAMES = [
1085
1077
  "replace",
1086
1078
  "from_dir",
1087
1079
  "restore",
1088
- "view_tree",
1089
1080
  ]
1090
1081
 
1091
1082
  if ln_setup._TESTING:
lamindb/_finish.py CHANGED
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING
8
8
 
9
9
  import lamindb_setup as ln_setup
10
10
  from lamin_utils import logger
11
+ from lamindb_setup.core.hashing import hash_file
11
12
  from lnschema_core.types import TransformType
12
13
 
13
14
  from .core._run_context import is_run_from_ipython, run_context
@@ -35,7 +36,7 @@ def get_seconds_since_modified(filepath) -> float:
35
36
  def finish():
36
37
  """Mark a tracked run as finished.
37
38
 
38
- If run in a notebook, it saves the run report & source code to your default storage location.
39
+ Saves source code and, for notebooks, a run report to your default storage location.
39
40
  """
40
41
  if run_context.path is None:
41
42
  raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
@@ -47,16 +48,12 @@ def finish():
47
48
  raise NotebookNotSaved(
48
49
  "Please save the notebook in your editor right before running `ln.finish()`"
49
50
  )
50
- save_run_context_core(
51
- run=run_context.run,
52
- transform=run_context.transform,
53
- filepath=run_context.path,
54
- finished_at=True,
55
- )
56
- else: # scripts
57
- # save_run_context_core was already called during ln.track()
58
- run_context.run.finished_at = datetime.now(timezone.utc) # update run time
59
- run_context.run.save()
51
+ save_run_context_core(
52
+ run=run_context.run,
53
+ transform=run_context.transform,
54
+ filepath=run_context.path,
55
+ finished_at=True,
56
+ )
60
57
 
61
58
 
62
59
  def save_run_context_core(
@@ -138,15 +135,17 @@ def save_run_context_core(
138
135
  if prev_transform.source_code_id is not None:
139
136
  prev_source = prev_transform.source_code
140
137
  ln.settings.silence_file_run_transform_warning = True
141
- # register the source code
142
- if transform.source_code is not None:
143
- # check if the hash of the notebook source code matches
144
- check_source_code = ln.Artifact(source_code_path, key="dummy")
145
- if check_source_code._state.adding:
138
+
139
+ # track source code
140
+ if transform.source_code_id is not None:
141
+ # check if the hash of the transform source code matches
142
+ # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
143
+ hash, _ = hash_file(source_code_path) # ignore hash_type for now
144
+ if hash != transform.source_code.hash:
146
145
  if os.getenv("LAMIN_TESTING") is None:
147
146
  # in test, auto-confirm overwrite
148
147
  response = input(
149
- f"You are about to overwrite existing source code (hash {transform.source_code.hash}) for transform version"
148
+ f"You are about to replace (overwrite) existing source code (hash '{transform.source_code.hash}') for transform version"
150
149
  f" '{transform.version}'. Proceed? (y/n)"
151
150
  )
152
151
  else:
@@ -154,6 +153,9 @@ def save_run_context_core(
154
153
  if response == "y":
155
154
  transform.source_code.replace(source_code_path)
156
155
  transform.source_code.save(upload=True)
156
+ logger.success(
157
+ f"replaced transform.source_code: {transform.source_code}"
158
+ )
157
159
  else:
158
160
  logger.warning("Please re-run `ln.track()` to make a new version")
159
161
  return "rerun-the-notebook"
@@ -169,21 +171,32 @@ def save_run_context_core(
169
171
  source_code.save(upload=True)
170
172
  transform.source_code = source_code
171
173
  logger.success(f"saved transform.source_code: {transform.source_code}")
174
+
172
175
  # track environment
173
176
  filepath_env = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
174
177
  if filepath_env.exists():
175
- artifact = ln.Artifact(
176
- filepath_env,
177
- description="requirements.txt",
178
- visibility=0,
179
- run=False,
180
- )
181
- if artifact._state.adding:
178
+ hash, _ = hash_file(filepath_env)
179
+ artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
180
+ new_env_artifact = artifact is None
181
+ if new_env_artifact:
182
+ artifact = ln.Artifact(
183
+ filepath_env,
184
+ description="requirements.txt",
185
+ visibility=0,
186
+ run=False,
187
+ )
182
188
  artifact.save(upload=True)
183
189
  run.environment = artifact
184
- logger.success(f"saved run.environment: {run.environment}")
185
- # save report file
190
+ if new_env_artifact:
191
+ logger.success(f"saved run.environment: {run.environment}")
192
+
193
+ # set finished_at
194
+ if finished_at:
195
+ run.finished_at = datetime.now(timezone.utc)
196
+
197
+ # track report and set is_consecutive
186
198
  if not transform.type == TransformType.notebook:
199
+ run.is_consecutive = True
187
200
  run.save()
188
201
  else:
189
202
  if run.report_id is not None:
@@ -203,16 +216,15 @@ def save_run_context_core(
203
216
  report_file.save(upload=True)
204
217
  run.report = report_file
205
218
  run.is_consecutive = is_consecutive
206
- if finished_at:
207
- run.finished_at = datetime.now(timezone.utc)
208
219
  run.save()
209
220
  transform.latest_report = run.report
210
- transform.save()
211
- if transform.type == TransformType.notebook:
212
221
  logger.success(f"saved transform.latest_report: {transform.latest_report}")
213
- if ln_setup.settings.instance.is_remote:
222
+ transform.save()
223
+
224
+ # finalize
225
+ if ln_setup.settings.instance.is_on_hub:
214
226
  identifier = ln_setup.settings.instance.slug
215
- logger.success(
227
+ logger.important(
216
228
  f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
217
229
  )
218
230
  # because run & transform changed, update the global run_context
lamindb/_query_set.py CHANGED
@@ -11,7 +11,6 @@ from lnschema_core.models import (
11
11
  Artifact,
12
12
  CanValidate,
13
13
  Collection,
14
- IsTree,
15
14
  IsVersioned,
16
15
  Registry,
17
16
  Run,
@@ -83,7 +82,7 @@ class RecordsList(UserList):
83
82
  return one_helper(self)
84
83
 
85
84
 
86
- class QuerySet(models.QuerySet, CanValidate, IsTree):
85
+ class QuerySet(models.QuerySet, CanValidate):
87
86
  """Sets of records returned by queries.
88
87
 
89
88
  See Also:
@@ -265,25 +264,6 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
265
264
 
266
265
  return _standardize(cls=self, values=values, field=field, **kwargs)
267
266
 
268
- @doc_args(IsTree.view_tree.__doc__)
269
- def view_tree(
270
- self,
271
- level: int = -1,
272
- limit_to_directories: bool = False,
273
- length_limit: int = 1000,
274
- max_files_per_dir_per_type: int = 7,
275
- ) -> None:
276
- """{}."""
277
- from .core._view_tree import view_tree as _view_tree
278
-
279
- _view_tree(
280
- cls=self,
281
- level=level,
282
- limit_to_directories=limit_to_directories,
283
- length_limit=length_limit,
284
- max_files_per_dir_per_type=max_files_per_dir_per_type,
285
- )
286
-
287
267
 
288
268
  def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList:
289
269
  # evaluating length can be very costly, hence, the try-except block
lamindb/_run.py CHANGED
@@ -13,6 +13,7 @@ def __init__(run: Run, *args, **kwargs):
13
13
  transform: Transform = None
14
14
  if "transform" in kwargs or len(args) == 1:
15
15
  transform = kwargs.pop("transform") if len(args) == 0 else args[0]
16
+ params: str | None = kwargs.pop("params") if "params" in kwargs else None
16
17
  reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
17
18
  reference_type: str | None = (
18
19
  kwargs.pop("reference_type") if "reference_type" in kwargs else None
@@ -25,6 +26,7 @@ def __init__(run: Run, *args, **kwargs):
25
26
  transform=transform,
26
27
  reference=reference,
27
28
  reference_type=reference_type,
29
+ json=params,
28
30
  )
29
31
 
30
32
 
lamindb/_save.py CHANGED
@@ -6,13 +6,13 @@ import traceback
6
6
  from collections import defaultdict
7
7
  from datetime import datetime
8
8
  from functools import partial
9
- from typing import Iterable, overload
9
+ from typing import TYPE_CHECKING, Iterable, overload
10
10
 
11
11
  import lamindb_setup
12
12
  from django.db import transaction
13
13
  from django.utils.functional import partition
14
14
  from lamin_utils import logger
15
- from lamindb_setup.core.upath import UPath, print_hook
15
+ from lamindb_setup.core.upath import print_hook
16
16
  from lnschema_core.models import Artifact, Registry
17
17
 
18
18
  from lamindb.core._settings import settings
@@ -23,12 +23,8 @@ from lamindb.core.storage.paths import (
23
23
  store_file_or_folder,
24
24
  )
25
25
 
26
- try:
27
- from lamindb.core.storage._zarr import write_adata_zarr
28
- except ImportError:
29
-
30
- def write_adata_zarr(filepath): # type: ignore
31
- raise ImportError("Please install zarr: pip install zarr")
26
+ if TYPE_CHECKING:
27
+ from lamindb_setup.core.upath import UPath
32
28
 
33
29
 
34
30
  def save(
@@ -162,7 +158,7 @@ def check_and_attempt_upload(
162
158
  def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
163
159
  local_path = artifact._local_filepath
164
160
 
165
- # some in-memory cases (zarr for now)
161
+ # in-memory cases
166
162
  if local_path is None or not local_path.exists():
167
163
  return None
168
164
 
@@ -284,18 +280,7 @@ def upload_artifact(
284
280
  storage_path = attempt_accessing_path(
285
281
  artifact, storage_key, using_key=using_key, access_token=access_token
286
282
  )
287
- msg = f"storing artifact '{artifact.uid}' at '{storage_path}'"
288
- if (
289
- artifact.suffix == ".zarr"
290
- and hasattr(artifact, "_memory_rep")
291
- and artifact._memory_rep is not None
292
- ):
293
- logger.save(msg)
294
- print_progress = partial(
295
- print_hook, objectname=storage_path.name, action="uploading"
296
- )
297
- write_adata_zarr(artifact._memory_rep, storage_path, callback=print_progress)
298
- elif hasattr(artifact, "_to_store") and artifact._to_store:
299
- logger.save(msg)
283
+ if hasattr(artifact, "_to_store") and artifact._to_store:
284
+ logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
300
285
  store_file_or_folder(artifact._local_filepath, storage_path)
301
286
  return storage_path
lamindb/_transform.py CHANGED
@@ -1,10 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from lnschema_core.models import Artifact, Run, Transform
3
+ from lnschema_core.models import Run, Transform
4
4
  from lnschema_core.types import TransformType
5
5
 
6
6
  from ._run import delete_run_artifacts
7
- from .core.versioning import get_uid_from_old_version, init_uid
7
+ from .core.versioning import process_is_new_version_of
8
8
 
9
9
 
10
10
  def __init__(transform: Transform, *args, **kwargs):
@@ -32,15 +32,9 @@ def __init__(transform: Transform, *args, **kwargs):
32
32
  "Only name, key, version, type, is_new_version_of, reference, "
33
33
  f"reference_type can be passed, but you passed: {kwargs}"
34
34
  )
35
- if is_new_version_of is None:
36
- new_uid = init_uid(version=version, n_full_id=Transform._len_full_uid)
37
- else:
38
- if not isinstance(is_new_version_of, Transform):
39
- raise TypeError("is_new_version_of has to be of type ln.Transform")
40
- new_uid, version = get_uid_from_old_version(is_new_version_of, version)
41
- if name is None:
42
- name = is_new_version_of.name
43
-
35
+ new_uid, version, name = process_is_new_version_of(
36
+ is_new_version_of, version, name, Transform
37
+ )
44
38
  # this is only because the user-facing constructor allows passing an id
45
39
  # most others don't
46
40
  if uid is None:
lamindb/core/__init__.py CHANGED
@@ -12,7 +12,6 @@ Registries:
12
12
  Data
13
13
  FeatureManager
14
14
  LabelManager
15
- IsTree
16
15
  IsVersioned
17
16
  CanValidate
18
17
  HasParents
@@ -55,7 +54,6 @@ from lnschema_core.models import (
55
54
  CanValidate,
56
55
  Data,
57
56
  HasParents,
58
- IsTree,
59
57
  IsVersioned,
60
58
  Registry,
61
59
  )
lamindb/core/_data.py CHANGED
@@ -345,7 +345,7 @@ def add_labels(
345
345
  f" {old_feature_set}"
346
346
  )
347
347
  old_feature_set.delete()
348
- self.features._add_feature_set(feature_set, slot="external")
348
+ self.features.add_feature_set(feature_set, slot="external")
349
349
  logger.save(
350
350
  f"linked new feature '{feature.name}' together with new feature set"
351
351
  f" {feature_set}"
@@ -236,7 +236,7 @@ class FeatureManager:
236
236
  and self._host.artifact.accessor == "DataFrame"
237
237
  ):
238
238
  slot = "columns" if slot is None else slot
239
- self._add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
239
+ self.add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
240
240
 
241
241
  def add_from_df(self, field: FieldAttr = Feature.name, organism: str | None = None):
242
242
  """Add features from DataFrame."""
@@ -325,7 +325,7 @@ class FeatureManager:
325
325
  self._host._feature_sets = feature_sets
326
326
  self._host.save()
327
327
 
328
- def _add_feature_set(self, feature_set: FeatureSet, slot: str):
328
+ def add_feature_set(self, feature_set: FeatureSet, slot: str):
329
329
  """Add new feature set to a slot.
330
330
 
331
331
  Args:
@@ -405,7 +405,8 @@ class FeatureManager:
405
405
  f"FeatureSet is not transferred, check if organism is set correctly: {feature_set}"
406
406
  )
407
407
  continue
408
- # TODO: make sure the uid matches if featureset is composed of same features
409
- # feature_set_self.uid = feature_set.uid
408
+ # make sure the uid matches if featureset is composed of same features
409
+ if feature_set_self.hash == feature_set.hash:
410
+ feature_set_self.uid = feature_set.uid
410
411
  logger.info(f"saving {slot} featureset: {feature_set_self}")
411
- self._host.features._add_feature_set(feature_set_self, slot)
412
+ self._host.features.add_feature_set(feature_set_self, slot)
@@ -8,6 +8,7 @@ from pathlib import Path, PurePath
8
8
  from typing import TYPE_CHECKING
9
9
 
10
10
  from lamin_utils import logger
11
+ from lamindb_setup.core.hashing import hash_file
11
12
  from lnschema_core import Run, Transform, ids
12
13
  from lnschema_core.types import TransformType
13
14
  from lnschema_core.users import current_user_id
@@ -175,6 +176,17 @@ def raise_transform_settings_error() -> None:
175
176
  )
176
177
 
177
178
 
179
+ def pretty_pypackages(dependencies: dict) -> str:
180
+ deps_list = []
181
+ for pkg, ver in dependencies.items():
182
+ if ver != "":
183
+ deps_list.append(pkg + f"=={ver}")
184
+ else:
185
+ deps_list.append(pkg)
186
+ deps_list.sort()
187
+ return " ".join(deps_list)
188
+
189
+
178
190
  class run_context:
179
191
  """Global run context."""
180
192
 
@@ -189,6 +201,7 @@ class run_context:
189
201
  def _track(
190
202
  cls,
191
203
  *,
204
+ params: dict | None = None,
192
205
  transform: Transform | None = None,
193
206
  new_run: bool | None = None,
194
207
  path: str | None = None,
@@ -204,6 +217,7 @@ class run_context:
204
217
  whether the script exists in the git repository and add a link.
205
218
 
206
219
  Args:
220
+ params: A dictionary of parameters to track for the run.
207
221
  transform: Can be of type `"pipeline"` or `"notebook"`
208
222
  (:class:`~lamindb.core.types.TransformType`).
209
223
  new_run: If `False`, loads latest run of transform
@@ -298,11 +312,13 @@ class run_context:
298
312
  )
299
313
  if run is not None: # loaded latest run
300
314
  run.started_at = datetime.now(timezone.utc) # update run time
315
+ run.json = params # update run params
301
316
  logger.important(f"loaded: {run}")
302
317
 
303
318
  if run is None: # create new run
304
319
  run = Run(
305
320
  transform=cls.transform,
321
+ params=params,
306
322
  )
307
323
  logger.important(f"saved: {run}")
308
324
  # can only determine at ln.finish() if run was consecutive in
@@ -315,16 +331,6 @@ class run_context:
315
331
  from ._track_environment import track_environment
316
332
 
317
333
  track_environment(run)
318
-
319
- if not is_run_from_ipython and cls.path is not None:
320
- # upload run source code & environment
321
- from lamindb._finish import save_run_context_core
322
-
323
- save_run_context_core(
324
- run=cls.run,
325
- transform=cls.transform,
326
- filepath=cls.path,
327
- )
328
334
  return None
329
335
 
330
336
  @classmethod
@@ -386,17 +392,12 @@ class run_context:
386
392
  # log imported python packages
387
393
  if not path_str.startswith("/fileId="):
388
394
  try:
389
- from nbproject.dev._metadata_display import DisplayMeta
390
395
  from nbproject.dev._pypackage import infer_pypackages
391
396
 
392
- metadata, _, nb = nbproject.header(
393
- filepath=path_str,
394
- metadata_only=True,
395
- )
396
- dm = DisplayMeta(metadata)
397
+ nb = nbproject.dev.read_notebook(path_str)
397
398
  logger.important(
398
399
  "notebook imports:"
399
- f" {' '.join(dm.pypackage(infer_pypackages(nb, pin_versions=True)))}"
400
+ f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
400
401
  )
401
402
  except Exception:
402
403
  logger.debug("inferring imported packages failed")
@@ -451,19 +452,36 @@ class run_context:
451
452
  transform.save()
452
453
  logger.important(f"updated: {transform}")
453
454
  # check whether the notebook source code was already saved
454
- if is_run_from_ipython and transform.source_code_id:
455
- if os.getenv("LAMIN_TESTING") is None:
456
- response = input(
457
- "You already saved source code for this notebook."
458
- " Bump the version before a new run? (y/n)"
459
- )
460
- else:
461
- response = "y"
462
- if response == "y":
463
- update_stem_uid_or_version(stem_uid, version, bump_version=True)
455
+ if transform.source_code_id is not None:
456
+ response = None
457
+ if is_run_from_ipython:
458
+ if os.getenv("LAMIN_TESTING") is None:
459
+ response = input(
460
+ "You already saved source code for this notebook."
461
+ " Bump the version before a new run? (y/n)"
462
+ )
463
+ else:
464
+ response = "y"
464
465
  else:
465
- # we want a new stem_uid in this case, hence raise the error
466
- raise_transform_settings_error()
466
+ hash, _ = hash_file(cls.path) # ignore hash_type for now
467
+ if hash != transform.source_code.hash:
468
+ # only if hashes don't match, we need user input
469
+ if os.getenv("LAMIN_TESTING") is None:
470
+ response = input(
471
+ "You already saved source code for this script and meanwhile modified it without bumping a version."
472
+ " Bump the version before a new run? (y/n)"
473
+ )
474
+ else:
475
+ response = "y"
476
+ else:
477
+ logger.important(f"loaded: {transform}")
478
+ if response is not None:
479
+ # if a script is re-run and hashes match, we don't need user input
480
+ if response == "y":
481
+ update_stem_uid_or_version(stem_uid, version, bump_version=True)
482
+ else:
483
+ # we want a new stem_uid in this case, hence raise the error
484
+ raise_transform_settings_error()
467
485
  else:
468
486
  logger.important(f"loaded: {transform}")
469
487
  cls.transform = transform
lamindb/core/_settings.py CHANGED
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Literal, Mapping
5
5
 
6
6
  import lamindb_setup as ln_setup
7
7
  from lamin_utils import logger
8
- from lamindb_setup._add_remote_storage import add_managed_storage
8
+ from lamindb_setup._set_managed_storage import set_managed_storage
9
9
  from lamindb_setup.core._settings import settings as setup_settings
10
10
  from lamindb_setup.core._settings_instance import sanitize_git_repo_url
11
11
 
@@ -147,7 +147,7 @@ class Settings:
147
147
  path, kwargs = path_kwargs
148
148
  else:
149
149
  path, kwargs = path_kwargs, {}
150
- add_managed_storage(path, **kwargs)
150
+ set_managed_storage(path, **kwargs)
151
151
 
152
152
  @property
153
153
  def storage_local(self) -> Path:
lamindb/core/_sync_git.py CHANGED
@@ -61,11 +61,15 @@ def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | N
61
61
  capture_output=True,
62
62
  cwd=repo_dir,
63
63
  )
64
- commit_hash = result.stdout.decode()
64
+ # we just care to find one commit
65
+ # hence, we split by new line ("\n") and use the first one
66
+ commit_hash = result.stdout.decode().split("\n")[0]
65
67
  if commit_hash == "" or result.returncode == 1:
66
68
  return None
67
69
  else:
68
- assert len(commit_hash) == 40
70
+ assert (
71
+ len(commit_hash) == 40
72
+ ), f"commit hash |{commit_hash}| is not 40 characters long"
69
73
  return commit_hash
70
74
 
71
75
 
@@ -161,8 +161,8 @@ def anndata_mouse_sc_lymph_node(
161
161
  adata.obs.columns = (
162
162
  adata.obs.columns.str.replace("Sample Characteristic", "")
163
163
  .str.replace("Factor Value ", "Factor Value:", regex=True)
164
- .str.replace("Factor Value\[", "Factor Value:", regex=True) # noqa
165
- .str.replace(" Ontology Term\[", "ontology_id:", regex=True) # noqa
164
+ .str.replace("Factor Value\\[", "Factor Value:", regex=True)
165
+ .str.replace(" Ontology Term\\[", "ontology_id:", regex=True)
166
166
  .str.strip("[]")
167
167
  .str.replace("organism part", "tissue")
168
168
  .str.replace("organism", "organism")
@@ -10,6 +10,6 @@ from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
10
10
 
11
11
  from ._anndata_sizes import size_adata
12
12
  from ._backed_access import AnnDataAccessor, BackedAccessor
13
- from ._valid_suffixes import VALID_SUFFIXES
14
- from .objects import infer_suffix, write_to_file
13
+ from ._valid_suffixes import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
14
+ from .objects import infer_suffix, write_to_disk
15
15
  from .paths import delete_storage, load_to_memory
@@ -1,3 +1,5 @@
1
- from lamindb_setup.core.upath import VALID_SUFFIXES
1
+ from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
2
2
 
3
- VALID_SUFFIXES.update({".vitessce.json", ".anndata.zarr", ".spatialdata.zarr"})
3
+ # add new composite suffixes like so
4
+ VALID_COMPOSITE_SUFFIXES.update({".vitessce.json"})
5
+ # can do the same for simple valid suffixes
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from pathlib import PurePosixPath
3
4
  from typing import TYPE_CHECKING
4
5
 
5
6
  from anndata import AnnData
@@ -21,7 +22,7 @@ def infer_suffix(dmem, adata_format: str | None = None):
21
22
  """Infer LaminDB storage file suffix from a data object."""
22
23
  if isinstance(dmem, AnnData):
23
24
  if adata_format is not None:
24
- if adata_format not in ("h5ad", "zarr"):
25
+ if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
25
26
  raise ValueError(
26
27
  "Error when specifying AnnData storage format, it should be"
27
28
  f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
@@ -40,9 +41,15 @@ def infer_suffix(dmem, adata_format: str | None = None):
40
41
  raise NotImplementedError
41
42
 
42
43
 
43
- def write_to_file(dmem, filepath: UPathStr):
44
+ def write_to_disk(dmem, filepath: UPathStr):
44
45
  if isinstance(dmem, AnnData):
45
- dmem.write(filepath)
46
+ suffix = PurePosixPath(filepath).suffix
47
+ if suffix == ".h5ad":
48
+ dmem.write_h5ad(filepath)
49
+ elif suffix == ".zarr":
50
+ dmem.write_zarr(filepath)
51
+ else:
52
+ raise NotImplementedError
46
53
  elif isinstance(dmem, DataFrame):
47
54
  dmem.to_parquet(filepath)
48
55
  else:
@@ -140,7 +140,7 @@ def delete_storage(storagepath: Path):
140
140
  if not storagepath.is_relative_to(settings.storage): # type: ignore
141
141
  allow_delete = False
142
142
  if setup_settings.instance.keep_artifacts_local:
143
- allow_delete = storagepath.is_relative_to(
143
+ allow_delete = storagepath.is_relative_to( # type: ignore
144
144
  setup_settings.instance.storage_local.root
145
145
  )
146
146
  if not allow_delete:
@@ -42,10 +42,7 @@ def init_uid(
42
42
  if is_new_version_of is not None:
43
43
  stem_uid = is_new_version_of.stem_uid
44
44
  else:
45
- if n_full_id == 20:
46
- stem_uid = ids.base62_16()
47
- elif n_full_id == 16:
48
- stem_uid = ids.base62_12()
45
+ stem_uid = ids.base62(n_full_id - 4)
49
46
  if version is not None:
50
47
  if not isinstance(version, str):
51
48
  raise ValueError(
@@ -90,3 +87,20 @@ def get_new_path_from_uid(old_path: UPath, old_uid: str, new_uid: str):
90
87
  # for cloud path, the rename target must be the last part of the path
91
88
  new_path = old_path.name.replace(old_uid, new_uid)
92
89
  return new_path
90
+
91
+
92
+ def process_is_new_version_of(
93
+ is_new_version_of: IsVersioned,
94
+ version: str | None,
95
+ name: str | None,
96
+ type: type[IsVersioned],
97
+ ) -> tuple[str, str, str]:
98
+ if is_new_version_of is not None and not isinstance(is_new_version_of, type):
99
+ raise TypeError(f"is_new_version_of has to be of type {type}")
100
+ if is_new_version_of is None:
101
+ uid = init_uid(version=version, n_full_id=type._len_stem_uid)
102
+ else:
103
+ uid, version = get_uid_from_old_version(is_new_version_of, version)
104
+ if name is None:
105
+ name = is_new_version_of.name
106
+ return uid, version, name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.71.0
3
+ Version: 0.71.2
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: lnschema_core==0.66.0
13
- Requires-Dist: lamindb_setup==0.71.0
12
+ Requires-Dist: lnschema_core==0.66.4
13
+ Requires-Dist: lamindb_setup==0.71.3
14
14
  Requires-Dist: lamin_utils==0.13.2
15
- Requires-Dist: lamin_cli==0.13.0
15
+ Requires-Dist: lamin_cli==0.13.1
16
16
  Requires-Dist: rapidfuzz
17
17
  Requires-Dist: pyarrow
18
18
  Requires-Dist: typing_extensions!=4.6.0
@@ -37,7 +37,7 @@ Requires-Dist: faker-biology ; extra == "dev"
37
37
  Requires-Dist: django-schema-graph ; extra == "erdiagram"
38
38
  Requires-Dist: readfcs>=1.1.8 ; extra == "fcs"
39
39
  Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
40
- Requires-Dist: nbproject==0.10.0 ; extra == "jupyter"
40
+ Requires-Dist: nbproject==0.10.2 ; extra == "jupyter"
41
41
  Requires-Dist: nbstripout==0.6.1 ; extra == "jupyter"
42
42
  Requires-Dist: nbconvert ; extra == "jupyter"
43
43
  Requires-Dist: zarr>=2.16.0 ; extra == "zarr"
@@ -1,55 +1,54 @@
1
- lamindb/__init__.py,sha256=T_mLeXTbOSi7s2DSoGxF-FrVBCSQLvBj5t02ueRNWSI,2182
1
+ lamindb/__init__.py,sha256=GKrW6unkqBBwwpxTXjuUv-5k4c4unimsV-vGFSWt68I,2182
2
2
  lamindb/_annotate.py,sha256=kgbilILfgzoS-GEpjxzVwRMs7CoSa9BNEcIWXFBW69I,43915
3
- lamindb/_artifact.py,sha256=875jV8J-GgvhoscWPmg73ogTa9rAVHQdAqc3V8S46Sc,40157
3
+ lamindb/_artifact.py,sha256=8uBW-dhuWyBUQGs728sAPCnuhTic-NKjSbaneF07aMo,40106
4
4
  lamindb/_can_validate.py,sha256=nvoZG-35n3HofkY4Xc6hBv9AV54_RDan7Hzp5TuqY9I,14709
5
5
  lamindb/_collection.py,sha256=SDM35R_5WHrgLKjVb14Q8-Rz_gn5hdZLJobPcanm4PM,14627
6
6
  lamindb/_feature.py,sha256=srAKchY7gqD-h-cWlEiAWuHlpFKFwv0PWIA-JX0Go8c,6758
7
7
  lamindb/_feature_set.py,sha256=AzjOcHzQajpeikPOAic-aj0z_C5b7VpHVegg3ThRSLw,9045
8
8
  lamindb/_filter.py,sha256=xnjJzjF3Zj4dK_Kfymvhgczk27MhhXz5ZYc7XINbgHY,1331
9
- lamindb/_finish.py,sha256=iUo6j89_hTP-OuRfUAj_i1YB1B5FU9QTtwxXKdX_J_4,8279
9
+ lamindb/_finish.py,sha256=6GwhqrC-x-JdFd16i7-uyhCWeQgGKxr25aSsSXPZt4g,8598
10
10
  lamindb/_from_values.py,sha256=DVXjnQ2wwNw-2bFzy0uXLdVlqoprrn95hTnrXwn-KqM,12638
11
11
  lamindb/_is_versioned.py,sha256=0PgRCmxEmYDcAjllLSOYZm132B1lW6QgmBBERhRyFt0,1341
12
12
  lamindb/_parents.py,sha256=N9T8jbd3eaoHDLE9TD1y1QgGcO81E6Brapy8LILzRCQ,14790
13
13
  lamindb/_query_manager.py,sha256=3zokXqxgj9vTJBnN2sbYKS-q69fyDDPF_aGq_rFHzXU,4066
14
- lamindb/_query_set.py,sha256=K_0rJ6Keltl3Pvglvd7kkzkJEy2u6Kp0TKiHLzwqH18,11359
14
+ lamindb/_query_set.py,sha256=n0owd74cTzGz6-mIv8SlDz0wcyRz7Xw3Ke1LhE8UlIg,10784
15
15
  lamindb/_registry.py,sha256=fmX-BUnan3Y0WrEAx3qNwRYCIJwJgjoKnRnpgcXujEI,19358
16
- lamindb/_run.py,sha256=b7A52M1On3QzFgIYyfQoz5Kk7V3wcu9p_Prq5bzd8v8,1838
17
- lamindb/_save.py,sha256=r-pUKi2xBW25brIMzDbf8iI-4xggX-X2C9cIYHzK1uI,11460
16
+ lamindb/_run.py,sha256=We50MUeGH778begutDGoNFM-n5_81_BfMCnZS1bdkt0,1937
17
+ lamindb/_save.py,sha256=_7r3TUV3B6Hp75r5O_ymu3fKWyBHbGa5vmE_pxrtsVI,10923
18
18
  lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
19
- lamindb/_transform.py,sha256=rxojJ91qQSkeYDHYbwqjFAYxBMgJd3cq_K7Z0n5g8Aw,3482
19
+ lamindb/_transform.py,sha256=E9C7psuOnsNrUQpWRuGgEUM8_pc7YhDn7n4ieHzB4X0,3169
20
20
  lamindb/_ulabel.py,sha256=e5dw9h1tR0_u-DMn7Gzx0WhUhV5w7j4v3QbnLWQV7eI,1941
21
21
  lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
22
22
  lamindb/_view.py,sha256=GV1FrqIMmdooEkA-5zvcTWgV1nqx1sehi6WdWEaFpxM,2171
23
- lamindb/core/__init__.py,sha256=MB1gEMKUf0GBQrI3dH8WRZOZQmWR4HIojXK_hXXVdqA,1235
24
- lamindb/core/_data.py,sha256=xULvge-txEO4r4amNQZRZTH3n3BqOLWauyNfxbB6WOA,17674
25
- lamindb/core/_feature_manager.py,sha256=LlYgU71AoTnrseWFCq-oZkUAYWITtRR7BNFm0AhHe-c,15773
23
+ lamindb/core/__init__.py,sha256=TI9_1Jtpwae_cUPQ3-U0RRPH5c3GBA-gLhHvlAk_Nlo,1213
24
+ lamindb/core/_data.py,sha256=Lico6-Vx15bNpGLl1bqFqEsh62pD4YKOOBnmahse1tI,17673
25
+ lamindb/core/_feature_manager.py,sha256=uTzZZ7-qqEAmdwi48Holy2j5VGTgmoQxhb21r6mLShI,15824
26
26
  lamindb/core/_label_manager.py,sha256=0RtegYnK3zIisOnd970EobOrHMpp7OCH-mEoPrPXw2c,9075
27
27
  lamindb/core/_mapped_collection.py,sha256=_OwFZh5SePDUD70XIK5kngv3we_Z5-YdGHNfpUSatSQ,19469
28
- lamindb/core/_run_context.py,sha256=zwsaq1iW3yb8Y6IjpWzqUL3e0i4l1bnmPF6V2USMqpI,16155
29
- lamindb/core/_settings.py,sha256=lhfn6gRjZw0atrA5Hr34m1nkPFXd8DAUMEesCGat1tA,6130
30
- lamindb/core/_sync_git.py,sha256=IlTqw55inPp_RZbN_YScaCeKza7LeF9mClQw55W3_d4,3921
28
+ lamindb/core/_run_context.py,sha256=7iCCOB2z154puBI7ZKzcaEZ5l6_9S8aSYBOBJI65lyc,17117
29
+ lamindb/core/_settings.py,sha256=rW1KfEXfT56XErwcnSuQxaCytpOy1kJ-u7tVmkmNmxY,6131
30
+ lamindb/core/_sync_git.py,sha256=06Te35UZj2QBaHNcc59VSC9vJgcFct7Z2sK78NLkZBs,4119
31
31
  lamindb/core/_track_environment.py,sha256=xLZ6kgzxWS6MWZ5LQ_wkbJX99vmYOT8iQ-Fz4OHCgWw,754
32
32
  lamindb/core/_transform_settings.py,sha256=eV96QKX9jOojjzF-a0oo0wXQsMXN2F6QV7orE06oFC8,161
33
- lamindb/core/_view_tree.py,sha256=PTwmKZSQL2UhKuSdV5Wp7o1JDjv1qwgsVCj3ThkbKb8,3447
34
33
  lamindb/core/exceptions.py,sha256=PHk5lyBdJPrrEQcid3ItfdNzz3fgiQsUmsEDdz063F0,197
35
34
  lamindb/core/fields.py,sha256=Jgi_XI-iTe6cT7oD8FV_JqEpjN1Q9rZWwL8VLtj4jkA,164
36
35
  lamindb/core/types.py,sha256=xeQF2x40p2pR9eIVQrXT74RrS810z2fbjmTRTSQUqPM,230
37
- lamindb/core/versioning.py,sha256=DsEHpCueNwhRiIaRH5-O8H_1fJVNtWslCRx30YiIS5o,3080
36
+ lamindb/core/versioning.py,sha256=T9d28erodCUmFlRA7InralbRoffdniPQxBE7qWqs2u8,3601
38
37
  lamindb/core/datasets/__init__.py,sha256=zRP98oqUAaXhqWyKMiH0s_ImVIuNeziQQ2kQ_t0f-DI,1353
39
- lamindb/core/datasets/_core.py,sha256=36vUOYFkX_4hBAnM_BujV5BRARMI5b9iI_SM9qS7wGc,20191
38
+ lamindb/core/datasets/_core.py,sha256=9bcDfVfMZ1h1WAS88ZBjy-R91xbP2KIm_ofHguXAKpY,20177
40
39
  lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh4,953
41
- lamindb/core/storage/__init__.py,sha256=6jnbFj-eBV3xZt04qP-kTsMWoP8YwpM50wlnnxDYsZU,415
40
+ lamindb/core/storage/__init__.py,sha256=5LUFQKRr2BX24d-yWBezhTXBV83sShcOvPj5Y5u6qIg,441
42
41
  lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
43
42
  lamindb/core/storage/_backed_access.py,sha256=eManrLsu3pSSQAyAKy47FDBm-iHgjaNfHA-zLy59uDs,24536
44
- lamindb/core/storage/_valid_suffixes.py,sha256=sewRRU3I6fJ-Jd5ACNcco_o3hic9zmqTs8BuZui-450,133
43
+ lamindb/core/storage/_valid_suffixes.py,sha256=J08aglC9oo35pzahj0SQXW9IHib8Asp4dc11co-2uys,212
45
44
  lamindb/core/storage/_zarr.py,sha256=5ceEz6YIvgvUnVVNWhK5Z4W0WfrvyvY82Yna5jSX1_E,3661
46
- lamindb/core/storage/objects.py,sha256=5LbBeZVKuOOB8DceSE-PN8elKY0N9OhFXZPQJE4lK48,1538
47
- lamindb/core/storage/paths.py,sha256=ib50kmRGhjRTHak20i94ruXVqLL9xQnQuqJSHEW50Q8,7866
45
+ lamindb/core/storage/objects.py,sha256=OzvBCS-Urz5mr-O95qYt6RGBDDX5HmjfRRKWPPDn1ZE,1797
46
+ lamindb/core/storage/paths.py,sha256=JTtiTlAMICH4gkw7iZNwTRfNTT0WxrBoKiag_7E9g4I,7882
48
47
  lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
49
48
  lamindb/integrations/_vitessce.py,sha256=b0FqTBsP-M6Q7xCYXVwFwM8DOIeeOBZEhYbryhtq4gk,2535
50
49
  lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
51
50
  lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
52
- lamindb-0.71.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
- lamindb-0.71.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
54
- lamindb-0.71.0.dist-info/METADATA,sha256=UbJOa1wX6oHrzN1WXgN_YiudHPiw8rOzBYDE3ricYCM,2674
55
- lamindb-0.71.0.dist-info/RECORD,,
51
+ lamindb-0.71.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
52
+ lamindb-0.71.2.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
53
+ lamindb-0.71.2.dist-info/METADATA,sha256=l49_xPwqfUDB6jUvUQoAVeQu8Tj3JUNCfTPB9cqOq_Y,2674
54
+ lamindb-0.71.2.dist-info/RECORD,,
@@ -1,116 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from collections import defaultdict
4
- from pathlib import Path
5
- from typing import Iterable
6
-
7
- from lamindb_setup import settings as setup_settings
8
- from lnschema_core.models import Artifact, Storage
9
-
10
-
11
- def view_tree(
12
- cls,
13
- level: int = -1,
14
- limit_to_directories: bool = False,
15
- length_limit: int = 1000,
16
- max_files_per_dir_per_type: int = 7,
17
- ) -> None:
18
- """{}."""
19
- if cls.__class__.__name__ == "QuerySet":
20
- print("queryset")
21
- qs = cls
22
- storage_ids = qs.list("storage_id")
23
- elif cls == Artifact:
24
- print("file")
25
- qs = cls.filter(storage_id=setup_settings.storage.id).all()
26
- storage_ids = Storage.filter().list("id")
27
- else:
28
- print("else")
29
- return
30
- storages = Storage.filter().all()
31
- storage_roots = {
32
- storage_id: storages.get(id=storage_id).root for storage_id in storage_ids
33
- }
34
- keys = set()
35
- for artifact in qs:
36
- root = storage_roots.get(artifact.storage_id, "")
37
- keys.add(f"{root}/{artifact.key}")
38
-
39
- _view_tree(
40
- keys=keys,
41
- level=level,
42
- only_dirs=limit_to_directories,
43
- limit=length_limit,
44
- max_files_per_dir_per_type=max_files_per_dir_per_type,
45
- )
46
-
47
-
48
- def _view_tree(
49
- keys: Iterable[str],
50
- *,
51
- level: int = -1,
52
- only_dirs: bool = False,
53
- limit: int = 1000,
54
- max_files_per_dir_per_type: int = 7,
55
- ) -> None:
56
- # Create a nested dictionary from keys
57
- def tree():
58
- return defaultdict(tree)
59
-
60
- root = tree()
61
-
62
- n_files = 0
63
- n_directories = 0
64
- suffixes = set()
65
-
66
- for key in keys:
67
- parts = key.split("/")
68
- node = root
69
- for part in parts:
70
- node = node[part]
71
- if node == {}:
72
- n_files += 1
73
- suffix = Path(part).suffix
74
- if suffix:
75
- suffixes.add(suffix)
76
- else:
77
- n_directories += 1
78
-
79
- # Function to print the tree
80
- def print_tree(node, prefix="", depth=0, count=None, n_files_per_dir_per_type=None):
81
- if count is None:
82
- count = [0]
83
- if n_files_per_dir_per_type is None:
84
- n_files_per_dir_per_type = defaultdict(int)
85
-
86
- if level != -1 and depth > level:
87
- return
88
- for name, child in node.items():
89
- if count[0] >= limit:
90
- return
91
- if only_dirs and child == {}:
92
- continue
93
- suffix = Path(name).suffix
94
- n_files_per_dir_per_type[suffix] += 1
95
- if (
96
- depth > 0
97
- and n_files_per_dir_per_type[suffix] > max_files_per_dir_per_type
98
- ):
99
- continue
100
- new_prefix = prefix + ("├── " if name != list(node.keys())[-1] else "└── ")
101
- print(new_prefix + name)
102
- count[0] += 1
103
- if child:
104
- print_tree(
105
- child,
106
- prefix + ("│ " if name != list(node.keys())[-1] else " "),
107
- depth + 1,
108
- count,
109
- (
110
- defaultdict(int) if depth == 0 else n_files_per_dir_per_type
111
- ), # Reset the counter for each directory
112
- )
113
-
114
- suffix_message = f" with suffixes {', '.join(suffixes)}" if n_files > 0 else ""
115
- print(f"{n_directories} directories, {n_files} files{suffix_message}")
116
- print_tree(root)