lamindb 0.70.4__py3-none-any.whl → 0.71.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -40,7 +40,8 @@ Modules & settings:
40
40
 
41
41
  """
42
42
 
43
- __version__ = "0.70.4" # denote a release candidate for 0.1.0 with 0.1rc1
43
+ # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
44
+ __version__ = "0.71.1"
44
45
 
45
46
  import os as _os
46
47
 
lamindb/_artifact.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import shutil
3
4
  from pathlib import Path, PurePath, PurePosixPath
4
5
  from typing import TYPE_CHECKING, Any, Mapping
5
6
 
@@ -9,9 +10,9 @@ import pandas as pd
9
10
  from anndata import AnnData
10
11
  from lamin_utils import colors, logger
11
12
  from lamindb_setup import settings as setup_settings
12
- from lamindb_setup._init_instance import register_storage
13
- from lamindb_setup.core import StorageSettings
13
+ from lamindb_setup._init_instance import register_storage_in_instance
14
14
  from lamindb_setup.core._docs import doc_args
15
+ from lamindb_setup.core._settings_storage import init_storage
15
16
  from lamindb_setup.core.hashing import b16_to_b64, hash_file, hash_md5s_from_dir
16
17
  from lamindb_setup.core.upath import (
17
18
  create_path,
@@ -20,7 +21,6 @@ from lamindb_setup.core.upath import (
20
21
  get_stat_file_cloud,
21
22
  )
22
23
  from lnschema_core import Artifact, Run, Storage
23
- from lnschema_core.models import IsTree
24
24
  from lnschema_core.types import (
25
25
  VisibilityChoice,
26
26
  )
@@ -34,8 +34,7 @@ from lamindb.core.storage import (
34
34
  delete_storage,
35
35
  infer_suffix,
36
36
  load_to_memory,
37
- size_adata,
38
- write_to_file,
37
+ write_to_disk,
39
38
  )
40
39
  from lamindb.core.storage.paths import (
41
40
  auto_storage_key_from_artifact,
@@ -100,12 +99,9 @@ def process_pathlike(
100
99
  # for the storage root: the bucket
101
100
  if not isinstance(filepath, LocalPathClasses):
102
101
  # for a cloud path, new_root is always the bucket name
103
- # we should check this assumption
104
102
  new_root = list(filepath.parents)[-1]
105
- new_root_str = new_root.as_posix().rstrip("/")
106
- logger.warning(f"generating a new storage location at {new_root_str}")
107
- storage_settings = StorageSettings(new_root_str)
108
- storage_record = register_storage(storage_settings)
103
+ storage_settings = init_storage(new_root)
104
+ storage_record = register_storage_in_instance(storage_settings)
109
105
  use_existing_storage_key = True
110
106
  return storage_record, use_existing_storage_key
111
107
  # if the filepath is local
@@ -175,8 +171,7 @@ def process_data(
175
171
  # Alex: I don't understand the line below
176
172
  if path.suffixes == []:
177
173
  path = path.with_suffix(suffix)
178
- if suffix != ".zarr":
179
- write_to_file(data, path)
174
+ write_to_disk(data, path)
180
175
  use_existing_storage_key = False
181
176
  else:
182
177
  raise NotImplementedError(
@@ -196,16 +191,13 @@ def get_stat_or_artifact(
196
191
  n_objects = None
197
192
  if settings.upon_file_create_skip_size_hash:
198
193
  return None, None, None, n_objects
199
- if suffix == ".zarr" and memory_rep is not None and isinstance(memory_rep, AnnData):
200
- size = size_adata(memory_rep)
201
- return size, None, None, n_objects
202
194
  stat = path.stat() # one network request
203
195
  if not isinstance(path, LocalPathClasses):
204
196
  size, hash, hash_type = None, None, None
205
197
  if stat is not None:
206
198
  if "ETag" in stat: # is file
207
199
  size, hash, hash_type = get_stat_file_cloud(stat)
208
- elif path.is_dir():
200
+ elif stat["type"] == "directory":
209
201
  size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
210
202
  if hash is None:
211
203
  logger.warning(f"did not add hash for {path}")
@@ -545,11 +537,13 @@ def __init__(artifact: Artifact, *args, **kwargs):
545
537
  skip_check_exists = (
546
538
  kwargs.pop("skip_check_exists") if "skip_check_exists" in kwargs else False
547
539
  )
548
- default_storage = (
549
- kwargs.pop("default_storage")
550
- if "default_storage" in kwargs
551
- else settings._storage_settings.record
552
- )
540
+ if "default_storage" in kwargs:
541
+ default_storage = kwargs.pop("default_storage")
542
+ else:
543
+ if setup_settings.instance.keep_artifacts_local:
544
+ default_storage = setup_settings.instance.storage_local.record
545
+ else:
546
+ default_storage = setup_settings.instance.storage.record
553
547
  using_key = (
554
548
  kwargs.pop("using_key") if "using_key" in kwargs else settings._using_key
555
549
  )
@@ -589,7 +583,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
589
583
  init_self_from_db(artifact, kwargs_or_artifact)
590
584
  # adding "key" here is dangerous because key might be auto-populated
591
585
  update_attributes(artifact, {"description": description})
592
- if artifact.key != key:
586
+ if artifact.key != key and key is not None:
593
587
  logger.warning(
594
588
  f"key {artifact.key} on existing artifact differs from passed key {key}"
595
589
  )
@@ -914,11 +908,25 @@ def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs)
914
908
 
915
909
  # docstring handled through attach_func_to_class_method
916
910
  def cache(self, is_run_input: bool | None = None) -> Path:
917
- _track_run_input(self, is_run_input)
918
-
919
911
  using_key = settings._using_key
920
912
  filepath = filepath_from_artifact(self, using_key=using_key)
921
- return setup_settings.instance.storage.cloud_to_local(filepath, print_progress=True)
913
+ try:
914
+ cache_path = setup_settings.instance.storage.cloud_to_local(
915
+ filepath, print_progress=True
916
+ )
917
+ except Exception as e:
918
+ if not isinstance(filepath, LocalPathClasses):
919
+ cache_path = setup_settings.instance.storage.cloud_to_local_no_update(
920
+ filepath
921
+ )
922
+ if cache_path.is_file():
923
+ cache_path.unlink(missing_ok=True)
924
+ elif cache_path.is_dir():
925
+ shutil.rmtree(cache_path)
926
+ raise e
927
+ # only call if sync is successfull
928
+ _track_run_input(self, is_run_input)
929
+ return cache_path
922
930
 
923
931
 
924
932
  # docstring handled through attach_func_to_class_method
@@ -995,7 +1003,19 @@ def _delete_skip_storage(artifact, *args, **kwargs) -> None:
995
1003
 
996
1004
  # docstring handled through attach_func_to_class_method
997
1005
  def save(self, upload: bool | None = None, **kwargs) -> None:
1006
+ state_was_adding = self._state.adding
998
1007
  access_token = kwargs.pop("access_token", None)
1008
+ local_path = None
1009
+ if upload and setup_settings.instance.keep_artifacts_local:
1010
+ # switch local storage location to cloud
1011
+ local_path = self.path
1012
+ self.storage_id = setup_settings.instance.storage.id
1013
+ self._local_filepath = local_path
1014
+ # switch to virtual storage key upon upload
1015
+ # the local filepath is already cached at that point
1016
+ self.key_is_virtual = True
1017
+ # ensure that the artifact is uploaded
1018
+ self._to_store = True
999
1019
 
1000
1020
  self._save_skip_storage(**kwargs)
1001
1021
 
@@ -1011,6 +1031,17 @@ def save(self, upload: bool | None = None, **kwargs) -> None:
1011
1031
  exception = check_and_attempt_clearing(self, using_key)
1012
1032
  if exception is not None:
1013
1033
  raise RuntimeError(exception)
1034
+ if local_path is not None and not state_was_adding:
1035
+ # only move the local artifact to cache if it was not newly created
1036
+ local_path_cache = ln_setup.settings.storage.cache_dir / local_path.name
1037
+ # don't use Path.rename here because of cross-device link error
1038
+ # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
1039
+ shutil.move(
1040
+ local_path, # type: ignore
1041
+ local_path_cache,
1042
+ )
1043
+ logger.important(f"moved local artifact to cache: {local_path_cache}")
1044
+ return self
1014
1045
 
1015
1046
 
1016
1047
  def _save_skip_storage(file, **kwargs) -> None:
@@ -1027,27 +1058,6 @@ def path(self) -> Path | UPath:
1027
1058
  return filepath_from_artifact(self, using_key)
1028
1059
 
1029
1060
 
1030
- @classmethod # type: ignore
1031
- @doc_args(IsTree.view_tree.__doc__)
1032
- def view_tree(
1033
- cls,
1034
- level: int = -1,
1035
- limit_to_directories: bool = False,
1036
- length_limit: int = 1000,
1037
- max_files_per_dir_per_type: int = 7,
1038
- ) -> None:
1039
- """{}."""
1040
- from lamindb.core._view_tree import view_tree as _view_tree
1041
-
1042
- _view_tree(
1043
- cls=cls,
1044
- level=level,
1045
- limit_to_directories=limit_to_directories,
1046
- length_limit=length_limit,
1047
- max_files_per_dir_per_type=max_files_per_dir_per_type,
1048
- )
1049
-
1050
-
1051
1061
  # docstring handled through attach_func_to_class_method
1052
1062
  def restore(self) -> None:
1053
1063
  self.visibility = VisibilityChoice.default.value
@@ -1067,7 +1077,6 @@ METHOD_NAMES = [
1067
1077
  "replace",
1068
1078
  "from_dir",
1069
1079
  "restore",
1070
- "view_tree",
1071
1080
  ]
1072
1081
 
1073
1082
  if ln_setup._TESTING:
lamindb/_finish.py CHANGED
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING
8
8
 
9
9
  import lamindb_setup as ln_setup
10
10
  from lamin_utils import logger
11
+ from lamindb_setup.core.hashing import hash_file
11
12
  from lnschema_core.types import TransformType
12
13
 
13
14
  from .core._run_context import is_run_from_ipython, run_context
@@ -35,7 +36,7 @@ def get_seconds_since_modified(filepath) -> float:
35
36
  def finish():
36
37
  """Mark a tracked run as finished.
37
38
 
38
- If run in a notebook, it saves the run report & source code to your default storage location.
39
+ Saves source code and, for notebooks, a run report to your default storage location.
39
40
  """
40
41
  if run_context.path is None:
41
42
  raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
@@ -47,16 +48,12 @@ def finish():
47
48
  raise NotebookNotSaved(
48
49
  "Please save the notebook in your editor right before running `ln.finish()`"
49
50
  )
50
- save_run_context_core(
51
- run=run_context.run,
52
- transform=run_context.transform,
53
- filepath=run_context.path,
54
- finished_at=True,
55
- )
56
- else: # scripts
57
- # save_run_context_core was already called during ln.track()
58
- run_context.run.finished_at = datetime.now(timezone.utc) # update run time
59
- run_context.run.save()
51
+ save_run_context_core(
52
+ run=run_context.run,
53
+ transform=run_context.transform,
54
+ filepath=run_context.path,
55
+ finished_at=True,
56
+ )
60
57
 
61
58
 
62
59
  def save_run_context_core(
@@ -121,7 +118,11 @@ def save_run_context_core(
121
118
  # first, copy the notebook file to a temporary file in the cache
122
119
  source_code_path = ln_setup.settings.storage.cache_dir / filepath.name
123
120
  shutil.copy2(filepath, source_code_path) # copy
124
- subprocess.run(f"nbstripout '{source_code_path}'", shell=True, check=True)
121
+ subprocess.run(
122
+ f"nbstripout '{source_code_path}' --extra-keys='metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension'",
123
+ shell=True,
124
+ check=True,
125
+ )
125
126
  # find initial versions of source codes and html reports
126
127
  prev_report = None
127
128
  prev_source = None
@@ -134,22 +135,27 @@ def save_run_context_core(
134
135
  if prev_transform.source_code_id is not None:
135
136
  prev_source = prev_transform.source_code
136
137
  ln.settings.silence_file_run_transform_warning = True
137
- # register the source code
138
- if transform.source_code is not None:
139
- # check if the hash of the notebook source code matches
140
- check_source_code = ln.Artifact(source_code_path, key="dummy")
141
- if check_source_code._state.adding:
138
+
139
+ # track source code
140
+ if transform.source_code_id is not None:
141
+ # check if the hash of the transform source code matches
142
+ # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
143
+ hash, _ = hash_file(source_code_path) # ignore hash_type for now
144
+ if hash != transform.source_code.hash:
142
145
  if os.getenv("LAMIN_TESTING") is None:
143
146
  # in test, auto-confirm overwrite
144
147
  response = input(
145
- f"You are about to overwrite existing source code (hash {transform.source_code.hash}) for transform version"
148
+ f"You are about to replace (overwrite) existing source code (hash '{transform.source_code.hash}') for transform version"
146
149
  f" '{transform.version}'. Proceed? (y/n)"
147
150
  )
148
151
  else:
149
152
  response = "y"
150
153
  if response == "y":
151
154
  transform.source_code.replace(source_code_path)
152
- transform.source_code.save()
155
+ transform.source_code.save(upload=True)
156
+ logger.success(
157
+ f"replaced transform.source_code: {transform.source_code}"
158
+ )
153
159
  else:
154
160
  logger.warning("Please re-run `ln.track()` to make a new version")
155
161
  return "rerun-the-notebook"
@@ -162,24 +168,35 @@ def save_run_context_core(
162
168
  visibility=0, # hidden file
163
169
  run=False,
164
170
  )
165
- source_code.save()
171
+ source_code.save(upload=True)
166
172
  transform.source_code = source_code
167
173
  logger.success(f"saved transform.source_code: {transform.source_code}")
174
+
168
175
  # track environment
169
176
  filepath_env = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
170
177
  if filepath_env.exists():
171
- artifact = ln.Artifact(
172
- filepath_env,
173
- description="requirements.txt",
174
- visibility=0,
175
- run=False,
176
- )
177
- if artifact._state.adding:
178
- artifact.save()
178
+ hash, _ = hash_file(filepath_env)
179
+ artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
180
+ new_env_artifact = artifact is None
181
+ if new_env_artifact:
182
+ artifact = ln.Artifact(
183
+ filepath_env,
184
+ description="requirements.txt",
185
+ visibility=0,
186
+ run=False,
187
+ )
188
+ artifact.save(upload=True)
179
189
  run.environment = artifact
180
- logger.success(f"saved run.environment: {run.environment}")
181
- # save report file
190
+ if new_env_artifact:
191
+ logger.success(f"saved run.environment: {run.environment}")
192
+
193
+ # set finished_at
194
+ if finished_at:
195
+ run.finished_at = datetime.now(timezone.utc)
196
+
197
+ # track report and set is_consecutive
182
198
  if not transform.type == TransformType.notebook:
199
+ run.is_consecutive = True
183
200
  run.save()
184
201
  else:
185
202
  if run.report_id is not None:
@@ -187,7 +204,7 @@ def save_run_context_core(
187
204
  "there is already an existing report for this run, replacing it"
188
205
  )
189
206
  run.report.replace(filepath_html)
190
- run.report.save()
207
+ run.report.save(upload=True)
191
208
  else:
192
209
  report_file = ln.Artifact(
193
210
  filepath_html,
@@ -196,19 +213,18 @@ def save_run_context_core(
196
213
  visibility=0, # hidden file
197
214
  run=False,
198
215
  )
199
- report_file.save()
216
+ report_file.save(upload=True)
200
217
  run.report = report_file
201
218
  run.is_consecutive = is_consecutive
202
- if finished_at:
203
- run.finished_at = datetime.now(timezone.utc)
204
219
  run.save()
205
220
  transform.latest_report = run.report
206
- transform.save()
207
- if transform.type == TransformType.notebook:
208
221
  logger.success(f"saved transform.latest_report: {transform.latest_report}")
209
- if ln_setup.settings.instance.is_remote:
222
+ transform.save()
223
+
224
+ # finalize
225
+ if ln_setup.settings.instance.is_on_hub:
210
226
  identifier = ln_setup.settings.instance.slug
211
- logger.success(
227
+ logger.important(
212
228
  f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
213
229
  )
214
230
  # because run & transform changed, update the global run_context
lamindb/_query_set.py CHANGED
@@ -11,7 +11,6 @@ from lnschema_core.models import (
11
11
  Artifact,
12
12
  CanValidate,
13
13
  Collection,
14
- IsTree,
15
14
  IsVersioned,
16
15
  Registry,
17
16
  Run,
@@ -83,7 +82,7 @@ class RecordsList(UserList):
83
82
  return one_helper(self)
84
83
 
85
84
 
86
- class QuerySet(models.QuerySet, CanValidate, IsTree):
85
+ class QuerySet(models.QuerySet, CanValidate):
87
86
  """Sets of records returned by queries.
88
87
 
89
88
  See Also:
@@ -265,25 +264,6 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
265
264
 
266
265
  return _standardize(cls=self, values=values, field=field, **kwargs)
267
266
 
268
- @doc_args(IsTree.view_tree.__doc__)
269
- def view_tree(
270
- self,
271
- level: int = -1,
272
- limit_to_directories: bool = False,
273
- length_limit: int = 1000,
274
- max_files_per_dir_per_type: int = 7,
275
- ) -> None:
276
- """{}."""
277
- from .core._view_tree import view_tree as _view_tree
278
-
279
- _view_tree(
280
- cls=self,
281
- level=level,
282
- limit_to_directories=limit_to_directories,
283
- length_limit=length_limit,
284
- max_files_per_dir_per_type=max_files_per_dir_per_type,
285
- )
286
-
287
267
 
288
268
  def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList:
289
269
  # evaluating length can be very costly, hence, the try-except block
lamindb/_registry.py CHANGED
@@ -113,7 +113,7 @@ def __init__(orm: Registry, *args, **kwargs):
113
113
  logger.warning(
114
114
  f"loaded {orm.__class__.__name__} record with same"
115
115
  f" name{version_comment}: '{kwargs['name']}' "
116
- "(disable via ln.settings.upon_create_search_names)"
116
+ "(disable via `ln.settings.upon_create_search_names`)"
117
117
  )
118
118
  init_self_from_db(orm, existing_record)
119
119
  return None
@@ -498,7 +498,7 @@ def transfer_to_default_db(
498
498
 
499
499
 
500
500
  # docstring handled through attach_func_to_class_method
501
- def save(self, *args, **kwargs) -> None:
501
+ def save(self, *args, **kwargs) -> Registry:
502
502
  using_key = None
503
503
  if "using" in kwargs:
504
504
  using_key = kwargs["using"]
@@ -540,6 +540,7 @@ def save(self, *args, **kwargs) -> None:
540
540
  self.features._add_from(self_on_db, **add_from_kwargs)
541
541
  logger.info("transfer labels")
542
542
  self.labels.add_from(self_on_db, **add_from_kwargs)
543
+ return self
543
544
 
544
545
 
545
546
  METHOD_NAMES = [
lamindb/_save.py CHANGED
@@ -6,13 +6,13 @@ import traceback
6
6
  from collections import defaultdict
7
7
  from datetime import datetime
8
8
  from functools import partial
9
- from typing import Iterable, overload
9
+ from typing import TYPE_CHECKING, Iterable, overload
10
10
 
11
11
  import lamindb_setup
12
12
  from django.db import transaction
13
13
  from django.utils.functional import partition
14
14
  from lamin_utils import logger
15
- from lamindb_setup.core.upath import UPath, print_hook
15
+ from lamindb_setup.core.upath import print_hook
16
16
  from lnschema_core.models import Artifact, Registry
17
17
 
18
18
  from lamindb.core._settings import settings
@@ -23,12 +23,8 @@ from lamindb.core.storage.paths import (
23
23
  store_file_or_folder,
24
24
  )
25
25
 
26
- try:
27
- from lamindb.core.storage._zarr import write_adata_zarr
28
- except ImportError:
29
-
30
- def write_adata_zarr(filepath): # type: ignore
31
- raise ImportError("Please install zarr: pip install zarr")
26
+ if TYPE_CHECKING:
27
+ from lamindb_setup.core.upath import UPath
32
28
 
33
29
 
34
30
  def save(
@@ -162,7 +158,7 @@ def check_and_attempt_upload(
162
158
  def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
163
159
  local_path = artifact._local_filepath
164
160
 
165
- # some in-memory cases (zarr for now)
161
+ # in-memory cases
166
162
  if local_path is None or not local_path.exists():
167
163
  return None
168
164
 
@@ -284,16 +280,7 @@ def upload_artifact(
284
280
  storage_path = attempt_accessing_path(
285
281
  artifact, storage_key, using_key=using_key, access_token=access_token
286
282
  )
287
- msg = f"storing artifact '{artifact.uid}' at '{storage_path}'"
288
- if (
289
- artifact.suffix == ".zarr"
290
- and hasattr(artifact, "_memory_rep")
291
- and artifact._memory_rep is not None
292
- ):
293
- logger.save(msg)
294
- print_progress = partial(print_hook, filepath=storage_path, action="uploading")
295
- write_adata_zarr(artifact._memory_rep, storage_path, callback=print_progress)
296
- elif hasattr(artifact, "_to_store") and artifact._to_store:
297
- logger.save(msg)
283
+ if hasattr(artifact, "_to_store") and artifact._to_store:
284
+ logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
298
285
  store_file_or_folder(artifact._local_filepath, storage_path)
299
286
  return storage_path
lamindb/core/__init__.py CHANGED
@@ -12,7 +12,6 @@ Registries:
12
12
  Data
13
13
  FeatureManager
14
14
  LabelManager
15
- IsTree
16
15
  IsVersioned
17
16
  CanValidate
18
17
  HasParents
@@ -55,7 +54,6 @@ from lnschema_core.models import (
55
54
  CanValidate,
56
55
  Data,
57
56
  HasParents,
58
- IsTree,
59
57
  IsVersioned,
60
58
  Registry,
61
59
  )
lamindb/core/_data.py CHANGED
@@ -345,7 +345,7 @@ def add_labels(
345
345
  f" {old_feature_set}"
346
346
  )
347
347
  old_feature_set.delete()
348
- self.features._add_feature_set(feature_set, slot="external")
348
+ self.features.add_feature_set(feature_set, slot="external")
349
349
  logger.save(
350
350
  f"linked new feature '{feature.name}' together with new feature set"
351
351
  f" {feature_set}"
@@ -236,7 +236,7 @@ class FeatureManager:
236
236
  and self._host.artifact.accessor == "DataFrame"
237
237
  ):
238
238
  slot = "columns" if slot is None else slot
239
- self._add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
239
+ self.add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
240
240
 
241
241
  def add_from_df(self, field: FieldAttr = Feature.name, organism: str | None = None):
242
242
  """Add features from DataFrame."""
@@ -325,7 +325,7 @@ class FeatureManager:
325
325
  self._host._feature_sets = feature_sets
326
326
  self._host.save()
327
327
 
328
- def _add_feature_set(self, feature_set: FeatureSet, slot: str):
328
+ def add_feature_set(self, feature_set: FeatureSet, slot: str):
329
329
  """Add new feature set to a slot.
330
330
 
331
331
  Args:
@@ -405,7 +405,8 @@ class FeatureManager:
405
405
  f"FeatureSet is not transferred, check if organism is set correctly: {feature_set}"
406
406
  )
407
407
  continue
408
- # TODO: make sure the uid matches if featureset is composed of same features
409
- # feature_set_self.uid = feature_set.uid
408
+ # make sure the uid matches if featureset is composed of same features
409
+ if feature_set_self.hash == feature_set.hash:
410
+ feature_set_self.uid = feature_set.uid
410
411
  logger.info(f"saving {slot} featureset: {feature_set_self}")
411
- self._host.features._add_feature_set(feature_set_self, slot)
412
+ self._host.features.add_feature_set(feature_set_self, slot)
@@ -3,16 +3,12 @@ from __future__ import annotations
3
3
  import builtins
4
4
  import hashlib
5
5
  import os
6
- import re
7
- import subprocess
8
- import sys
9
6
  from datetime import datetime, timezone
10
7
  from pathlib import Path, PurePath
11
- from typing import TYPE_CHECKING, Any
8
+ from typing import TYPE_CHECKING
12
9
 
13
10
  from lamin_utils import logger
14
- from lamindb_setup import settings as setup_settings
15
- from lamindb_setup.core import InstanceSettings
11
+ from lamindb_setup.core.hashing import hash_file
16
12
  from lnschema_core import Run, Transform, ids
17
13
  from lnschema_core.types import TransformType
18
14
  from lnschema_core.users import current_user_id
@@ -59,42 +55,6 @@ def get_uid_ext(version: str) -> str:
59
55
  return encodebytes(hashlib.md5(version.encode()).digest())[:4]
60
56
 
61
57
 
62
- def get_stem_uid_and_version_from_file(file_path: Path) -> tuple[str, str]:
63
- # line-by-line matching might be faster, but let's go with this for now
64
- with open(file_path) as file:
65
- content = file.read()
66
-
67
- if file_path.suffix == ".py":
68
- stem_uid_pattern = re.compile(
69
- r'\.transform\.stem_uid\s*=\s*["\']([^"\']+)["\']'
70
- )
71
- version_pattern = re.compile(r'\.transform\.version\s*=\s*["\']([^"\']+)["\']')
72
- elif file_path.suffix == ".ipynb":
73
- stem_uid_pattern = re.compile(
74
- r'\.transform\.stem_uid\s*=\s*\\["\']([^"\']+)\\["\']'
75
- )
76
- version_pattern = re.compile(
77
- r'\.transform\.version\s*=\s*\\["\']([^"\']+)\\["\']'
78
- )
79
- else:
80
- raise ValueError("Only .py and .ipynb files are supported.")
81
-
82
- # Search for matches in the entire file content
83
- stem_uid_match = stem_uid_pattern.search(content)
84
- version_match = version_pattern.search(content)
85
-
86
- # Extract values if matches are found
87
- stem_uid = stem_uid_match.group(1) if stem_uid_match else None
88
- version = version_match.group(1) if version_match else None
89
-
90
- if stem_uid is None or version is None:
91
- raise SystemExit(
92
- f"ln.settings.transform.stem_uid and ln.settings.transform.version aren't set in {file_path}\n"
93
- "Call ln.track() and copy/paste the output into the notebook"
94
- )
95
- return stem_uid, version
96
-
97
-
98
58
  def update_stem_uid_or_version(
99
59
  stem_uid: str,
100
60
  version: str,
@@ -216,6 +176,17 @@ def raise_transform_settings_error() -> None:
216
176
  )
217
177
 
218
178
 
179
+ def pretty_pypackages(dependencies: dict) -> str:
180
+ deps_list = []
181
+ for pkg, ver in dependencies.items():
182
+ if ver != "":
183
+ deps_list.append(pkg + f"=={ver}")
184
+ else:
185
+ deps_list.append(pkg)
186
+ deps_list.sort()
187
+ return " ".join(deps_list)
188
+
189
+
219
190
  class run_context:
220
191
  """Global run context."""
221
192
 
@@ -356,16 +327,6 @@ class run_context:
356
327
  from ._track_environment import track_environment
357
328
 
358
329
  track_environment(run)
359
-
360
- if not is_run_from_ipython and cls.path is not None:
361
- # upload run source code & environment
362
- from lamindb._finish import save_run_context_core
363
-
364
- save_run_context_core(
365
- run=cls.run,
366
- transform=cls.transform,
367
- filepath=cls.path,
368
- )
369
330
  return None
370
331
 
371
332
  @classmethod
@@ -427,17 +388,12 @@ class run_context:
427
388
  # log imported python packages
428
389
  if not path_str.startswith("/fileId="):
429
390
  try:
430
- from nbproject.dev._metadata_display import DisplayMeta
431
391
  from nbproject.dev._pypackage import infer_pypackages
432
392
 
433
- metadata, _, nb = nbproject.header(
434
- filepath=path_str,
435
- metadata_only=True,
436
- )
437
- dm = DisplayMeta(metadata)
393
+ nb = nbproject.dev.read_notebook(path_str)
438
394
  logger.important(
439
395
  "notebook imports:"
440
- f" {' '.join(dm.pypackage(infer_pypackages(nb, pin_versions=True)))}"
396
+ f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
441
397
  )
442
398
  except Exception:
443
399
  logger.debug("inferring imported packages failed")
@@ -492,19 +448,36 @@ class run_context:
492
448
  transform.save()
493
449
  logger.important(f"updated: {transform}")
494
450
  # check whether the notebook source code was already saved
495
- if is_run_from_ipython and transform.source_code_id:
496
- if os.getenv("LAMIN_TESTING") is None:
497
- response = input(
498
- "You already saved source code for this notebook."
499
- " Bump the version before a new run? (y/n)"
500
- )
501
- else:
502
- response = "y"
503
- if response == "y":
504
- update_stem_uid_or_version(stem_uid, version, bump_version=True)
451
+ if transform.source_code_id is not None:
452
+ response = None
453
+ if is_run_from_ipython:
454
+ if os.getenv("LAMIN_TESTING") is None:
455
+ response = input(
456
+ "You already saved source code for this notebook."
457
+ " Bump the version before a new run? (y/n)"
458
+ )
459
+ else:
460
+ response = "y"
505
461
  else:
506
- # we want a new stem_uid in this case, hence raise the error
507
- raise_transform_settings_error()
462
+ hash, _ = hash_file(cls.path) # ignore hash_type for now
463
+ if hash != transform.source_code.hash:
464
+ # only if hashes don't match, we need user input
465
+ if os.getenv("LAMIN_TESTING") is None:
466
+ response = input(
467
+ "You already saved source code for this script and meanwhile modified it without bumping a version."
468
+ " Bump the version before a new run? (y/n)"
469
+ )
470
+ else:
471
+ response = "y"
472
+ else:
473
+ logger.important(f"loaded: {transform}")
474
+ if response is not None:
475
+ # if a script is re-run and hashes match, we don't need user input
476
+ if response == "y":
477
+ update_stem_uid_or_version(stem_uid, version, bump_version=True)
478
+ else:
479
+ # we want a new stem_uid in this case, hence raise the error
480
+ raise_transform_settings_error()
508
481
  else:
509
482
  logger.important(f"loaded: {transform}")
510
483
  cls.transform = transform
lamindb/core/_settings.py CHANGED
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Literal, Mapping
5
5
 
6
6
  import lamindb_setup as ln_setup
7
7
  from lamin_utils import logger
8
- from lamindb_setup._add_remote_storage import switch_default_storage
8
+ from lamindb_setup._set_managed_storage import set_managed_storage
9
9
  from lamindb_setup.core._settings import settings as setup_settings
10
10
  from lamindb_setup.core._settings_instance import sanitize_git_repo_url
11
11
 
@@ -92,11 +92,11 @@ class Settings:
92
92
  self.__using_key = value
93
93
 
94
94
  @property
95
- def _storage_settings(self) -> ln_setup.dev.StorageSettings:
95
+ def _storage_settings(self) -> ln_setup.core.StorageSettings:
96
96
  if self._using_storage is None:
97
97
  storage_settings = ln_setup.settings.storage
98
98
  else:
99
- storage_settings = ln_setup.dev.StorageSettings(root=self._using_storage)
99
+ storage_settings = ln_setup.core.StorageSettings(root=self._using_storage)
100
100
  return storage_settings
101
101
 
102
102
  @property
@@ -127,7 +127,7 @@ class Settings:
127
127
 
128
128
  Examples:
129
129
 
130
- You can set the root via:
130
+ You can switch to another managed storage location via:
131
131
 
132
132
  >>> ln.settings.storage = "s3://some-bucket"
133
133
 
@@ -143,14 +143,27 @@ class Settings:
143
143
 
144
144
  @storage.setter
145
145
  def storage(self, path_kwargs: str | Path | UPath | tuple[str | UPath, Mapping]):
146
- logger.warning(
147
- "you'll no longer be able to set arbitrary storage locations soon"
148
- )
149
146
  if isinstance(path_kwargs, tuple):
150
147
  path, kwargs = path_kwargs
151
148
  else:
152
149
  path, kwargs = path_kwargs, {}
153
- switch_default_storage(path, **kwargs)
150
+ set_managed_storage(path, **kwargs)
151
+
152
+ @property
153
+ def storage_local(self) -> Path:
154
+ """An additional local default storage (a path to its root).
155
+
156
+ Is only available if :attr:`~lamindb.setup.core.InstanceSettings.keep_artifacts_local` is enabled.
157
+
158
+ Guide: :doc:`faq/keep-artifacts-local`
159
+
160
+ Shortcut for: `ln.setup.settings.instance.storage_local.root`
161
+ """
162
+ return ln_setup.settings.instance.storage_local.root
163
+
164
+ @storage_local.setter
165
+ def storage_local(self, local_root: Path):
166
+ ln_setup.settings.instance.storage_local = local_root
154
167
 
155
168
  @property
156
169
  def verbosity(self) -> str:
@@ -162,8 +175,6 @@ class Settings:
162
175
  - 'info': 💡 also show info messages
163
176
  - 'hint': 💡 also show hint messages
164
177
  - 'debug': 🐛 also show detailed debug messages
165
-
166
- This is based on Scanpy's and Django's verbosity setting.
167
178
  """
168
179
  return VERBOSITY_TO_STR[self._verbosity_int]
169
180
 
lamindb/core/_sync_git.py CHANGED
@@ -61,11 +61,15 @@ def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | N
61
61
  capture_output=True,
62
62
  cwd=repo_dir,
63
63
  )
64
- commit_hash = result.stdout.decode()
64
+ # we just care to find one commit
65
+ # hence, we split by new line ("\n") and use the first one
66
+ commit_hash = result.stdout.decode().split("\n")[0]
65
67
  if commit_hash == "" or result.returncode == 1:
66
68
  return None
67
69
  else:
68
- assert len(commit_hash) == 40
70
+ assert (
71
+ len(commit_hash) == 40
72
+ ), f"commit hash |{commit_hash}| is not 40 characters long"
69
73
  return commit_hash
70
74
 
71
75
 
@@ -161,8 +161,8 @@ def anndata_mouse_sc_lymph_node(
161
161
  adata.obs.columns = (
162
162
  adata.obs.columns.str.replace("Sample Characteristic", "")
163
163
  .str.replace("Factor Value ", "Factor Value:", regex=True)
164
- .str.replace("Factor Value\[", "Factor Value:", regex=True) # noqa
165
- .str.replace(" Ontology Term\[", "ontology_id:", regex=True) # noqa
164
+ .str.replace("Factor Value\\[", "Factor Value:", regex=True)
165
+ .str.replace(" Ontology Term\\[", "ontology_id:", regex=True)
166
166
  .str.strip("[]")
167
167
  .str.replace("organism part", "tissue")
168
168
  .str.replace("organism", "organism")
@@ -10,6 +10,6 @@ from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
10
10
 
11
11
  from ._anndata_sizes import size_adata
12
12
  from ._backed_access import AnnDataAccessor, BackedAccessor
13
- from ._valid_suffixes import VALID_SUFFIXES
14
- from .objects import infer_suffix, write_to_file
13
+ from ._valid_suffixes import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
14
+ from .objects import infer_suffix, write_to_disk
15
15
  from .paths import delete_storage, load_to_memory
@@ -1,3 +1,5 @@
1
- from lamindb_setup.core.upath import VALID_SUFFIXES
1
+ from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
2
2
 
3
- VALID_SUFFIXES.update({".vitessce.json", ".anndata.zarr", ".spatialdata.zarr"})
3
+ # add new composite suffixes like so
4
+ VALID_COMPOSITE_SUFFIXES.update({".vitessce.json"})
5
+ # can do the same for simple valid suffixes
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from pathlib import PurePosixPath
3
4
  from typing import TYPE_CHECKING
4
5
 
5
6
  from anndata import AnnData
@@ -21,7 +22,7 @@ def infer_suffix(dmem, adata_format: str | None = None):
21
22
  """Infer LaminDB storage file suffix from a data object."""
22
23
  if isinstance(dmem, AnnData):
23
24
  if adata_format is not None:
24
- if adata_format not in ("h5ad", "zarr"):
25
+ if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
25
26
  raise ValueError(
26
27
  "Error when specifying AnnData storage format, it should be"
27
28
  f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
@@ -40,9 +41,15 @@ def infer_suffix(dmem, adata_format: str | None = None):
40
41
  raise NotImplementedError
41
42
 
42
43
 
43
- def write_to_file(dmem, filepath: UPathStr):
44
+ def write_to_disk(dmem, filepath: UPathStr):
44
45
  if isinstance(dmem, AnnData):
45
- dmem.write(filepath)
46
+ suffix = PurePosixPath(filepath).suffix
47
+ if suffix == ".h5ad":
48
+ dmem.write_h5ad(filepath)
49
+ elif suffix == ".zarr":
50
+ dmem.write_zarr(filepath)
51
+ else:
52
+ raise NotImplementedError
46
53
  elif isinstance(dmem, DataFrame):
47
54
  dmem.to_parquet(filepath)
48
55
  else:
@@ -75,9 +75,6 @@ def attempt_accessing_path(
75
75
  settings.storage, access_token=access_token
76
76
  )
77
77
  else:
78
- logger.debug(
79
- "artifact.path is slightly slower for files outside default storage"
80
- )
81
78
  if artifact._state.db not in ("default", None) and using_key is None:
82
79
  storage = (
83
80
  Storage.using(artifact._state.db).filter(id=artifact.storage_id).one()
@@ -141,8 +138,14 @@ def delete_storage(storagepath: Path):
141
138
  # replace with check_path_is_child_of_root but this needs to first be debugged
142
139
  # if not check_path_is_child_of_root(storagepath, settings.storage):
143
140
  if not storagepath.is_relative_to(settings.storage): # type: ignore
144
- logger.warning("couldn't delete files outside of default storage")
145
- return "did-not-delete"
141
+ allow_delete = False
142
+ if setup_settings.instance.keep_artifacts_local:
143
+ allow_delete = storagepath.is_relative_to( # type: ignore
144
+ setup_settings.instance.storage_local.root
145
+ )
146
+ if not allow_delete:
147
+ logger.warning("couldn't delete files outside of default storage")
148
+ return "did-not-delete"
146
149
  # only delete files in the default storage
147
150
  if storagepath.is_file():
148
151
  storagepath.unlink()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.70.4
3
+ Version: 0.71.1
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: lnschema_core==0.65.2
13
- Requires-Dist: lamindb_setup==0.70.0
12
+ Requires-Dist: lnschema_core==0.66.3
13
+ Requires-Dist: lamindb_setup==0.71.3
14
14
  Requires-Dist: lamin_utils==0.13.2
15
- Requires-Dist: lamin_cli==0.12.3
15
+ Requires-Dist: lamin_cli==0.13.1
16
16
  Requires-Dist: rapidfuzz
17
17
  Requires-Dist: pyarrow
18
18
  Requires-Dist: typing_extensions!=4.6.0
@@ -23,10 +23,7 @@ Requires-Dist: fsspec
23
23
  Requires-Dist: pandas
24
24
  Requires-Dist: graphviz
25
25
  Requires-Dist: psycopg2-binary
26
- Requires-Dist: urllib3<2 ; extra == "aws"
27
- Requires-Dist: aiobotocore[boto3]>=2.5.4,<3.0.0 ; extra == "aws"
28
- Requires-Dist: s3fs==2023.12.2 ; extra == "aws"
29
- Requires-Dist: fsspec[s3]==2023.12.2 ; extra == "aws"
26
+ Requires-Dist: lamindb_setup[aws] ; extra == "aws"
30
27
  Requires-Dist: bionty==0.42.9 ; extra == "bionty"
31
28
  Requires-Dist: pandas<2 ; extra == "dev"
32
29
  Requires-Dist: pre-commit ; extra == "dev"
@@ -39,8 +36,8 @@ Requires-Dist: nbproject_test>=0.5.1 ; extra == "dev"
39
36
  Requires-Dist: faker-biology ; extra == "dev"
40
37
  Requires-Dist: django-schema-graph ; extra == "erdiagram"
41
38
  Requires-Dist: readfcs>=1.1.8 ; extra == "fcs"
42
- Requires-Dist: fsspec[gs]==2023.12.2 ; extra == "gcp"
43
- Requires-Dist: nbproject==0.10.0 ; extra == "jupyter"
39
+ Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
40
+ Requires-Dist: nbproject==0.10.2 ; extra == "jupyter"
44
41
  Requires-Dist: nbstripout==0.6.1 ; extra == "jupyter"
45
42
  Requires-Dist: nbconvert ; extra == "jupyter"
46
43
  Requires-Dist: zarr>=2.16.0 ; extra == "zarr"
@@ -1,55 +1,54 @@
1
- lamindb/__init__.py,sha256=n_WJSqcrctVzdr83pL8gZ--FI9vu1ZoqTL5AXEdq8LA,2163
1
+ lamindb/__init__.py,sha256=PbZGEkozIsD8RM3XLafkYGo4iPJy7FJFdzVr0VW7Zy0,2182
2
2
  lamindb/_annotate.py,sha256=kgbilILfgzoS-GEpjxzVwRMs7CoSa9BNEcIWXFBW69I,43915
3
- lamindb/_artifact.py,sha256=E104JM5_Brw7BxJLBTE0acl7Oz7j5R7pPgVgrbHz79I,39279
3
+ lamindb/_artifact.py,sha256=8uBW-dhuWyBUQGs728sAPCnuhTic-NKjSbaneF07aMo,40106
4
4
  lamindb/_can_validate.py,sha256=nvoZG-35n3HofkY4Xc6hBv9AV54_RDan7Hzp5TuqY9I,14709
5
5
  lamindb/_collection.py,sha256=SDM35R_5WHrgLKjVb14Q8-Rz_gn5hdZLJobPcanm4PM,14627
6
6
  lamindb/_feature.py,sha256=srAKchY7gqD-h-cWlEiAWuHlpFKFwv0PWIA-JX0Go8c,6758
7
7
  lamindb/_feature_set.py,sha256=AzjOcHzQajpeikPOAic-aj0z_C5b7VpHVegg3ThRSLw,9045
8
8
  lamindb/_filter.py,sha256=xnjJzjF3Zj4dK_Kfymvhgczk27MhhXz5ZYc7XINbgHY,1331
9
- lamindb/_finish.py,sha256=oR7oe6By3vEhF0twDBqSdT1EF28MPhyiS_cfZP0CcCw,8040
9
+ lamindb/_finish.py,sha256=6GwhqrC-x-JdFd16i7-uyhCWeQgGKxr25aSsSXPZt4g,8598
10
10
  lamindb/_from_values.py,sha256=DVXjnQ2wwNw-2bFzy0uXLdVlqoprrn95hTnrXwn-KqM,12638
11
11
  lamindb/_is_versioned.py,sha256=0PgRCmxEmYDcAjllLSOYZm132B1lW6QgmBBERhRyFt0,1341
12
12
  lamindb/_parents.py,sha256=N9T8jbd3eaoHDLE9TD1y1QgGcO81E6Brapy8LILzRCQ,14790
13
13
  lamindb/_query_manager.py,sha256=3zokXqxgj9vTJBnN2sbYKS-q69fyDDPF_aGq_rFHzXU,4066
14
- lamindb/_query_set.py,sha256=K_0rJ6Keltl3Pvglvd7kkzkJEy2u6Kp0TKiHLzwqH18,11359
15
- lamindb/_registry.py,sha256=-Bv10zSr6IY7QM5pu_35NiVjQDJnBcXRECVe9h7GEuY,19336
14
+ lamindb/_query_set.py,sha256=n0owd74cTzGz6-mIv8SlDz0wcyRz7Xw3Ke1LhE8UlIg,10784
15
+ lamindb/_registry.py,sha256=fmX-BUnan3Y0WrEAx3qNwRYCIJwJgjoKnRnpgcXujEI,19358
16
16
  lamindb/_run.py,sha256=b7A52M1On3QzFgIYyfQoz5Kk7V3wcu9p_Prq5bzd8v8,1838
17
- lamindb/_save.py,sha256=C4sPr0slgMmxDdiOcaLhIiHOqW9c3DnIz1uj9NlsnXQ,11431
17
+ lamindb/_save.py,sha256=_7r3TUV3B6Hp75r5O_ymu3fKWyBHbGa5vmE_pxrtsVI,10923
18
18
  lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
19
19
  lamindb/_transform.py,sha256=rxojJ91qQSkeYDHYbwqjFAYxBMgJd3cq_K7Z0n5g8Aw,3482
20
20
  lamindb/_ulabel.py,sha256=e5dw9h1tR0_u-DMn7Gzx0WhUhV5w7j4v3QbnLWQV7eI,1941
21
21
  lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
22
22
  lamindb/_view.py,sha256=GV1FrqIMmdooEkA-5zvcTWgV1nqx1sehi6WdWEaFpxM,2171
23
- lamindb/core/__init__.py,sha256=MB1gEMKUf0GBQrI3dH8WRZOZQmWR4HIojXK_hXXVdqA,1235
24
- lamindb/core/_data.py,sha256=xULvge-txEO4r4amNQZRZTH3n3BqOLWauyNfxbB6WOA,17674
25
- lamindb/core/_feature_manager.py,sha256=LlYgU71AoTnrseWFCq-oZkUAYWITtRR7BNFm0AhHe-c,15773
23
+ lamindb/core/__init__.py,sha256=TI9_1Jtpwae_cUPQ3-U0RRPH5c3GBA-gLhHvlAk_Nlo,1213
24
+ lamindb/core/_data.py,sha256=Lico6-Vx15bNpGLl1bqFqEsh62pD4YKOOBnmahse1tI,17673
25
+ lamindb/core/_feature_manager.py,sha256=uTzZZ7-qqEAmdwi48Holy2j5VGTgmoQxhb21r6mLShI,15824
26
26
  lamindb/core/_label_manager.py,sha256=0RtegYnK3zIisOnd970EobOrHMpp7OCH-mEoPrPXw2c,9075
27
27
  lamindb/core/_mapped_collection.py,sha256=_OwFZh5SePDUD70XIK5kngv3we_Z5-YdGHNfpUSatSQ,19469
28
- lamindb/core/_run_context.py,sha256=tqKPNkryy4yc7vtYSIfGjUu_pJSBQt1Kx8Cbq9vwXK8,17726
29
- lamindb/core/_settings.py,sha256=r9si7wJb31tI4vfz9dUN4iXe6QQU7FjnqAEsHy2UDzM,5727
30
- lamindb/core/_sync_git.py,sha256=IlTqw55inPp_RZbN_YScaCeKza7LeF9mClQw55W3_d4,3921
28
+ lamindb/core/_run_context.py,sha256=3Pa9DQRR9_OZTMJyezi4p_ZIdL6JsKnQ8gM57whFpMo,16926
29
+ lamindb/core/_settings.py,sha256=rW1KfEXfT56XErwcnSuQxaCytpOy1kJ-u7tVmkmNmxY,6131
30
+ lamindb/core/_sync_git.py,sha256=06Te35UZj2QBaHNcc59VSC9vJgcFct7Z2sK78NLkZBs,4119
31
31
  lamindb/core/_track_environment.py,sha256=xLZ6kgzxWS6MWZ5LQ_wkbJX99vmYOT8iQ-Fz4OHCgWw,754
32
32
  lamindb/core/_transform_settings.py,sha256=eV96QKX9jOojjzF-a0oo0wXQsMXN2F6QV7orE06oFC8,161
33
- lamindb/core/_view_tree.py,sha256=PTwmKZSQL2UhKuSdV5Wp7o1JDjv1qwgsVCj3ThkbKb8,3447
34
33
  lamindb/core/exceptions.py,sha256=PHk5lyBdJPrrEQcid3ItfdNzz3fgiQsUmsEDdz063F0,197
35
34
  lamindb/core/fields.py,sha256=Jgi_XI-iTe6cT7oD8FV_JqEpjN1Q9rZWwL8VLtj4jkA,164
36
35
  lamindb/core/types.py,sha256=xeQF2x40p2pR9eIVQrXT74RrS810z2fbjmTRTSQUqPM,230
37
36
  lamindb/core/versioning.py,sha256=DsEHpCueNwhRiIaRH5-O8H_1fJVNtWslCRx30YiIS5o,3080
38
37
  lamindb/core/datasets/__init__.py,sha256=zRP98oqUAaXhqWyKMiH0s_ImVIuNeziQQ2kQ_t0f-DI,1353
39
- lamindb/core/datasets/_core.py,sha256=36vUOYFkX_4hBAnM_BujV5BRARMI5b9iI_SM9qS7wGc,20191
38
+ lamindb/core/datasets/_core.py,sha256=9bcDfVfMZ1h1WAS88ZBjy-R91xbP2KIm_ofHguXAKpY,20177
40
39
  lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh4,953
41
- lamindb/core/storage/__init__.py,sha256=6jnbFj-eBV3xZt04qP-kTsMWoP8YwpM50wlnnxDYsZU,415
40
+ lamindb/core/storage/__init__.py,sha256=5LUFQKRr2BX24d-yWBezhTXBV83sShcOvPj5Y5u6qIg,441
42
41
  lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
43
42
  lamindb/core/storage/_backed_access.py,sha256=eManrLsu3pSSQAyAKy47FDBm-iHgjaNfHA-zLy59uDs,24536
44
- lamindb/core/storage/_valid_suffixes.py,sha256=sewRRU3I6fJ-Jd5ACNcco_o3hic9zmqTs8BuZui-450,133
43
+ lamindb/core/storage/_valid_suffixes.py,sha256=J08aglC9oo35pzahj0SQXW9IHib8Asp4dc11co-2uys,212
45
44
  lamindb/core/storage/_zarr.py,sha256=5ceEz6YIvgvUnVVNWhK5Z4W0WfrvyvY82Yna5jSX1_E,3661
46
- lamindb/core/storage/objects.py,sha256=5LbBeZVKuOOB8DceSE-PN8elKY0N9OhFXZPQJE4lK48,1538
47
- lamindb/core/storage/paths.py,sha256=kvu4Xi4dvreXpg4iuskN_nd2yyGmEdCmoIfi3nCrTyo,7728
45
+ lamindb/core/storage/objects.py,sha256=OzvBCS-Urz5mr-O95qYt6RGBDDX5HmjfRRKWPPDn1ZE,1797
46
+ lamindb/core/storage/paths.py,sha256=JTtiTlAMICH4gkw7iZNwTRfNTT0WxrBoKiag_7E9g4I,7882
48
47
  lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
49
48
  lamindb/integrations/_vitessce.py,sha256=b0FqTBsP-M6Q7xCYXVwFwM8DOIeeOBZEhYbryhtq4gk,2535
50
49
  lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
51
50
  lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
52
- lamindb-0.70.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
- lamindb-0.70.4.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
54
- lamindb-0.70.4.dist-info/METADATA,sha256=tpsQ0FARcje5BTONwg1mer7gucqwICMw1RmApXGME0I,2835
55
- lamindb-0.70.4.dist-info/RECORD,,
51
+ lamindb-0.71.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
52
+ lamindb-0.71.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
53
+ lamindb-0.71.1.dist-info/METADATA,sha256=fmFFlU4FrVwO0ON6JVCs8qCh8_HLWyt9WyYs_zyIZgo,2674
54
+ lamindb-0.71.1.dist-info/RECORD,,
@@ -1,116 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from collections import defaultdict
4
- from pathlib import Path
5
- from typing import Iterable
6
-
7
- from lamindb_setup import settings as setup_settings
8
- from lnschema_core.models import Artifact, Storage
9
-
10
-
11
- def view_tree(
12
- cls,
13
- level: int = -1,
14
- limit_to_directories: bool = False,
15
- length_limit: int = 1000,
16
- max_files_per_dir_per_type: int = 7,
17
- ) -> None:
18
- """{}."""
19
- if cls.__class__.__name__ == "QuerySet":
20
- print("queryset")
21
- qs = cls
22
- storage_ids = qs.list("storage_id")
23
- elif cls == Artifact:
24
- print("file")
25
- qs = cls.filter(storage_id=setup_settings.storage.id).all()
26
- storage_ids = Storage.filter().list("id")
27
- else:
28
- print("else")
29
- return
30
- storages = Storage.filter().all()
31
- storage_roots = {
32
- storage_id: storages.get(id=storage_id).root for storage_id in storage_ids
33
- }
34
- keys = set()
35
- for artifact in qs:
36
- root = storage_roots.get(artifact.storage_id, "")
37
- keys.add(f"{root}/{artifact.key}")
38
-
39
- _view_tree(
40
- keys=keys,
41
- level=level,
42
- only_dirs=limit_to_directories,
43
- limit=length_limit,
44
- max_files_per_dir_per_type=max_files_per_dir_per_type,
45
- )
46
-
47
-
48
- def _view_tree(
49
- keys: Iterable[str],
50
- *,
51
- level: int = -1,
52
- only_dirs: bool = False,
53
- limit: int = 1000,
54
- max_files_per_dir_per_type: int = 7,
55
- ) -> None:
56
- # Create a nested dictionary from keys
57
- def tree():
58
- return defaultdict(tree)
59
-
60
- root = tree()
61
-
62
- n_files = 0
63
- n_directories = 0
64
- suffixes = set()
65
-
66
- for key in keys:
67
- parts = key.split("/")
68
- node = root
69
- for part in parts:
70
- node = node[part]
71
- if node == {}:
72
- n_files += 1
73
- suffix = Path(part).suffix
74
- if suffix:
75
- suffixes.add(suffix)
76
- else:
77
- n_directories += 1
78
-
79
- # Function to print the tree
80
- def print_tree(node, prefix="", depth=0, count=None, n_files_per_dir_per_type=None):
81
- if count is None:
82
- count = [0]
83
- if n_files_per_dir_per_type is None:
84
- n_files_per_dir_per_type = defaultdict(int)
85
-
86
- if level != -1 and depth > level:
87
- return
88
- for name, child in node.items():
89
- if count[0] >= limit:
90
- return
91
- if only_dirs and child == {}:
92
- continue
93
- suffix = Path(name).suffix
94
- n_files_per_dir_per_type[suffix] += 1
95
- if (
96
- depth > 0
97
- and n_files_per_dir_per_type[suffix] > max_files_per_dir_per_type
98
- ):
99
- continue
100
- new_prefix = prefix + ("├── " if name != list(node.keys())[-1] else "└── ")
101
- print(new_prefix + name)
102
- count[0] += 1
103
- if child:
104
- print_tree(
105
- child,
106
- prefix + ("│ " if name != list(node.keys())[-1] else " "),
107
- depth + 1,
108
- count,
109
- (
110
- defaultdict(int) if depth == 0 else n_files_per_dir_per_type
111
- ), # Reset the counter for each directory
112
- )
113
-
114
- suffix_message = f" with suffixes {', '.join(suffixes)}" if n_files > 0 else ""
115
- print(f"{n_directories} directories, {n_files} files{suffix_message}")
116
- print_tree(root)