lamindb 0.71.0__py3-none-any.whl → 0.71.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -41,7 +41,7 @@ Modules & settings:
41
41
  """
42
42
 
43
43
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
44
- __version__ = "0.71.0"
44
+ __version__ = "0.71.1"
45
45
 
46
46
  import os as _os
47
47
 
lamindb/_artifact.py CHANGED
@@ -21,7 +21,6 @@ from lamindb_setup.core.upath import (
21
21
  get_stat_file_cloud,
22
22
  )
23
23
  from lnschema_core import Artifact, Run, Storage
24
- from lnschema_core.models import IsTree
25
24
  from lnschema_core.types import (
26
25
  VisibilityChoice,
27
26
  )
@@ -35,8 +34,7 @@ from lamindb.core.storage import (
35
34
  delete_storage,
36
35
  infer_suffix,
37
36
  load_to_memory,
38
- size_adata,
39
- write_to_file,
37
+ write_to_disk,
40
38
  )
41
39
  from lamindb.core.storage.paths import (
42
40
  auto_storage_key_from_artifact,
@@ -173,8 +171,7 @@ def process_data(
173
171
  # Alex: I don't understand the line below
174
172
  if path.suffixes == []:
175
173
  path = path.with_suffix(suffix)
176
- if suffix != ".zarr":
177
- write_to_file(data, path)
174
+ write_to_disk(data, path)
178
175
  use_existing_storage_key = False
179
176
  else:
180
177
  raise NotImplementedError(
@@ -194,16 +191,13 @@ def get_stat_or_artifact(
194
191
  n_objects = None
195
192
  if settings.upon_file_create_skip_size_hash:
196
193
  return None, None, None, n_objects
197
- if suffix == ".zarr" and memory_rep is not None and isinstance(memory_rep, AnnData):
198
- size = size_adata(memory_rep)
199
- return size, None, None, n_objects
200
194
  stat = path.stat() # one network request
201
195
  if not isinstance(path, LocalPathClasses):
202
196
  size, hash, hash_type = None, None, None
203
197
  if stat is not None:
204
198
  if "ETag" in stat: # is file
205
199
  size, hash, hash_type = get_stat_file_cloud(stat)
206
- elif path.is_dir():
200
+ elif stat["type"] == "directory":
207
201
  size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
208
202
  if hash is None:
209
203
  logger.warning(f"did not add hash for {path}")
@@ -589,7 +583,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
589
583
  init_self_from_db(artifact, kwargs_or_artifact)
590
584
  # adding "key" here is dangerous because key might be auto-populated
591
585
  update_attributes(artifact, {"description": description})
592
- if artifact.key != key:
586
+ if artifact.key != key and key is not None:
593
587
  logger.warning(
594
588
  f"key {artifact.key} on existing artifact differs from passed key {key}"
595
589
  )
@@ -914,11 +908,25 @@ def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs)
914
908
 
915
909
  # docstring handled through attach_func_to_class_method
916
910
  def cache(self, is_run_input: bool | None = None) -> Path:
917
- _track_run_input(self, is_run_input)
918
-
919
911
  using_key = settings._using_key
920
912
  filepath = filepath_from_artifact(self, using_key=using_key)
921
- return setup_settings.instance.storage.cloud_to_local(filepath, print_progress=True)
913
+ try:
914
+ cache_path = setup_settings.instance.storage.cloud_to_local(
915
+ filepath, print_progress=True
916
+ )
917
+ except Exception as e:
918
+ if not isinstance(filepath, LocalPathClasses):
919
+ cache_path = setup_settings.instance.storage.cloud_to_local_no_update(
920
+ filepath
921
+ )
922
+ if cache_path.is_file():
923
+ cache_path.unlink(missing_ok=True)
924
+ elif cache_path.is_dir():
925
+ shutil.rmtree(cache_path)
926
+ raise e
927
+ # only call if sync is successfull
928
+ _track_run_input(self, is_run_input)
929
+ return cache_path
922
930
 
923
931
 
924
932
  # docstring handled through attach_func_to_class_method
@@ -1003,6 +1011,11 @@ def save(self, upload: bool | None = None, **kwargs) -> None:
1003
1011
  local_path = self.path
1004
1012
  self.storage_id = setup_settings.instance.storage.id
1005
1013
  self._local_filepath = local_path
1014
+ # switch to virtual storage key upon upload
1015
+ # the local filepath is already cached at that point
1016
+ self.key_is_virtual = True
1017
+ # ensure that the artifact is uploaded
1018
+ self._to_store = True
1006
1019
 
1007
1020
  self._save_skip_storage(**kwargs)
1008
1021
 
@@ -1045,27 +1058,6 @@ def path(self) -> Path | UPath:
1045
1058
  return filepath_from_artifact(self, using_key)
1046
1059
 
1047
1060
 
1048
- @classmethod # type: ignore
1049
- @doc_args(IsTree.view_tree.__doc__)
1050
- def view_tree(
1051
- cls,
1052
- level: int = -1,
1053
- limit_to_directories: bool = False,
1054
- length_limit: int = 1000,
1055
- max_files_per_dir_per_type: int = 7,
1056
- ) -> None:
1057
- """{}."""
1058
- from lamindb.core._view_tree import view_tree as _view_tree
1059
-
1060
- _view_tree(
1061
- cls=cls,
1062
- level=level,
1063
- limit_to_directories=limit_to_directories,
1064
- length_limit=length_limit,
1065
- max_files_per_dir_per_type=max_files_per_dir_per_type,
1066
- )
1067
-
1068
-
1069
1061
  # docstring handled through attach_func_to_class_method
1070
1062
  def restore(self) -> None:
1071
1063
  self.visibility = VisibilityChoice.default.value
@@ -1085,7 +1077,6 @@ METHOD_NAMES = [
1085
1077
  "replace",
1086
1078
  "from_dir",
1087
1079
  "restore",
1088
- "view_tree",
1089
1080
  ]
1090
1081
 
1091
1082
  if ln_setup._TESTING:
lamindb/_finish.py CHANGED
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING
8
8
 
9
9
  import lamindb_setup as ln_setup
10
10
  from lamin_utils import logger
11
+ from lamindb_setup.core.hashing import hash_file
11
12
  from lnschema_core.types import TransformType
12
13
 
13
14
  from .core._run_context import is_run_from_ipython, run_context
@@ -35,7 +36,7 @@ def get_seconds_since_modified(filepath) -> float:
35
36
  def finish():
36
37
  """Mark a tracked run as finished.
37
38
 
38
- If run in a notebook, it saves the run report & source code to your default storage location.
39
+ Saves source code and, for notebooks, a run report to your default storage location.
39
40
  """
40
41
  if run_context.path is None:
41
42
  raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
@@ -47,16 +48,12 @@ def finish():
47
48
  raise NotebookNotSaved(
48
49
  "Please save the notebook in your editor right before running `ln.finish()`"
49
50
  )
50
- save_run_context_core(
51
- run=run_context.run,
52
- transform=run_context.transform,
53
- filepath=run_context.path,
54
- finished_at=True,
55
- )
56
- else: # scripts
57
- # save_run_context_core was already called during ln.track()
58
- run_context.run.finished_at = datetime.now(timezone.utc) # update run time
59
- run_context.run.save()
51
+ save_run_context_core(
52
+ run=run_context.run,
53
+ transform=run_context.transform,
54
+ filepath=run_context.path,
55
+ finished_at=True,
56
+ )
60
57
 
61
58
 
62
59
  def save_run_context_core(
@@ -138,15 +135,17 @@ def save_run_context_core(
138
135
  if prev_transform.source_code_id is not None:
139
136
  prev_source = prev_transform.source_code
140
137
  ln.settings.silence_file_run_transform_warning = True
141
- # register the source code
142
- if transform.source_code is not None:
143
- # check if the hash of the notebook source code matches
144
- check_source_code = ln.Artifact(source_code_path, key="dummy")
145
- if check_source_code._state.adding:
138
+
139
+ # track source code
140
+ if transform.source_code_id is not None:
141
+ # check if the hash of the transform source code matches
142
+ # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
143
+ hash, _ = hash_file(source_code_path) # ignore hash_type for now
144
+ if hash != transform.source_code.hash:
146
145
  if os.getenv("LAMIN_TESTING") is None:
147
146
  # in test, auto-confirm overwrite
148
147
  response = input(
149
- f"You are about to overwrite existing source code (hash {transform.source_code.hash}) for transform version"
148
+ f"You are about to replace (overwrite) existing source code (hash '{transform.source_code.hash}') for transform version"
150
149
  f" '{transform.version}'. Proceed? (y/n)"
151
150
  )
152
151
  else:
@@ -154,6 +153,9 @@ def save_run_context_core(
154
153
  if response == "y":
155
154
  transform.source_code.replace(source_code_path)
156
155
  transform.source_code.save(upload=True)
156
+ logger.success(
157
+ f"replaced transform.source_code: {transform.source_code}"
158
+ )
157
159
  else:
158
160
  logger.warning("Please re-run `ln.track()` to make a new version")
159
161
  return "rerun-the-notebook"
@@ -169,21 +171,32 @@ def save_run_context_core(
169
171
  source_code.save(upload=True)
170
172
  transform.source_code = source_code
171
173
  logger.success(f"saved transform.source_code: {transform.source_code}")
174
+
172
175
  # track environment
173
176
  filepath_env = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
174
177
  if filepath_env.exists():
175
- artifact = ln.Artifact(
176
- filepath_env,
177
- description="requirements.txt",
178
- visibility=0,
179
- run=False,
180
- )
181
- if artifact._state.adding:
178
+ hash, _ = hash_file(filepath_env)
179
+ artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
180
+ new_env_artifact = artifact is None
181
+ if new_env_artifact:
182
+ artifact = ln.Artifact(
183
+ filepath_env,
184
+ description="requirements.txt",
185
+ visibility=0,
186
+ run=False,
187
+ )
182
188
  artifact.save(upload=True)
183
189
  run.environment = artifact
184
- logger.success(f"saved run.environment: {run.environment}")
185
- # save report file
190
+ if new_env_artifact:
191
+ logger.success(f"saved run.environment: {run.environment}")
192
+
193
+ # set finished_at
194
+ if finished_at:
195
+ run.finished_at = datetime.now(timezone.utc)
196
+
197
+ # track report and set is_consecutive
186
198
  if not transform.type == TransformType.notebook:
199
+ run.is_consecutive = True
187
200
  run.save()
188
201
  else:
189
202
  if run.report_id is not None:
@@ -203,16 +216,15 @@ def save_run_context_core(
203
216
  report_file.save(upload=True)
204
217
  run.report = report_file
205
218
  run.is_consecutive = is_consecutive
206
- if finished_at:
207
- run.finished_at = datetime.now(timezone.utc)
208
219
  run.save()
209
220
  transform.latest_report = run.report
210
- transform.save()
211
- if transform.type == TransformType.notebook:
212
221
  logger.success(f"saved transform.latest_report: {transform.latest_report}")
213
- if ln_setup.settings.instance.is_remote:
222
+ transform.save()
223
+
224
+ # finalize
225
+ if ln_setup.settings.instance.is_on_hub:
214
226
  identifier = ln_setup.settings.instance.slug
215
- logger.success(
227
+ logger.important(
216
228
  f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
217
229
  )
218
230
  # because run & transform changed, update the global run_context
lamindb/_query_set.py CHANGED
@@ -11,7 +11,6 @@ from lnschema_core.models import (
11
11
  Artifact,
12
12
  CanValidate,
13
13
  Collection,
14
- IsTree,
15
14
  IsVersioned,
16
15
  Registry,
17
16
  Run,
@@ -83,7 +82,7 @@ class RecordsList(UserList):
83
82
  return one_helper(self)
84
83
 
85
84
 
86
- class QuerySet(models.QuerySet, CanValidate, IsTree):
85
+ class QuerySet(models.QuerySet, CanValidate):
87
86
  """Sets of records returned by queries.
88
87
 
89
88
  See Also:
@@ -265,25 +264,6 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
265
264
 
266
265
  return _standardize(cls=self, values=values, field=field, **kwargs)
267
266
 
268
- @doc_args(IsTree.view_tree.__doc__)
269
- def view_tree(
270
- self,
271
- level: int = -1,
272
- limit_to_directories: bool = False,
273
- length_limit: int = 1000,
274
- max_files_per_dir_per_type: int = 7,
275
- ) -> None:
276
- """{}."""
277
- from .core._view_tree import view_tree as _view_tree
278
-
279
- _view_tree(
280
- cls=self,
281
- level=level,
282
- limit_to_directories=limit_to_directories,
283
- length_limit=length_limit,
284
- max_files_per_dir_per_type=max_files_per_dir_per_type,
285
- )
286
-
287
267
 
288
268
  def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList:
289
269
  # evaluating length can be very costly, hence, the try-except block
lamindb/_save.py CHANGED
@@ -6,13 +6,13 @@ import traceback
6
6
  from collections import defaultdict
7
7
  from datetime import datetime
8
8
  from functools import partial
9
- from typing import Iterable, overload
9
+ from typing import TYPE_CHECKING, Iterable, overload
10
10
 
11
11
  import lamindb_setup
12
12
  from django.db import transaction
13
13
  from django.utils.functional import partition
14
14
  from lamin_utils import logger
15
- from lamindb_setup.core.upath import UPath, print_hook
15
+ from lamindb_setup.core.upath import print_hook
16
16
  from lnschema_core.models import Artifact, Registry
17
17
 
18
18
  from lamindb.core._settings import settings
@@ -23,12 +23,8 @@ from lamindb.core.storage.paths import (
23
23
  store_file_or_folder,
24
24
  )
25
25
 
26
- try:
27
- from lamindb.core.storage._zarr import write_adata_zarr
28
- except ImportError:
29
-
30
- def write_adata_zarr(filepath): # type: ignore
31
- raise ImportError("Please install zarr: pip install zarr")
26
+ if TYPE_CHECKING:
27
+ from lamindb_setup.core.upath import UPath
32
28
 
33
29
 
34
30
  def save(
@@ -162,7 +158,7 @@ def check_and_attempt_upload(
162
158
  def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
163
159
  local_path = artifact._local_filepath
164
160
 
165
- # some in-memory cases (zarr for now)
161
+ # in-memory cases
166
162
  if local_path is None or not local_path.exists():
167
163
  return None
168
164
 
@@ -284,18 +280,7 @@ def upload_artifact(
284
280
  storage_path = attempt_accessing_path(
285
281
  artifact, storage_key, using_key=using_key, access_token=access_token
286
282
  )
287
- msg = f"storing artifact '{artifact.uid}' at '{storage_path}'"
288
- if (
289
- artifact.suffix == ".zarr"
290
- and hasattr(artifact, "_memory_rep")
291
- and artifact._memory_rep is not None
292
- ):
293
- logger.save(msg)
294
- print_progress = partial(
295
- print_hook, objectname=storage_path.name, action="uploading"
296
- )
297
- write_adata_zarr(artifact._memory_rep, storage_path, callback=print_progress)
298
- elif hasattr(artifact, "_to_store") and artifact._to_store:
299
- logger.save(msg)
283
+ if hasattr(artifact, "_to_store") and artifact._to_store:
284
+ logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
300
285
  store_file_or_folder(artifact._local_filepath, storage_path)
301
286
  return storage_path
lamindb/core/__init__.py CHANGED
@@ -12,7 +12,6 @@ Registries:
12
12
  Data
13
13
  FeatureManager
14
14
  LabelManager
15
- IsTree
16
15
  IsVersioned
17
16
  CanValidate
18
17
  HasParents
@@ -55,7 +54,6 @@ from lnschema_core.models import (
55
54
  CanValidate,
56
55
  Data,
57
56
  HasParents,
58
- IsTree,
59
57
  IsVersioned,
60
58
  Registry,
61
59
  )
lamindb/core/_data.py CHANGED
@@ -345,7 +345,7 @@ def add_labels(
345
345
  f" {old_feature_set}"
346
346
  )
347
347
  old_feature_set.delete()
348
- self.features._add_feature_set(feature_set, slot="external")
348
+ self.features.add_feature_set(feature_set, slot="external")
349
349
  logger.save(
350
350
  f"linked new feature '{feature.name}' together with new feature set"
351
351
  f" {feature_set}"
@@ -236,7 +236,7 @@ class FeatureManager:
236
236
  and self._host.artifact.accessor == "DataFrame"
237
237
  ):
238
238
  slot = "columns" if slot is None else slot
239
- self._add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
239
+ self.add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
240
240
 
241
241
  def add_from_df(self, field: FieldAttr = Feature.name, organism: str | None = None):
242
242
  """Add features from DataFrame."""
@@ -325,7 +325,7 @@ class FeatureManager:
325
325
  self._host._feature_sets = feature_sets
326
326
  self._host.save()
327
327
 
328
- def _add_feature_set(self, feature_set: FeatureSet, slot: str):
328
+ def add_feature_set(self, feature_set: FeatureSet, slot: str):
329
329
  """Add new feature set to a slot.
330
330
 
331
331
  Args:
@@ -405,7 +405,8 @@ class FeatureManager:
405
405
  f"FeatureSet is not transferred, check if organism is set correctly: {feature_set}"
406
406
  )
407
407
  continue
408
- # TODO: make sure the uid matches if featureset is composed of same features
409
- # feature_set_self.uid = feature_set.uid
408
+ # make sure the uid matches if featureset is composed of same features
409
+ if feature_set_self.hash == feature_set.hash:
410
+ feature_set_self.uid = feature_set.uid
410
411
  logger.info(f"saving {slot} featureset: {feature_set_self}")
411
- self._host.features._add_feature_set(feature_set_self, slot)
412
+ self._host.features.add_feature_set(feature_set_self, slot)
@@ -8,6 +8,7 @@ from pathlib import Path, PurePath
8
8
  from typing import TYPE_CHECKING
9
9
 
10
10
  from lamin_utils import logger
11
+ from lamindb_setup.core.hashing import hash_file
11
12
  from lnschema_core import Run, Transform, ids
12
13
  from lnschema_core.types import TransformType
13
14
  from lnschema_core.users import current_user_id
@@ -175,6 +176,17 @@ def raise_transform_settings_error() -> None:
175
176
  )
176
177
 
177
178
 
179
+ def pretty_pypackages(dependencies: dict) -> str:
180
+ deps_list = []
181
+ for pkg, ver in dependencies.items():
182
+ if ver != "":
183
+ deps_list.append(pkg + f"=={ver}")
184
+ else:
185
+ deps_list.append(pkg)
186
+ deps_list.sort()
187
+ return " ".join(deps_list)
188
+
189
+
178
190
  class run_context:
179
191
  """Global run context."""
180
192
 
@@ -315,16 +327,6 @@ class run_context:
315
327
  from ._track_environment import track_environment
316
328
 
317
329
  track_environment(run)
318
-
319
- if not is_run_from_ipython and cls.path is not None:
320
- # upload run source code & environment
321
- from lamindb._finish import save_run_context_core
322
-
323
- save_run_context_core(
324
- run=cls.run,
325
- transform=cls.transform,
326
- filepath=cls.path,
327
- )
328
330
  return None
329
331
 
330
332
  @classmethod
@@ -386,17 +388,12 @@ class run_context:
386
388
  # log imported python packages
387
389
  if not path_str.startswith("/fileId="):
388
390
  try:
389
- from nbproject.dev._metadata_display import DisplayMeta
390
391
  from nbproject.dev._pypackage import infer_pypackages
391
392
 
392
- metadata, _, nb = nbproject.header(
393
- filepath=path_str,
394
- metadata_only=True,
395
- )
396
- dm = DisplayMeta(metadata)
393
+ nb = nbproject.dev.read_notebook(path_str)
397
394
  logger.important(
398
395
  "notebook imports:"
399
- f" {' '.join(dm.pypackage(infer_pypackages(nb, pin_versions=True)))}"
396
+ f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
400
397
  )
401
398
  except Exception:
402
399
  logger.debug("inferring imported packages failed")
@@ -451,19 +448,36 @@ class run_context:
451
448
  transform.save()
452
449
  logger.important(f"updated: {transform}")
453
450
  # check whether the notebook source code was already saved
454
- if is_run_from_ipython and transform.source_code_id:
455
- if os.getenv("LAMIN_TESTING") is None:
456
- response = input(
457
- "You already saved source code for this notebook."
458
- " Bump the version before a new run? (y/n)"
459
- )
460
- else:
461
- response = "y"
462
- if response == "y":
463
- update_stem_uid_or_version(stem_uid, version, bump_version=True)
451
+ if transform.source_code_id is not None:
452
+ response = None
453
+ if is_run_from_ipython:
454
+ if os.getenv("LAMIN_TESTING") is None:
455
+ response = input(
456
+ "You already saved source code for this notebook."
457
+ " Bump the version before a new run? (y/n)"
458
+ )
459
+ else:
460
+ response = "y"
464
461
  else:
465
- # we want a new stem_uid in this case, hence raise the error
466
- raise_transform_settings_error()
462
+ hash, _ = hash_file(cls.path) # ignore hash_type for now
463
+ if hash != transform.source_code.hash:
464
+ # only if hashes don't match, we need user input
465
+ if os.getenv("LAMIN_TESTING") is None:
466
+ response = input(
467
+ "You already saved source code for this script and meanwhile modified it without bumping a version."
468
+ " Bump the version before a new run? (y/n)"
469
+ )
470
+ else:
471
+ response = "y"
472
+ else:
473
+ logger.important(f"loaded: {transform}")
474
+ if response is not None:
475
+ # if a script is re-run and hashes match, we don't need user input
476
+ if response == "y":
477
+ update_stem_uid_or_version(stem_uid, version, bump_version=True)
478
+ else:
479
+ # we want a new stem_uid in this case, hence raise the error
480
+ raise_transform_settings_error()
467
481
  else:
468
482
  logger.important(f"loaded: {transform}")
469
483
  cls.transform = transform
lamindb/core/_settings.py CHANGED
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Literal, Mapping
5
5
 
6
6
  import lamindb_setup as ln_setup
7
7
  from lamin_utils import logger
8
- from lamindb_setup._add_remote_storage import add_managed_storage
8
+ from lamindb_setup._set_managed_storage import set_managed_storage
9
9
  from lamindb_setup.core._settings import settings as setup_settings
10
10
  from lamindb_setup.core._settings_instance import sanitize_git_repo_url
11
11
 
@@ -147,7 +147,7 @@ class Settings:
147
147
  path, kwargs = path_kwargs
148
148
  else:
149
149
  path, kwargs = path_kwargs, {}
150
- add_managed_storage(path, **kwargs)
150
+ set_managed_storage(path, **kwargs)
151
151
 
152
152
  @property
153
153
  def storage_local(self) -> Path:
lamindb/core/_sync_git.py CHANGED
@@ -61,11 +61,15 @@ def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | N
61
61
  capture_output=True,
62
62
  cwd=repo_dir,
63
63
  )
64
- commit_hash = result.stdout.decode()
64
+ # we just care to find one commit
65
+ # hence, we split by new line ("\n") and use the first one
66
+ commit_hash = result.stdout.decode().split("\n")[0]
65
67
  if commit_hash == "" or result.returncode == 1:
66
68
  return None
67
69
  else:
68
- assert len(commit_hash) == 40
70
+ assert (
71
+ len(commit_hash) == 40
72
+ ), f"commit hash |{commit_hash}| is not 40 characters long"
69
73
  return commit_hash
70
74
 
71
75
 
@@ -161,8 +161,8 @@ def anndata_mouse_sc_lymph_node(
161
161
  adata.obs.columns = (
162
162
  adata.obs.columns.str.replace("Sample Characteristic", "")
163
163
  .str.replace("Factor Value ", "Factor Value:", regex=True)
164
- .str.replace("Factor Value\[", "Factor Value:", regex=True) # noqa
165
- .str.replace(" Ontology Term\[", "ontology_id:", regex=True) # noqa
164
+ .str.replace("Factor Value\\[", "Factor Value:", regex=True)
165
+ .str.replace(" Ontology Term\\[", "ontology_id:", regex=True)
166
166
  .str.strip("[]")
167
167
  .str.replace("organism part", "tissue")
168
168
  .str.replace("organism", "organism")
@@ -10,6 +10,6 @@ from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem
10
10
 
11
11
  from ._anndata_sizes import size_adata
12
12
  from ._backed_access import AnnDataAccessor, BackedAccessor
13
- from ._valid_suffixes import VALID_SUFFIXES
14
- from .objects import infer_suffix, write_to_file
13
+ from ._valid_suffixes import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
14
+ from .objects import infer_suffix, write_to_disk
15
15
  from .paths import delete_storage, load_to_memory
@@ -1,3 +1,5 @@
1
- from lamindb_setup.core.upath import VALID_SUFFIXES
1
+ from lamindb_setup.core.upath import VALID_COMPOSITE_SUFFIXES, VALID_SUFFIXES
2
2
 
3
- VALID_SUFFIXES.update({".vitessce.json", ".anndata.zarr", ".spatialdata.zarr"})
3
+ # add new composite suffixes like so
4
+ VALID_COMPOSITE_SUFFIXES.update({".vitessce.json"})
5
+ # can do the same for simple valid suffixes
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from pathlib import PurePosixPath
3
4
  from typing import TYPE_CHECKING
4
5
 
5
6
  from anndata import AnnData
@@ -21,7 +22,7 @@ def infer_suffix(dmem, adata_format: str | None = None):
21
22
  """Infer LaminDB storage file suffix from a data object."""
22
23
  if isinstance(dmem, AnnData):
23
24
  if adata_format is not None:
24
- if adata_format not in ("h5ad", "zarr"):
25
+ if adata_format not in {"h5ad", "zarr", "anndata.zarr"}:
25
26
  raise ValueError(
26
27
  "Error when specifying AnnData storage format, it should be"
27
28
  f" 'h5ad', 'zarr', not '{adata_format}'. Check 'format'"
@@ -40,9 +41,15 @@ def infer_suffix(dmem, adata_format: str | None = None):
40
41
  raise NotImplementedError
41
42
 
42
43
 
43
- def write_to_file(dmem, filepath: UPathStr):
44
+ def write_to_disk(dmem, filepath: UPathStr):
44
45
  if isinstance(dmem, AnnData):
45
- dmem.write(filepath)
46
+ suffix = PurePosixPath(filepath).suffix
47
+ if suffix == ".h5ad":
48
+ dmem.write_h5ad(filepath)
49
+ elif suffix == ".zarr":
50
+ dmem.write_zarr(filepath)
51
+ else:
52
+ raise NotImplementedError
46
53
  elif isinstance(dmem, DataFrame):
47
54
  dmem.to_parquet(filepath)
48
55
  else:
@@ -140,7 +140,7 @@ def delete_storage(storagepath: Path):
140
140
  if not storagepath.is_relative_to(settings.storage): # type: ignore
141
141
  allow_delete = False
142
142
  if setup_settings.instance.keep_artifacts_local:
143
- allow_delete = storagepath.is_relative_to(
143
+ allow_delete = storagepath.is_relative_to( # type: ignore
144
144
  setup_settings.instance.storage_local.root
145
145
  )
146
146
  if not allow_delete:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.71.0
3
+ Version: 0.71.1
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: lnschema_core==0.66.0
13
- Requires-Dist: lamindb_setup==0.71.0
12
+ Requires-Dist: lnschema_core==0.66.3
13
+ Requires-Dist: lamindb_setup==0.71.3
14
14
  Requires-Dist: lamin_utils==0.13.2
15
- Requires-Dist: lamin_cli==0.13.0
15
+ Requires-Dist: lamin_cli==0.13.1
16
16
  Requires-Dist: rapidfuzz
17
17
  Requires-Dist: pyarrow
18
18
  Requires-Dist: typing_extensions!=4.6.0
@@ -37,7 +37,7 @@ Requires-Dist: faker-biology ; extra == "dev"
37
37
  Requires-Dist: django-schema-graph ; extra == "erdiagram"
38
38
  Requires-Dist: readfcs>=1.1.8 ; extra == "fcs"
39
39
  Requires-Dist: lamindb_setup[gcp] ; extra == "gcp"
40
- Requires-Dist: nbproject==0.10.0 ; extra == "jupyter"
40
+ Requires-Dist: nbproject==0.10.2 ; extra == "jupyter"
41
41
  Requires-Dist: nbstripout==0.6.1 ; extra == "jupyter"
42
42
  Requires-Dist: nbconvert ; extra == "jupyter"
43
43
  Requires-Dist: zarr>=2.16.0 ; extra == "zarr"
@@ -1,55 +1,54 @@
1
- lamindb/__init__.py,sha256=T_mLeXTbOSi7s2DSoGxF-FrVBCSQLvBj5t02ueRNWSI,2182
1
+ lamindb/__init__.py,sha256=PbZGEkozIsD8RM3XLafkYGo4iPJy7FJFdzVr0VW7Zy0,2182
2
2
  lamindb/_annotate.py,sha256=kgbilILfgzoS-GEpjxzVwRMs7CoSa9BNEcIWXFBW69I,43915
3
- lamindb/_artifact.py,sha256=875jV8J-GgvhoscWPmg73ogTa9rAVHQdAqc3V8S46Sc,40157
3
+ lamindb/_artifact.py,sha256=8uBW-dhuWyBUQGs728sAPCnuhTic-NKjSbaneF07aMo,40106
4
4
  lamindb/_can_validate.py,sha256=nvoZG-35n3HofkY4Xc6hBv9AV54_RDan7Hzp5TuqY9I,14709
5
5
  lamindb/_collection.py,sha256=SDM35R_5WHrgLKjVb14Q8-Rz_gn5hdZLJobPcanm4PM,14627
6
6
  lamindb/_feature.py,sha256=srAKchY7gqD-h-cWlEiAWuHlpFKFwv0PWIA-JX0Go8c,6758
7
7
  lamindb/_feature_set.py,sha256=AzjOcHzQajpeikPOAic-aj0z_C5b7VpHVegg3ThRSLw,9045
8
8
  lamindb/_filter.py,sha256=xnjJzjF3Zj4dK_Kfymvhgczk27MhhXz5ZYc7XINbgHY,1331
9
- lamindb/_finish.py,sha256=iUo6j89_hTP-OuRfUAj_i1YB1B5FU9QTtwxXKdX_J_4,8279
9
+ lamindb/_finish.py,sha256=6GwhqrC-x-JdFd16i7-uyhCWeQgGKxr25aSsSXPZt4g,8598
10
10
  lamindb/_from_values.py,sha256=DVXjnQ2wwNw-2bFzy0uXLdVlqoprrn95hTnrXwn-KqM,12638
11
11
  lamindb/_is_versioned.py,sha256=0PgRCmxEmYDcAjllLSOYZm132B1lW6QgmBBERhRyFt0,1341
12
12
  lamindb/_parents.py,sha256=N9T8jbd3eaoHDLE9TD1y1QgGcO81E6Brapy8LILzRCQ,14790
13
13
  lamindb/_query_manager.py,sha256=3zokXqxgj9vTJBnN2sbYKS-q69fyDDPF_aGq_rFHzXU,4066
14
- lamindb/_query_set.py,sha256=K_0rJ6Keltl3Pvglvd7kkzkJEy2u6Kp0TKiHLzwqH18,11359
14
+ lamindb/_query_set.py,sha256=n0owd74cTzGz6-mIv8SlDz0wcyRz7Xw3Ke1LhE8UlIg,10784
15
15
  lamindb/_registry.py,sha256=fmX-BUnan3Y0WrEAx3qNwRYCIJwJgjoKnRnpgcXujEI,19358
16
16
  lamindb/_run.py,sha256=b7A52M1On3QzFgIYyfQoz5Kk7V3wcu9p_Prq5bzd8v8,1838
17
- lamindb/_save.py,sha256=r-pUKi2xBW25brIMzDbf8iI-4xggX-X2C9cIYHzK1uI,11460
17
+ lamindb/_save.py,sha256=_7r3TUV3B6Hp75r5O_ymu3fKWyBHbGa5vmE_pxrtsVI,10923
18
18
  lamindb/_storage.py,sha256=VW8xq3VRv58-ciholvOdlcgvp_OIlLxx5GxLt-e2Irs,614
19
19
  lamindb/_transform.py,sha256=rxojJ91qQSkeYDHYbwqjFAYxBMgJd3cq_K7Z0n5g8Aw,3482
20
20
  lamindb/_ulabel.py,sha256=e5dw9h1tR0_u-DMn7Gzx0WhUhV5w7j4v3QbnLWQV7eI,1941
21
21
  lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
22
22
  lamindb/_view.py,sha256=GV1FrqIMmdooEkA-5zvcTWgV1nqx1sehi6WdWEaFpxM,2171
23
- lamindb/core/__init__.py,sha256=MB1gEMKUf0GBQrI3dH8WRZOZQmWR4HIojXK_hXXVdqA,1235
24
- lamindb/core/_data.py,sha256=xULvge-txEO4r4amNQZRZTH3n3BqOLWauyNfxbB6WOA,17674
25
- lamindb/core/_feature_manager.py,sha256=LlYgU71AoTnrseWFCq-oZkUAYWITtRR7BNFm0AhHe-c,15773
23
+ lamindb/core/__init__.py,sha256=TI9_1Jtpwae_cUPQ3-U0RRPH5c3GBA-gLhHvlAk_Nlo,1213
24
+ lamindb/core/_data.py,sha256=Lico6-Vx15bNpGLl1bqFqEsh62pD4YKOOBnmahse1tI,17673
25
+ lamindb/core/_feature_manager.py,sha256=uTzZZ7-qqEAmdwi48Holy2j5VGTgmoQxhb21r6mLShI,15824
26
26
  lamindb/core/_label_manager.py,sha256=0RtegYnK3zIisOnd970EobOrHMpp7OCH-mEoPrPXw2c,9075
27
27
  lamindb/core/_mapped_collection.py,sha256=_OwFZh5SePDUD70XIK5kngv3we_Z5-YdGHNfpUSatSQ,19469
28
- lamindb/core/_run_context.py,sha256=zwsaq1iW3yb8Y6IjpWzqUL3e0i4l1bnmPF6V2USMqpI,16155
29
- lamindb/core/_settings.py,sha256=lhfn6gRjZw0atrA5Hr34m1nkPFXd8DAUMEesCGat1tA,6130
30
- lamindb/core/_sync_git.py,sha256=IlTqw55inPp_RZbN_YScaCeKza7LeF9mClQw55W3_d4,3921
28
+ lamindb/core/_run_context.py,sha256=3Pa9DQRR9_OZTMJyezi4p_ZIdL6JsKnQ8gM57whFpMo,16926
29
+ lamindb/core/_settings.py,sha256=rW1KfEXfT56XErwcnSuQxaCytpOy1kJ-u7tVmkmNmxY,6131
30
+ lamindb/core/_sync_git.py,sha256=06Te35UZj2QBaHNcc59VSC9vJgcFct7Z2sK78NLkZBs,4119
31
31
  lamindb/core/_track_environment.py,sha256=xLZ6kgzxWS6MWZ5LQ_wkbJX99vmYOT8iQ-Fz4OHCgWw,754
32
32
  lamindb/core/_transform_settings.py,sha256=eV96QKX9jOojjzF-a0oo0wXQsMXN2F6QV7orE06oFC8,161
33
- lamindb/core/_view_tree.py,sha256=PTwmKZSQL2UhKuSdV5Wp7o1JDjv1qwgsVCj3ThkbKb8,3447
34
33
  lamindb/core/exceptions.py,sha256=PHk5lyBdJPrrEQcid3ItfdNzz3fgiQsUmsEDdz063F0,197
35
34
  lamindb/core/fields.py,sha256=Jgi_XI-iTe6cT7oD8FV_JqEpjN1Q9rZWwL8VLtj4jkA,164
36
35
  lamindb/core/types.py,sha256=xeQF2x40p2pR9eIVQrXT74RrS810z2fbjmTRTSQUqPM,230
37
36
  lamindb/core/versioning.py,sha256=DsEHpCueNwhRiIaRH5-O8H_1fJVNtWslCRx30YiIS5o,3080
38
37
  lamindb/core/datasets/__init__.py,sha256=zRP98oqUAaXhqWyKMiH0s_ImVIuNeziQQ2kQ_t0f-DI,1353
39
- lamindb/core/datasets/_core.py,sha256=36vUOYFkX_4hBAnM_BujV5BRARMI5b9iI_SM9qS7wGc,20191
38
+ lamindb/core/datasets/_core.py,sha256=9bcDfVfMZ1h1WAS88ZBjy-R91xbP2KIm_ofHguXAKpY,20177
40
39
  lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh4,953
41
- lamindb/core/storage/__init__.py,sha256=6jnbFj-eBV3xZt04qP-kTsMWoP8YwpM50wlnnxDYsZU,415
40
+ lamindb/core/storage/__init__.py,sha256=5LUFQKRr2BX24d-yWBezhTXBV83sShcOvPj5Y5u6qIg,441
42
41
  lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
43
42
  lamindb/core/storage/_backed_access.py,sha256=eManrLsu3pSSQAyAKy47FDBm-iHgjaNfHA-zLy59uDs,24536
44
- lamindb/core/storage/_valid_suffixes.py,sha256=sewRRU3I6fJ-Jd5ACNcco_o3hic9zmqTs8BuZui-450,133
43
+ lamindb/core/storage/_valid_suffixes.py,sha256=J08aglC9oo35pzahj0SQXW9IHib8Asp4dc11co-2uys,212
45
44
  lamindb/core/storage/_zarr.py,sha256=5ceEz6YIvgvUnVVNWhK5Z4W0WfrvyvY82Yna5jSX1_E,3661
46
- lamindb/core/storage/objects.py,sha256=5LbBeZVKuOOB8DceSE-PN8elKY0N9OhFXZPQJE4lK48,1538
47
- lamindb/core/storage/paths.py,sha256=ib50kmRGhjRTHak20i94ruXVqLL9xQnQuqJSHEW50Q8,7866
45
+ lamindb/core/storage/objects.py,sha256=OzvBCS-Urz5mr-O95qYt6RGBDDX5HmjfRRKWPPDn1ZE,1797
46
+ lamindb/core/storage/paths.py,sha256=JTtiTlAMICH4gkw7iZNwTRfNTT0WxrBoKiag_7E9g4I,7882
48
47
  lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
49
48
  lamindb/integrations/_vitessce.py,sha256=b0FqTBsP-M6Q7xCYXVwFwM8DOIeeOBZEhYbryhtq4gk,2535
50
49
  lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
51
50
  lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
52
- lamindb-0.71.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
53
- lamindb-0.71.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
54
- lamindb-0.71.0.dist-info/METADATA,sha256=UbJOa1wX6oHrzN1WXgN_YiudHPiw8rOzBYDE3ricYCM,2674
55
- lamindb-0.71.0.dist-info/RECORD,,
51
+ lamindb-0.71.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
52
+ lamindb-0.71.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
53
+ lamindb-0.71.1.dist-info/METADATA,sha256=fmFFlU4FrVwO0ON6JVCs8qCh8_HLWyt9WyYs_zyIZgo,2674
54
+ lamindb-0.71.1.dist-info/RECORD,,
@@ -1,116 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from collections import defaultdict
4
- from pathlib import Path
5
- from typing import Iterable
6
-
7
- from lamindb_setup import settings as setup_settings
8
- from lnschema_core.models import Artifact, Storage
9
-
10
-
11
- def view_tree(
12
- cls,
13
- level: int = -1,
14
- limit_to_directories: bool = False,
15
- length_limit: int = 1000,
16
- max_files_per_dir_per_type: int = 7,
17
- ) -> None:
18
- """{}."""
19
- if cls.__class__.__name__ == "QuerySet":
20
- print("queryset")
21
- qs = cls
22
- storage_ids = qs.list("storage_id")
23
- elif cls == Artifact:
24
- print("file")
25
- qs = cls.filter(storage_id=setup_settings.storage.id).all()
26
- storage_ids = Storage.filter().list("id")
27
- else:
28
- print("else")
29
- return
30
- storages = Storage.filter().all()
31
- storage_roots = {
32
- storage_id: storages.get(id=storage_id).root for storage_id in storage_ids
33
- }
34
- keys = set()
35
- for artifact in qs:
36
- root = storage_roots.get(artifact.storage_id, "")
37
- keys.add(f"{root}/{artifact.key}")
38
-
39
- _view_tree(
40
- keys=keys,
41
- level=level,
42
- only_dirs=limit_to_directories,
43
- limit=length_limit,
44
- max_files_per_dir_per_type=max_files_per_dir_per_type,
45
- )
46
-
47
-
48
- def _view_tree(
49
- keys: Iterable[str],
50
- *,
51
- level: int = -1,
52
- only_dirs: bool = False,
53
- limit: int = 1000,
54
- max_files_per_dir_per_type: int = 7,
55
- ) -> None:
56
- # Create a nested dictionary from keys
57
- def tree():
58
- return defaultdict(tree)
59
-
60
- root = tree()
61
-
62
- n_files = 0
63
- n_directories = 0
64
- suffixes = set()
65
-
66
- for key in keys:
67
- parts = key.split("/")
68
- node = root
69
- for part in parts:
70
- node = node[part]
71
- if node == {}:
72
- n_files += 1
73
- suffix = Path(part).suffix
74
- if suffix:
75
- suffixes.add(suffix)
76
- else:
77
- n_directories += 1
78
-
79
- # Function to print the tree
80
- def print_tree(node, prefix="", depth=0, count=None, n_files_per_dir_per_type=None):
81
- if count is None:
82
- count = [0]
83
- if n_files_per_dir_per_type is None:
84
- n_files_per_dir_per_type = defaultdict(int)
85
-
86
- if level != -1 and depth > level:
87
- return
88
- for name, child in node.items():
89
- if count[0] >= limit:
90
- return
91
- if only_dirs and child == {}:
92
- continue
93
- suffix = Path(name).suffix
94
- n_files_per_dir_per_type[suffix] += 1
95
- if (
96
- depth > 0
97
- and n_files_per_dir_per_type[suffix] > max_files_per_dir_per_type
98
- ):
99
- continue
100
- new_prefix = prefix + ("├── " if name != list(node.keys())[-1] else "└── ")
101
- print(new_prefix + name)
102
- count[0] += 1
103
- if child:
104
- print_tree(
105
- child,
106
- prefix + ("│ " if name != list(node.keys())[-1] else " "),
107
- depth + 1,
108
- count,
109
- (
110
- defaultdict(int) if depth == 0 else n_files_per_dir_per_type
111
- ), # Reset the counter for each directory
112
- )
113
-
114
- suffix_message = f" with suffixes {', '.join(suffixes)}" if n_files > 0 else ""
115
- print(f"{n_directories} directories, {n_files} files{suffix_message}")
116
- print_tree(root)