lamindb 0.76.7__py3-none-any.whl → 0.76.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -41,7 +41,7 @@ Modules and settings.
41
41
  """
42
42
 
43
43
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
44
- __version__ = "0.76.7"
44
+ __version__ = "0.76.9"
45
45
 
46
46
  import os as _os
47
47
 
lamindb/_artifact.py CHANGED
@@ -30,7 +30,7 @@ from lnschema_core.types import (
30
30
  from lamindb._utils import attach_func_to_class_method
31
31
  from lamindb.core._data import _track_run_input, describe, view_lineage
32
32
  from lamindb.core._settings import settings
33
- from lamindb.core.exceptions import IntegrityError
33
+ from lamindb.core.exceptions import IntegrityError, InvalidArgument
34
34
  from lamindb.core.loaders import load_to_memory
35
35
  from lamindb.core.storage import (
36
36
  LocalPathClasses,
@@ -43,6 +43,7 @@ from lamindb.core.storage.paths import (
43
43
  auto_storage_key_from_artifact,
44
44
  auto_storage_key_from_artifact_uid,
45
45
  check_path_is_child_of_root,
46
+ filepath_cache_key_from_artifact,
46
47
  filepath_from_artifact,
47
48
  )
48
49
  from lamindb.core.versioning import (
@@ -88,8 +89,6 @@ def process_pathlike(
88
89
  raise FileNotFoundError(filepath)
89
90
  except PermissionError:
90
91
  pass
91
- if isinstance(filepath, LocalPathClasses):
92
- filepath = filepath.resolve()
93
92
  if check_path_is_child_of_root(filepath, default_storage.root):
94
93
  use_existing_storage_key = True
95
94
  return default_storage, use_existing_storage_key
@@ -154,7 +153,7 @@ def process_data(
154
153
  if hasattr(default_storage, "_access_token")
155
154
  else None
156
155
  )
157
- path = create_path(data, access_token=access_token)
156
+ path = create_path(data, access_token=access_token).resolve()
158
157
  storage, use_existing_storage_key = process_pathlike(
159
158
  path,
160
159
  default_storage=default_storage,
@@ -175,12 +174,12 @@ def process_data(
175
174
  key_suffix = None
176
175
  suffix = infer_suffix(data, format)
177
176
  if key_suffix is not None and key_suffix != suffix:
178
- raise ValueError(
177
+ raise InvalidArgument(
179
178
  f"The suffix '{key_suffix}' of the provided key is incorrect, it should"
180
179
  f" be '{suffix}'."
181
180
  )
182
181
  cache_name = f"{provisional_uid}{suffix}"
183
- path = settings._storage_settings.cache_dir / cache_name
182
+ path = settings.storage.cache_dir / cache_name
184
183
  # Alex: I don't understand the line below
185
184
  if path.suffixes == []:
186
185
  path = path.with_suffix(suffix)
@@ -344,8 +343,8 @@ def get_artifact_kwargs_from_data(
344
343
 
345
344
  if revises is not None: # update provisional_uid
346
345
  provisional_uid, revises = create_uid(revises=revises, version=version)
347
- if path.as_posix().startswith(settings._storage_settings.cache_dir.as_posix()):
348
- path = path.rename(f"{provisional_uid}{suffix}")
346
+ if settings.storage.cache_dir in path.parents:
347
+ path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))
349
348
 
350
349
  check_path_in_storage = False
351
350
  if use_existing_storage_key:
@@ -356,7 +355,7 @@ def get_artifact_kwargs_from_data(
356
355
  key = inferred_key
357
356
  else:
358
357
  if not key == inferred_key:
359
- raise ValueError(
358
+ raise InvalidArgument(
360
359
  f"The path '{data}' is already in registered storage"
361
360
  f" '{storage.root}' with key '{inferred_key}'\nYou passed"
362
361
  f" conflicting key '{key}': please move the file before"
@@ -399,6 +398,7 @@ def get_artifact_kwargs_from_data(
399
398
  "run_id": run.id if run is not None else None,
400
399
  "run": run,
401
400
  "_key_is_virtual": key_is_virtual,
401
+ "revises": revises,
402
402
  }
403
403
  if not isinstance(path, LocalPathClasses):
404
404
  local_filepath = None
@@ -614,6 +614,9 @@ def __init__(artifact: Artifact, *args, **kwargs):
614
614
  else:
615
615
  kwargs = kwargs_or_artifact
616
616
 
617
+ if revises is None:
618
+ revises = kwargs_or_artifact.pop("revises")
619
+
617
620
  if data is not None:
618
621
  artifact._local_filepath = privates["local_filepath"]
619
622
  artifact._cloud_filepath = privates["cloud_filepath"]
@@ -750,7 +753,7 @@ def from_dir(
750
753
  " ln.Artifact(dir_path) to get one artifact for the entire directory"
751
754
  )
752
755
  folderpath: UPath = create_path(path) # returns Path for local
753
- default_storage = settings._storage_settings.record
756
+ default_storage = settings.storage.record
754
757
  using_key = settings._using_key
755
758
  storage, use_existing_storage = process_pathlike(
756
759
  folderpath, default_storage, using_key
@@ -844,7 +847,7 @@ def replace(
844
847
  run: Run | None = None,
845
848
  format: str | None = None,
846
849
  ) -> None:
847
- default_storage = settings._storage_settings.record
850
+ default_storage = settings.storage.record
848
851
  kwargs, privates = get_artifact_kwargs_from_data(
849
852
  provisional_uid=self.uid,
850
853
  data=data,
@@ -919,12 +922,14 @@ def open(
919
922
  from lamindb.core.storage._backed_access import _track_writes_factory, backed_access
920
923
 
921
924
  using_key = settings._using_key
922
- filepath = filepath_from_artifact(self, using_key=using_key)
925
+ filepath, cache_key = filepath_cache_key_from_artifact(self, using_key=using_key)
923
926
  is_tiledbsoma_w = (
924
927
  filepath.name == "soma" or filepath.suffix == ".tiledbsoma"
925
928
  ) and mode == "w"
926
929
  # consider the case where an object is already locally cached
927
- localpath = setup_settings.instance.storage.cloud_to_local_no_update(filepath)
930
+ localpath = setup_settings.instance.storage.cloud_to_local_no_update(
931
+ filepath, cache_key=cache_key
932
+ )
928
933
  if not is_tiledbsoma_w and localpath.exists():
929
934
  access = backed_access(localpath, mode, using_key)
930
935
  else:
@@ -956,15 +961,17 @@ def open(
956
961
 
957
962
 
958
963
  # can't really just call .cache in .load because of double tracking
959
- def _synchronize_cleanup_on_error(filepath: UPath) -> UPath:
964
+ def _synchronize_cleanup_on_error(
965
+ filepath: UPath, cache_key: str | None = None
966
+ ) -> UPath:
960
967
  try:
961
968
  cache_path = setup_settings.instance.storage.cloud_to_local(
962
- filepath, print_progress=True
969
+ filepath, cache_key=cache_key, print_progress=True
963
970
  )
964
971
  except Exception as e:
965
972
  if not isinstance(filepath, LocalPathClasses):
966
973
  cache_path = setup_settings.instance.storage.cloud_to_local_no_update(
967
- filepath
974
+ filepath, cache_key=cache_key
968
975
  )
969
976
  if cache_path.is_file():
970
977
  cache_path.unlink(missing_ok=True)
@@ -979,8 +986,11 @@ def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
979
986
  if hasattr(self, "_memory_rep") and self._memory_rep is not None:
980
987
  access_memory = self._memory_rep
981
988
  else:
982
- filepath = filepath_from_artifact(self, using_key=settings._using_key)
983
- cache_path = _synchronize_cleanup_on_error(filepath)
989
+ filepath, cache_key = filepath_cache_key_from_artifact(
990
+ self, using_key=settings._using_key
991
+ )
992
+ cache_path = _synchronize_cleanup_on_error(filepath, cache_key=cache_key)
993
+ # cache_path is local so doesn't trigger any sync in load_to_memory
984
994
  access_memory = load_to_memory(cache_path, **kwargs)
985
995
  # only call if load is successfull
986
996
  _track_run_input(self, is_run_input)
@@ -989,8 +999,10 @@ def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
989
999
 
990
1000
  # docstring handled through attach_func_to_class_method
991
1001
  def cache(self, is_run_input: bool | None = None) -> Path:
992
- filepath = filepath_from_artifact(self, using_key=settings._using_key)
993
- cache_path = _synchronize_cleanup_on_error(filepath)
1002
+ filepath, cache_key = filepath_cache_key_from_artifact(
1003
+ self, using_key=settings._using_key
1004
+ )
1005
+ cache_path = _synchronize_cleanup_on_error(filepath, cache_key=cache_key)
994
1006
  # only call if sync is successfull
995
1007
  _track_run_input(self, is_run_input)
996
1008
  return cache_path
@@ -1041,7 +1053,7 @@ def delete(
1041
1053
  if delete_record:
1042
1054
  # need to grab file path before deletion
1043
1055
  try:
1044
- path = filepath_from_artifact(self, using_key)
1056
+ path, _ = filepath_from_artifact(self, using_key)
1045
1057
  except OSError:
1046
1058
  # we can still delete the record
1047
1059
  logger.warning("Could not get path")
@@ -1133,8 +1145,22 @@ def _save_skip_storage(file, **kwargs) -> None:
1133
1145
  @doc_args(Artifact.path.__doc__)
1134
1146
  def path(self) -> Path | UPath:
1135
1147
  """{}""" # noqa: D415
1136
- using_key = settings._using_key
1137
- return filepath_from_artifact(self, using_key)
1148
+ # return only the path, without StorageSettings
1149
+ filepath, _ = filepath_from_artifact(self, using_key=settings._using_key)
1150
+ return filepath
1151
+
1152
+
1153
+ # get cache path without triggering sync
1154
+ @property # type: ignore
1155
+ def _cache_path(self) -> UPath:
1156
+ filepath, cache_key = filepath_cache_key_from_artifact(
1157
+ self, using_key=settings._using_key
1158
+ )
1159
+ if isinstance(filepath, LocalPathClasses):
1160
+ return filepath
1161
+ return setup_settings.instance.storage.cloud_to_local_no_update(
1162
+ filepath, cache_key=cache_key
1163
+ )
1138
1164
 
1139
1165
 
1140
1166
  # docstring handled through attach_func_to_class_method
@@ -1173,6 +1199,7 @@ for name in METHOD_NAMES:
1173
1199
  # privates currently dealt with separately
1174
1200
  Artifact._delete_skip_storage = _delete_skip_storage
1175
1201
  Artifact._save_skip_storage = _save_skip_storage
1202
+ Artifact._cache_path = _cache_path
1176
1203
  Artifact.path = path
1177
1204
  Artifact.describe = describe
1178
1205
  Artifact.view_lineage = view_lineage
lamindb/_collection.py CHANGED
@@ -211,6 +211,7 @@ def mapped(
211
211
  layers_keys: str | list[str] | None = None,
212
212
  obs_keys: str | list[str] | None = None,
213
213
  obsm_keys: str | list[str] | None = None,
214
+ obs_filter: tuple[str, str | tuple[str, ...]] | None = None,
214
215
  join: Literal["inner", "outer"] | None = "inner",
215
216
  encode_labels: bool | list[str] = True,
216
217
  unknown_label: str | dict[str, str] | None = None,
@@ -239,6 +240,7 @@ def mapped(
239
240
  layers_keys,
240
241
  obs_keys,
241
242
  obsm_keys,
243
+ obs_filter,
242
244
  join,
243
245
  encode_labels,
244
246
  unknown_label,
lamindb/_save.py CHANGED
@@ -17,6 +17,7 @@ from lnschema_core.models import Artifact, Record
17
17
 
18
18
  from lamindb.core._settings import settings
19
19
  from lamindb.core.storage.paths import (
20
+ _cache_key_from_artifact_storage,
20
21
  attempt_accessing_path,
21
22
  auto_storage_key_from_artifact,
22
23
  delete_storage_using_key,
@@ -135,7 +136,7 @@ def check_and_attempt_upload(
135
136
  # a local env it will have a _local_filepath and needs to be uploaded
136
137
  if hasattr(artifact, "_local_filepath"):
137
138
  try:
138
- storage_path = upload_artifact(
139
+ storage_path, cache_path = upload_artifact(
139
140
  artifact,
140
141
  using_key,
141
142
  access_token=access_token,
@@ -146,7 +147,7 @@ def check_and_attempt_upload(
146
147
  return exception
147
148
  # copies (if on-disk) or moves the temporary file (if in-memory) to the cache
148
149
  if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
149
- copy_or_move_to_cache(artifact, storage_path)
150
+ copy_or_move_to_cache(artifact, storage_path, cache_path)
150
151
  # after successful upload, we should remove the attribute so that another call
151
152
  # call to save won't upload again, the user should call replace() then
152
153
  del artifact._local_filepath
@@ -154,7 +155,9 @@ def check_and_attempt_upload(
154
155
  return None
155
156
 
156
157
 
157
- def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
158
+ def copy_or_move_to_cache(
159
+ artifact: Artifact, storage_path: UPath, cache_path: UPath | None
160
+ ):
158
161
  local_path = artifact._local_filepath
159
162
 
160
163
  # in-memory cases
@@ -166,7 +169,7 @@ def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
166
169
  cache_dir = settings._storage_settings.cache_dir
167
170
 
168
171
  # just delete from the cache dir if storage_path is local
169
- if isinstance(storage_path, LocalPathClasses):
172
+ if cache_path is None:
170
173
  if (
171
174
  local_path.as_posix() != storage_path.as_posix()
172
175
  and cache_dir in local_path.parents
@@ -176,11 +179,12 @@ def copy_or_move_to_cache(artifact: Artifact, storage_path: UPath):
176
179
  else:
177
180
  local_path.unlink()
178
181
  return None
179
-
180
- cache_path = settings._storage_settings.cloud_to_local_no_update(storage_path)
182
+ # non-local storage_path further
181
183
  if local_path != cache_path:
182
184
  cache_path.parent.mkdir(parents=True, exist_ok=True)
183
185
  if cache_dir in local_path.parents:
186
+ if cache_path.is_dir():
187
+ shutil.rmtree(cache_path)
184
188
  local_path.replace(cache_path)
185
189
  else:
186
190
  if is_dir:
@@ -280,11 +284,11 @@ def upload_artifact(
280
284
  using_key: str | None = None,
281
285
  access_token: str | None = None,
282
286
  print_progress: bool = True,
283
- ) -> UPath:
287
+ ) -> tuple[UPath, UPath | None]:
284
288
  """Store and add file and its linked entries."""
285
289
  # can't currently use filepath_from_artifact here because it resolves to ._local_filepath
286
290
  storage_key = auto_storage_key_from_artifact(artifact)
287
- storage_path = attempt_accessing_path(
291
+ storage_path, storage_settings = attempt_accessing_path(
288
292
  artifact, storage_key, using_key=using_key, access_token=access_token
289
293
  )
290
294
  if hasattr(artifact, "_to_store") and artifact._to_store:
@@ -292,4 +296,13 @@ def upload_artifact(
292
296
  store_file_or_folder(
293
297
  artifact._local_filepath, storage_path, print_progress=print_progress
294
298
  )
295
- return storage_path
299
+
300
+ if isinstance(storage_path, LocalPathClasses):
301
+ cache_path = None
302
+ else:
303
+ cache_key = _cache_key_from_artifact_storage(artifact, storage_settings)
304
+ cache_path = storage_settings.cloud_to_local_no_update(
305
+ storage_path, cache_key=cache_key
306
+ )
307
+
308
+ return storage_path, cache_path
lamindb/core/_context.py CHANGED
@@ -450,11 +450,11 @@ class Context:
450
450
  self._logging_message += f"created Transform(uid='{transform.uid}')"
451
451
  else:
452
452
  uid = transform.uid
453
- # check whether the transform file has been renamed
453
+ # check whether the transform.key is consistent
454
454
  if transform.key != key:
455
455
  suid = transform.stem_uid
456
456
  new_suid = ids.base62_12()
457
- transform_type = "Notebook" if is_run_from_ipython else "Script"
457
+ transform_type = "notebook" if is_run_from_ipython else "script"
458
458
  note = message_update_key_in_version_family(
459
459
  suid=suid,
460
460
  existing_key=transform.key,
@@ -462,7 +462,7 @@ class Context:
462
462
  registry="Transform",
463
463
  )
464
464
  raise UpdateContext(
465
- f"{transform_type} filename changed.\n\nEither init a new transform family by setting:\n\n"
465
+ f'\n✗ Filename "{key}" clashes with the existing key "{transform.key}" for uid "{transform.uid[:-4]}...."\n\nEither init a new transform with a new uid:\n\n'
466
466
  f'ln.context.uid = "{new_suid}0000"\n\n{note}'
467
467
  )
468
468
  elif transform.name != name:
lamindb/core/_data.py CHANGED
@@ -41,7 +41,7 @@ if TYPE_CHECKING:
41
41
 
42
42
 
43
43
  WARNING_RUN_TRANSFORM = (
44
- "no run & transform got linked, call `ln.context.track()` & re-run`"
44
+ "no run & transform got linked, call `ln.context.track()` & re-run"
45
45
  )
46
46
 
47
47
  WARNING_NO_INPUT = "run input wasn't tracked, call `ln.context.track()` and re-run"
@@ -49,6 +49,9 @@ class _Connect:
49
49
  self.conn.close()
50
50
 
51
51
 
52
+ _decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
53
+
54
+
52
55
  class MappedCollection:
53
56
  """Map-style collection for use in data loaders.
54
57
 
@@ -83,6 +86,9 @@ class MappedCollection:
83
86
  retrieves ``.X``.
84
87
  obsm_keys: Keys from the ``.obsm`` slots.
85
88
  obs_keys: Keys from the ``.obs`` slots.
89
+ obs_filter: Select only observations with these values for the given obs column.
90
+ Should be a tuple with an obs column name as the first element
91
+ and filtering values (a string or a tuple of strings) as the second element.
86
92
  join: `"inner"` or `"outer"` virtual joins. If ``None`` is passed,
87
93
  does not join.
88
94
  encode_labels: Encode labels into integers.
@@ -101,6 +107,7 @@ class MappedCollection:
101
107
  layers_keys: str | list[str] | None = None,
102
108
  obs_keys: str | list[str] | None = None,
103
109
  obsm_keys: str | list[str] | None = None,
110
+ obs_filter: tuple[str, str | tuple[str, ...]] | None = None,
104
111
  join: Literal["inner", "outer"] | None = "inner",
105
112
  encode_labels: bool | list[str] = True,
106
113
  unknown_label: str | dict[str, str] | None = None,
@@ -113,6 +120,13 @@ class MappedCollection:
113
120
  f"join must be one of None, 'inner, or 'outer' but was {type(join)}"
114
121
  )
115
122
 
123
+ self.filtered = obs_filter is not None
124
+ if self.filtered and len(obs_filter) != 2:
125
+ raise ValueError(
126
+ "obs_filter should be a tuple with obs column name "
127
+ "as the first element and filtering values as the second element"
128
+ )
129
+
116
130
  if layers_keys is None:
117
131
  self.layers_keys = ["X"]
118
132
  else:
@@ -153,16 +167,37 @@ class MappedCollection:
153
167
  self.path_list = path_list
154
168
  self._make_connections(path_list, parallel)
155
169
 
170
+ self._cache_cats: dict = {}
171
+ if self.obs_keys is not None:
172
+ if cache_categories:
173
+ self._cache_categories(self.obs_keys)
174
+ self.encoders: dict = {}
175
+ if self.encode_labels:
176
+ self._make_encoders(self.encode_labels) # type: ignore
177
+
156
178
  self.n_obs_list = []
179
+ self.indices_list = []
157
180
  for i, storage in enumerate(self.storages):
158
181
  with _Connect(storage) as store:
159
182
  X = store["X"]
160
183
  store_path = self.path_list[i]
161
184
  self._check_csc_raise_error(X, "X", store_path)
162
- if isinstance(X, ArrayTypes): # type: ignore
163
- self.n_obs_list.append(X.shape[0])
185
+ if self.filtered:
186
+ obs_filter_key, obs_filter_values = obs_filter
187
+ indices_storage = np.where(
188
+ np.isin(
189
+ self._get_labels(store, obs_filter_key), obs_filter_values
190
+ )
191
+ )[0]
192
+ n_obs_storage = len(indices_storage)
164
193
  else:
165
- self.n_obs_list.append(X.attrs["shape"][0])
194
+ if isinstance(X, ArrayTypes): # type: ignore
195
+ n_obs_storage = X.shape[0]
196
+ else:
197
+ n_obs_storage = X.attrs["shape"][0]
198
+ indices_storage = np.arange(n_obs_storage)
199
+ self.n_obs_list.append(n_obs_storage)
200
+ self.indices_list.append(indices_storage)
166
201
  for layer_key in self.layers_keys:
167
202
  if layer_key == "X":
168
203
  continue
@@ -180,7 +215,7 @@ class MappedCollection:
180
215
  )
181
216
  self.n_obs = sum(self.n_obs_list)
182
217
 
183
- self.indices = np.hstack([np.arange(n_obs) for n_obs in self.n_obs_list])
218
+ self.indices = np.hstack(self.indices_list)
184
219
  self.storage_idx = np.repeat(np.arange(len(self.storages)), self.n_obs_list)
185
220
 
186
221
  self.join_vars: Literal["inner", "outer"] | None = join
@@ -193,15 +228,6 @@ class MappedCollection:
193
228
  self._make_join_vars()
194
229
  self.n_vars = len(self.var_joint)
195
230
 
196
- if self.obs_keys is not None:
197
- if cache_categories:
198
- self._cache_categories(self.obs_keys)
199
- else:
200
- self._cache_cats: dict = {}
201
- self.encoders: dict = {}
202
- if self.encode_labels:
203
- self._make_encoders(self.encode_labels) # type: ignore
204
-
205
231
  self._dtype = dtype
206
232
  self._closed = False
207
233
 
@@ -220,14 +246,15 @@ class MappedCollection:
220
246
 
221
247
  def _cache_categories(self, obs_keys: list):
222
248
  self._cache_cats = {}
223
- decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
224
249
  for label in obs_keys:
225
250
  self._cache_cats[label] = []
226
251
  for storage in self.storages:
227
252
  with _Connect(storage) as store:
228
253
  cats = self._get_categories(store, label)
229
254
  if cats is not None:
230
- cats = decode(cats) if isinstance(cats[0], bytes) else cats[...]
255
+ cats = (
256
+ _decode(cats) if isinstance(cats[0], bytes) else cats[...]
257
+ )
231
258
  self._cache_cats[label].append(cats)
232
259
 
233
260
  def _make_encoders(self, encode_labels: list):
@@ -366,7 +393,7 @@ class MappedCollection:
366
393
 
367
394
  def _get_data_idx(
368
395
  self,
369
- lazy_data: ArrayType | GroupType, # type: ignore
396
+ lazy_data: ArrayType | GroupType,
370
397
  idx: int,
371
398
  join_vars: Literal["inner", "outer"] | None = None,
372
399
  var_idxs_join: list | None = None,
@@ -480,25 +507,17 @@ class MappedCollection:
480
507
  def get_merged_labels(self, label_key: str):
481
508
  """Get merged labels for `label_key` from all `.obs`."""
482
509
  labels_merge = []
483
- decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
484
510
  for i, storage in enumerate(self.storages):
485
511
  with _Connect(storage) as store:
486
- codes = self._get_codes(store, label_key)
487
- labels = decode(codes) if isinstance(codes[0], bytes) else codes
488
- if label_key in self._cache_cats:
489
- cats = self._cache_cats[label_key][i]
490
- else:
491
- cats = self._get_categories(store, label_key)
492
- if cats is not None:
493
- cats = decode(cats) if isinstance(cats[0], bytes) else cats
494
- labels = cats[labels]
512
+ labels = self._get_labels(store, label_key, storage_idx=i)
513
+ if self.filtered:
514
+ labels = labels[self.indices_list[i]]
495
515
  labels_merge.append(labels)
496
516
  return np.hstack(labels_merge)
497
517
 
498
518
  def get_merged_categories(self, label_key: str):
499
519
  """Get merged categories for `label_key` from all `.obs`."""
500
520
  cats_merge = set()
501
- decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
502
521
  for i, storage in enumerate(self.storages):
503
522
  with _Connect(storage) as store:
504
523
  if label_key in self._cache_cats:
@@ -506,15 +525,15 @@ class MappedCollection:
506
525
  else:
507
526
  cats = self._get_categories(store, label_key)
508
527
  if cats is not None:
509
- cats = decode(cats) if isinstance(cats[0], bytes) else cats
528
+ cats = _decode(cats) if isinstance(cats[0], bytes) else cats
510
529
  cats_merge.update(cats)
511
530
  else:
512
531
  codes = self._get_codes(store, label_key)
513
- codes = decode(codes) if isinstance(codes[0], bytes) else codes
532
+ codes = _decode(codes) if isinstance(codes[0], bytes) else codes
514
533
  cats_merge.update(codes)
515
534
  return sorted(cats_merge)
516
535
 
517
- def _get_categories(self, storage: StorageType, label_key: str): # type: ignore
536
+ def _get_categories(self, storage: StorageType, label_key: str):
518
537
  """Get categories."""
519
538
  obs = storage["obs"] # type: ignore
520
539
  if isinstance(obs, ArrayTypes): # type: ignore
@@ -543,7 +562,7 @@ class MappedCollection:
543
562
  return None
544
563
  return None
545
564
 
546
- def _get_codes(self, storage: StorageType, label_key: str): # type: ignore
565
+ def _get_codes(self, storage: StorageType, label_key: str):
547
566
  """Get codes."""
548
567
  obs = storage["obs"] # type: ignore
549
568
  if isinstance(obs, ArrayTypes): # type: ignore
@@ -555,6 +574,21 @@ class MappedCollection:
555
574
  else:
556
575
  return label["codes"][...]
557
576
 
577
+ def _get_labels(
578
+ self, storage: StorageType, label_key: str, storage_idx: int | None = None
579
+ ):
580
+ """Get labels."""
581
+ codes = self._get_codes(storage, label_key)
582
+ labels = _decode(codes) if isinstance(codes[0], bytes) else codes
583
+ if storage_idx is not None and label_key in self._cache_cats:
584
+ cats = self._cache_cats[label_key][storage_idx]
585
+ else:
586
+ cats = self._get_categories(storage, label_key)
587
+ if cats is not None:
588
+ cats = _decode(cats) if isinstance(cats[0], bytes) else cats
589
+ labels = cats[labels]
590
+ return labels
591
+
558
592
  def close(self):
559
593
  """Close connections to array streaming backend.
560
594
 
@@ -137,7 +137,17 @@ def file_tiff_suo22() -> Path: # pragma: no cover
137
137
  def dir_iris_images() -> UPath: # pragma: no cover
138
138
  """Directory with 3 studies of the Iris flower: 405 images & metadata.
139
139
 
140
- Based on: https://github.com/laminlabs/lamindb-dev-datasets/pull/2
140
+ Provenance: https://lamin.ai/laminlabs/lamindata/transform/3q4MpQxRL2qZ5zKv
141
+
142
+ The problem is that the same artifact was also ingested by the downstream
143
+ demo notebook:
144
+ https://lamin.ai/laminlabs/lamindata/transform/NJvdsWWbJlZS5zKv
145
+
146
+ This is why on the UI, the artifact shows up as output of the downstream
147
+ demo notebook rather than the upstream curation notebook. The lineage
148
+ information should still be captured by
149
+ https://github.com/laminlabs/lnschema-core/blob/a90437e91dfbd6b9002f18c3e978bd0f9c9a632d/lnschema_core/models.py#L2050-L2052
150
+ but we don't use this in the UI yet.
141
151
  """
142
152
  return UPath("s3://lamindata/iris_studies")
143
153
 
@@ -1,10 +1,9 @@
1
1
  """Exceptions.
2
2
 
3
- The registry base class:
4
-
5
3
  .. autosummary::
6
4
  :toctree: .
7
5
 
6
+ InvalidArgument
8
7
  DoesNotExist
9
8
  ValidationError
10
9
  NotebookNotSavedError
@@ -15,12 +14,26 @@ The registry base class:
15
14
 
16
15
  """
17
16
 
17
+ # inheriting from SystemExit has the sole purpose of suppressing
18
+ # the traceback - this isn't optimal but the current best solution
19
+ # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1726856875597489
20
+
21
+
22
+ class InvalidArgument(SystemExit):
23
+ """Invalid method or function argument."""
24
+
25
+ pass
26
+
18
27
 
19
28
  class TrackNotCalled(SystemExit):
29
+ """ln.context.track() wasn't called."""
30
+
20
31
  pass
21
32
 
22
33
 
23
34
  class NotebookNotSaved(SystemExit):
35
+ """Notebook wasn't saved."""
36
+
24
37
  pass
25
38
 
26
39
 
@@ -69,7 +69,7 @@ def backed_access(
69
69
  using_key: str | None = None,
70
70
  ) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
71
71
  if isinstance(artifact_or_filepath, Artifact):
72
- filepath = filepath_from_artifact(artifact_or_filepath, using_key=using_key)
72
+ filepath, _ = filepath_from_artifact(artifact_or_filepath, using_key=using_key)
73
73
  else:
74
74
  filepath = artifact_or_filepath
75
75
  name = filepath.name
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import shutil
4
- from pathlib import Path
5
4
  from typing import TYPE_CHECKING
6
5
 
7
6
  import anndata as ad
@@ -19,6 +18,8 @@ from lnschema_core.models import Artifact, Storage
19
18
  from lamindb.core._settings import settings
20
19
 
21
20
  if TYPE_CHECKING:
21
+ from pathlib import Path
22
+
22
23
  from lamindb_setup.core.types import UPathStr
23
24
 
24
25
 
@@ -52,12 +53,13 @@ def check_path_is_child_of_root(path: Path | UPath, root: Path | UPath | None) -
52
53
  return root.resolve() in path.resolve().parents
53
54
 
54
55
 
56
+ # returns filepath and root of the storage
55
57
  def attempt_accessing_path(
56
58
  artifact: Artifact,
57
59
  storage_key: str,
58
60
  using_key: str | None = None,
59
61
  access_token: str | None = None,
60
- ):
62
+ ) -> tuple[UPath, StorageSettings]:
61
63
  # check whether the file is in the default db and whether storage
62
64
  # matches default storage
63
65
  if (
@@ -78,23 +80,53 @@ def attempt_accessing_path(
78
80
  # find a better way than passing None to instance_settings in the future!
79
81
  storage_settings = StorageSettings(storage.root, access_token=access_token)
80
82
  path = storage_settings.key_to_filepath(storage_key)
81
- return path
83
+ return path, storage_settings
82
84
 
83
85
 
84
- # add type annotations back asap when re-organizing the module
85
- def filepath_from_artifact(artifact: Artifact, using_key: str | None = None):
86
+ def filepath_from_artifact(
87
+ artifact: Artifact, using_key: str | None = None
88
+ ) -> tuple[UPath, StorageSettings | None]:
86
89
  if hasattr(artifact, "_local_filepath") and artifact._local_filepath is not None:
87
- return artifact._local_filepath.resolve()
90
+ return artifact._local_filepath.resolve(), None
88
91
  storage_key = auto_storage_key_from_artifact(artifact)
89
- path = attempt_accessing_path(artifact, storage_key, using_key=using_key)
90
- return path
92
+ path, storage_settings = attempt_accessing_path(
93
+ artifact, storage_key, using_key=using_key
94
+ )
95
+ return path, storage_settings
96
+
97
+
98
+ # virtual key is taken into consideration
99
+ # only if the version is latest
100
+ def _cache_key_from_artifact_storage(
101
+ artifact: Artifact, storage_settings: StorageSettings | None
102
+ ):
103
+ cache_key = None
104
+ if (
105
+ artifact._key_is_virtual
106
+ and artifact.key is not None
107
+ and storage_settings is not None
108
+ and artifact.is_latest
109
+ ):
110
+ cache_key = (storage_settings.root / artifact.key).path
111
+ return cache_key
112
+
113
+
114
+ # return filepath and cache_key if needed
115
+ def filepath_cache_key_from_artifact(
116
+ artifact: Artifact, using_key: str | None = None
117
+ ) -> tuple[UPath, str | None]:
118
+ filepath, storage_settings = filepath_from_artifact(artifact, using_key)
119
+ if isinstance(filepath, LocalPathClasses):
120
+ return filepath, None
121
+ cache_key = _cache_key_from_artifact_storage(artifact, storage_settings)
122
+ return filepath, cache_key
91
123
 
92
124
 
93
125
  def store_file_or_folder(
94
126
  local_path: UPathStr, storage_path: UPath, print_progress: bool = True
95
127
  ) -> None:
96
128
  """Store file or folder (localpath) at storagepath."""
97
- local_path = Path(local_path)
129
+ local_path = UPath(local_path)
98
130
  if not isinstance(storage_path, LocalPathClasses):
99
131
  # this uploads files and directories
100
132
  create_folder = False if local_path.is_dir() else None
@@ -102,12 +134,11 @@ def store_file_or_folder(
102
134
  local_path, create_folder=create_folder, print_progress=print_progress
103
135
  )
104
136
  else: # storage path is local
137
+ if local_path.resolve().as_posix() == storage_path.resolve().as_posix():
138
+ return None
105
139
  storage_path.parent.mkdir(parents=True, exist_ok=True)
106
140
  if local_path.is_file():
107
- try:
108
- shutil.copyfile(local_path, storage_path)
109
- except shutil.SameFileError:
110
- pass
141
+ shutil.copyfile(local_path, storage_path)
111
142
  else:
112
143
  if storage_path.exists():
113
144
  shutil.rmtree(storage_path)
@@ -117,7 +148,7 @@ def store_file_or_folder(
117
148
  def delete_storage_using_key(
118
149
  artifact: Artifact, storage_key: str, using_key: str | None
119
150
  ):
120
- filepath = attempt_accessing_path(artifact, storage_key, using_key=using_key)
151
+ filepath, _ = attempt_accessing_path(artifact, storage_key, using_key=using_key)
121
152
  delete_storage(filepath)
122
153
 
123
154
 
@@ -18,7 +18,7 @@ def message_update_key_in_version_family(
18
18
  registry: str,
19
19
  new_key: str,
20
20
  ) -> str:
21
- return f'Or update key "{existing_key}" in your existing family:\n\nln.{registry}.filter(uid__startswith="{suid}").update(key="{new_key}")'
21
+ return f'Or update key "{existing_key}" to "{new_key}":\n\nln.{registry}.filter(uid__startswith="{suid}").update(key="{new_key}")\n'
22
22
 
23
23
 
24
24
  def increment_base62(s: str) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.76.7
3
+ Version: 0.76.9
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: lnschema_core==0.74.3
13
- Requires-Dist: lamindb_setup==0.77.2
12
+ Requires-Dist: lnschema_core==0.74.5
13
+ Requires-Dist: lamindb_setup==0.77.5
14
14
  Requires-Dist: lamin_utils==0.13.4
15
- Requires-Dist: lamin_cli==0.17.3
15
+ Requires-Dist: lamin_cli==0.17.6
16
16
  Requires-Dist: rapidfuzz
17
17
  Requires-Dist: pyarrow
18
18
  Requires-Dist: typing_extensions!=4.6.0
@@ -1,7 +1,7 @@
1
- lamindb/__init__.py,sha256=5ywGXz6u0OjQ-W57dmRORaBl-njA98gRYHBZgW9HBh8,2344
2
- lamindb/_artifact.py,sha256=PKu_CuTypCSSByu0bjRELVWsZ6mQU-AFIygGAAXVsIs,44110
1
+ lamindb/__init__.py,sha256=2tV0Xvd0v3bD22wAZj__CvVleGCC1UG7zEqm1Nkb1q8,2344
2
+ lamindb/_artifact.py,sha256=ZQ5tFPhwnC9Aqku-wPBflXU_u8jh6EroCnZPsZE_42M,44984
3
3
  lamindb/_can_validate.py,sha256=9di9FLmC2m3dpT42sceF34UEFzQITi2e_hjVMa8DIc4,18261
4
- lamindb/_collection.py,sha256=hT9VFNIVd041f45bDE-BYXcCvM2Cd1vGByTeP9_F3Yc,14016
4
+ lamindb/_collection.py,sha256=Lnmc42wQgN0-qrPBpGaPPt_rDtYgOBjYVhAkuZNS4uQ,14101
5
5
  lamindb/_curate.py,sha256=b9CsOChho-V9YUOY2D4ZO-agM0wjH5DsBGxcrRCJuTw,58807
6
6
  lamindb/_feature.py,sha256=nZhtrH0ssoNls-hV-dkwfK9sKypg2El59R9qfarxfUE,5340
7
7
  lamindb/_feature_set.py,sha256=DmAy96V_RyV0yiyvWOCHgustXPsCaMwn4TrWwh2qDd8,8104
@@ -14,39 +14,39 @@ lamindb/_query_manager.py,sha256=Ipe85HL31DDwMbC8CN_1Svbwk48a_DUh_INGQdZL08I,422
14
14
  lamindb/_query_set.py,sha256=BiGvEiaBSd9aV28EAy83Q8h6RLsYMDjfxLOljAcyMaM,12692
15
15
  lamindb/_record.py,sha256=d-tBYj_EgNBD2Nl9pBC5H_OYGdRmBKAVvRnE91_23e8,22035
16
16
  lamindb/_run.py,sha256=5M_r1zGDv9HlqbqRKTWCYCOtENovJ-8mQ4kY7XqcLaU,1888
17
- lamindb/_save.py,sha256=Fu7Z84btKOXfTfpunKLni21s5ER2zIllqg5e3nPq-0A,10910
17
+ lamindb/_save.py,sha256=YfyTTOil-05OBfs5bO1vFxuyUYUqR5J1wqxaKYZxg4s,11352
18
18
  lamindb/_storage.py,sha256=GBVChv-DHVMNEBJL5l_JT6B4RDhZ6NnwgzmUICphYKk,413
19
19
  lamindb/_transform.py,sha256=tRO7Uq-8fkq6Tm4U5qQ1lBOaNUehH8IkiDDPnYPgQH8,4623
20
20
  lamindb/_ulabel.py,sha256=XDSdZBXX_ki5s1vOths3MjF2x5DPggBR_PV_KF4SGyg,1611
21
21
  lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
22
22
  lamindb/_view.py,sha256=4Ln2ItTb3857PAI-70O8eJYqoTJ_NNFc7E_wds6OGns,2412
23
23
  lamindb/core/__init__.py,sha256=57AXQ286eOX2_o5HUeqIFJrfqN-OZ_E7FVHd3Xm5oOk,1483
24
- lamindb/core/_context.py,sha256=RCp5NKNr8JWxHZXFmNZT0GYgt-nKs4inZ0OQYkBMub0,21549
25
- lamindb/core/_data.py,sha256=EJNVqsyBR5N-IK-YtF6l92QlB2V9FFRkkd4uF_Phfmo,16890
24
+ lamindb/core/_context.py,sha256=O66uBJGDHBJFCKFrQyFuBPheCxXMSMmDjelIjLpfiSQ,21613
25
+ lamindb/core/_data.py,sha256=TRgB_VFXjToVeFkqlZLROvdAcl2-2jg9QJLGQRPf8i0,16889
26
26
  lamindb/core/_feature_manager.py,sha256=n8z51HiOGom642EXmSrHMDqlcpbJFyYutyc-Mwu1NM0,32673
27
27
  lamindb/core/_label_manager.py,sha256=VskOVnQoafKCjUKDs64b3vN4w7NTcM7fDXNdUtMVGFo,9605
28
- lamindb/core/_mapped_collection.py,sha256=1XzratL2IvRleqioNhWo26Lsuqkev8-HEImmHQxw9Kw,23266
28
+ lamindb/core/_mapped_collection.py,sha256=h8H1j_TC_Tzk2CHcMjTRk34Et95SVQY5di4GnQdmqxc,24604
29
29
  lamindb/core/_settings.py,sha256=73SV-vTDzSKX9E5rSvj9kdPV4jHSovRM3x7bgT1OBh8,5948
30
30
  lamindb/core/_sync_git.py,sha256=qc0yfPyKeG4uuNT_3qsv-mkIMqhLFqfXNeNVO49vV00,4547
31
31
  lamindb/core/_track_environment.py,sha256=STzEVUzOeUEWdX7WDJUkKH4u08k7eupRX6AXQwoVt14,828
32
- lamindb/core/exceptions.py,sha256=9KM3j2PvHzW-Gx6waoxzlYiz822ZMJ_7PXPqv1AHup0,1284
32
+ lamindb/core/exceptions.py,sha256=rJ7v4KZWBrYP62ylIy5jCRvCW0CteQ-4r4yMrUz2UP8,1652
33
33
  lamindb/core/fields.py,sha256=47Jmh3efUr5ZscgimR_yckY-I3cNf8ScLutbwKCK3j4,162
34
34
  lamindb/core/loaders.py,sha256=KMTkDa73jkRVvI9uc5Fgr0t6mq22cAxBwhSlUZKUaBg,4016
35
35
  lamindb/core/schema.py,sha256=KiYQn_8fokSMztTNDe6qUocZzKXWxU32H-YChNJv51A,1877
36
36
  lamindb/core/types.py,sha256=uVBqSVLoQaTkqP9nqsJhwU6yYnx8H5e6-ZxrB6vpOOw,265
37
- lamindb/core/versioning.py,sha256=GYhgSA6IOlWMMNfctZu7U_jIvmQP2gdvsZxn4bTanOc,5277
37
+ lamindb/core/versioning.py,sha256=i9J6YaibM4dXqojqfC_tULfEAJXHu6hU3XbsIeKgp_4,5270
38
38
  lamindb/core/datasets/__init__.py,sha256=zRP98oqUAaXhqWyKMiH0s_ImVIuNeziQQ2kQ_t0f-DI,1353
39
- lamindb/core/datasets/_core.py,sha256=CgVF_pXuBXLElzubDMsl1DbpYOnXCY0HleITVvBKih4,19873
39
+ lamindb/core/datasets/_core.py,sha256=JGP_q-OQibDCEaI54jZ2F6fSbSW9Yg6oYOqgOCXM0v4,20414
40
40
  lamindb/core/datasets/_fake.py,sha256=BZF9R_1iF0HDnvtZNqL2FtsjSMuqDIfuFxnw_LJYIh4,953
41
41
  lamindb/core/storage/__init__.py,sha256=JOIMu_7unbyhndtH1j0Q-9AvY8knSuc1IJO9sQnyBAQ,498
42
42
  lamindb/core/storage/_anndata_accessor.py,sha256=F3ze8ICG7K4BKueg-766olnoEA8Eh8gVrvDSSE2FX-M,24160
43
43
  lamindb/core/storage/_anndata_sizes.py,sha256=aXO3OB--tF5MChenSsigW6Q-RuE8YJJOUTVukkLrv9A,1029
44
- lamindb/core/storage/_backed_access.py,sha256=YcWCeT2eligJGsBdjJS_-4el_eC9J088jxUWG9lsleM,3231
44
+ lamindb/core/storage/_backed_access.py,sha256=O0zazsDlW0PKa52WHV5HooHGGI81FxLT6VEvNONdiEc,3234
45
45
  lamindb/core/storage/_tiledbsoma.py,sha256=0NPLS5m1icEhzWPfXAv4U2SNiLGqGQd7FM6xCm5wYEc,7269
46
46
  lamindb/core/storage/_valid_suffixes.py,sha256=vUSeQ4s01rdhD_vSd6wKmFBsgMJAKkBMnL_T9Y1znMg,501
47
47
  lamindb/core/storage/_zarr.py,sha256=TODQD3p1eykoPwP-c-YRP_UDmsbMeBGMGvkBxxOMeYc,3663
48
48
  lamindb/core/storage/objects.py,sha256=OzvBCS-Urz5mr-O95qYt6RGBDDX5HmjfRRKWPPDn1ZE,1797
49
- lamindb/core/storage/paths.py,sha256=L5ImdOURHdA9dB2XLzuFe90zj2oC1EQaZdi0pyYZcW0,4854
49
+ lamindb/core/storage/paths.py,sha256=bQwRbGTOCx3DSFOQ404uT-3YBXrm8yfKuttNCdWwJpA,5892
50
50
  lamindb/core/subsettings/__init__.py,sha256=KFHPzIE7f7Bj4RgMjGQF4CjTdHVG_VNFBrCndo49ixo,198
51
51
  lamindb/core/subsettings/_creation_settings.py,sha256=54mfMH_osC753hpxcl7Dq1rwBD2LHnWveXtQpkLBITE,1194
52
52
  lamindb/core/subsettings/_transform_settings.py,sha256=4YbCuZtJo6zdytl6UQR4GvdDkTtT6SRBqVzofGzNOt8,583
@@ -54,7 +54,7 @@ lamindb/integrations/__init__.py,sha256=RWGMYYIzr8zvmNPyVB4m-p4gMDhxdRbjES2Ed23O
54
54
  lamindb/integrations/_vitessce.py,sha256=S51wl7iF2QvQmrNcZ9yDdqTtcn_AAzuh0i5axKwQ2sM,4560
55
55
  lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
56
56
  lamindb/setup/core/__init__.py,sha256=SevlVrc2AZWL3uALbE5sopxBnIZPWZ1IB0NBDudiAL8,167
57
- lamindb-0.76.7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
58
- lamindb-0.76.7.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
59
- lamindb-0.76.7.dist-info/METADATA,sha256=8F_urb2kwsRFh0LjMICrKOS6AwoBy7GTD837GPsm9pA,2372
60
- lamindb-0.76.7.dist-info/RECORD,,
57
+ lamindb-0.76.9.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
58
+ lamindb-0.76.9.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
59
+ lamindb-0.76.9.dist-info/METADATA,sha256=lHl2Ifd3QCisYZ-Qz6JacQMWvXtF6O7HQfK65JiKBb4,2372
60
+ lamindb-0.76.9.dist-info/RECORD,,