lamindb 0.57.2__py3-none-any.whl → 0.58.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -53,7 +53,7 @@ Static classes & modules:
53
53
 
54
54
  """
55
55
 
56
- __version__ = "0.57.2" # denote a release candidate for 0.1.0 with 0.1rc1
56
+ __version__ = "0.58.1" # denote a release candidate for 0.1.0 with 0.1rc1
57
57
 
58
58
  import os as _os
59
59
 
lamindb/_dataset.py CHANGED
@@ -63,12 +63,15 @@ def __init__(
63
63
  kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None
64
64
  )
65
65
  version: Optional[str] = kwargs.pop("version") if "version" in kwargs else None
66
+ visibility: Optional[int] = (
67
+ kwargs.pop("visibility") if "visibility" in kwargs else 0
68
+ )
66
69
  feature_sets: Dict[str, FeatureSet] = (
67
70
  kwargs.pop("feature_sets") if "feature_sets" in kwargs else {}
68
71
  )
69
72
  if not len(kwargs) == 0:
70
73
  raise ValueError(
71
- f"Only data, name, run, description, reference, reference_type can be passed, you passed: {kwargs}" # noqa
74
+ f"Only data, name, run, description, reference, reference_type, visibility can be passed, you passed: {kwargs}" # noqa
72
75
  )
73
76
 
74
77
  if is_new_version_of is None:
@@ -145,6 +148,7 @@ def __init__(
145
148
  hash, feature_sets = from_files(files) # type: ignore
146
149
  else:
147
150
  raise ValueError("Only DataFrame, AnnData and iterable of File is allowed")
151
+ # we ignore datasets in trash containing the same hash
148
152
  existing_dataset = Dataset.filter(hash=hash).one_or_none()
149
153
  if existing_dataset is not None:
150
154
  logger.warning(f"returning existing dataset with same hash: {existing_dataset}")
@@ -169,6 +173,7 @@ def __init__(
169
173
  run=run,
170
174
  version=version,
171
175
  initial_version_id=initial_version_id,
176
+ visibility=visibility,
172
177
  **kwargs,
173
178
  )
174
179
  dataset._files = files
@@ -179,10 +184,7 @@ def __init__(
179
184
  if file is not None and file.run != run:
180
185
  _track_run_input(file, run=run)
181
186
  elif files is not None:
182
- for file in files:
183
- if file.run != run:
184
- _track_run_input(file, run=run)
185
- # there is not other possibility
187
+ _track_run_input(files, run=run)
186
188
 
187
189
 
188
190
  @classmethod # type: ignore
@@ -197,6 +199,8 @@ def from_df(
197
199
  modality: Optional[Modality] = None,
198
200
  reference: Optional[str] = None,
199
201
  reference_type: Optional[str] = None,
202
+ version: Optional[str] = None,
203
+ is_new_version_of: Optional["File"] = None,
200
204
  ) -> "Dataset":
201
205
  """{}"""
202
206
  feature_set = FeatureSet.from_df(df, field=field, modality=modality)
@@ -205,7 +209,15 @@ def from_df(
205
209
  else:
206
210
  feature_sets = {}
207
211
  dataset = Dataset(
208
- data=df, name=name, run=run, description=description, feature_sets=feature_sets
212
+ data=df,
213
+ name=name,
214
+ run=run,
215
+ description=description,
216
+ feature_sets=feature_sets,
217
+ reference=reference,
218
+ reference_type=reference_type,
219
+ version=version,
220
+ is_new_version_of=is_new_version_of,
209
221
  )
210
222
  return dataset
211
223
 
@@ -222,6 +234,8 @@ def from_anndata(
222
234
  modality: Optional[Modality] = None,
223
235
  reference: Optional[str] = None,
224
236
  reference_type: Optional[str] = None,
237
+ version: Optional[str] = None,
238
+ is_new_version_of: Optional["File"] = None,
225
239
  ) -> "Dataset":
226
240
  """{}"""
227
241
  if isinstance(adata, File):
@@ -237,6 +251,10 @@ def from_anndata(
237
251
  name=name,
238
252
  description=description,
239
253
  feature_sets=feature_sets,
254
+ reference=reference,
255
+ reference_type=reference_type,
256
+ version=version,
257
+ is_new_version_of=is_new_version_of,
240
258
  )
241
259
  return dataset
242
260
 
@@ -244,30 +262,48 @@ def from_anndata(
244
262
  # internal function, not exposed to user
245
263
  def from_files(files: Iterable[File]) -> Tuple[str, Dict[str, str]]:
246
264
  # assert all files are already saved
265
+ logger.debug("check not saved")
247
266
  saved = not any([file._state.adding for file in files])
248
267
  if not saved:
249
268
  raise ValueError("Not all files are yet saved, please save them")
250
269
  # query all feature sets of files
270
+ logger.debug("file ids")
251
271
  file_ids = [file.id for file in files]
252
272
  # query all feature sets at the same time rather than making a single query per file
273
+ logger.debug("feature_set_file_links")
253
274
  feature_set_file_links = File.feature_sets.through.objects.filter(
254
275
  file_id__in=file_ids
255
276
  )
256
- feature_set_ids = [link.feature_set_id for link in feature_set_file_links]
257
- feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
258
277
  feature_sets_by_slots = defaultdict(list)
278
+ logger.debug("slots")
259
279
  for link in feature_set_file_links:
260
- feature_sets_by_slots[link.slot].append(
261
- feature_sets.filter(id=link.feature_set_id).one()
262
- )
280
+ feature_sets_by_slots[link.slot].append(link.feature_set_id)
263
281
  feature_sets_union = {}
264
- for slot, feature_sets_slot in feature_sets_by_slots.items():
265
- members = feature_sets_slot[0].members
266
- for feature_set in feature_sets_slot[1:]:
267
- members = members | feature_set.members
268
- feature_sets_union[slot] = FeatureSet(members)
282
+ logger.debug("union")
283
+ for slot, feature_set_ids_slot in feature_sets_by_slots.items():
284
+ feature_set_1 = FeatureSet.filter(id=feature_set_ids_slot[0]).one()
285
+ related_name = feature_set_1._get_related_name()
286
+ features_registry = getattr(FeatureSet, related_name).field.model
287
+ start_time = logger.debug("run filter")
288
+ # this way of writing the __in statement turned out to be the fastest
289
+ # evaluated on a link table with 16M entries connecting 500 feature sets with
290
+ # 60k genes
291
+ feature_ids = (
292
+ features_registry.feature_sets.through.objects.filter(
293
+ featureset_id__in=feature_set_ids_slot
294
+ )
295
+ .values(f"{features_registry.__name__.lower()}_id")
296
+ .distinct()
297
+ )
298
+ start_time = logger.debug("done, start evaluate", time=start_time)
299
+ features = features_registry.filter(id__in=feature_ids)
300
+ feature_sets_union[slot] = FeatureSet(
301
+ features, type=feature_set_1.type, modality=feature_set_1.modality
302
+ )
303
+ start_time = logger.debug("done", time=start_time)
269
304
  # validate consistency of hashes
270
305
  # we do not allow duplicate hashes
306
+ logger.debug("hashes")
271
307
  hashes = [file.hash for file in files]
272
308
  if len(hashes) != len(set(hashes)):
273
309
  seen = set()
@@ -276,7 +312,9 @@ def from_files(files: Iterable[File]) -> Tuple[str, Dict[str, str]]:
276
312
  "Please pass files with distinct hashes: these ones are non-unique"
277
313
  f" {non_unique}"
278
314
  )
315
+ time = logger.debug("hash")
279
316
  hash = hash_set(set(hashes))
317
+ logger.debug("done", time=time)
280
318
  return hash, feature_sets_union
281
319
 
282
320
 
@@ -311,12 +349,12 @@ def load(
311
349
  # because we're tracking data flow on the dataset-level, here, we don't
312
350
  # want to track it on the file-level
313
351
  objects = [file.load(is_run_input=False) for file in all_files]
314
- file_ids = [file.id for file in all_files]
352
+ file_uids = [file.uid for file in all_files]
315
353
  if isinstance(objects[0], pd.DataFrame):
316
354
  concat_object = pd.concat(objects, join=join)
317
355
  elif isinstance(objects[0], ad.AnnData):
318
356
  concat_object = ad.concat(
319
- objects, join=join, label="file_id", keys=file_ids
357
+ objects, join=join, label="file_uid", keys=file_uids
320
358
  )
321
359
  # only call it here because there might be errors during concat
322
360
  _track_run_input(self, is_run_input)
@@ -324,10 +362,32 @@ def load(
324
362
 
325
363
 
326
364
  # docstring handled through attach_func_to_class_method
327
- def delete(self, storage: Optional[bool] = None) -> None:
328
- super(Dataset, self).delete()
365
+ def delete(
366
+ self, permanent: Optional[bool] = None, storage: Optional[bool] = None
367
+ ) -> None:
368
+ # change visibility to 2 (trash)
369
+ if self.visibility < 2 and permanent is not True:
370
+ self.visibility = 2
371
+ self.save()
372
+ if self.file is not None:
373
+ self.file.visibility = 2
374
+ self.file.save()
375
+ return
376
+
377
+ # permanent delete
378
+ if permanent is None:
379
+ response = input(
380
+ "File record is already in trash! Are you sure to delete it from your"
381
+ " database? (y/n) You can't undo this action."
382
+ )
383
+ delete_record = response == "y"
384
+ else:
385
+ delete_record = permanent
386
+
387
+ if delete_record:
388
+ super(Dataset, self).delete()
329
389
  if self.file is not None:
330
- self.file.delete(storage=storage)
390
+ self.file.delete(permanent=permanent, storage=storage)
331
391
 
332
392
 
333
393
  # docstring handled through attach_func_to_class_method
@@ -351,6 +411,15 @@ def path(self) -> Union[Path, UPath]:
351
411
  return self.storage.path
352
412
 
353
413
 
414
+ # docstring handled through attach_func_to_class_method
415
+ def restore(self) -> None:
416
+ self.visibility = 0
417
+ self.save()
418
+ if self.file is not None:
419
+ self.file.visibility = 0
420
+ self.file.save()
421
+
422
+
354
423
  METHOD_NAMES = [
355
424
  "__init__",
356
425
  "from_anndata",
@@ -359,6 +428,7 @@ METHOD_NAMES = [
359
428
  "load",
360
429
  "delete",
361
430
  "save",
431
+ "restore",
362
432
  ]
363
433
 
364
434
  if _TESTING:
lamindb/_feature_set.py CHANGED
@@ -239,11 +239,16 @@ def members(self) -> "QuerySet":
239
239
  # this should return a queryset and not a list...
240
240
  # need to fix this
241
241
  return self._features[1]
242
+ related_name = self._get_related_name()
243
+ return self.__getattribute__(related_name).all()
244
+
245
+
246
+ def _get_related_name(self: FeatureSet) -> str:
242
247
  key_split = self.registry.split(".")
243
248
  orm_name_with_schema = f"{key_split[0]}.{key_split[1]}"
244
249
  feature_sets_related_models = dict_related_model_to_related_name(self)
245
250
  related_name = feature_sets_related_models.get(orm_name_with_schema)
246
- return self.__getattribute__(related_name).all()
251
+ return related_name
247
252
 
248
253
 
249
254
  METHOD_NAMES = [
@@ -266,3 +271,4 @@ for name in METHOD_NAMES:
266
271
  attach_func_to_class_method(name, FeatureSet, globals())
267
272
 
268
273
  setattr(FeatureSet, "members", members)
274
+ setattr(FeatureSet, "_get_related_name", _get_related_name)
lamindb/_file.py CHANGED
@@ -179,6 +179,7 @@ def get_hash(
179
179
  hash, hash_type = hash_file(filepath)
180
180
  if not check_hash:
181
181
  return hash, hash_type
182
+ # we ignore datasets in trash containing the same hash
182
183
  result = File.filter(hash=hash).list()
183
184
  if len(result) > 0:
184
185
  if settings.upon_file_create_if_hash_exists == "error":
@@ -454,6 +455,9 @@ def __init__(file: File, *args, **kwargs):
454
455
  kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None
455
456
  )
456
457
  version: Optional[str] = kwargs.pop("version") if "version" in kwargs else None
458
+ visibility: Optional[int] = (
459
+ kwargs.pop("visibility") if "visibility" in kwargs else 0
460
+ )
457
461
  format = kwargs.pop("format") if "format" in kwargs else None
458
462
  log_hint = kwargs.pop("log_hint") if "log_hint" in kwargs else True
459
463
  skip_check_exists = (
@@ -462,8 +466,8 @@ def __init__(file: File, *args, **kwargs):
462
466
 
463
467
  if not len(kwargs) == 0:
464
468
  raise ValueError(
465
- "Only data, key, run, description, version, is_new_version_of can be"
466
- f" passed, you passed: {kwargs}"
469
+ "Only data, key, run, description, version, is_new_version_of, visibility"
470
+ f" can be passed, you passed: {kwargs}"
467
471
  )
468
472
 
469
473
  if is_new_version_of is None:
@@ -523,6 +527,7 @@ def __init__(file: File, *args, **kwargs):
523
527
  kwargs["initial_version_id"] = initial_version_id
524
528
  kwargs["version"] = version
525
529
  kwargs["description"] = description
530
+ kwargs["visibility"] = visibility
526
531
  # this check needs to come down here because key might be populated from an
527
532
  # existing file path during get_file_kwargs_from_data()
528
533
  if (
@@ -553,9 +558,19 @@ def from_df(
553
558
  description: Optional[str] = None,
554
559
  run: Optional[Run] = None,
555
560
  modality: Optional[Modality] = None,
561
+ version: Optional[str] = None,
562
+ is_new_version_of: Optional["File"] = None,
556
563
  ) -> "File":
557
564
  """{}"""
558
- file = File(data=df, key=key, run=run, description=description, log_hint=False)
565
+ file = File(
566
+ data=df,
567
+ key=key,
568
+ run=run,
569
+ description=description,
570
+ version=version,
571
+ is_new_version_of=is_new_version_of,
572
+ log_hint=False,
573
+ )
559
574
  feature_set = FeatureSet.from_df(df, field=field, modality=modality)
560
575
  if feature_set is not None:
561
576
  file._feature_sets = {"columns": feature_set}
@@ -615,9 +630,19 @@ def from_anndata(
615
630
  description: Optional[str] = None,
616
631
  run: Optional[Run] = None,
617
632
  modality: Optional[Modality] = None,
633
+ version: Optional[str] = None,
634
+ is_new_version_of: Optional["File"] = None,
618
635
  ) -> "File":
619
636
  """{}"""
620
- file = File(data=adata, key=key, run=run, description=description, log_hint=False)
637
+ file = File(
638
+ data=adata,
639
+ key=key,
640
+ run=run,
641
+ description=description,
642
+ version=version,
643
+ is_new_version_of=is_new_version_of,
644
+ log_hint=False,
645
+ )
621
646
  file._feature_sets = parse_feature_sets_from_anndata(adata, field, modality)
622
647
  return file
623
648
 
@@ -800,23 +825,49 @@ def stage(self, is_run_input: Optional[bool] = None) -> Path:
800
825
 
801
826
 
802
827
  # docstring handled through attach_func_to_class_method
803
- def delete(self, storage: Optional[bool] = None) -> None:
804
- if storage is None:
805
- response = input(f"Are you sure you want to delete {self} from storage? (y/n)")
806
- delete_in_storage = response == "y"
828
+ def delete(
829
+ self, permanent: Optional[bool] = None, storage: Optional[bool] = None
830
+ ) -> None:
831
+ # change visibility to 2 (trash)
832
+ if self.visibility < 2 and permanent is not True:
833
+ self.visibility = 2
834
+ self.save()
835
+ return
836
+
837
+ # if the file is already in the trash
838
+ # permanent delete skips the trash
839
+ if permanent is None:
840
+ response = input(
841
+ "File record is already in trash! Are you sure to delete it from your"
842
+ " database? (y/n) You can't undo this action."
843
+ )
844
+ delete_record = response == "y"
807
845
  else:
808
- delete_in_storage = storage
846
+ delete_record = permanent
809
847
 
810
848
  # need to grab file path before deletion
811
849
  filepath = self.path
850
+
812
851
  # only delete in storage if DB delete is successful
813
852
  # DB delete might error because of a foreign key constraint violated etc.
814
- self._delete_skip_storage()
815
- # we don't yet have any way to bring back the deleted metadata record
816
- # in case the storage deletion fails - this is important for ACID down the road
817
- if delete_in_storage:
818
- delete_storage(filepath)
819
- logger.success(f"deleted stored object {colors.yellow(f'{filepath}')}")
853
+ if delete_record:
854
+ self._delete_skip_storage()
855
+ if self.key is None:
856
+ delete_in_storage = True
857
+ else:
858
+ if storage is None:
859
+ response = input(
860
+ f"Are you sure to delete {filepath}? (y/n) You can't undo this"
861
+ " action."
862
+ )
863
+ delete_in_storage = response == "y"
864
+ else:
865
+ delete_in_storage = storage
866
+ # we don't yet have any way to bring back the deleted metadata record
867
+ # in case storage deletion fails - this is important for ACID down the road
868
+ if delete_in_storage:
869
+ delete_storage(filepath)
870
+ logger.success(f"deleted {colors.yellow(f'{filepath}')}")
820
871
 
821
872
 
822
873
  def _delete_skip_storage(file, *args, **kwargs) -> None:
@@ -941,6 +992,12 @@ def view_tree(
941
992
  )
942
993
 
943
994
 
995
+ # docstring handled through attach_func_to_class_method
996
+ def restore(self) -> None:
997
+ self.visibility = 0
998
+ self.save()
999
+
1000
+
944
1001
  METHOD_NAMES = [
945
1002
  "__init__",
946
1003
  "from_anndata",
@@ -953,6 +1010,7 @@ METHOD_NAMES = [
953
1010
  "replace",
954
1011
  "from_dir",
955
1012
  "view_tree",
1013
+ "restore",
956
1014
  ]
957
1015
 
958
1016
  if _TESTING:
lamindb/_filter.py CHANGED
@@ -42,6 +42,18 @@ def filter(Registry: Type[Registry], using: str = None, **expressions) -> QueryS
42
42
  id=UUID(instance_result["id"]),
43
43
  )
44
44
  add_db_connection(isettings, using)
45
+
46
+ if Registry.__name__ in {"File", "Dataset"}:
47
+ # visibility is set to <2 by default
48
+ if not any([e.startswith("visibility") for e in expressions]):
49
+ expressions["visibility__lt"] = 2
50
+ # if visibility is None, will not apply any filter for visibility
51
+ elif "visibility" in expressions:
52
+ if expressions["visibility"] is None:
53
+ expressions.pop("visibility")
54
+ elif expressions["visibility"] == "default":
55
+ expressions.pop("visibility")
56
+ expressions["visibility__lt"] = 2
45
57
  qs = QuerySet(model=Registry, using=using)
46
58
  if len(expressions) > 0:
47
59
  return qs.filter(**expressions)
lamindb/_parents.py CHANGED
@@ -201,13 +201,18 @@ def _get_parents(record: Registry, field: str, distance: int, children: bool = F
201
201
  d = 2
202
202
  while d < distance:
203
203
  condition = f"{key}__{condition}"
204
- records = model.filter(**{condition: record.__getattribute__(field)}).all()
204
+ records = model.filter(**{condition: record.__getattribute__(field)})
205
205
 
206
- if len(records) == 0:
207
- return results
206
+ try:
207
+ if not records.exists():
208
+ return results
208
209
 
209
- results = results | records
210
- d += 1
210
+ results = results | records.all()
211
+ d += 1
212
+ except Exception:
213
+ # For OperationalError:
214
+ # SQLite does not support joins containing more than 64 tables
215
+ return results
211
216
  return results
212
217
 
213
218
 
lamindb/_query_set.py CHANGED
@@ -210,13 +210,11 @@ class QuerySet(models.QuerySet):
210
210
  return _search(cls=self, string=string, **kwargs)
211
211
 
212
212
  @doc_args(Registry.lookup.__doc__)
213
- def lookup(
214
- self, field: Optional[StrField] = None, return_field: Optional[StrField] = None
215
- ) -> NamedTuple:
213
+ def lookup(self, field: Optional[StrField] = None, **kwargs) -> NamedTuple:
216
214
  """{}"""
217
215
  from ._registry import _lookup
218
216
 
219
- return _lookup(cls=self, field=field, return_field=return_field)
217
+ return _lookup(cls=self, field=field, **kwargs)
220
218
 
221
219
  @doc_args(CanValidate.validate.__doc__)
222
220
  def validate(
lamindb/_registry.py CHANGED
@@ -147,8 +147,9 @@ def _search(
147
147
  return_queryset: bool = False,
148
148
  case_sensitive: bool = False,
149
149
  synonyms_field: Optional[StrField] = "synonyms",
150
+ **expressions,
150
151
  ) -> Union["pd.DataFrame", "QuerySet"]:
151
- queryset = _queryset(cls)
152
+ queryset = _queryset(cls, **expressions)
152
153
  orm = queryset.model
153
154
 
154
155
  def _search_single_field(
@@ -229,6 +230,7 @@ def search(
229
230
  return_queryset: bool = False,
230
231
  case_sensitive: bool = False,
231
232
  synonyms_field: Optional[StrField] = "synonyms",
233
+ **expressions,
232
234
  ) -> Union["pd.DataFrame", "QuerySet"]:
233
235
  """{}"""
234
236
  return _search(
@@ -239,14 +241,18 @@ def search(
239
241
  limit=limit,
240
242
  case_sensitive=case_sensitive,
241
243
  synonyms_field=synonyms_field,
244
+ **expressions,
242
245
  )
243
246
 
244
247
 
245
248
  def _lookup(
246
- cls, field: Optional[StrField] = None, return_field: Optional[StrField] = None
249
+ cls,
250
+ field: Optional[StrField] = None,
251
+ return_field: Optional[StrField] = None,
252
+ **expressions,
247
253
  ) -> NamedTuple:
248
254
  """{}"""
249
- queryset = _queryset(cls)
255
+ queryset = _queryset(cls, **expressions)
250
256
  field = get_default_str_field(orm=queryset.model, field=field)
251
257
 
252
258
  return Lookup(
@@ -264,10 +270,13 @@ def _lookup(
264
270
  @classmethod # type: ignore
265
271
  @doc_args(Registry.lookup.__doc__)
266
272
  def lookup(
267
- cls, field: Optional[StrField] = None, return_field: Optional[StrField] = None
273
+ cls,
274
+ field: Optional[StrField] = None,
275
+ return_field: Optional[StrField] = None,
276
+ **expressions,
268
277
  ) -> NamedTuple:
269
278
  """{}"""
270
- return _lookup(cls=cls, field=field, return_field=return_field)
279
+ return _lookup(cls=cls, field=field, return_field=return_field, **expressions)
271
280
 
272
281
 
273
282
  def get_default_str_field(
@@ -316,8 +325,12 @@ def get_default_str_field(
316
325
  return field
317
326
 
318
327
 
319
- def _queryset(cls: Union[Registry, QuerySet, Manager]) -> QuerySet:
320
- queryset = cls.all() if isinstance(cls, QuerySet) else cls.objects.all()
328
+ def _queryset(cls: Union[Registry, QuerySet, Manager], **expressions) -> QuerySet:
329
+ queryset = (
330
+ cls.filter(**expressions).all()
331
+ if isinstance(cls, QuerySet)
332
+ else cls.filter(**expressions).all()
333
+ )
321
334
  return queryset
322
335
 
323
336
 
lamindb/_save.py CHANGED
@@ -252,18 +252,20 @@ def upload_data_object(file) -> None:
252
252
  """Store and add file and its linked entries."""
253
253
  # do NOT hand-craft the storage key!
254
254
  file_storage_key = auto_storage_key_from_file(file)
255
- msg = f"storing file '{file.id}' at '{file_storage_key}'"
255
+ storage_path = lamindb_setup.settings.instance.storage.key_to_filepath(
256
+ file_storage_key
257
+ )
258
+ msg = f"storing file '{file.uid}' at '{storage_path}'"
256
259
  if (
257
260
  file.suffix in {".zarr", ".zrad"}
258
261
  and hasattr(file, "_memory_rep")
259
262
  and file._memory_rep is not None
260
263
  ):
261
264
  logger.save(msg)
262
- storagepath = lamindb_setup.settings.storage.key_to_filepath(file_storage_key)
263
265
  print_progress = partial(
264
266
  print_hook, filepath=file_storage_key, action="uploading"
265
267
  )
266
- write_adata_zarr(file._memory_rep, storagepath, callback=print_progress)
268
+ write_adata_zarr(file._memory_rep, storage_path, callback=print_progress)
267
269
  elif hasattr(file, "_to_store") and file._to_store:
268
270
  logger.save(msg)
269
271
  store_object(file._local_filepath, file_storage_key)
lamindb/dev/_data.py CHANGED
@@ -308,24 +308,48 @@ def add_labels(
308
308
 
309
309
 
310
310
  def _track_run_input(
311
- data: Data, is_run_input: Optional[bool] = None, run: Optional[Run] = None
311
+ data: Union[Data, Iterable[Data]],
312
+ is_run_input: Optional[bool] = None,
313
+ run: Optional[Run] = None,
312
314
  ):
313
315
  if run is None:
314
316
  run = run_context.run
317
+ # consider that data is an iterable of Data
318
+ data_iter: Iterable[Data] = [data] if isinstance(data, Data) else data
315
319
  track_run_input = False
320
+ input_data = []
321
+ if run is not None:
322
+ # avoid cycles: data can't be both input and output
323
+ input_data = [data for data in data_iter if data.run_id != run.id]
324
+ input_data_ids = [data.id for data in data_iter if data.run_id != run.id]
325
+ if input_data:
326
+ data_class_name = input_data[0].__class__.__name__.lower()
327
+ # let us first look at the case in which the user does not
328
+ # provide a boolean value for `is_run_input`
329
+ # hence, we need to determine whether we actually want to
330
+ # track a run or not
316
331
  if is_run_input is None:
317
- # we need a global run context for this to work
318
- if run is not None:
319
- # avoid cycles (a file is both input and output)
320
- if data.run != run:
332
+ # we don't have a run record
333
+ if run is None:
334
+ if settings.track_run_inputs:
335
+ logger.hint(
336
+ "you can auto-track this file as a run input by calling"
337
+ " `ln.track()`"
338
+ )
339
+ # assume we have a run record
340
+ else:
341
+ # assume there is non-cyclic candidate input data
342
+ if input_data:
321
343
  if settings.track_run_inputs:
322
344
  transform_note = ""
323
- if data.transform is not None:
324
- transform_note = (
325
- f", adding parent transform {data.transform.id}"
326
- )
345
+ if len(input_data) == 1:
346
+ if input_data[0].transform is not None:
347
+ transform_note = (
348
+ ", adding parent transform"
349
+ f" {input_data[0].transform.id}"
350
+ )
327
351
  logger.info(
328
- f"adding file {data.id} as input for run"
352
+ f"adding {data_class_name} {input_data_ids} as input for run"
329
353
  f" {run.id}{transform_note}"
330
354
  )
331
355
  track_run_input = True
@@ -333,12 +357,6 @@ def _track_run_input(
333
357
  logger.hint(
334
358
  "track this file as a run input by passing `is_run_input=True`"
335
359
  )
336
- else:
337
- if settings.track_run_inputs:
338
- logger.hint(
339
- "you can auto-track this file as a run input by calling"
340
- " `ln.track()`"
341
- )
342
360
  else:
343
361
  track_run_input = is_run_input
344
362
  if track_run_input:
@@ -348,12 +366,22 @@ def _track_run_input(
348
366
  " run object via `run.input_files.add(file)`"
349
367
  )
350
368
  # avoid adding the same run twice
351
- # avoid cycles (a file is both input and output)
352
- if not data.input_of.contains(run) and data.run != run:
353
- run.save()
354
- data.input_of.add(run)
355
- if data.transform is not None:
356
- run.transform.parents.add(data.transform)
369
+ run.save()
370
+ if data_class_name == "file":
371
+ LinkORM = run.input_files.through
372
+ links = [
373
+ LinkORM(run_id=run.id, file_id=data_id) for data_id in input_data_ids
374
+ ]
375
+ else:
376
+ LinkORM = run.input_datasets.through
377
+ links = [
378
+ LinkORM(run_id=run.id, dataset_id=data_id) for data_id in input_data_ids
379
+ ]
380
+ LinkORM.objects.bulk_create(links, ignore_conflicts=True)
381
+ # generalize below for more than one data batch
382
+ if len(input_data) == 1:
383
+ if input_data[0].transform is not None:
384
+ run.transform.parents.add(input_data[0].transform)
357
385
 
358
386
 
359
387
  @property # type: ignore
@@ -156,7 +156,7 @@ def anndata_mouse_sc_lymph_node(
156
156
  populate_registries: pre-populate metadata records to simulate existing registries # noqa
157
157
  """
158
158
  filepath, _ = urlretrieve("https://lamindb-test.s3.amazonaws.com/E-MTAB-8414.h5ad")
159
- adata = ad.read(filepath)
159
+ adata = ad.read_h5ad(filepath)
160
160
 
161
161
  # The column names are a bit lengthy, let's abbreviate them:
162
162
  adata.obs.columns = (
@@ -253,7 +253,7 @@ def anndata_pbmc68k_reduced() -> ad.AnnData:
253
253
  filepath, _ = urlretrieve(
254
254
  "https://lamindb-dev-datasets.s3.amazonaws.com/scrnaseq_pbmc68k_tiny.h5ad"
255
255
  )
256
- return ad.read(filepath)
256
+ return ad.read_h5ad(filepath)
257
257
 
258
258
 
259
259
  def anndata_file_pbmc68k_test() -> Path:
@@ -283,7 +283,7 @@ def anndata_pbmc3k_processed() -> ad.AnnData: # pragma: no cover
283
283
  filepath, _ = urlretrieve(
284
284
  "https://lamindb-test.s3.amazonaws.com/scrnaseq_scanpy_pbmc3k_processed.h5ad"
285
285
  )
286
- pbmc3k = ad.read(filepath)
286
+ pbmc3k = ad.read_h5ad(filepath)
287
287
  pbmc3k.obs.rename(columns={"louvain": "cell_type"}, inplace=True)
288
288
  return pbmc3k
289
289
 
@@ -306,8 +306,11 @@ def anndata_human_immune_cells(
306
306
  adata.write('human_immune.h5ad')
307
307
  """
308
308
  filepath, _ = urlretrieve("https://lamindb-test.s3.amazonaws.com/human_immune.h5ad")
309
- adata = ad.read(filepath)
309
+ adata = ad.read_h5ad(filepath)
310
310
  adata.var.drop(columns=["gene_symbols", "feature_name"], inplace=True)
311
+ adata.uns.pop("cell_type_ontology_term_id_colors")
312
+ adata.uns.pop("title")
313
+ adata.uns.pop("schema_version")
311
314
  adata.obs.columns = adata.obs.columns.str.replace("donor_id", "donor")
312
315
  columns = [col for col in adata.obs.columns if "ontology_term" not in col]
313
316
  adata.obs = adata.obs[columns]
@@ -378,7 +381,7 @@ def anndata_suo22_Visium10X(): # pragma: no cover
378
381
  )
379
382
  Path("suo22/").mkdir(exist_ok=True)
380
383
  filepath = Path(filepath).rename("suo22/Visium10X_data_LI_subset.h5ad")
381
- return ad.read(filepath)
384
+ return ad.read_h5ad(filepath)
382
385
 
383
386
 
384
387
  def mudata_papalexi21_subset(): # pragma: no cover
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.57.2
3
+ Version: 0.58.1
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -8,9 +8,9 @@ Description-Content-Type: text/markdown
8
8
  Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
- Requires-Dist: lnschema_core==0.52.0
12
- Requires-Dist: lamindb_setup==0.55.6
13
- Requires-Dist: lamin_utils==0.11.4
11
+ Requires-Dist: lnschema_core==0.53.0
12
+ Requires-Dist: lamindb_setup==0.56.3
13
+ Requires-Dist: lamin_utils==0.11.5
14
14
  Requires-Dist: rapidfuzz
15
15
  Requires-Dist: pyarrow
16
16
  Requires-Dist: typing_extensions!=4.6.0
@@ -24,7 +24,7 @@ Requires-Dist: urllib3<2 ; extra == "aws"
24
24
  Requires-Dist: boto3==1.28.17 ; extra == "aws"
25
25
  Requires-Dist: aiobotocore==2.5.4 ; extra == "aws"
26
26
  Requires-Dist: fsspec[s3]==2023.9.0 ; extra == "aws"
27
- Requires-Dist: lnschema_bionty==0.33.0 ; extra == "bionty"
27
+ Requires-Dist: lnschema_bionty==0.34.0 ; extra == "bionty"
28
28
  Requires-Dist: pandas<2 ; extra == "dev"
29
29
  Requires-Dist: pre-commit ; extra == "dev"
30
30
  Requires-Dist: nox ; extra == "dev"
@@ -1,17 +1,17 @@
1
- lamindb/__init__.py,sha256=IaVdOqONgsSFqAPdan9hd2UoDB3fd9CWMbHSmnZdjn8,2870
2
- lamindb/_dataset.py,sha256=mBoeQj7KhMxAnb9wmgu7MXxlbPRGQPsGIcBVaiAZjQ8,13453
1
+ lamindb/__init__.py,sha256=0YGJThA1KvrX4UlxWsOrnuNKxxjEe1FVhKvdZ_8KWTg,2870
2
+ lamindb/_dataset.py,sha256=GLGtwbZLlSjy2HtJsjHgRDTOO0u0PwuarRE5qp-rGUA,15810
3
3
  lamindb/_delete.py,sha256=wiYmYnvIEHrDdmw1NiXyfCY9mBt-FI5XNFi5jyR_mkA,1968
4
4
  lamindb/_feature.py,sha256=5gsa7zsMVVtm1DID4dF3Vwo5llWyY1dH3Hg5hjaIrQk,5554
5
- lamindb/_feature_set.py,sha256=DWDrLlNfsR726IdGw93CcTxSxrfmZtGSulZKCmUv4MQ,9055
6
- lamindb/_file.py,sha256=0TIsPvOcWXjtgCwTOoeot1o0Gs8ebkcDFQenMSgxXuM,35818
7
- lamindb/_filter.py,sha256=fNvPbLeOxYzvNKPcFYiFz3P7bkD5_84Xh8HHAoLNdas,1716
5
+ lamindb/_feature_set.py,sha256=G63pwauDQ7jg4ydFCQLhu-lgO6tm56iQwUdRuNHeKHY,9233
6
+ lamindb/_file.py,sha256=9McSL-DuhGDihfusIX1UKZ195HwhXohlWhJHV9Ki0c4,37358
7
+ lamindb/_filter.py,sha256=JrE4tdExNkOmNf0_tnO3vo-W3tecsH6ZB74gLO_fvKE,2293
8
8
  lamindb/_from_values.py,sha256=GitpmKOqV6YHJggaCnJgGsRIHI_bnuLRVE2oo9W-SgE,11613
9
- lamindb/_parents.py,sha256=-SRNd4O7TUmCIHYysjS00uK1QKODF4UJSXK_T_1KOEI,13212
9
+ lamindb/_parents.py,sha256=VT_gtomf1Erd_AKLVd1uLwigeDqMHtcaAbma3_AbQAw,13408
10
10
  lamindb/_query_manager.py,sha256=MXueabWHqft7GWNkzmWbhfTqdk-0mKU7nWrhXG6wpYQ,3693
11
- lamindb/_query_set.py,sha256=Lf7vLvOsEfUWRQ3iImSj4eQPmUK1KCgeoKS_m66Lp7o,10279
12
- lamindb/_registry.py,sha256=_pdlEvAtemiQCzpK2s14MsTKkLqE6ORDjhDs7ABs4i4,14893
11
+ lamindb/_query_set.py,sha256=1vjTLkCCrs1GiS2KTyqmSgVRSx966UsMhApXbW7GgI0,10217
12
+ lamindb/_registry.py,sha256=lUnHCeDDOw4mlak0_Q_EbQU1_qDrsE23l7IEbeoaV8w,15138
13
13
  lamindb/_run.py,sha256=659lqY32GW7F41rFUUo37OftUa38-p8yaV9Z0oF32CE,1120
14
- lamindb/_save.py,sha256=m6l5mMsxlrmlkdWhfjbwOtZ3haGEYyg63QcPG8twTMQ,10136
14
+ lamindb/_save.py,sha256=hL34zgm-L3MFfi6P9O0AzeptFHtEnHdKheJqdOlGDM4,10154
15
15
  lamindb/_storage.py,sha256=HUdXGj4839C606gvxWXo0tDITbtbuyJKOgUPhagYPTI,415
16
16
  lamindb/_transform.py,sha256=87yUTz0RndJ_C98tBt4t2SPw8fksRgqJKwCQG_H40Kk,2515
17
17
  lamindb/_ulabel.py,sha256=lEAENh_dluNkBi8xKUH_CjJNMXldOm2liy6Rg3IH1pE,1900
@@ -19,7 +19,7 @@ lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
19
19
  lamindb/_validate.py,sha256=3powFmYcNop2R6ijt2v3I_vPn4TD9ET4DJkW8uzQt_U,13719
20
20
  lamindb/_view.py,sha256=bzx6e-Cif2CmDQkOu6jMrq_d5rsu6g7hhdaK_sYBv_Y,2150
21
21
  lamindb/dev/__init__.py,sha256=Ja96dxb0t7raGsCr8QxqCabyEzIxeVGlL_IgmhxdsB8,1010
22
- lamindb/dev/_data.py,sha256=-0Bz2wg98-BTzpV_5lUZCrRk9yeU1xqCUrjELomJb60,13818
22
+ lamindb/dev/_data.py,sha256=6TLM2tVWV7xMYzWNA14EsdyhSoRjK7IK6EU4VuQoC-g,15071
23
23
  lamindb/dev/_feature_manager.py,sha256=IojA1TPH3ZPlPghV_d1MIPIxdIcYO15RenI_o7YjmAM,8049
24
24
  lamindb/dev/_label_manager.py,sha256=5R2rZzdLgiZHEzXyilSjK3J7kHDHUOhneZJuSh--qQY,7339
25
25
  lamindb/dev/_priors.py,sha256=eSZEEijmeFs3zcrU27r3T2sSGdsK-cvy7vl6ukDYaU8,785
@@ -31,7 +31,7 @@ lamindb/dev/hashing.py,sha256=IlNrHy-a9NqB0vfqiwIh4sjt40CvaiZIvfK6gMnkxDo,1381
31
31
  lamindb/dev/types.py,sha256=svg5S_aynuGfbEOsbmqkR_gF9d9YMzfOkcvGN37Rzvg,232
32
32
  lamindb/dev/versioning.py,sha256=XF7X-Ngat_Ggca7FdtZa5ElOKlOgoxDtxwZlhsCTJZU,2788
33
33
  lamindb/dev/datasets/__init__.py,sha256=clbWOmg4K8Rh94OPFtJasNKdtUHHvR_Lx11jZWMqfok,1350
34
- lamindb/dev/datasets/_core.py,sha256=T1XE9tr3uVLnyA2W9_xuF60EQH3WVaS9GBl69cB-KDQ,18844
34
+ lamindb/dev/datasets/_core.py,sha256=-g7wWWYHrejlkSQS04Xafi_w5OjDv9ItHMUFNdHsXlM,18987
35
35
  lamindb/dev/datasets/_fake.py,sha256=S8mNho-oSh1M9x9oOSsUBLLHmBAegsOLlFk6LnF81EA,942
36
36
  lamindb/dev/storage/__init__.py,sha256=mFvsMkAHHmO_xTM1UI-WGynDObnH0RCI2TXtFGhYfv8,392
37
37
  lamindb/dev/storage/_anndata_sizes.py,sha256=0XVzA6AQeVGPaGPrhGusKyxFgFjeo3qSN29hxb8D5E8,993
@@ -41,8 +41,8 @@ lamindb/dev/storage/file.py,sha256=xfeU8X1ty80-PhnHOpupBJfibZKhp6MPLA2IjYdTBoY,7
41
41
  lamindb/dev/storage/object.py,sha256=KGuOwwYuN2yCJxTXn9v0LanC0fjKwy_62P-WksHcf40,1140
42
42
  lamindb/setup/__init__.py,sha256=8-0F2C4Glx23-b8-D_1CBGgRBM5PppVhazhoXZYOLsg,275
43
43
  lamindb/setup/dev/__init__.py,sha256=tBty426VGF2PGqqt2XuNU-WgvOrbOp1aZBDowjLuzgA,242
44
- lamindb-0.57.2.dist-info/entry_points.txt,sha256=MioM8vSpKwXxY3geNBwjo1wnwy1l15WjJYlI3lpKuZI,53
45
- lamindb-0.57.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
46
- lamindb-0.57.2.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
47
- lamindb-0.57.2.dist-info/METADATA,sha256=oJ8lBjU6ZZ7Bl-oV6PrqtbHlnuGb-8Ry3clIInkpxuk,3030
48
- lamindb-0.57.2.dist-info/RECORD,,
44
+ lamindb-0.58.1.dist-info/entry_points.txt,sha256=MioM8vSpKwXxY3geNBwjo1wnwy1l15WjJYlI3lpKuZI,53
45
+ lamindb-0.58.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
46
+ lamindb-0.58.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
47
+ lamindb-0.58.1.dist-info/METADATA,sha256=vsEe2aNzGIKTdXiRH07Cr6wZuFn5COOO9U1DuZRkBRM,3030
48
+ lamindb-0.58.1.dist-info/RECORD,,