lamindb 0.57.2__py3-none-any.whl → 0.58.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_dataset.py +91 -21
- lamindb/_feature_set.py +7 -1
- lamindb/_file.py +73 -15
- lamindb/_filter.py +12 -0
- lamindb/_parents.py +10 -5
- lamindb/_query_set.py +2 -4
- lamindb/_registry.py +20 -7
- lamindb/_save.py +5 -3
- lamindb/dev/_data.py +50 -22
- lamindb/dev/datasets/_core.py +8 -5
- {lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/METADATA +5 -5
- {lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/RECORD +16 -16
- {lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/LICENSE +0 -0
- {lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/WHEEL +0 -0
- {lamindb-0.57.2.dist-info → lamindb-0.58.1.dist-info}/entry_points.txt +0 -0
lamindb/__init__.py
CHANGED
lamindb/_dataset.py
CHANGED
@@ -63,12 +63,15 @@ def __init__(
|
|
63
63
|
kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None
|
64
64
|
)
|
65
65
|
version: Optional[str] = kwargs.pop("version") if "version" in kwargs else None
|
66
|
+
visibility: Optional[int] = (
|
67
|
+
kwargs.pop("visibility") if "visibility" in kwargs else 0
|
68
|
+
)
|
66
69
|
feature_sets: Dict[str, FeatureSet] = (
|
67
70
|
kwargs.pop("feature_sets") if "feature_sets" in kwargs else {}
|
68
71
|
)
|
69
72
|
if not len(kwargs) == 0:
|
70
73
|
raise ValueError(
|
71
|
-
f"Only data, name, run, description, reference, reference_type can be passed, you passed: {kwargs}" # noqa
|
74
|
+
f"Only data, name, run, description, reference, reference_type, visibility can be passed, you passed: {kwargs}" # noqa
|
72
75
|
)
|
73
76
|
|
74
77
|
if is_new_version_of is None:
|
@@ -145,6 +148,7 @@ def __init__(
|
|
145
148
|
hash, feature_sets = from_files(files) # type: ignore
|
146
149
|
else:
|
147
150
|
raise ValueError("Only DataFrame, AnnData and iterable of File is allowed")
|
151
|
+
# we ignore datasets in trash containing the same hash
|
148
152
|
existing_dataset = Dataset.filter(hash=hash).one_or_none()
|
149
153
|
if existing_dataset is not None:
|
150
154
|
logger.warning(f"returning existing dataset with same hash: {existing_dataset}")
|
@@ -169,6 +173,7 @@ def __init__(
|
|
169
173
|
run=run,
|
170
174
|
version=version,
|
171
175
|
initial_version_id=initial_version_id,
|
176
|
+
visibility=visibility,
|
172
177
|
**kwargs,
|
173
178
|
)
|
174
179
|
dataset._files = files
|
@@ -179,10 +184,7 @@ def __init__(
|
|
179
184
|
if file is not None and file.run != run:
|
180
185
|
_track_run_input(file, run=run)
|
181
186
|
elif files is not None:
|
182
|
-
|
183
|
-
if file.run != run:
|
184
|
-
_track_run_input(file, run=run)
|
185
|
-
# there is not other possibility
|
187
|
+
_track_run_input(files, run=run)
|
186
188
|
|
187
189
|
|
188
190
|
@classmethod # type: ignore
|
@@ -197,6 +199,8 @@ def from_df(
|
|
197
199
|
modality: Optional[Modality] = None,
|
198
200
|
reference: Optional[str] = None,
|
199
201
|
reference_type: Optional[str] = None,
|
202
|
+
version: Optional[str] = None,
|
203
|
+
is_new_version_of: Optional["File"] = None,
|
200
204
|
) -> "Dataset":
|
201
205
|
"""{}"""
|
202
206
|
feature_set = FeatureSet.from_df(df, field=field, modality=modality)
|
@@ -205,7 +209,15 @@ def from_df(
|
|
205
209
|
else:
|
206
210
|
feature_sets = {}
|
207
211
|
dataset = Dataset(
|
208
|
-
data=df,
|
212
|
+
data=df,
|
213
|
+
name=name,
|
214
|
+
run=run,
|
215
|
+
description=description,
|
216
|
+
feature_sets=feature_sets,
|
217
|
+
reference=reference,
|
218
|
+
reference_type=reference_type,
|
219
|
+
version=version,
|
220
|
+
is_new_version_of=is_new_version_of,
|
209
221
|
)
|
210
222
|
return dataset
|
211
223
|
|
@@ -222,6 +234,8 @@ def from_anndata(
|
|
222
234
|
modality: Optional[Modality] = None,
|
223
235
|
reference: Optional[str] = None,
|
224
236
|
reference_type: Optional[str] = None,
|
237
|
+
version: Optional[str] = None,
|
238
|
+
is_new_version_of: Optional["File"] = None,
|
225
239
|
) -> "Dataset":
|
226
240
|
"""{}"""
|
227
241
|
if isinstance(adata, File):
|
@@ -237,6 +251,10 @@ def from_anndata(
|
|
237
251
|
name=name,
|
238
252
|
description=description,
|
239
253
|
feature_sets=feature_sets,
|
254
|
+
reference=reference,
|
255
|
+
reference_type=reference_type,
|
256
|
+
version=version,
|
257
|
+
is_new_version_of=is_new_version_of,
|
240
258
|
)
|
241
259
|
return dataset
|
242
260
|
|
@@ -244,30 +262,48 @@ def from_anndata(
|
|
244
262
|
# internal function, not exposed to user
|
245
263
|
def from_files(files: Iterable[File]) -> Tuple[str, Dict[str, str]]:
|
246
264
|
# assert all files are already saved
|
265
|
+
logger.debug("check not saved")
|
247
266
|
saved = not any([file._state.adding for file in files])
|
248
267
|
if not saved:
|
249
268
|
raise ValueError("Not all files are yet saved, please save them")
|
250
269
|
# query all feature sets of files
|
270
|
+
logger.debug("file ids")
|
251
271
|
file_ids = [file.id for file in files]
|
252
272
|
# query all feature sets at the same time rather than making a single query per file
|
273
|
+
logger.debug("feature_set_file_links")
|
253
274
|
feature_set_file_links = File.feature_sets.through.objects.filter(
|
254
275
|
file_id__in=file_ids
|
255
276
|
)
|
256
|
-
feature_set_ids = [link.feature_set_id for link in feature_set_file_links]
|
257
|
-
feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
|
258
277
|
feature_sets_by_slots = defaultdict(list)
|
278
|
+
logger.debug("slots")
|
259
279
|
for link in feature_set_file_links:
|
260
|
-
feature_sets_by_slots[link.slot].append(
|
261
|
-
feature_sets.filter(id=link.feature_set_id).one()
|
262
|
-
)
|
280
|
+
feature_sets_by_slots[link.slot].append(link.feature_set_id)
|
263
281
|
feature_sets_union = {}
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
282
|
+
logger.debug("union")
|
283
|
+
for slot, feature_set_ids_slot in feature_sets_by_slots.items():
|
284
|
+
feature_set_1 = FeatureSet.filter(id=feature_set_ids_slot[0]).one()
|
285
|
+
related_name = feature_set_1._get_related_name()
|
286
|
+
features_registry = getattr(FeatureSet, related_name).field.model
|
287
|
+
start_time = logger.debug("run filter")
|
288
|
+
# this way of writing the __in statement turned out to be the fastest
|
289
|
+
# evaluated on a link table with 16M entries connecting 500 feature sets with
|
290
|
+
# 60k genes
|
291
|
+
feature_ids = (
|
292
|
+
features_registry.feature_sets.through.objects.filter(
|
293
|
+
featureset_id__in=feature_set_ids_slot
|
294
|
+
)
|
295
|
+
.values(f"{features_registry.__name__.lower()}_id")
|
296
|
+
.distinct()
|
297
|
+
)
|
298
|
+
start_time = logger.debug("done, start evaluate", time=start_time)
|
299
|
+
features = features_registry.filter(id__in=feature_ids)
|
300
|
+
feature_sets_union[slot] = FeatureSet(
|
301
|
+
features, type=feature_set_1.type, modality=feature_set_1.modality
|
302
|
+
)
|
303
|
+
start_time = logger.debug("done", time=start_time)
|
269
304
|
# validate consistency of hashes
|
270
305
|
# we do not allow duplicate hashes
|
306
|
+
logger.debug("hashes")
|
271
307
|
hashes = [file.hash for file in files]
|
272
308
|
if len(hashes) != len(set(hashes)):
|
273
309
|
seen = set()
|
@@ -276,7 +312,9 @@ def from_files(files: Iterable[File]) -> Tuple[str, Dict[str, str]]:
|
|
276
312
|
"Please pass files with distinct hashes: these ones are non-unique"
|
277
313
|
f" {non_unique}"
|
278
314
|
)
|
315
|
+
time = logger.debug("hash")
|
279
316
|
hash = hash_set(set(hashes))
|
317
|
+
logger.debug("done", time=time)
|
280
318
|
return hash, feature_sets_union
|
281
319
|
|
282
320
|
|
@@ -311,12 +349,12 @@ def load(
|
|
311
349
|
# because we're tracking data flow on the dataset-level, here, we don't
|
312
350
|
# want to track it on the file-level
|
313
351
|
objects = [file.load(is_run_input=False) for file in all_files]
|
314
|
-
|
352
|
+
file_uids = [file.uid for file in all_files]
|
315
353
|
if isinstance(objects[0], pd.DataFrame):
|
316
354
|
concat_object = pd.concat(objects, join=join)
|
317
355
|
elif isinstance(objects[0], ad.AnnData):
|
318
356
|
concat_object = ad.concat(
|
319
|
-
objects, join=join, label="
|
357
|
+
objects, join=join, label="file_uid", keys=file_uids
|
320
358
|
)
|
321
359
|
# only call it here because there might be errors during concat
|
322
360
|
_track_run_input(self, is_run_input)
|
@@ -324,10 +362,32 @@ def load(
|
|
324
362
|
|
325
363
|
|
326
364
|
# docstring handled through attach_func_to_class_method
|
327
|
-
def delete(
|
328
|
-
|
365
|
+
def delete(
|
366
|
+
self, permanent: Optional[bool] = None, storage: Optional[bool] = None
|
367
|
+
) -> None:
|
368
|
+
# change visibility to 2 (trash)
|
369
|
+
if self.visibility < 2 and permanent is not True:
|
370
|
+
self.visibility = 2
|
371
|
+
self.save()
|
372
|
+
if self.file is not None:
|
373
|
+
self.file.visibility = 2
|
374
|
+
self.file.save()
|
375
|
+
return
|
376
|
+
|
377
|
+
# permanent delete
|
378
|
+
if permanent is None:
|
379
|
+
response = input(
|
380
|
+
"File record is already in trash! Are you sure to delete it from your"
|
381
|
+
" database? (y/n) You can't undo this action."
|
382
|
+
)
|
383
|
+
delete_record = response == "y"
|
384
|
+
else:
|
385
|
+
delete_record = permanent
|
386
|
+
|
387
|
+
if delete_record:
|
388
|
+
super(Dataset, self).delete()
|
329
389
|
if self.file is not None:
|
330
|
-
self.file.delete(storage=storage)
|
390
|
+
self.file.delete(permanent=permanent, storage=storage)
|
331
391
|
|
332
392
|
|
333
393
|
# docstring handled through attach_func_to_class_method
|
@@ -351,6 +411,15 @@ def path(self) -> Union[Path, UPath]:
|
|
351
411
|
return self.storage.path
|
352
412
|
|
353
413
|
|
414
|
+
# docstring handled through attach_func_to_class_method
|
415
|
+
def restore(self) -> None:
|
416
|
+
self.visibility = 0
|
417
|
+
self.save()
|
418
|
+
if self.file is not None:
|
419
|
+
self.file.visibility = 0
|
420
|
+
self.file.save()
|
421
|
+
|
422
|
+
|
354
423
|
METHOD_NAMES = [
|
355
424
|
"__init__",
|
356
425
|
"from_anndata",
|
@@ -359,6 +428,7 @@ METHOD_NAMES = [
|
|
359
428
|
"load",
|
360
429
|
"delete",
|
361
430
|
"save",
|
431
|
+
"restore",
|
362
432
|
]
|
363
433
|
|
364
434
|
if _TESTING:
|
lamindb/_feature_set.py
CHANGED
@@ -239,11 +239,16 @@ def members(self) -> "QuerySet":
|
|
239
239
|
# this should return a queryset and not a list...
|
240
240
|
# need to fix this
|
241
241
|
return self._features[1]
|
242
|
+
related_name = self._get_related_name()
|
243
|
+
return self.__getattribute__(related_name).all()
|
244
|
+
|
245
|
+
|
246
|
+
def _get_related_name(self: FeatureSet) -> str:
|
242
247
|
key_split = self.registry.split(".")
|
243
248
|
orm_name_with_schema = f"{key_split[0]}.{key_split[1]}"
|
244
249
|
feature_sets_related_models = dict_related_model_to_related_name(self)
|
245
250
|
related_name = feature_sets_related_models.get(orm_name_with_schema)
|
246
|
-
return
|
251
|
+
return related_name
|
247
252
|
|
248
253
|
|
249
254
|
METHOD_NAMES = [
|
@@ -266,3 +271,4 @@ for name in METHOD_NAMES:
|
|
266
271
|
attach_func_to_class_method(name, FeatureSet, globals())
|
267
272
|
|
268
273
|
setattr(FeatureSet, "members", members)
|
274
|
+
setattr(FeatureSet, "_get_related_name", _get_related_name)
|
lamindb/_file.py
CHANGED
@@ -179,6 +179,7 @@ def get_hash(
|
|
179
179
|
hash, hash_type = hash_file(filepath)
|
180
180
|
if not check_hash:
|
181
181
|
return hash, hash_type
|
182
|
+
# we ignore datasets in trash containing the same hash
|
182
183
|
result = File.filter(hash=hash).list()
|
183
184
|
if len(result) > 0:
|
184
185
|
if settings.upon_file_create_if_hash_exists == "error":
|
@@ -454,6 +455,9 @@ def __init__(file: File, *args, **kwargs):
|
|
454
455
|
kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None
|
455
456
|
)
|
456
457
|
version: Optional[str] = kwargs.pop("version") if "version" in kwargs else None
|
458
|
+
visibility: Optional[int] = (
|
459
|
+
kwargs.pop("visibility") if "visibility" in kwargs else 0
|
460
|
+
)
|
457
461
|
format = kwargs.pop("format") if "format" in kwargs else None
|
458
462
|
log_hint = kwargs.pop("log_hint") if "log_hint" in kwargs else True
|
459
463
|
skip_check_exists = (
|
@@ -462,8 +466,8 @@ def __init__(file: File, *args, **kwargs):
|
|
462
466
|
|
463
467
|
if not len(kwargs) == 0:
|
464
468
|
raise ValueError(
|
465
|
-
"Only data, key, run, description, version, is_new_version_of
|
466
|
-
f" passed, you passed: {kwargs}"
|
469
|
+
"Only data, key, run, description, version, is_new_version_of, visibility"
|
470
|
+
f" can be passed, you passed: {kwargs}"
|
467
471
|
)
|
468
472
|
|
469
473
|
if is_new_version_of is None:
|
@@ -523,6 +527,7 @@ def __init__(file: File, *args, **kwargs):
|
|
523
527
|
kwargs["initial_version_id"] = initial_version_id
|
524
528
|
kwargs["version"] = version
|
525
529
|
kwargs["description"] = description
|
530
|
+
kwargs["visibility"] = visibility
|
526
531
|
# this check needs to come down here because key might be populated from an
|
527
532
|
# existing file path during get_file_kwargs_from_data()
|
528
533
|
if (
|
@@ -553,9 +558,19 @@ def from_df(
|
|
553
558
|
description: Optional[str] = None,
|
554
559
|
run: Optional[Run] = None,
|
555
560
|
modality: Optional[Modality] = None,
|
561
|
+
version: Optional[str] = None,
|
562
|
+
is_new_version_of: Optional["File"] = None,
|
556
563
|
) -> "File":
|
557
564
|
"""{}"""
|
558
|
-
file = File(
|
565
|
+
file = File(
|
566
|
+
data=df,
|
567
|
+
key=key,
|
568
|
+
run=run,
|
569
|
+
description=description,
|
570
|
+
version=version,
|
571
|
+
is_new_version_of=is_new_version_of,
|
572
|
+
log_hint=False,
|
573
|
+
)
|
559
574
|
feature_set = FeatureSet.from_df(df, field=field, modality=modality)
|
560
575
|
if feature_set is not None:
|
561
576
|
file._feature_sets = {"columns": feature_set}
|
@@ -615,9 +630,19 @@ def from_anndata(
|
|
615
630
|
description: Optional[str] = None,
|
616
631
|
run: Optional[Run] = None,
|
617
632
|
modality: Optional[Modality] = None,
|
633
|
+
version: Optional[str] = None,
|
634
|
+
is_new_version_of: Optional["File"] = None,
|
618
635
|
) -> "File":
|
619
636
|
"""{}"""
|
620
|
-
file = File(
|
637
|
+
file = File(
|
638
|
+
data=adata,
|
639
|
+
key=key,
|
640
|
+
run=run,
|
641
|
+
description=description,
|
642
|
+
version=version,
|
643
|
+
is_new_version_of=is_new_version_of,
|
644
|
+
log_hint=False,
|
645
|
+
)
|
621
646
|
file._feature_sets = parse_feature_sets_from_anndata(adata, field, modality)
|
622
647
|
return file
|
623
648
|
|
@@ -800,23 +825,49 @@ def stage(self, is_run_input: Optional[bool] = None) -> Path:
|
|
800
825
|
|
801
826
|
|
802
827
|
# docstring handled through attach_func_to_class_method
|
803
|
-
def delete(
|
804
|
-
|
805
|
-
|
806
|
-
|
828
|
+
def delete(
|
829
|
+
self, permanent: Optional[bool] = None, storage: Optional[bool] = None
|
830
|
+
) -> None:
|
831
|
+
# change visibility to 2 (trash)
|
832
|
+
if self.visibility < 2 and permanent is not True:
|
833
|
+
self.visibility = 2
|
834
|
+
self.save()
|
835
|
+
return
|
836
|
+
|
837
|
+
# if the file is already in the trash
|
838
|
+
# permanent delete skips the trash
|
839
|
+
if permanent is None:
|
840
|
+
response = input(
|
841
|
+
"File record is already in trash! Are you sure to delete it from your"
|
842
|
+
" database? (y/n) You can't undo this action."
|
843
|
+
)
|
844
|
+
delete_record = response == "y"
|
807
845
|
else:
|
808
|
-
|
846
|
+
delete_record = permanent
|
809
847
|
|
810
848
|
# need to grab file path before deletion
|
811
849
|
filepath = self.path
|
850
|
+
|
812
851
|
# only delete in storage if DB delete is successful
|
813
852
|
# DB delete might error because of a foreign key constraint violated etc.
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
853
|
+
if delete_record:
|
854
|
+
self._delete_skip_storage()
|
855
|
+
if self.key is None:
|
856
|
+
delete_in_storage = True
|
857
|
+
else:
|
858
|
+
if storage is None:
|
859
|
+
response = input(
|
860
|
+
f"Are you sure to delete {filepath}? (y/n) You can't undo this"
|
861
|
+
" action."
|
862
|
+
)
|
863
|
+
delete_in_storage = response == "y"
|
864
|
+
else:
|
865
|
+
delete_in_storage = storage
|
866
|
+
# we don't yet have any way to bring back the deleted metadata record
|
867
|
+
# in case storage deletion fails - this is important for ACID down the road
|
868
|
+
if delete_in_storage:
|
869
|
+
delete_storage(filepath)
|
870
|
+
logger.success(f"deleted {colors.yellow(f'{filepath}')}")
|
820
871
|
|
821
872
|
|
822
873
|
def _delete_skip_storage(file, *args, **kwargs) -> None:
|
@@ -941,6 +992,12 @@ def view_tree(
|
|
941
992
|
)
|
942
993
|
|
943
994
|
|
995
|
+
# docstring handled through attach_func_to_class_method
|
996
|
+
def restore(self) -> None:
|
997
|
+
self.visibility = 0
|
998
|
+
self.save()
|
999
|
+
|
1000
|
+
|
944
1001
|
METHOD_NAMES = [
|
945
1002
|
"__init__",
|
946
1003
|
"from_anndata",
|
@@ -953,6 +1010,7 @@ METHOD_NAMES = [
|
|
953
1010
|
"replace",
|
954
1011
|
"from_dir",
|
955
1012
|
"view_tree",
|
1013
|
+
"restore",
|
956
1014
|
]
|
957
1015
|
|
958
1016
|
if _TESTING:
|
lamindb/_filter.py
CHANGED
@@ -42,6 +42,18 @@ def filter(Registry: Type[Registry], using: str = None, **expressions) -> QueryS
|
|
42
42
|
id=UUID(instance_result["id"]),
|
43
43
|
)
|
44
44
|
add_db_connection(isettings, using)
|
45
|
+
|
46
|
+
if Registry.__name__ in {"File", "Dataset"}:
|
47
|
+
# visibility is set to <2 by default
|
48
|
+
if not any([e.startswith("visibility") for e in expressions]):
|
49
|
+
expressions["visibility__lt"] = 2
|
50
|
+
# if visibility is None, will not apply any filter for visibility
|
51
|
+
elif "visibility" in expressions:
|
52
|
+
if expressions["visibility"] is None:
|
53
|
+
expressions.pop("visibility")
|
54
|
+
elif expressions["visibility"] == "default":
|
55
|
+
expressions.pop("visibility")
|
56
|
+
expressions["visibility__lt"] = 2
|
45
57
|
qs = QuerySet(model=Registry, using=using)
|
46
58
|
if len(expressions) > 0:
|
47
59
|
return qs.filter(**expressions)
|
lamindb/_parents.py
CHANGED
@@ -201,13 +201,18 @@ def _get_parents(record: Registry, field: str, distance: int, children: bool = F
|
|
201
201
|
d = 2
|
202
202
|
while d < distance:
|
203
203
|
condition = f"{key}__{condition}"
|
204
|
-
records = model.filter(**{condition: record.__getattribute__(field)})
|
204
|
+
records = model.filter(**{condition: record.__getattribute__(field)})
|
205
205
|
|
206
|
-
|
207
|
-
|
206
|
+
try:
|
207
|
+
if not records.exists():
|
208
|
+
return results
|
208
209
|
|
209
|
-
|
210
|
-
|
210
|
+
results = results | records.all()
|
211
|
+
d += 1
|
212
|
+
except Exception:
|
213
|
+
# For OperationalError:
|
214
|
+
# SQLite does not support joins containing more than 64 tables
|
215
|
+
return results
|
211
216
|
return results
|
212
217
|
|
213
218
|
|
lamindb/_query_set.py
CHANGED
@@ -210,13 +210,11 @@ class QuerySet(models.QuerySet):
|
|
210
210
|
return _search(cls=self, string=string, **kwargs)
|
211
211
|
|
212
212
|
@doc_args(Registry.lookup.__doc__)
|
213
|
-
def lookup(
|
214
|
-
self, field: Optional[StrField] = None, return_field: Optional[StrField] = None
|
215
|
-
) -> NamedTuple:
|
213
|
+
def lookup(self, field: Optional[StrField] = None, **kwargs) -> NamedTuple:
|
216
214
|
"""{}"""
|
217
215
|
from ._registry import _lookup
|
218
216
|
|
219
|
-
return _lookup(cls=self, field=field,
|
217
|
+
return _lookup(cls=self, field=field, **kwargs)
|
220
218
|
|
221
219
|
@doc_args(CanValidate.validate.__doc__)
|
222
220
|
def validate(
|
lamindb/_registry.py
CHANGED
@@ -147,8 +147,9 @@ def _search(
|
|
147
147
|
return_queryset: bool = False,
|
148
148
|
case_sensitive: bool = False,
|
149
149
|
synonyms_field: Optional[StrField] = "synonyms",
|
150
|
+
**expressions,
|
150
151
|
) -> Union["pd.DataFrame", "QuerySet"]:
|
151
|
-
queryset = _queryset(cls)
|
152
|
+
queryset = _queryset(cls, **expressions)
|
152
153
|
orm = queryset.model
|
153
154
|
|
154
155
|
def _search_single_field(
|
@@ -229,6 +230,7 @@ def search(
|
|
229
230
|
return_queryset: bool = False,
|
230
231
|
case_sensitive: bool = False,
|
231
232
|
synonyms_field: Optional[StrField] = "synonyms",
|
233
|
+
**expressions,
|
232
234
|
) -> Union["pd.DataFrame", "QuerySet"]:
|
233
235
|
"""{}"""
|
234
236
|
return _search(
|
@@ -239,14 +241,18 @@ def search(
|
|
239
241
|
limit=limit,
|
240
242
|
case_sensitive=case_sensitive,
|
241
243
|
synonyms_field=synonyms_field,
|
244
|
+
**expressions,
|
242
245
|
)
|
243
246
|
|
244
247
|
|
245
248
|
def _lookup(
|
246
|
-
cls,
|
249
|
+
cls,
|
250
|
+
field: Optional[StrField] = None,
|
251
|
+
return_field: Optional[StrField] = None,
|
252
|
+
**expressions,
|
247
253
|
) -> NamedTuple:
|
248
254
|
"""{}"""
|
249
|
-
queryset = _queryset(cls)
|
255
|
+
queryset = _queryset(cls, **expressions)
|
250
256
|
field = get_default_str_field(orm=queryset.model, field=field)
|
251
257
|
|
252
258
|
return Lookup(
|
@@ -264,10 +270,13 @@ def _lookup(
|
|
264
270
|
@classmethod # type: ignore
|
265
271
|
@doc_args(Registry.lookup.__doc__)
|
266
272
|
def lookup(
|
267
|
-
cls,
|
273
|
+
cls,
|
274
|
+
field: Optional[StrField] = None,
|
275
|
+
return_field: Optional[StrField] = None,
|
276
|
+
**expressions,
|
268
277
|
) -> NamedTuple:
|
269
278
|
"""{}"""
|
270
|
-
return _lookup(cls=cls, field=field, return_field=return_field)
|
279
|
+
return _lookup(cls=cls, field=field, return_field=return_field, **expressions)
|
271
280
|
|
272
281
|
|
273
282
|
def get_default_str_field(
|
@@ -316,8 +325,12 @@ def get_default_str_field(
|
|
316
325
|
return field
|
317
326
|
|
318
327
|
|
319
|
-
def _queryset(cls: Union[Registry, QuerySet, Manager]) -> QuerySet:
|
320
|
-
queryset =
|
328
|
+
def _queryset(cls: Union[Registry, QuerySet, Manager], **expressions) -> QuerySet:
|
329
|
+
queryset = (
|
330
|
+
cls.filter(**expressions).all()
|
331
|
+
if isinstance(cls, QuerySet)
|
332
|
+
else cls.filter(**expressions).all()
|
333
|
+
)
|
321
334
|
return queryset
|
322
335
|
|
323
336
|
|
lamindb/_save.py
CHANGED
@@ -252,18 +252,20 @@ def upload_data_object(file) -> None:
|
|
252
252
|
"""Store and add file and its linked entries."""
|
253
253
|
# do NOT hand-craft the storage key!
|
254
254
|
file_storage_key = auto_storage_key_from_file(file)
|
255
|
-
|
255
|
+
storage_path = lamindb_setup.settings.instance.storage.key_to_filepath(
|
256
|
+
file_storage_key
|
257
|
+
)
|
258
|
+
msg = f"storing file '{file.uid}' at '{storage_path}'"
|
256
259
|
if (
|
257
260
|
file.suffix in {".zarr", ".zrad"}
|
258
261
|
and hasattr(file, "_memory_rep")
|
259
262
|
and file._memory_rep is not None
|
260
263
|
):
|
261
264
|
logger.save(msg)
|
262
|
-
storagepath = lamindb_setup.settings.storage.key_to_filepath(file_storage_key)
|
263
265
|
print_progress = partial(
|
264
266
|
print_hook, filepath=file_storage_key, action="uploading"
|
265
267
|
)
|
266
|
-
write_adata_zarr(file._memory_rep,
|
268
|
+
write_adata_zarr(file._memory_rep, storage_path, callback=print_progress)
|
267
269
|
elif hasattr(file, "_to_store") and file._to_store:
|
268
270
|
logger.save(msg)
|
269
271
|
store_object(file._local_filepath, file_storage_key)
|
lamindb/dev/_data.py
CHANGED
@@ -308,24 +308,48 @@ def add_labels(
|
|
308
308
|
|
309
309
|
|
310
310
|
def _track_run_input(
|
311
|
-
data: Data,
|
311
|
+
data: Union[Data, Iterable[Data]],
|
312
|
+
is_run_input: Optional[bool] = None,
|
313
|
+
run: Optional[Run] = None,
|
312
314
|
):
|
313
315
|
if run is None:
|
314
316
|
run = run_context.run
|
317
|
+
# consider that data is an iterable of Data
|
318
|
+
data_iter: Iterable[Data] = [data] if isinstance(data, Data) else data
|
315
319
|
track_run_input = False
|
320
|
+
input_data = []
|
321
|
+
if run is not None:
|
322
|
+
# avoid cycles: data can't be both input and output
|
323
|
+
input_data = [data for data in data_iter if data.run_id != run.id]
|
324
|
+
input_data_ids = [data.id for data in data_iter if data.run_id != run.id]
|
325
|
+
if input_data:
|
326
|
+
data_class_name = input_data[0].__class__.__name__.lower()
|
327
|
+
# let us first look at the case in which the user does not
|
328
|
+
# provide a boolean value for `is_run_input`
|
329
|
+
# hence, we need to determine whether we actually want to
|
330
|
+
# track a run or not
|
316
331
|
if is_run_input is None:
|
317
|
-
# we
|
318
|
-
if run is
|
319
|
-
|
320
|
-
|
332
|
+
# we don't have a run record
|
333
|
+
if run is None:
|
334
|
+
if settings.track_run_inputs:
|
335
|
+
logger.hint(
|
336
|
+
"you can auto-track this file as a run input by calling"
|
337
|
+
" `ln.track()`"
|
338
|
+
)
|
339
|
+
# assume we have a run record
|
340
|
+
else:
|
341
|
+
# assume there is non-cyclic candidate input data
|
342
|
+
if input_data:
|
321
343
|
if settings.track_run_inputs:
|
322
344
|
transform_note = ""
|
323
|
-
if
|
324
|
-
|
325
|
-
|
326
|
-
|
345
|
+
if len(input_data) == 1:
|
346
|
+
if input_data[0].transform is not None:
|
347
|
+
transform_note = (
|
348
|
+
", adding parent transform"
|
349
|
+
f" {input_data[0].transform.id}"
|
350
|
+
)
|
327
351
|
logger.info(
|
328
|
-
f"adding
|
352
|
+
f"adding {data_class_name} {input_data_ids} as input for run"
|
329
353
|
f" {run.id}{transform_note}"
|
330
354
|
)
|
331
355
|
track_run_input = True
|
@@ -333,12 +357,6 @@ def _track_run_input(
|
|
333
357
|
logger.hint(
|
334
358
|
"track this file as a run input by passing `is_run_input=True`"
|
335
359
|
)
|
336
|
-
else:
|
337
|
-
if settings.track_run_inputs:
|
338
|
-
logger.hint(
|
339
|
-
"you can auto-track this file as a run input by calling"
|
340
|
-
" `ln.track()`"
|
341
|
-
)
|
342
360
|
else:
|
343
361
|
track_run_input = is_run_input
|
344
362
|
if track_run_input:
|
@@ -348,12 +366,22 @@ def _track_run_input(
|
|
348
366
|
" run object via `run.input_files.add(file)`"
|
349
367
|
)
|
350
368
|
# avoid adding the same run twice
|
351
|
-
|
352
|
-
if
|
353
|
-
run.
|
354
|
-
|
355
|
-
|
356
|
-
|
369
|
+
run.save()
|
370
|
+
if data_class_name == "file":
|
371
|
+
LinkORM = run.input_files.through
|
372
|
+
links = [
|
373
|
+
LinkORM(run_id=run.id, file_id=data_id) for data_id in input_data_ids
|
374
|
+
]
|
375
|
+
else:
|
376
|
+
LinkORM = run.input_datasets.through
|
377
|
+
links = [
|
378
|
+
LinkORM(run_id=run.id, dataset_id=data_id) for data_id in input_data_ids
|
379
|
+
]
|
380
|
+
LinkORM.objects.bulk_create(links, ignore_conflicts=True)
|
381
|
+
# generalize below for more than one data batch
|
382
|
+
if len(input_data) == 1:
|
383
|
+
if input_data[0].transform is not None:
|
384
|
+
run.transform.parents.add(input_data[0].transform)
|
357
385
|
|
358
386
|
|
359
387
|
@property # type: ignore
|
lamindb/dev/datasets/_core.py
CHANGED
@@ -156,7 +156,7 @@ def anndata_mouse_sc_lymph_node(
|
|
156
156
|
populate_registries: pre-populate metadata records to simulate existing registries # noqa
|
157
157
|
"""
|
158
158
|
filepath, _ = urlretrieve("https://lamindb-test.s3.amazonaws.com/E-MTAB-8414.h5ad")
|
159
|
-
adata = ad.
|
159
|
+
adata = ad.read_h5ad(filepath)
|
160
160
|
|
161
161
|
# The column names are a bit lengthy, let's abbreviate them:
|
162
162
|
adata.obs.columns = (
|
@@ -253,7 +253,7 @@ def anndata_pbmc68k_reduced() -> ad.AnnData:
|
|
253
253
|
filepath, _ = urlretrieve(
|
254
254
|
"https://lamindb-dev-datasets.s3.amazonaws.com/scrnaseq_pbmc68k_tiny.h5ad"
|
255
255
|
)
|
256
|
-
return ad.
|
256
|
+
return ad.read_h5ad(filepath)
|
257
257
|
|
258
258
|
|
259
259
|
def anndata_file_pbmc68k_test() -> Path:
|
@@ -283,7 +283,7 @@ def anndata_pbmc3k_processed() -> ad.AnnData: # pragma: no cover
|
|
283
283
|
filepath, _ = urlretrieve(
|
284
284
|
"https://lamindb-test.s3.amazonaws.com/scrnaseq_scanpy_pbmc3k_processed.h5ad"
|
285
285
|
)
|
286
|
-
pbmc3k = ad.
|
286
|
+
pbmc3k = ad.read_h5ad(filepath)
|
287
287
|
pbmc3k.obs.rename(columns={"louvain": "cell_type"}, inplace=True)
|
288
288
|
return pbmc3k
|
289
289
|
|
@@ -306,8 +306,11 @@ def anndata_human_immune_cells(
|
|
306
306
|
adata.write('human_immune.h5ad')
|
307
307
|
"""
|
308
308
|
filepath, _ = urlretrieve("https://lamindb-test.s3.amazonaws.com/human_immune.h5ad")
|
309
|
-
adata = ad.
|
309
|
+
adata = ad.read_h5ad(filepath)
|
310
310
|
adata.var.drop(columns=["gene_symbols", "feature_name"], inplace=True)
|
311
|
+
adata.uns.pop("cell_type_ontology_term_id_colors")
|
312
|
+
adata.uns.pop("title")
|
313
|
+
adata.uns.pop("schema_version")
|
311
314
|
adata.obs.columns = adata.obs.columns.str.replace("donor_id", "donor")
|
312
315
|
columns = [col for col in adata.obs.columns if "ontology_term" not in col]
|
313
316
|
adata.obs = adata.obs[columns]
|
@@ -378,7 +381,7 @@ def anndata_suo22_Visium10X(): # pragma: no cover
|
|
378
381
|
)
|
379
382
|
Path("suo22/").mkdir(exist_ok=True)
|
380
383
|
filepath = Path(filepath).rename("suo22/Visium10X_data_LI_subset.h5ad")
|
381
|
-
return ad.
|
384
|
+
return ad.read_h5ad(filepath)
|
382
385
|
|
383
386
|
|
384
387
|
def mudata_papalexi21_subset(): # pragma: no cover
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.58.1
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -8,9 +8,9 @@ Description-Content-Type: text/markdown
|
|
8
8
|
Classifier: Programming Language :: Python :: 3.8
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
|
-
Requires-Dist: lnschema_core==0.
|
12
|
-
Requires-Dist: lamindb_setup==0.
|
13
|
-
Requires-Dist: lamin_utils==0.11.
|
11
|
+
Requires-Dist: lnschema_core==0.53.0
|
12
|
+
Requires-Dist: lamindb_setup==0.56.3
|
13
|
+
Requires-Dist: lamin_utils==0.11.5
|
14
14
|
Requires-Dist: rapidfuzz
|
15
15
|
Requires-Dist: pyarrow
|
16
16
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -24,7 +24,7 @@ Requires-Dist: urllib3<2 ; extra == "aws"
|
|
24
24
|
Requires-Dist: boto3==1.28.17 ; extra == "aws"
|
25
25
|
Requires-Dist: aiobotocore==2.5.4 ; extra == "aws"
|
26
26
|
Requires-Dist: fsspec[s3]==2023.9.0 ; extra == "aws"
|
27
|
-
Requires-Dist: lnschema_bionty==0.
|
27
|
+
Requires-Dist: lnschema_bionty==0.34.0 ; extra == "bionty"
|
28
28
|
Requires-Dist: pandas<2 ; extra == "dev"
|
29
29
|
Requires-Dist: pre-commit ; extra == "dev"
|
30
30
|
Requires-Dist: nox ; extra == "dev"
|
@@ -1,17 +1,17 @@
|
|
1
|
-
lamindb/__init__.py,sha256=
|
2
|
-
lamindb/_dataset.py,sha256=
|
1
|
+
lamindb/__init__.py,sha256=0YGJThA1KvrX4UlxWsOrnuNKxxjEe1FVhKvdZ_8KWTg,2870
|
2
|
+
lamindb/_dataset.py,sha256=GLGtwbZLlSjy2HtJsjHgRDTOO0u0PwuarRE5qp-rGUA,15810
|
3
3
|
lamindb/_delete.py,sha256=wiYmYnvIEHrDdmw1NiXyfCY9mBt-FI5XNFi5jyR_mkA,1968
|
4
4
|
lamindb/_feature.py,sha256=5gsa7zsMVVtm1DID4dF3Vwo5llWyY1dH3Hg5hjaIrQk,5554
|
5
|
-
lamindb/_feature_set.py,sha256=
|
6
|
-
lamindb/_file.py,sha256=
|
7
|
-
lamindb/_filter.py,sha256=
|
5
|
+
lamindb/_feature_set.py,sha256=G63pwauDQ7jg4ydFCQLhu-lgO6tm56iQwUdRuNHeKHY,9233
|
6
|
+
lamindb/_file.py,sha256=9McSL-DuhGDihfusIX1UKZ195HwhXohlWhJHV9Ki0c4,37358
|
7
|
+
lamindb/_filter.py,sha256=JrE4tdExNkOmNf0_tnO3vo-W3tecsH6ZB74gLO_fvKE,2293
|
8
8
|
lamindb/_from_values.py,sha256=GitpmKOqV6YHJggaCnJgGsRIHI_bnuLRVE2oo9W-SgE,11613
|
9
|
-
lamindb/_parents.py,sha256
|
9
|
+
lamindb/_parents.py,sha256=VT_gtomf1Erd_AKLVd1uLwigeDqMHtcaAbma3_AbQAw,13408
|
10
10
|
lamindb/_query_manager.py,sha256=MXueabWHqft7GWNkzmWbhfTqdk-0mKU7nWrhXG6wpYQ,3693
|
11
|
-
lamindb/_query_set.py,sha256=
|
12
|
-
lamindb/_registry.py,sha256=
|
11
|
+
lamindb/_query_set.py,sha256=1vjTLkCCrs1GiS2KTyqmSgVRSx966UsMhApXbW7GgI0,10217
|
12
|
+
lamindb/_registry.py,sha256=lUnHCeDDOw4mlak0_Q_EbQU1_qDrsE23l7IEbeoaV8w,15138
|
13
13
|
lamindb/_run.py,sha256=659lqY32GW7F41rFUUo37OftUa38-p8yaV9Z0oF32CE,1120
|
14
|
-
lamindb/_save.py,sha256=
|
14
|
+
lamindb/_save.py,sha256=hL34zgm-L3MFfi6P9O0AzeptFHtEnHdKheJqdOlGDM4,10154
|
15
15
|
lamindb/_storage.py,sha256=HUdXGj4839C606gvxWXo0tDITbtbuyJKOgUPhagYPTI,415
|
16
16
|
lamindb/_transform.py,sha256=87yUTz0RndJ_C98tBt4t2SPw8fksRgqJKwCQG_H40Kk,2515
|
17
17
|
lamindb/_ulabel.py,sha256=lEAENh_dluNkBi8xKUH_CjJNMXldOm2liy6Rg3IH1pE,1900
|
@@ -19,7 +19,7 @@ lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
|
19
19
|
lamindb/_validate.py,sha256=3powFmYcNop2R6ijt2v3I_vPn4TD9ET4DJkW8uzQt_U,13719
|
20
20
|
lamindb/_view.py,sha256=bzx6e-Cif2CmDQkOu6jMrq_d5rsu6g7hhdaK_sYBv_Y,2150
|
21
21
|
lamindb/dev/__init__.py,sha256=Ja96dxb0t7raGsCr8QxqCabyEzIxeVGlL_IgmhxdsB8,1010
|
22
|
-
lamindb/dev/_data.py,sha256
|
22
|
+
lamindb/dev/_data.py,sha256=6TLM2tVWV7xMYzWNA14EsdyhSoRjK7IK6EU4VuQoC-g,15071
|
23
23
|
lamindb/dev/_feature_manager.py,sha256=IojA1TPH3ZPlPghV_d1MIPIxdIcYO15RenI_o7YjmAM,8049
|
24
24
|
lamindb/dev/_label_manager.py,sha256=5R2rZzdLgiZHEzXyilSjK3J7kHDHUOhneZJuSh--qQY,7339
|
25
25
|
lamindb/dev/_priors.py,sha256=eSZEEijmeFs3zcrU27r3T2sSGdsK-cvy7vl6ukDYaU8,785
|
@@ -31,7 +31,7 @@ lamindb/dev/hashing.py,sha256=IlNrHy-a9NqB0vfqiwIh4sjt40CvaiZIvfK6gMnkxDo,1381
|
|
31
31
|
lamindb/dev/types.py,sha256=svg5S_aynuGfbEOsbmqkR_gF9d9YMzfOkcvGN37Rzvg,232
|
32
32
|
lamindb/dev/versioning.py,sha256=XF7X-Ngat_Ggca7FdtZa5ElOKlOgoxDtxwZlhsCTJZU,2788
|
33
33
|
lamindb/dev/datasets/__init__.py,sha256=clbWOmg4K8Rh94OPFtJasNKdtUHHvR_Lx11jZWMqfok,1350
|
34
|
-
lamindb/dev/datasets/_core.py,sha256
|
34
|
+
lamindb/dev/datasets/_core.py,sha256=-g7wWWYHrejlkSQS04Xafi_w5OjDv9ItHMUFNdHsXlM,18987
|
35
35
|
lamindb/dev/datasets/_fake.py,sha256=S8mNho-oSh1M9x9oOSsUBLLHmBAegsOLlFk6LnF81EA,942
|
36
36
|
lamindb/dev/storage/__init__.py,sha256=mFvsMkAHHmO_xTM1UI-WGynDObnH0RCI2TXtFGhYfv8,392
|
37
37
|
lamindb/dev/storage/_anndata_sizes.py,sha256=0XVzA6AQeVGPaGPrhGusKyxFgFjeo3qSN29hxb8D5E8,993
|
@@ -41,8 +41,8 @@ lamindb/dev/storage/file.py,sha256=xfeU8X1ty80-PhnHOpupBJfibZKhp6MPLA2IjYdTBoY,7
|
|
41
41
|
lamindb/dev/storage/object.py,sha256=KGuOwwYuN2yCJxTXn9v0LanC0fjKwy_62P-WksHcf40,1140
|
42
42
|
lamindb/setup/__init__.py,sha256=8-0F2C4Glx23-b8-D_1CBGgRBM5PppVhazhoXZYOLsg,275
|
43
43
|
lamindb/setup/dev/__init__.py,sha256=tBty426VGF2PGqqt2XuNU-WgvOrbOp1aZBDowjLuzgA,242
|
44
|
-
lamindb-0.
|
45
|
-
lamindb-0.
|
46
|
-
lamindb-0.
|
47
|
-
lamindb-0.
|
48
|
-
lamindb-0.
|
44
|
+
lamindb-0.58.1.dist-info/entry_points.txt,sha256=MioM8vSpKwXxY3geNBwjo1wnwy1l15WjJYlI3lpKuZI,53
|
45
|
+
lamindb-0.58.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
46
|
+
lamindb-0.58.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
47
|
+
lamindb-0.58.1.dist-info/METADATA,sha256=vsEe2aNzGIKTdXiRH07Cr6wZuFn5COOO9U1DuZRkBRM,3030
|
48
|
+
lamindb-0.58.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|