lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +114 -113
- lamindb/_artifact.py +1206 -1205
- lamindb/_can_validate.py +621 -579
- lamindb/_collection.py +390 -387
- lamindb/_curate.py +1603 -1601
- lamindb/_feature.py +155 -155
- lamindb/_feature_set.py +244 -242
- lamindb/_filter.py +23 -23
- lamindb/_finish.py +250 -256
- lamindb/_from_values.py +403 -382
- lamindb/_is_versioned.py +40 -40
- lamindb/_parents.py +476 -476
- lamindb/_query_manager.py +125 -125
- lamindb/_query_set.py +364 -362
- lamindb/_record.py +668 -649
- lamindb/_run.py +60 -57
- lamindb/_save.py +310 -308
- lamindb/_storage.py +14 -14
- lamindb/_transform.py +130 -127
- lamindb/_ulabel.py +56 -56
- lamindb/_utils.py +9 -9
- lamindb/_view.py +72 -72
- lamindb/core/__init__.py +94 -94
- lamindb/core/_context.py +590 -574
- lamindb/core/_data.py +510 -438
- lamindb/core/_django.py +209 -0
- lamindb/core/_feature_manager.py +994 -867
- lamindb/core/_label_manager.py +289 -253
- lamindb/core/_mapped_collection.py +631 -597
- lamindb/core/_settings.py +188 -187
- lamindb/core/_sync_git.py +138 -138
- lamindb/core/_track_environment.py +27 -27
- lamindb/core/datasets/__init__.py +59 -59
- lamindb/core/datasets/_core.py +581 -571
- lamindb/core/datasets/_fake.py +36 -36
- lamindb/core/exceptions.py +90 -90
- lamindb/core/fields.py +12 -12
- lamindb/core/loaders.py +164 -164
- lamindb/core/schema.py +56 -56
- lamindb/core/storage/__init__.py +25 -25
- lamindb/core/storage/_anndata_accessor.py +741 -740
- lamindb/core/storage/_anndata_sizes.py +41 -41
- lamindb/core/storage/_backed_access.py +98 -98
- lamindb/core/storage/_tiledbsoma.py +204 -204
- lamindb/core/storage/_valid_suffixes.py +21 -21
- lamindb/core/storage/_zarr.py +110 -110
- lamindb/core/storage/objects.py +62 -62
- lamindb/core/storage/paths.py +172 -172
- lamindb/core/subsettings/__init__.py +12 -12
- lamindb/core/subsettings/_creation_settings.py +38 -38
- lamindb/core/subsettings/_transform_settings.py +21 -21
- lamindb/core/types.py +19 -19
- lamindb/core/versioning.py +146 -158
- lamindb/integrations/__init__.py +12 -12
- lamindb/integrations/_vitessce.py +107 -107
- lamindb/setup/__init__.py +14 -14
- lamindb/setup/core/__init__.py +4 -4
- {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
- {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
- lamindb-0.76.10.dist-info/RECORD +61 -0
- {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
- lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/_artifact.py
CHANGED
@@ -1,1205 +1,1206 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import os
|
4
|
-
import shutil
|
5
|
-
from
|
6
|
-
from
|
7
|
-
|
8
|
-
|
9
|
-
import
|
10
|
-
import
|
11
|
-
|
12
|
-
from
|
13
|
-
from
|
14
|
-
from
|
15
|
-
from lamindb_setup
|
16
|
-
from lamindb_setup.
|
17
|
-
from lamindb_setup.core.
|
18
|
-
from lamindb_setup.core.
|
19
|
-
from lamindb_setup.core.
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
from lnschema_core.
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
from lamindb.
|
32
|
-
from lamindb.core.
|
33
|
-
from lamindb.core.
|
34
|
-
from lamindb.core.
|
35
|
-
from lamindb.core.
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
from .core.storage.
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
from
|
74
|
-
from
|
75
|
-
from tiledbsoma import
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
#
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
#
|
108
|
-
|
109
|
-
|
110
|
-
new_root
|
111
|
-
|
112
|
-
#
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
#
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
#
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
if
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
f"
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
"
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
.
|
236
|
-
.
|
237
|
-
.
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
"
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
"
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
if
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
#
|
287
|
-
#
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
#
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
stat_or_artifact.
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
f" '{
|
361
|
-
f"
|
362
|
-
"
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
#
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
"
|
388
|
-
"
|
389
|
-
"
|
390
|
-
"
|
391
|
-
"
|
392
|
-
"
|
393
|
-
|
394
|
-
#
|
395
|
-
#
|
396
|
-
|
397
|
-
"
|
398
|
-
"
|
399
|
-
"
|
400
|
-
"
|
401
|
-
"
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
"
|
412
|
-
"
|
413
|
-
"
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
artifact.
|
503
|
-
|
504
|
-
#
|
505
|
-
#
|
506
|
-
#
|
507
|
-
#
|
508
|
-
#
|
509
|
-
#
|
510
|
-
#
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
accessor =
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
#
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
artifact.
|
623
|
-
artifact.
|
624
|
-
artifact.
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
kwargs["key"]
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
kwargs["
|
646
|
-
kwargs["
|
647
|
-
kwargs["
|
648
|
-
kwargs["
|
649
|
-
kwargs["
|
650
|
-
|
651
|
-
#
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
and kwargs["
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
@
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
@
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
@
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
@
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
"
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
#
|
812
|
-
#
|
813
|
-
#
|
814
|
-
#
|
815
|
-
#
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
f" {len(
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
f" {
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
if
|
875
|
-
|
876
|
-
self.
|
877
|
-
|
878
|
-
|
879
|
-
f"
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
self.
|
895
|
-
self.
|
896
|
-
self.
|
897
|
-
self.
|
898
|
-
self.
|
899
|
-
|
900
|
-
|
901
|
-
self.
|
902
|
-
self.
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
"
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
1015
|
-
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1019
|
-
#
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
"
|
1026
|
-
|
1027
|
-
f"\
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
self.
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
#
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
"
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
#
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
#
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
"
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
#
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1101
|
-
|
1102
|
-
self.
|
1103
|
-
|
1104
|
-
#
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
#
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1145
|
-
@
|
1146
|
-
|
1147
|
-
|
1148
|
-
#
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
self.
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
|
1174
|
-
"
|
1175
|
-
"
|
1176
|
-
"
|
1177
|
-
"
|
1178
|
-
"
|
1179
|
-
"
|
1180
|
-
"
|
1181
|
-
"
|
1182
|
-
"
|
1183
|
-
"
|
1184
|
-
"
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
|
1198
|
-
|
1199
|
-
|
1200
|
-
|
1201
|
-
Artifact.
|
1202
|
-
Artifact.
|
1203
|
-
Artifact.
|
1204
|
-
Artifact.
|
1205
|
-
Artifact.
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import os
|
4
|
+
import shutil
|
5
|
+
from collections.abc import Mapping
|
6
|
+
from pathlib import Path, PurePath, PurePosixPath
|
7
|
+
from typing import TYPE_CHECKING, Any
|
8
|
+
|
9
|
+
import fsspec
|
10
|
+
import lamindb_setup as ln_setup
|
11
|
+
import pandas as pd
|
12
|
+
from anndata import AnnData
|
13
|
+
from django.db.models import Q, QuerySet
|
14
|
+
from lamin_utils import colors, logger
|
15
|
+
from lamindb_setup import settings as setup_settings
|
16
|
+
from lamindb_setup._init_instance import register_storage_in_instance
|
17
|
+
from lamindb_setup.core._docs import doc_args
|
18
|
+
from lamindb_setup.core._settings_storage import init_storage
|
19
|
+
from lamindb_setup.core.hashing import hash_dir, hash_file
|
20
|
+
from lamindb_setup.core.upath import (
|
21
|
+
create_path,
|
22
|
+
extract_suffix_from_path,
|
23
|
+
get_stat_dir_cloud,
|
24
|
+
get_stat_file_cloud,
|
25
|
+
)
|
26
|
+
from lnschema_core.models import Artifact, FeatureManager, ParamManager, Run, Storage
|
27
|
+
from lnschema_core.types import (
|
28
|
+
VisibilityChoice,
|
29
|
+
)
|
30
|
+
|
31
|
+
from lamindb._utils import attach_func_to_class_method
|
32
|
+
from lamindb.core._data import _track_run_input, describe, view_lineage
|
33
|
+
from lamindb.core._settings import settings
|
34
|
+
from lamindb.core.exceptions import IntegrityError, InvalidArgument
|
35
|
+
from lamindb.core.loaders import load_to_memory
|
36
|
+
from lamindb.core.storage import (
|
37
|
+
LocalPathClasses,
|
38
|
+
UPath,
|
39
|
+
delete_storage,
|
40
|
+
infer_suffix,
|
41
|
+
write_to_disk,
|
42
|
+
)
|
43
|
+
from lamindb.core.storage.paths import (
|
44
|
+
auto_storage_key_from_artifact,
|
45
|
+
auto_storage_key_from_artifact_uid,
|
46
|
+
check_path_is_child_of_root,
|
47
|
+
filepath_cache_key_from_artifact,
|
48
|
+
filepath_from_artifact,
|
49
|
+
)
|
50
|
+
from lamindb.core.versioning import (
|
51
|
+
create_uid,
|
52
|
+
message_update_key_in_version_family,
|
53
|
+
)
|
54
|
+
|
55
|
+
from .core._data import (
|
56
|
+
add_transform_to_kwargs,
|
57
|
+
get_run,
|
58
|
+
save_feature_set_links,
|
59
|
+
save_feature_sets,
|
60
|
+
)
|
61
|
+
from .core.storage.objects import _mudata_is_installed
|
62
|
+
from .core.storage.paths import AUTO_KEY_PREFIX
|
63
|
+
|
64
|
+
try:
|
65
|
+
from .core.storage._zarr import zarr_is_adata
|
66
|
+
except ImportError:
|
67
|
+
|
68
|
+
def zarr_is_adata(storepath): # type: ignore
|
69
|
+
raise ImportError("Please install zarr: pip install zarr")
|
70
|
+
|
71
|
+
|
72
|
+
if TYPE_CHECKING:
|
73
|
+
from lamindb_setup.core.types import UPathStr
|
74
|
+
from mudata import MuData
|
75
|
+
from tiledbsoma import Collection as SOMACollection
|
76
|
+
from tiledbsoma import Experiment as SOMAExperiment
|
77
|
+
|
78
|
+
from lamindb.core.storage._backed_access import AnnDataAccessor, BackedAccessor
|
79
|
+
|
80
|
+
|
81
|
+
def process_pathlike(
|
82
|
+
filepath: UPath,
|
83
|
+
default_storage: Storage,
|
84
|
+
using_key: str | None,
|
85
|
+
skip_existence_check: bool = False,
|
86
|
+
) -> tuple[Storage, bool]:
|
87
|
+
if not skip_existence_check:
|
88
|
+
try: # check if file exists
|
89
|
+
if not filepath.exists():
|
90
|
+
raise FileNotFoundError(filepath)
|
91
|
+
except PermissionError:
|
92
|
+
pass
|
93
|
+
if check_path_is_child_of_root(filepath, default_storage.root):
|
94
|
+
use_existing_storage_key = True
|
95
|
+
return default_storage, use_existing_storage_key
|
96
|
+
else:
|
97
|
+
# check whether the path is part of one of the existing
|
98
|
+
# already-registered storage locations
|
99
|
+
result = False
|
100
|
+
# within the hub, we don't want to perform check_path_in_existing_storage
|
101
|
+
if using_key is None:
|
102
|
+
result = check_path_in_existing_storage(filepath, using_key)
|
103
|
+
if isinstance(result, Storage):
|
104
|
+
use_existing_storage_key = True
|
105
|
+
return result, use_existing_storage_key
|
106
|
+
else:
|
107
|
+
# if the path is in the cloud, we have a good candidate
|
108
|
+
# for the storage root: the bucket
|
109
|
+
if not isinstance(filepath, LocalPathClasses):
|
110
|
+
# for a cloud path, new_root is always the bucket name
|
111
|
+
new_root = list(filepath.parents)[-1]
|
112
|
+
# do not register remote storage locations on hub if the current instance
|
113
|
+
# is not managed on the hub
|
114
|
+
storage_settings, _ = init_storage(
|
115
|
+
new_root, prevent_register_hub=not setup_settings.instance.is_on_hub
|
116
|
+
)
|
117
|
+
storage_record = register_storage_in_instance(storage_settings)
|
118
|
+
use_existing_storage_key = True
|
119
|
+
return storage_record, use_existing_storage_key
|
120
|
+
# if the filepath is local
|
121
|
+
else:
|
122
|
+
use_existing_storage_key = False
|
123
|
+
# if the default storage is local we'll throw an error if the user
|
124
|
+
# doesn't provide a key
|
125
|
+
if default_storage.type == "local":
|
126
|
+
return default_storage, use_existing_storage_key
|
127
|
+
# if the default storage is in the cloud (the file is going to
|
128
|
+
# be uploaded upon saving it), we treat the filepath as a cache
|
129
|
+
else:
|
130
|
+
return default_storage, use_existing_storage_key
|
131
|
+
|
132
|
+
|
133
|
+
def process_data(
|
134
|
+
provisional_uid: str,
|
135
|
+
data: UPathStr | pd.DataFrame | AnnData,
|
136
|
+
format: str | None,
|
137
|
+
key: str | None,
|
138
|
+
default_storage: Storage,
|
139
|
+
using_key: str | None,
|
140
|
+
skip_existence_check: bool = False,
|
141
|
+
) -> tuple[Any, Path | UPath, str, Storage, bool]:
|
142
|
+
"""Serialize a data object that's provided as file or in memory."""
|
143
|
+
# if not overwritten, data gets stored in default storage
|
144
|
+
if _mudata_is_installed():
|
145
|
+
from mudata import MuData
|
146
|
+
|
147
|
+
data_types = (pd.DataFrame, AnnData, MuData)
|
148
|
+
else:
|
149
|
+
data_types = (pd.DataFrame, AnnData) # type:ignore
|
150
|
+
|
151
|
+
if isinstance(data, (str, Path, UPath)): # UPathStr, spelled out
|
152
|
+
access_token = (
|
153
|
+
default_storage._access_token
|
154
|
+
if hasattr(default_storage, "_access_token")
|
155
|
+
else None
|
156
|
+
)
|
157
|
+
path = create_path(data, access_token=access_token).resolve()
|
158
|
+
storage, use_existing_storage_key = process_pathlike(
|
159
|
+
path,
|
160
|
+
default_storage=default_storage,
|
161
|
+
using_key=using_key,
|
162
|
+
skip_existence_check=skip_existence_check,
|
163
|
+
)
|
164
|
+
suffix = extract_suffix_from_path(path)
|
165
|
+
memory_rep = None
|
166
|
+
elif isinstance(data, data_types):
|
167
|
+
storage = default_storage
|
168
|
+
memory_rep = data
|
169
|
+
if key is not None:
|
170
|
+
key_suffix = extract_suffix_from_path(PurePosixPath(key), arg_name="key")
|
171
|
+
# use suffix as the (adata) format if the format is not provided
|
172
|
+
if isinstance(data, AnnData) and format is None and len(key_suffix) > 0:
|
173
|
+
format = key_suffix[1:]
|
174
|
+
else:
|
175
|
+
key_suffix = None
|
176
|
+
suffix = infer_suffix(data, format)
|
177
|
+
if key_suffix is not None and key_suffix != suffix:
|
178
|
+
raise InvalidArgument(
|
179
|
+
f"The suffix '{key_suffix}' of the provided key is incorrect, it should"
|
180
|
+
f" be '{suffix}'."
|
181
|
+
)
|
182
|
+
cache_name = f"{provisional_uid}{suffix}"
|
183
|
+
path = settings.storage.cache_dir / cache_name
|
184
|
+
# Alex: I don't understand the line below
|
185
|
+
if path.suffixes == []:
|
186
|
+
path = path.with_suffix(suffix)
|
187
|
+
write_to_disk(data, path)
|
188
|
+
use_existing_storage_key = False
|
189
|
+
else:
|
190
|
+
raise NotImplementedError(
|
191
|
+
f"Do not know how to create a artifact object from {data}, pass a path"
|
192
|
+
" instead!"
|
193
|
+
)
|
194
|
+
return memory_rep, path, suffix, storage, use_existing_storage_key
|
195
|
+
|
196
|
+
|
197
|
+
def get_stat_or_artifact(
|
198
|
+
path: UPath,
|
199
|
+
key: str | None = None,
|
200
|
+
check_hash: bool = True,
|
201
|
+
is_replace: bool = False,
|
202
|
+
instance: str | None = None,
|
203
|
+
) -> tuple[int, str | None, str | None, int | None, Artifact | None] | Artifact:
|
204
|
+
n_objects = None
|
205
|
+
if settings.creation.artifact_skip_size_hash:
|
206
|
+
return None, None, None, n_objects, None
|
207
|
+
stat = path.stat() # one network request
|
208
|
+
if not isinstance(path, LocalPathClasses):
|
209
|
+
size, hash, hash_type = None, None, None
|
210
|
+
if stat is not None:
|
211
|
+
# convert UPathStatResult to fsspec info dict
|
212
|
+
stat = stat.as_info()
|
213
|
+
if "ETag" in stat: # is file
|
214
|
+
size, hash, hash_type = get_stat_file_cloud(stat)
|
215
|
+
elif stat["type"] == "directory":
|
216
|
+
size, hash, hash_type, n_objects = get_stat_dir_cloud(path)
|
217
|
+
if hash is None:
|
218
|
+
logger.warning(f"did not add hash for {path}")
|
219
|
+
return size, hash, hash_type, n_objects, None
|
220
|
+
else:
|
221
|
+
if path.is_dir():
|
222
|
+
size, hash, hash_type, n_objects = hash_dir(path)
|
223
|
+
else:
|
224
|
+
hash, hash_type = hash_file(path)
|
225
|
+
size = stat.st_size
|
226
|
+
if not check_hash:
|
227
|
+
return size, hash, hash_type, n_objects, None
|
228
|
+
previous_artifact_version = None
|
229
|
+
if key is None or is_replace:
|
230
|
+
result = Artifact.objects.using(instance).filter(hash=hash).all()
|
231
|
+
artifact_with_same_hash_exists = len(result) > 0
|
232
|
+
else:
|
233
|
+
storage_id = settings.storage.id
|
234
|
+
result = (
|
235
|
+
Artifact.objects.using(instance)
|
236
|
+
.filter(Q(hash=hash) | Q(key=key, storage_id=storage_id))
|
237
|
+
.order_by("-created_at")
|
238
|
+
.all()
|
239
|
+
)
|
240
|
+
artifact_with_same_hash_exists = len(result.filter(hash=hash).all()) > 0
|
241
|
+
if not artifact_with_same_hash_exists and len(result) > 0:
|
242
|
+
logger.important(
|
243
|
+
f"creating new artifact version for key='{key}' (storage: '{settings.storage.root_as_str}')"
|
244
|
+
)
|
245
|
+
previous_artifact_version = result[0]
|
246
|
+
if artifact_with_same_hash_exists:
|
247
|
+
if settings.creation.artifact_if_hash_exists == "error":
|
248
|
+
msg = f"artifact with same hash exists: {result[0]}"
|
249
|
+
hint = (
|
250
|
+
"💡 you can make this error a warning:\n"
|
251
|
+
" ln.settings.creation.artifact_if_hash_exists"
|
252
|
+
)
|
253
|
+
raise FileExistsError(f"{msg}\n{hint}")
|
254
|
+
elif settings.creation.artifact_if_hash_exists == "warn_create_new":
|
255
|
+
logger.warning(
|
256
|
+
"creating new Artifact object despite existing artifact with same hash:"
|
257
|
+
f" {result[0]}"
|
258
|
+
)
|
259
|
+
return size, hash, hash_type, n_objects, None
|
260
|
+
else:
|
261
|
+
if result[0].visibility == -1:
|
262
|
+
raise FileExistsError(
|
263
|
+
f"You're trying to re-create this artifact in trash: {result[0]}"
|
264
|
+
"Either permanently delete it with `artifact.delete(permanent=True)` or restore it with `artifact.restore()`"
|
265
|
+
)
|
266
|
+
logger.important(f"returning existing artifact with same hash: {result[0]}")
|
267
|
+
return result[0]
|
268
|
+
else:
|
269
|
+
return size, hash, hash_type, n_objects, previous_artifact_version
|
270
|
+
|
271
|
+
|
272
|
+
def check_path_in_existing_storage(
|
273
|
+
path: Path | UPath, using_key: str | None = None
|
274
|
+
) -> Storage | bool:
|
275
|
+
for storage in Storage.objects.using(using_key).filter().all():
|
276
|
+
# if path is part of storage, return it
|
277
|
+
if check_path_is_child_of_root(path, root=storage.root):
|
278
|
+
return storage
|
279
|
+
return False
|
280
|
+
|
281
|
+
|
282
|
+
def get_relative_path_to_directory(
|
283
|
+
path: PurePath | Path | UPath, directory: PurePath | Path | UPath
|
284
|
+
) -> PurePath | Path:
|
285
|
+
if isinstance(directory, UPath) and not isinstance(directory, LocalPathClasses):
|
286
|
+
# UPath.relative_to() is not behaving as it should (2023-04-07)
|
287
|
+
# need to lstrip otherwise inconsistent behavior across trailing slashes
|
288
|
+
# see test_artifact.py: test_get_relative_path_to_directory
|
289
|
+
relpath = PurePath(
|
290
|
+
path.as_posix().replace(directory.as_posix(), "").lstrip("/")
|
291
|
+
)
|
292
|
+
elif isinstance(directory, Path):
|
293
|
+
relpath = path.resolve().relative_to(directory.resolve()) # type: ignore
|
294
|
+
elif isinstance(directory, PurePath):
|
295
|
+
relpath = path.relative_to(directory)
|
296
|
+
else:
|
297
|
+
raise TypeError("Directory not of type Path or UPath")
|
298
|
+
return relpath
|
299
|
+
|
300
|
+
|
301
|
+
def get_artifact_kwargs_from_data(
|
302
|
+
*,
|
303
|
+
data: Path | UPath | str | pd.DataFrame | AnnData | MuData,
|
304
|
+
key: str | None,
|
305
|
+
run: Run | None,
|
306
|
+
format: str | None,
|
307
|
+
provisional_uid: str,
|
308
|
+
version: str | None,
|
309
|
+
default_storage: Storage,
|
310
|
+
using_key: str | None = None,
|
311
|
+
is_replace: bool = False,
|
312
|
+
skip_check_exists: bool = False,
|
313
|
+
):
|
314
|
+
run = get_run(run)
|
315
|
+
memory_rep, path, suffix, storage, use_existing_storage_key = process_data(
|
316
|
+
provisional_uid,
|
317
|
+
data,
|
318
|
+
format,
|
319
|
+
key,
|
320
|
+
default_storage,
|
321
|
+
using_key,
|
322
|
+
skip_check_exists,
|
323
|
+
)
|
324
|
+
stat_or_artifact = get_stat_or_artifact(
|
325
|
+
path=path,
|
326
|
+
key=key,
|
327
|
+
instance=using_key,
|
328
|
+
is_replace=is_replace,
|
329
|
+
)
|
330
|
+
if isinstance(stat_or_artifact, Artifact):
|
331
|
+
artifact = stat_or_artifact
|
332
|
+
# update the run of the existing artifact
|
333
|
+
if run is not None:
|
334
|
+
# save the information that this artifact was previously
|
335
|
+
# produced by another run
|
336
|
+
if artifact.run is not None:
|
337
|
+
artifact.run._output_artifacts_with_later_updates.add(artifact)
|
338
|
+
# update the run of the artifact with the latest run
|
339
|
+
stat_or_artifact.run = run
|
340
|
+
stat_or_artifact.transform = run.transform
|
341
|
+
return artifact, None
|
342
|
+
else:
|
343
|
+
size, hash, hash_type, n_objects, revises = stat_or_artifact
|
344
|
+
|
345
|
+
if revises is not None: # update provisional_uid
|
346
|
+
provisional_uid, revises = create_uid(revises=revises, version=version)
|
347
|
+
if settings.storage.cache_dir in path.parents:
|
348
|
+
path = path.rename(path.with_name(f"{provisional_uid}{suffix}"))
|
349
|
+
|
350
|
+
check_path_in_storage = False
|
351
|
+
if use_existing_storage_key:
|
352
|
+
inferred_key = get_relative_path_to_directory(
|
353
|
+
path=path, directory=UPath(storage.root)
|
354
|
+
).as_posix()
|
355
|
+
if key is None:
|
356
|
+
key = inferred_key
|
357
|
+
else:
|
358
|
+
if not key == inferred_key:
|
359
|
+
raise InvalidArgument(
|
360
|
+
f"The path '{data}' is already in registered storage"
|
361
|
+
f" '{storage.root}' with key '{inferred_key}'\nYou passed"
|
362
|
+
f" conflicting key '{key}': please move the file before"
|
363
|
+
" registering it."
|
364
|
+
)
|
365
|
+
check_path_in_storage = True
|
366
|
+
else:
|
367
|
+
storage = default_storage
|
368
|
+
|
369
|
+
log_storage_hint(
|
370
|
+
check_path_in_storage=check_path_in_storage,
|
371
|
+
storage=storage,
|
372
|
+
key=key,
|
373
|
+
uid=provisional_uid,
|
374
|
+
suffix=suffix,
|
375
|
+
is_dir=n_objects is not None,
|
376
|
+
)
|
377
|
+
|
378
|
+
# do we use a virtual or an actual storage key?
|
379
|
+
key_is_virtual = settings.creation._artifact_use_virtual_keys
|
380
|
+
|
381
|
+
# if the file is already in storage, independent of the default
|
382
|
+
# we use an actual storage key
|
383
|
+
if check_path_in_storage:
|
384
|
+
key_is_virtual = False
|
385
|
+
|
386
|
+
kwargs = {
|
387
|
+
"uid": provisional_uid,
|
388
|
+
"suffix": suffix,
|
389
|
+
"hash": hash,
|
390
|
+
"_hash_type": hash_type,
|
391
|
+
"key": key,
|
392
|
+
"size": size,
|
393
|
+
"storage_id": storage.id,
|
394
|
+
# passing both the id and the object
|
395
|
+
# to make them both available immediately
|
396
|
+
# after object creation
|
397
|
+
"n_objects": n_objects,
|
398
|
+
"n_observations": None, # to implement
|
399
|
+
"run_id": run.id if run is not None else None,
|
400
|
+
"run": run,
|
401
|
+
"_key_is_virtual": key_is_virtual,
|
402
|
+
"revises": revises,
|
403
|
+
}
|
404
|
+
if not isinstance(path, LocalPathClasses):
|
405
|
+
local_filepath = None
|
406
|
+
cloud_filepath = path
|
407
|
+
else:
|
408
|
+
local_filepath = path
|
409
|
+
cloud_filepath = None
|
410
|
+
privates = {
|
411
|
+
"local_filepath": local_filepath,
|
412
|
+
"cloud_filepath": cloud_filepath,
|
413
|
+
"memory_rep": memory_rep,
|
414
|
+
"check_path_in_storage": check_path_in_storage,
|
415
|
+
}
|
416
|
+
return kwargs, privates
|
417
|
+
|
418
|
+
|
419
|
+
def log_storage_hint(
|
420
|
+
*,
|
421
|
+
check_path_in_storage: bool,
|
422
|
+
storage: Storage | None,
|
423
|
+
key: str | None,
|
424
|
+
uid: str,
|
425
|
+
suffix: str,
|
426
|
+
is_dir: bool,
|
427
|
+
) -> None:
|
428
|
+
hint = ""
|
429
|
+
if check_path_in_storage:
|
430
|
+
display_root = storage.root # type: ignore
|
431
|
+
# check whether path is local
|
432
|
+
if fsspec.utils.get_protocol(storage.root) == "file": # type: ignore
|
433
|
+
# if it's a local path, check whether it's in the current working directory
|
434
|
+
root_path = Path(storage.root) # type: ignore
|
435
|
+
if check_path_is_child_of_root(root_path, Path.cwd()):
|
436
|
+
# only display the relative path, not the fully resolved path
|
437
|
+
display_root = root_path.relative_to(Path.cwd())
|
438
|
+
hint += f"path in storage '{display_root}'" # type: ignore
|
439
|
+
else:
|
440
|
+
hint += "path content will be copied to default storage upon `save()`"
|
441
|
+
if key is None:
|
442
|
+
storage_key = auto_storage_key_from_artifact_uid(uid, suffix, is_dir)
|
443
|
+
hint += f" with key `None` ('{storage_key}')"
|
444
|
+
else:
|
445
|
+
hint += f" with key '{key}'"
|
446
|
+
logger.hint(hint)
|
447
|
+
|
448
|
+
|
449
|
+
def data_is_anndata(data: AnnData | UPathStr) -> bool:
|
450
|
+
if isinstance(data, AnnData):
|
451
|
+
return True
|
452
|
+
if isinstance(data, (str, Path, UPath)):
|
453
|
+
data_path = UPath(data)
|
454
|
+
if data_path.suffix == ".h5ad":
|
455
|
+
return True
|
456
|
+
elif data_path.suffix == ".zarr":
|
457
|
+
# ".anndata.zarr" is a valid suffix (core.storage._valid_suffixes)
|
458
|
+
if ".anndata" in data_path.suffixes:
|
459
|
+
return True
|
460
|
+
# check only for local, expensive for cloud
|
461
|
+
if fsspec.utils.get_protocol(data_path.as_posix()) == "file":
|
462
|
+
return zarr_is_adata(data_path)
|
463
|
+
else:
|
464
|
+
logger.warning("We do not check if cloud zarr is AnnData or not.")
|
465
|
+
return False
|
466
|
+
return False
|
467
|
+
|
468
|
+
|
469
|
+
def data_is_mudata(data: MuData | UPathStr) -> bool:
|
470
|
+
if _mudata_is_installed():
|
471
|
+
from mudata import MuData
|
472
|
+
|
473
|
+
if isinstance(data, MuData):
|
474
|
+
return True
|
475
|
+
if isinstance(data, (str, Path)):
|
476
|
+
return UPath(data).suffix in {".h5mu"}
|
477
|
+
return False
|
478
|
+
|
479
|
+
|
480
|
+
def _check_accessor_artifact(data: Any, accessor: str | None = None):
|
481
|
+
if accessor is None:
|
482
|
+
if isinstance(data, pd.DataFrame):
|
483
|
+
logger.warning("data is a DataFrame, please use .from_df()")
|
484
|
+
accessor = "DataFrame"
|
485
|
+
return accessor
|
486
|
+
|
487
|
+
data_is_path = isinstance(data, (str, Path))
|
488
|
+
if data_is_anndata(data):
|
489
|
+
if not data_is_path:
|
490
|
+
logger.warning("data is an AnnData, please use .from_anndata()")
|
491
|
+
accessor = "AnnData"
|
492
|
+
elif data_is_mudata(data):
|
493
|
+
if not data_is_path:
|
494
|
+
logger.warning("data is a MuData, please use .from_mudata()")
|
495
|
+
accessor = "MuData"
|
496
|
+
elif not data_is_path: # UPath is a subclass of Path
|
497
|
+
raise TypeError("data has to be a string, Path, UPath")
|
498
|
+
return accessor
|
499
|
+
|
500
|
+
|
501
|
+
def __init__(artifact: Artifact, *args, **kwargs):
|
502
|
+
artifact.features = FeatureManager(artifact)
|
503
|
+
artifact.params = ParamManager(artifact)
|
504
|
+
# Below checks for the Django-internal call in from_db()
|
505
|
+
# it'd be better if we could avoid this, but not being able to create a Artifact
|
506
|
+
# from data with the default constructor renders the central class of the API
|
507
|
+
# essentially useless
|
508
|
+
# The danger below is not that a user might pass as many args (12 of it), but rather
|
509
|
+
# that at some point the Django API might change; on the other hand, this
|
510
|
+
# condition of for calling the constructor based on kwargs should always
|
511
|
+
# stay robust
|
512
|
+
if len(args) == len(artifact._meta.concrete_fields):
|
513
|
+
super(Artifact, artifact).__init__(*args, **kwargs)
|
514
|
+
return None
|
515
|
+
# now we proceed with the user-facing constructor
|
516
|
+
if len(args) > 1:
|
517
|
+
raise ValueError("Only one non-keyword arg allowed: data")
|
518
|
+
|
519
|
+
data: str | Path = kwargs.pop("data") if len(args) == 0 else args[0]
|
520
|
+
type: str = kwargs.pop("type") if "type" in kwargs else None
|
521
|
+
key: str | None = kwargs.pop("key") if "key" in kwargs else None
|
522
|
+
run: Run | None = kwargs.pop("run") if "run" in kwargs else None
|
523
|
+
description: str | None = (
|
524
|
+
kwargs.pop("description") if "description" in kwargs else None
|
525
|
+
)
|
526
|
+
revises: Artifact | None = kwargs.pop("revises") if "revises" in kwargs else None
|
527
|
+
version: str | None = kwargs.pop("version") if "version" in kwargs else None
|
528
|
+
visibility: int | None = (
|
529
|
+
kwargs.pop("visibility")
|
530
|
+
if "visibility" in kwargs
|
531
|
+
else VisibilityChoice.default.value
|
532
|
+
)
|
533
|
+
format = kwargs.pop("format") if "format" in kwargs else None
|
534
|
+
_is_internal_call = kwargs.pop("_is_internal_call", False)
|
535
|
+
skip_check_exists = (
|
536
|
+
kwargs.pop("skip_check_exists") if "skip_check_exists" in kwargs else False
|
537
|
+
)
|
538
|
+
if "default_storage" in kwargs:
|
539
|
+
default_storage = kwargs.pop("default_storage")
|
540
|
+
else:
|
541
|
+
if setup_settings.instance.keep_artifacts_local:
|
542
|
+
default_storage = setup_settings.instance.storage_local.record
|
543
|
+
else:
|
544
|
+
default_storage = setup_settings.instance.storage.record
|
545
|
+
using_key = (
|
546
|
+
kwargs.pop("using_key") if "using_key" in kwargs else settings._using_key
|
547
|
+
)
|
548
|
+
accessor = kwargs.pop("_accessor") if "_accessor" in kwargs else None
|
549
|
+
accessor = _check_accessor_artifact(data=data, accessor=accessor)
|
550
|
+
if "is_new_version_of" in kwargs:
|
551
|
+
logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
|
552
|
+
revises = kwargs.pop("is_new_version_of")
|
553
|
+
if not len(kwargs) == 0:
|
554
|
+
raise ValueError(
|
555
|
+
"Only data, key, run, description, version, revises, visibility"
|
556
|
+
f" can be passed, you passed: {kwargs}"
|
557
|
+
)
|
558
|
+
if revises is not None and key is not None and revises.key != key:
|
559
|
+
note = message_update_key_in_version_family(
|
560
|
+
suid=revises.stem_uid,
|
561
|
+
existing_key=revises.key,
|
562
|
+
new_key=key,
|
563
|
+
registry="Artifact",
|
564
|
+
)
|
565
|
+
raise ValueError(
|
566
|
+
f"`key` is {key}, but `revises.key` is '{revises.key}'\n\n Either do *not* pass `key`.\n\n{note}"
|
567
|
+
)
|
568
|
+
if revises is not None:
|
569
|
+
if not isinstance(revises, Artifact):
|
570
|
+
raise TypeError("`revises` has to be of type `Artifact`")
|
571
|
+
if description is None:
|
572
|
+
description = revises.description
|
573
|
+
if key is not None and AUTO_KEY_PREFIX in key:
|
574
|
+
raise ValueError(
|
575
|
+
f"Do not pass key that contains a managed storage path in `{AUTO_KEY_PREFIX}`"
|
576
|
+
)
|
577
|
+
# below is for internal calls that require defining the storage location
|
578
|
+
# ahead of constructing the Artifact
|
579
|
+
if isinstance(data, (str, Path)) and AUTO_KEY_PREFIX in str(data):
|
580
|
+
if _is_internal_call:
|
581
|
+
is_automanaged_path = True
|
582
|
+
user_provided_key = key
|
583
|
+
key = None
|
584
|
+
else:
|
585
|
+
raise ValueError(
|
586
|
+
f"Do not pass path inside the `{AUTO_KEY_PREFIX}` directory."
|
587
|
+
)
|
588
|
+
else:
|
589
|
+
is_automanaged_path = False
|
590
|
+
provisional_uid, revises = create_uid(revises=revises, version=version)
|
591
|
+
kwargs_or_artifact, privates = get_artifact_kwargs_from_data(
|
592
|
+
data=data,
|
593
|
+
key=key,
|
594
|
+
run=run,
|
595
|
+
format=format,
|
596
|
+
provisional_uid=provisional_uid,
|
597
|
+
version=version,
|
598
|
+
default_storage=default_storage,
|
599
|
+
using_key=using_key,
|
600
|
+
skip_check_exists=skip_check_exists,
|
601
|
+
)
|
602
|
+
|
603
|
+
# an object with the same hash already exists
|
604
|
+
if isinstance(kwargs_or_artifact, Artifact):
|
605
|
+
from ._record import init_self_from_db, update_attributes
|
606
|
+
|
607
|
+
init_self_from_db(artifact, kwargs_or_artifact)
|
608
|
+
# adding "key" here is dangerous because key might be auto-populated
|
609
|
+
update_attributes(artifact, {"description": description})
|
610
|
+
if artifact.key != key and key is not None:
|
611
|
+
logger.warning(
|
612
|
+
f"key {artifact.key} on existing artifact differs from passed key {key}"
|
613
|
+
)
|
614
|
+
return None
|
615
|
+
else:
|
616
|
+
kwargs = kwargs_or_artifact
|
617
|
+
|
618
|
+
if revises is None:
|
619
|
+
revises = kwargs_or_artifact.pop("revises")
|
620
|
+
|
621
|
+
if data is not None:
|
622
|
+
artifact._local_filepath = privates["local_filepath"]
|
623
|
+
artifact._cloud_filepath = privates["cloud_filepath"]
|
624
|
+
artifact._memory_rep = privates["memory_rep"]
|
625
|
+
artifact._to_store = not privates["check_path_in_storage"]
|
626
|
+
|
627
|
+
if is_automanaged_path and _is_internal_call:
|
628
|
+
kwargs["_key_is_virtual"] = True
|
629
|
+
assert AUTO_KEY_PREFIX in kwargs["key"] # noqa: S101
|
630
|
+
uid = kwargs["key"].replace(AUTO_KEY_PREFIX, "").replace(kwargs["suffix"], "")
|
631
|
+
kwargs["key"] = user_provided_key
|
632
|
+
if revises is not None:
|
633
|
+
assert uid.startswith(revises.stem_uid) # noqa: S101
|
634
|
+
if len(uid) == 16:
|
635
|
+
if revises is None:
|
636
|
+
uid += "0000"
|
637
|
+
else:
|
638
|
+
uid, revises = create_uid(revises=revises, version=version)
|
639
|
+
kwargs["uid"] = uid
|
640
|
+
|
641
|
+
# only set key now so that we don't do a look-up on it in case revises is passed
|
642
|
+
if revises is not None:
|
643
|
+
kwargs["key"] = revises.key
|
644
|
+
|
645
|
+
kwargs["type"] = type
|
646
|
+
kwargs["version"] = version
|
647
|
+
kwargs["description"] = description
|
648
|
+
kwargs["visibility"] = visibility
|
649
|
+
kwargs["_accessor"] = accessor
|
650
|
+
kwargs["revises"] = revises
|
651
|
+
# this check needs to come down here because key might be populated from an
|
652
|
+
# existing file path during get_artifact_kwargs_from_data()
|
653
|
+
if (
|
654
|
+
kwargs["key"] is None
|
655
|
+
and kwargs["description"] is None
|
656
|
+
and kwargs["run"] is None
|
657
|
+
):
|
658
|
+
raise ValueError("Pass one of key, run or description as a parameter")
|
659
|
+
|
660
|
+
add_transform_to_kwargs(kwargs, kwargs["run"])
|
661
|
+
|
662
|
+
super(Artifact, artifact).__init__(**kwargs)
|
663
|
+
|
664
|
+
|
665
|
+
@classmethod # type: ignore
|
666
|
+
@doc_args(Artifact.from_df.__doc__)
|
667
|
+
def from_df(
|
668
|
+
cls,
|
669
|
+
df: pd.DataFrame,
|
670
|
+
key: str | None = None,
|
671
|
+
description: str | None = None,
|
672
|
+
run: Run | None = None,
|
673
|
+
revises: Artifact | None = None,
|
674
|
+
**kwargs,
|
675
|
+
) -> Artifact:
|
676
|
+
"""{}""" # noqa: D415
|
677
|
+
artifact = Artifact(
|
678
|
+
data=df,
|
679
|
+
key=key,
|
680
|
+
run=run,
|
681
|
+
description=description,
|
682
|
+
revises=revises,
|
683
|
+
_accessor="DataFrame",
|
684
|
+
type="dataset",
|
685
|
+
**kwargs,
|
686
|
+
)
|
687
|
+
return artifact
|
688
|
+
|
689
|
+
|
690
|
+
@classmethod # type: ignore
|
691
|
+
@doc_args(Artifact.from_anndata.__doc__)
|
692
|
+
def from_anndata(
|
693
|
+
cls,
|
694
|
+
adata: AnnData | UPathStr,
|
695
|
+
key: str | None = None,
|
696
|
+
description: str | None = None,
|
697
|
+
run: Run | None = None,
|
698
|
+
revises: Artifact | None = None,
|
699
|
+
**kwargs,
|
700
|
+
) -> Artifact:
|
701
|
+
"""{}""" # noqa: D415
|
702
|
+
if not data_is_anndata(adata):
|
703
|
+
raise ValueError("data has to be an AnnData object or a path to AnnData-like")
|
704
|
+
artifact = Artifact(
|
705
|
+
data=adata,
|
706
|
+
key=key,
|
707
|
+
run=run,
|
708
|
+
description=description,
|
709
|
+
revises=revises,
|
710
|
+
_accessor="AnnData",
|
711
|
+
type="dataset",
|
712
|
+
**kwargs,
|
713
|
+
)
|
714
|
+
return artifact
|
715
|
+
|
716
|
+
|
717
|
+
@classmethod # type: ignore
|
718
|
+
@doc_args(Artifact.from_mudata.__doc__)
|
719
|
+
def from_mudata(
|
720
|
+
cls,
|
721
|
+
mdata: MuData,
|
722
|
+
key: str | None = None,
|
723
|
+
description: str | None = None,
|
724
|
+
run: Run | None = None,
|
725
|
+
revises: Artifact | None = None,
|
726
|
+
**kwargs,
|
727
|
+
) -> Artifact:
|
728
|
+
"""{}""" # noqa: D415
|
729
|
+
artifact = Artifact(
|
730
|
+
data=mdata,
|
731
|
+
key=key,
|
732
|
+
run=run,
|
733
|
+
description=description,
|
734
|
+
revises=revises,
|
735
|
+
_accessor="MuData",
|
736
|
+
type="dataset",
|
737
|
+
**kwargs,
|
738
|
+
)
|
739
|
+
return artifact
|
740
|
+
|
741
|
+
|
742
|
+
@classmethod # type: ignore
|
743
|
+
@doc_args(Artifact.from_dir.__doc__)
|
744
|
+
def from_dir(
|
745
|
+
cls,
|
746
|
+
path: UPathStr,
|
747
|
+
key: str | None = None,
|
748
|
+
*,
|
749
|
+
run: Run | None = None,
|
750
|
+
) -> list[Artifact]:
|
751
|
+
"""{}""" # noqa: D415
|
752
|
+
logger.warning(
|
753
|
+
"this creates one artifact per file in the directory - consider"
|
754
|
+
" ln.Artifact(dir_path) to get one artifact for the entire directory"
|
755
|
+
)
|
756
|
+
folderpath: UPath = create_path(path) # returns Path for local
|
757
|
+
default_storage = settings.storage.record
|
758
|
+
using_key = settings._using_key
|
759
|
+
storage, use_existing_storage = process_pathlike(
|
760
|
+
folderpath, default_storage, using_key
|
761
|
+
)
|
762
|
+
folder_key_path: PurePath | Path
|
763
|
+
if key is None:
|
764
|
+
if not use_existing_storage:
|
765
|
+
logger.warning(
|
766
|
+
"folder is outside existing storage location, will copy files from"
|
767
|
+
f" {path} to {storage.root}/{folderpath.name}"
|
768
|
+
)
|
769
|
+
folder_key_path = Path(folderpath.name)
|
770
|
+
else:
|
771
|
+
# maintain the hierachy within an existing storage location
|
772
|
+
folder_key_path = get_relative_path_to_directory(
|
773
|
+
folderpath, UPath(storage.root)
|
774
|
+
)
|
775
|
+
else:
|
776
|
+
folder_key_path = Path(key)
|
777
|
+
|
778
|
+
# always sanitize by stripping a trailing slash
|
779
|
+
folder_key = folder_key_path.as_posix().rstrip("/")
|
780
|
+
|
781
|
+
# TODO: (non-local) UPath doesn't list the first level artifacts and dirs with "*"
|
782
|
+
pattern = "" if not isinstance(folderpath, LocalPathClasses) else "*"
|
783
|
+
|
784
|
+
# silence fine-grained logging
|
785
|
+
verbosity = settings.verbosity
|
786
|
+
verbosity_int = settings._verbosity_int
|
787
|
+
if verbosity_int >= 1:
|
788
|
+
settings.verbosity = "warning"
|
789
|
+
artifacts_dict = {}
|
790
|
+
for filepath in folderpath.rglob(pattern):
|
791
|
+
if filepath.is_file():
|
792
|
+
relative_path = get_relative_path_to_directory(filepath, folderpath)
|
793
|
+
artifact_key = folder_key + "/" + relative_path.as_posix()
|
794
|
+
# if creating from rglob, we don't need to check for existence
|
795
|
+
artifact = Artifact(
|
796
|
+
filepath, run=run, key=artifact_key, skip_check_exists=True
|
797
|
+
)
|
798
|
+
artifacts_dict[artifact.uid] = artifact
|
799
|
+
settings.verbosity = verbosity
|
800
|
+
|
801
|
+
# run sanity check on hashes
|
802
|
+
hashes = [
|
803
|
+
artifact.hash
|
804
|
+
for artifact in artifacts_dict.values()
|
805
|
+
if artifact.hash is not None
|
806
|
+
]
|
807
|
+
uids = artifacts_dict.keys()
|
808
|
+
if len(set(hashes)) == len(hashes):
|
809
|
+
artifacts = list(artifacts_dict.values())
|
810
|
+
else:
|
811
|
+
# consider exact duplicates (same id, same hash)
|
812
|
+
# below can't happen anymore because artifacts is a dict now
|
813
|
+
# if len(set(uids)) == len(set(hashes)):
|
814
|
+
# logger.warning("dropping duplicate records in list of artifact records")
|
815
|
+
# artifacts = list(set(uids))
|
816
|
+
# consider false duplicates (different id, same hash)
|
817
|
+
if not len(set(uids)) == len(set(hashes)):
|
818
|
+
seen_hashes = set()
|
819
|
+
non_unique_artifacts = {
|
820
|
+
hash: artifact
|
821
|
+
for hash, artifact in artifacts_dict.items()
|
822
|
+
if artifact.hash in seen_hashes or seen_hashes.add(artifact.hash) # type: ignore
|
823
|
+
}
|
824
|
+
display_non_unique = "\n ".join(
|
825
|
+
f"{artifact}" for artifact in non_unique_artifacts
|
826
|
+
)
|
827
|
+
logger.warning(
|
828
|
+
"there are multiple artifact uids with the same hashes, dropping"
|
829
|
+
f" {len(non_unique_artifacts)} duplicates out of"
|
830
|
+
f" {len(artifacts_dict)} artifacts:\n {display_non_unique}"
|
831
|
+
)
|
832
|
+
artifacts = [
|
833
|
+
artifact
|
834
|
+
for artifact in artifacts_dict.values()
|
835
|
+
if artifact not in non_unique_artifacts.values()
|
836
|
+
]
|
837
|
+
logger.success(
|
838
|
+
f"created {len(artifacts)} artifacts from directory using storage"
|
839
|
+
f" {storage.root} and key = {folder_key}/"
|
840
|
+
)
|
841
|
+
return artifacts
|
842
|
+
|
843
|
+
|
844
|
+
# docstring handled through attach_func_to_class_method
|
845
|
+
def replace(
|
846
|
+
self,
|
847
|
+
data: UPathStr,
|
848
|
+
run: Run | None = None,
|
849
|
+
format: str | None = None,
|
850
|
+
) -> None:
|
851
|
+
default_storage = settings.storage.record
|
852
|
+
kwargs, privates = get_artifact_kwargs_from_data(
|
853
|
+
provisional_uid=self.uid,
|
854
|
+
data=data,
|
855
|
+
key=self.key,
|
856
|
+
run=run,
|
857
|
+
format=format,
|
858
|
+
default_storage=default_storage,
|
859
|
+
version=None,
|
860
|
+
is_replace=True,
|
861
|
+
)
|
862
|
+
|
863
|
+
# this artifact already exists
|
864
|
+
if privates is None:
|
865
|
+
return kwargs
|
866
|
+
|
867
|
+
check_path_in_storage = privates["check_path_in_storage"]
|
868
|
+
if check_path_in_storage:
|
869
|
+
raise ValueError("Can only replace with a local file not in any Storage.")
|
870
|
+
|
871
|
+
if self.key is not None and not self._key_is_virtual:
|
872
|
+
key_path = PurePosixPath(self.key)
|
873
|
+
new_filename = f"{key_path.stem}{kwargs['suffix']}"
|
874
|
+
# the following will only be true if the suffix changes!
|
875
|
+
if key_path.name != new_filename:
|
876
|
+
self._clear_storagekey = self.key
|
877
|
+
self.key = str(key_path.with_name(new_filename))
|
878
|
+
logger.warning(
|
879
|
+
f"replacing the file will replace key '{key_path}' with '{self.key}'"
|
880
|
+
f" and delete '{key_path}' upon `save()`"
|
881
|
+
)
|
882
|
+
else:
|
883
|
+
old_storage = auto_storage_key_from_artifact(self)
|
884
|
+
is_dir = self.n_objects is not None
|
885
|
+
new_storage = auto_storage_key_from_artifact_uid(
|
886
|
+
self.uid, kwargs["suffix"], is_dir
|
887
|
+
)
|
888
|
+
if old_storage != new_storage:
|
889
|
+
self._clear_storagekey = old_storage
|
890
|
+
if self.key is not None:
|
891
|
+
new_key_path = PurePosixPath(self.key).with_suffix(kwargs["suffix"])
|
892
|
+
self.key = str(new_key_path)
|
893
|
+
|
894
|
+
self.suffix = kwargs["suffix"]
|
895
|
+
self.size = kwargs["size"]
|
896
|
+
self.hash = kwargs["hash"]
|
897
|
+
self._hash_type = kwargs["_hash_type"]
|
898
|
+
self.run_id = kwargs["run_id"]
|
899
|
+
self.run = kwargs["run"]
|
900
|
+
|
901
|
+
self._local_filepath = privates["local_filepath"]
|
902
|
+
self._cloud_filepath = privates["cloud_filepath"]
|
903
|
+
self._memory_rep = privates["memory_rep"]
|
904
|
+
# no need to upload if new file is already in storage
|
905
|
+
self._to_store = not check_path_in_storage
|
906
|
+
|
907
|
+
|
908
|
+
# docstring handled through attach_func_to_class_method
|
909
|
+
def open(
|
910
|
+
self, mode: str = "r", is_run_input: bool | None = None
|
911
|
+
) -> AnnDataAccessor | BackedAccessor | SOMACollection | SOMAExperiment:
|
912
|
+
# ignore empty suffix for now
|
913
|
+
suffixes = (".h5", ".hdf5", ".h5ad", ".zarr", ".tiledbsoma", "")
|
914
|
+
if self.suffix not in suffixes:
|
915
|
+
raise ValueError(
|
916
|
+
"Artifact should have a zarr, h5 or tiledbsoma object as the underlying data, please"
|
917
|
+
" use one of the following suffixes for the object name:"
|
918
|
+
f" {', '.join(suffixes[:-1])}."
|
919
|
+
)
|
920
|
+
if self.suffix != ".tiledbsoma" and self.key != "soma" and mode != "r":
|
921
|
+
raise ValueError("Only a tiledbsoma store can be openened with `mode!='r'`.")
|
922
|
+
|
923
|
+
from lamindb.core.storage._backed_access import _track_writes_factory, backed_access
|
924
|
+
|
925
|
+
using_key = settings._using_key
|
926
|
+
filepath, cache_key = filepath_cache_key_from_artifact(self, using_key=using_key)
|
927
|
+
is_tiledbsoma_w = (
|
928
|
+
filepath.name == "soma" or filepath.suffix == ".tiledbsoma"
|
929
|
+
) and mode == "w"
|
930
|
+
# consider the case where an object is already locally cached
|
931
|
+
localpath = setup_settings.instance.storage.cloud_to_local_no_update(
|
932
|
+
filepath, cache_key=cache_key
|
933
|
+
)
|
934
|
+
if not is_tiledbsoma_w and localpath.exists():
|
935
|
+
access = backed_access(localpath, mode, using_key)
|
936
|
+
else:
|
937
|
+
access = backed_access(filepath, mode, using_key)
|
938
|
+
if is_tiledbsoma_w:
|
939
|
+
|
940
|
+
def finalize():
|
941
|
+
nonlocal self, filepath, localpath
|
942
|
+
if not isinstance(filepath, LocalPathClasses):
|
943
|
+
_, hash, _, _ = get_stat_dir_cloud(filepath)
|
944
|
+
else:
|
945
|
+
# this can be very slow
|
946
|
+
_, hash, _, _ = hash_dir(filepath)
|
947
|
+
if self.hash != hash:
|
948
|
+
from ._record import init_self_from_db
|
949
|
+
|
950
|
+
new_version = Artifact(
|
951
|
+
filepath, revises=self, _is_internal_call=True
|
952
|
+
).save()
|
953
|
+
init_self_from_db(self, new_version)
|
954
|
+
|
955
|
+
if localpath != filepath and localpath.exists():
|
956
|
+
shutil.rmtree(localpath)
|
957
|
+
|
958
|
+
access = _track_writes_factory(access, finalize)
|
959
|
+
# only call if open is successfull
|
960
|
+
_track_run_input(self, is_run_input)
|
961
|
+
return access
|
962
|
+
|
963
|
+
|
964
|
+
# can't really just call .cache in .load because of double tracking
|
965
|
+
def _synchronize_cleanup_on_error(
|
966
|
+
filepath: UPath, cache_key: str | None = None
|
967
|
+
) -> UPath:
|
968
|
+
try:
|
969
|
+
cache_path = setup_settings.instance.storage.cloud_to_local(
|
970
|
+
filepath, cache_key=cache_key, print_progress=True
|
971
|
+
)
|
972
|
+
except Exception as e:
|
973
|
+
if not isinstance(filepath, LocalPathClasses):
|
974
|
+
cache_path = setup_settings.instance.storage.cloud_to_local_no_update(
|
975
|
+
filepath, cache_key=cache_key
|
976
|
+
)
|
977
|
+
if cache_path.is_file():
|
978
|
+
cache_path.unlink(missing_ok=True)
|
979
|
+
elif cache_path.is_dir():
|
980
|
+
shutil.rmtree(cache_path)
|
981
|
+
raise e
|
982
|
+
return cache_path
|
983
|
+
|
984
|
+
|
985
|
+
# docstring handled through attach_func_to_class_method
|
986
|
+
def load(self, is_run_input: bool | None = None, **kwargs) -> Any:
|
987
|
+
if hasattr(self, "_memory_rep") and self._memory_rep is not None:
|
988
|
+
access_memory = self._memory_rep
|
989
|
+
else:
|
990
|
+
filepath, cache_key = filepath_cache_key_from_artifact(
|
991
|
+
self, using_key=settings._using_key
|
992
|
+
)
|
993
|
+
cache_path = _synchronize_cleanup_on_error(filepath, cache_key=cache_key)
|
994
|
+
# cache_path is local so doesn't trigger any sync in load_to_memory
|
995
|
+
access_memory = load_to_memory(cache_path, **kwargs)
|
996
|
+
# only call if load is successfull
|
997
|
+
_track_run_input(self, is_run_input)
|
998
|
+
return access_memory
|
999
|
+
|
1000
|
+
|
1001
|
+
# docstring handled through attach_func_to_class_method
|
1002
|
+
def cache(self, is_run_input: bool | None = None) -> Path:
|
1003
|
+
filepath, cache_key = filepath_cache_key_from_artifact(
|
1004
|
+
self, using_key=settings._using_key
|
1005
|
+
)
|
1006
|
+
cache_path = _synchronize_cleanup_on_error(filepath, cache_key=cache_key)
|
1007
|
+
# only call if sync is successfull
|
1008
|
+
_track_run_input(self, is_run_input)
|
1009
|
+
return cache_path
|
1010
|
+
|
1011
|
+
|
1012
|
+
# docstring handled through attach_func_to_class_method
|
1013
|
+
def delete(
|
1014
|
+
self,
|
1015
|
+
permanent: bool | None = None,
|
1016
|
+
storage: bool | None = None,
|
1017
|
+
using_key: str | None = None,
|
1018
|
+
) -> None:
|
1019
|
+
# this first check means an invalid delete fails fast rather than cascading through
|
1020
|
+
# database and storage permission errors
|
1021
|
+
if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
|
1022
|
+
isettings = setup_settings.instance
|
1023
|
+
if self.storage.instance_uid != isettings.uid and (storage or storage is None):
|
1024
|
+
raise IntegrityError(
|
1025
|
+
"Cannot simply delete artifacts outside of this instance's managed storage locations."
|
1026
|
+
"\n(1) If you only want to delete the metadata record in this instance, pass `storage=False`"
|
1027
|
+
f"\n(2) If you want to delete the artifact in storage, please load the managing lamindb instance (uid={self.storage.instance_uid})."
|
1028
|
+
f"\nThese are all managed storage locations of this instance:\n{Storage.filter(instance_uid=isettings.uid).df()}"
|
1029
|
+
)
|
1030
|
+
# by default, we only move artifacts into the trash (visibility = -1)
|
1031
|
+
trash_visibility = VisibilityChoice.trash.value
|
1032
|
+
if self.visibility > trash_visibility and not permanent:
|
1033
|
+
if storage is not None:
|
1034
|
+
logger.warning("moving artifact to trash, storage arg is ignored")
|
1035
|
+
# move to trash
|
1036
|
+
self.visibility = trash_visibility
|
1037
|
+
self.save()
|
1038
|
+
logger.important(f"moved artifact to trash (visibility = {trash_visibility})")
|
1039
|
+
return
|
1040
|
+
|
1041
|
+
# if the artifact is already in the trash
|
1042
|
+
# permanent delete skips the trash
|
1043
|
+
if permanent is None:
|
1044
|
+
# ask for confirmation of permanent delete
|
1045
|
+
response = input(
|
1046
|
+
"Artifact record is already in trash! Are you sure you want to permanently"
|
1047
|
+
" delete it? (y/n) You can't undo this action."
|
1048
|
+
)
|
1049
|
+
delete_record = response == "y"
|
1050
|
+
else:
|
1051
|
+
assert permanent # noqa: S101
|
1052
|
+
delete_record = True
|
1053
|
+
|
1054
|
+
if delete_record:
|
1055
|
+
# need to grab file path before deletion
|
1056
|
+
try:
|
1057
|
+
path, _ = filepath_from_artifact(self, using_key)
|
1058
|
+
except OSError:
|
1059
|
+
# we can still delete the record
|
1060
|
+
logger.warning("Could not get path")
|
1061
|
+
storage = False
|
1062
|
+
# only delete in storage if DB delete is successful
|
1063
|
+
# DB delete might error because of a foreign key constraint violated etc.
|
1064
|
+
self._delete_skip_storage()
|
1065
|
+
if self.key is None or self._key_is_virtual:
|
1066
|
+
# do not ask for confirmation also if storage is None
|
1067
|
+
delete_in_storage = storage is None or storage
|
1068
|
+
else:
|
1069
|
+
# for artifacts with non-virtual semantic storage keys (key is not None)
|
1070
|
+
# ask for extra-confirmation
|
1071
|
+
if storage is None:
|
1072
|
+
response = input(
|
1073
|
+
f"Are you sure to want to delete {path}? (y/n) You can't undo"
|
1074
|
+
" this action."
|
1075
|
+
)
|
1076
|
+
delete_in_storage = response == "y"
|
1077
|
+
else:
|
1078
|
+
delete_in_storage = storage
|
1079
|
+
if not delete_in_storage:
|
1080
|
+
logger.important(f"a file/folder remains here: {path}")
|
1081
|
+
# we don't yet have logic to bring back the deleted metadata record
|
1082
|
+
# in case storage deletion fails - this is important for ACID down the road
|
1083
|
+
if delete_in_storage:
|
1084
|
+
delete_msg = delete_storage(path, raise_file_not_found_error=False)
|
1085
|
+
if delete_msg != "did-not-delete":
|
1086
|
+
logger.success(f"deleted {colors.yellow(f'{path}')}")
|
1087
|
+
|
1088
|
+
|
1089
|
+
def _delete_skip_storage(artifact, *args, **kwargs) -> None:
|
1090
|
+
super(Artifact, artifact).delete(*args, **kwargs)
|
1091
|
+
|
1092
|
+
|
1093
|
+
# docstring handled through attach_func_to_class_method
|
1094
|
+
def save(self, upload: bool | None = None, **kwargs) -> Artifact:
|
1095
|
+
state_was_adding = self._state.adding
|
1096
|
+
print_progress = kwargs.pop("print_progress", True)
|
1097
|
+
access_token = kwargs.pop("access_token", None)
|
1098
|
+
local_path = None
|
1099
|
+
if upload and setup_settings.instance.keep_artifacts_local:
|
1100
|
+
# switch local storage location to cloud
|
1101
|
+
local_path = self.path
|
1102
|
+
self.storage_id = setup_settings.instance.storage.id
|
1103
|
+
self._local_filepath = local_path
|
1104
|
+
# switch to virtual storage key upon upload
|
1105
|
+
# the local filepath is already cached at that point
|
1106
|
+
self._key_is_virtual = True
|
1107
|
+
# ensure that the artifact is uploaded
|
1108
|
+
self._to_store = True
|
1109
|
+
|
1110
|
+
self._save_skip_storage(**kwargs)
|
1111
|
+
|
1112
|
+
from lamindb._save import check_and_attempt_clearing, check_and_attempt_upload
|
1113
|
+
|
1114
|
+
using_key = None
|
1115
|
+
if "using" in kwargs:
|
1116
|
+
using_key = kwargs["using"]
|
1117
|
+
exception = check_and_attempt_upload(
|
1118
|
+
self, using_key, access_token=access_token, print_progress=print_progress
|
1119
|
+
)
|
1120
|
+
if exception is not None:
|
1121
|
+
self._delete_skip_storage()
|
1122
|
+
raise RuntimeError(exception)
|
1123
|
+
exception = check_and_attempt_clearing(self, using_key)
|
1124
|
+
if exception is not None:
|
1125
|
+
raise RuntimeError(exception)
|
1126
|
+
if local_path is not None and not state_was_adding:
|
1127
|
+
# only move the local artifact to cache if it was not newly created
|
1128
|
+
local_path_cache = ln_setup.settings.storage.cache_dir / local_path.name
|
1129
|
+
# don't use Path.rename here because of cross-device link error
|
1130
|
+
# https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
|
1131
|
+
shutil.move(
|
1132
|
+
local_path, # type: ignore
|
1133
|
+
local_path_cache,
|
1134
|
+
)
|
1135
|
+
logger.important(f"moved local artifact to cache: {local_path_cache}")
|
1136
|
+
return self
|
1137
|
+
|
1138
|
+
|
1139
|
+
def _save_skip_storage(file, **kwargs) -> None:
|
1140
|
+
save_feature_sets(file)
|
1141
|
+
super(Artifact, file).save(**kwargs)
|
1142
|
+
save_feature_set_links(file)
|
1143
|
+
|
1144
|
+
|
1145
|
+
@property # type: ignore
|
1146
|
+
@doc_args(Artifact.path.__doc__)
|
1147
|
+
def path(self) -> Path | UPath:
|
1148
|
+
"""{}""" # noqa: D415
|
1149
|
+
# return only the path, without StorageSettings
|
1150
|
+
filepath, _ = filepath_from_artifact(self, using_key=settings._using_key)
|
1151
|
+
return filepath
|
1152
|
+
|
1153
|
+
|
1154
|
+
# get cache path without triggering sync
|
1155
|
+
@property # type: ignore
|
1156
|
+
def _cache_path(self) -> UPath:
|
1157
|
+
filepath, cache_key = filepath_cache_key_from_artifact(
|
1158
|
+
self, using_key=settings._using_key
|
1159
|
+
)
|
1160
|
+
if isinstance(filepath, LocalPathClasses):
|
1161
|
+
return filepath
|
1162
|
+
return setup_settings.instance.storage.cloud_to_local_no_update(
|
1163
|
+
filepath, cache_key=cache_key
|
1164
|
+
)
|
1165
|
+
|
1166
|
+
|
1167
|
+
# docstring handled through attach_func_to_class_method
|
1168
|
+
def restore(self) -> None:
|
1169
|
+
self.visibility = VisibilityChoice.default.value
|
1170
|
+
self.save()
|
1171
|
+
|
1172
|
+
|
1173
|
+
METHOD_NAMES = [
|
1174
|
+
"__init__",
|
1175
|
+
"from_anndata",
|
1176
|
+
"from_df",
|
1177
|
+
"from_mudata",
|
1178
|
+
"open",
|
1179
|
+
"cache",
|
1180
|
+
"load",
|
1181
|
+
"delete",
|
1182
|
+
"save",
|
1183
|
+
"replace",
|
1184
|
+
"from_dir",
|
1185
|
+
"restore",
|
1186
|
+
]
|
1187
|
+
|
1188
|
+
if ln_setup._TESTING:
|
1189
|
+
from inspect import signature
|
1190
|
+
|
1191
|
+
SIGS = {
|
1192
|
+
name: signature(getattr(Artifact, name))
|
1193
|
+
for name in METHOD_NAMES
|
1194
|
+
if name != "__init__"
|
1195
|
+
}
|
1196
|
+
|
1197
|
+
for name in METHOD_NAMES:
|
1198
|
+
attach_func_to_class_method(name, Artifact, globals())
|
1199
|
+
|
1200
|
+
# privates currently dealt with separately
|
1201
|
+
Artifact._delete_skip_storage = _delete_skip_storage
|
1202
|
+
Artifact._save_skip_storage = _save_skip_storage
|
1203
|
+
Artifact._cache_path = _cache_path
|
1204
|
+
Artifact.path = path
|
1205
|
+
Artifact.describe = describe
|
1206
|
+
Artifact.view_lineage = view_lineage
|