lamindb 1.3.0__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_view.py +2 -2
- lamindb/base/types.py +50 -11
- lamindb/core/types.py +1 -1
- lamindb/curators/__init__.py +232 -222
- lamindb/curators/_cellxgene_schemas/__init__.py +1 -1
- lamindb/models/_feature_manager.py +21 -28
- lamindb/models/_from_values.py +53 -97
- lamindb/models/_label_manager.py +17 -10
- lamindb/models/artifact.py +30 -6
- lamindb/models/can_curate.py +20 -20
- lamindb/models/feature.py +47 -48
- lamindb/models/record.py +29 -25
- lamindb/models/run.py +4 -8
- lamindb/models/schema.py +7 -7
- {lamindb-1.3.0.dist-info → lamindb-1.3.1.dist-info}/METADATA +3 -3
- {lamindb-1.3.0.dist-info → lamindb-1.3.1.dist-info}/RECORD +19 -19
- {lamindb-1.3.0.dist-info → lamindb-1.3.1.dist-info}/LICENSE +0 -0
- {lamindb-1.3.0.dist-info → lamindb-1.3.1.dist-info}/WHEEL +0 -0
@@ -113,7 +113,7 @@ def _create_sources(
|
|
113
113
|
if source is None:
|
114
114
|
logger.error(
|
115
115
|
f"Could not find source: {entity}\n"
|
116
|
-
" → consider running `bionty.core.
|
116
|
+
" → consider running `bionty.core.sync_public_sources()`"
|
117
117
|
)
|
118
118
|
return source
|
119
119
|
|
@@ -24,7 +24,7 @@ from lamindb.core.storage import LocalPathClasses
|
|
24
24
|
from lamindb.errors import DoesNotExist, ValidationError
|
25
25
|
from lamindb.models._from_values import _format_values
|
26
26
|
from lamindb.models.feature import (
|
27
|
-
|
27
|
+
serialize_pandas_dtype,
|
28
28
|
suggest_categorical_for_str_iterable,
|
29
29
|
)
|
30
30
|
from lamindb.models.record import (
|
@@ -485,6 +485,7 @@ def parse_staged_feature_sets_from_anndata(
|
|
485
485
|
adata: AnnData,
|
486
486
|
var_field: FieldAttr | None = None,
|
487
487
|
obs_field: FieldAttr = Feature.name,
|
488
|
+
uns_field: FieldAttr | None = None,
|
488
489
|
mute: bool = False,
|
489
490
|
organism: str | Record | None = None,
|
490
491
|
) -> dict:
|
@@ -501,15 +502,9 @@ def parse_staged_feature_sets_from_anndata(
|
|
501
502
|
data_parse = ad.read_h5ad(filepath, backed="r")
|
502
503
|
type = "float"
|
503
504
|
else:
|
504
|
-
type = (
|
505
|
-
"float"
|
506
|
-
if adata.X is None
|
507
|
-
else convert_pandas_dtype_to_lamin_dtype(adata.X.dtype)
|
508
|
-
)
|
505
|
+
type = "float" if adata.X is None else serialize_pandas_dtype(adata.X.dtype)
|
509
506
|
feature_sets = {}
|
510
507
|
if var_field is not None:
|
511
|
-
logger.info("parsing feature names of X stored in slot 'var'")
|
512
|
-
logger.indent = " "
|
513
508
|
schema_var = Schema.from_values(
|
514
509
|
data_parse.var.index,
|
515
510
|
var_field,
|
@@ -520,13 +515,7 @@ def parse_staged_feature_sets_from_anndata(
|
|
520
515
|
)
|
521
516
|
if schema_var is not None:
|
522
517
|
feature_sets["var"] = schema_var
|
523
|
-
|
524
|
-
logger.indent = ""
|
525
|
-
if schema_var is None:
|
526
|
-
logger.warning("skip linking features to artifact in slot 'var'")
|
527
|
-
if len(data_parse.obs.columns) > 0:
|
528
|
-
logger.info("parsing feature names of slot 'obs'")
|
529
|
-
logger.indent = " "
|
518
|
+
if obs_field is not None and len(data_parse.obs.columns) > 0:
|
530
519
|
schema_obs = Schema.from_df(
|
531
520
|
df=data_parse.obs,
|
532
521
|
field=obs_field,
|
@@ -535,10 +524,13 @@ def parse_staged_feature_sets_from_anndata(
|
|
535
524
|
)
|
536
525
|
if schema_obs is not None:
|
537
526
|
feature_sets["obs"] = schema_obs
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
527
|
+
if uns_field is not None and len(data_parse.uns) > 0:
|
528
|
+
validated_features = Feature.from_values( # type: ignore
|
529
|
+
data_parse.uns.keys(), field=uns_field, organism=organism
|
530
|
+
)
|
531
|
+
if len(validated_features) > 0:
|
532
|
+
schema_uns = Schema(validated_features, dtype=None, otype="dict")
|
533
|
+
feature_sets["uns"] = schema_uns
|
542
534
|
return feature_sets
|
543
535
|
|
544
536
|
|
@@ -575,7 +567,7 @@ def infer_feature_type_convert_json(
|
|
575
567
|
return "cat ? str", value, message
|
576
568
|
elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
|
577
569
|
if isinstance(value, (pd.Series, np.ndarray, pd.Categorical)):
|
578
|
-
dtype =
|
570
|
+
dtype = serialize_pandas_dtype(value.dtype)
|
579
571
|
if dtype == "str":
|
580
572
|
# ndarray doesn't know categorical, so there was no conscious choice
|
581
573
|
# offer both options
|
@@ -848,7 +840,7 @@ def _add_values(
|
|
848
840
|
)
|
849
841
|
validated = registry.validate(keys, field=feature_param_field, mute=True)
|
850
842
|
keys_array = np.array(keys)
|
851
|
-
|
843
|
+
keys_array[validated]
|
852
844
|
if validated.sum() != len(keys):
|
853
845
|
not_validated_keys = keys_array[~validated]
|
854
846
|
not_validated_keys_dtype_message = [
|
@@ -874,10 +866,7 @@ def _add_values(
|
|
874
866
|
f"Here is how to create a {model_name.lower()}:\n\n{hint}"
|
875
867
|
)
|
876
868
|
raise ValidationError(msg)
|
877
|
-
|
878
|
-
validated_keys,
|
879
|
-
field=feature_param_field,
|
880
|
-
)
|
869
|
+
|
881
870
|
# figure out which of the values go where
|
882
871
|
features_labels = defaultdict(list)
|
883
872
|
_feature_values = []
|
@@ -937,12 +926,14 @@ def _add_values(
|
|
937
926
|
if "ULabel" not in feature.dtype:
|
938
927
|
feature.dtype += "[ULabel]"
|
939
928
|
feature.save()
|
940
|
-
validated = ULabel.validate(values, field=
|
929
|
+
validated = ULabel.validate(values, field=ULabel.name, mute=True)
|
941
930
|
values_array = np.array(values)
|
942
931
|
validated_values = values_array[validated]
|
943
932
|
if validated.sum() != len(values):
|
944
933
|
not_validated_values += values_array[~validated].tolist()
|
945
|
-
label_records = ULabel.from_values(
|
934
|
+
label_records = ULabel.from_values(
|
935
|
+
validated_values, field=ULabel.name, mute=True
|
936
|
+
) # type: ignore
|
946
937
|
features_labels["ULabel"] += [
|
947
938
|
(feature, label_record) for label_record in label_records
|
948
939
|
]
|
@@ -1120,6 +1111,7 @@ def _add_set_from_anndata(
|
|
1120
1111
|
self,
|
1121
1112
|
var_field: FieldAttr | None = None,
|
1122
1113
|
obs_field: FieldAttr | None = Feature.name,
|
1114
|
+
uns_field: FieldAttr | None = None,
|
1123
1115
|
mute: bool = False,
|
1124
1116
|
organism: str | Record | None = None,
|
1125
1117
|
):
|
@@ -1132,6 +1124,7 @@ def _add_set_from_anndata(
|
|
1132
1124
|
adata,
|
1133
1125
|
var_field=var_field,
|
1134
1126
|
obs_field=obs_field,
|
1127
|
+
uns_field=uns_field,
|
1135
1128
|
mute=mute,
|
1136
1129
|
organism=organism,
|
1137
1130
|
)
|
@@ -1255,7 +1248,7 @@ def _add_from(self, data: Artifact | Collection, transfer_logs: dict = None):
|
|
1255
1248
|
# create records from ontology_id
|
1256
1249
|
if hasattr(registry, "_ontology_id_field") and len(member_uids) > 0:
|
1257
1250
|
# create from bionty
|
1258
|
-
members_records = registry.from_values(member_uids, field=field)
|
1251
|
+
members_records = registry.from_values(member_uids, field=field, mute=True)
|
1259
1252
|
save([r for r in members_records if r._state.adding])
|
1260
1253
|
validated = registry.validate(member_uids, field=field, mute=True)
|
1261
1254
|
new_members_uids = list(compress(member_uids, ~validated))
|
lamindb/models/_from_values.py
CHANGED
@@ -1,15 +1,11 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import re
|
4
3
|
from typing import TYPE_CHECKING
|
5
4
|
|
6
5
|
import pandas as pd
|
7
|
-
from django.core.exceptions import FieldDoesNotExist
|
8
6
|
from lamin_utils import colors, logger
|
9
7
|
|
10
8
|
if TYPE_CHECKING:
|
11
|
-
from collections.abc import Iterable
|
12
|
-
|
13
9
|
from lamindb.base.types import FieldAttr, ListLike
|
14
10
|
|
15
11
|
from .query_set import RecordList
|
@@ -30,7 +26,7 @@ def _from_values(
|
|
30
26
|
from .query_set import RecordList
|
31
27
|
|
32
28
|
registry = field.field.model # type: ignore
|
33
|
-
organism_record =
|
29
|
+
organism_record = get_organism_record_from_field(field, organism, values=iterable)
|
34
30
|
# TODO: the create is problematic if field is not a name field
|
35
31
|
if create:
|
36
32
|
create_kwargs = {}
|
@@ -55,15 +51,17 @@ def _from_values(
|
|
55
51
|
|
56
52
|
# new records to be created based on new values
|
57
53
|
if len(nonexist_values) > 0:
|
58
|
-
if
|
54
|
+
if registry.__base__.__name__ == "BioRecord":
|
55
|
+
from bionty._organism import is_organism_required
|
56
|
+
|
59
57
|
# if can and needed, get organism record from the existing records
|
60
58
|
if (
|
61
59
|
organism_record is None
|
62
60
|
and len(records) > 0
|
63
|
-
and
|
61
|
+
and is_organism_required(registry)
|
64
62
|
):
|
65
63
|
organism_record = records[0].organism
|
66
|
-
|
64
|
+
records_public, unmapped_values = create_records_from_source(
|
67
65
|
iterable_idx=nonexist_values,
|
68
66
|
field=field,
|
69
67
|
organism=organism_record,
|
@@ -71,11 +69,11 @@ def _from_values(
|
|
71
69
|
msg=msg,
|
72
70
|
mute=mute,
|
73
71
|
)
|
74
|
-
if len(
|
72
|
+
if len(records_public) > 0:
|
75
73
|
msg = ""
|
76
|
-
for record in
|
74
|
+
for record in records_public:
|
77
75
|
record._from_source = True
|
78
|
-
records +=
|
76
|
+
records += records_public
|
79
77
|
else:
|
80
78
|
unmapped_values = nonexist_values
|
81
79
|
# unmapped new_ids will NOT create records
|
@@ -187,25 +185,26 @@ def create_records_from_source(
|
|
187
185
|
"""Create records from source."""
|
188
186
|
model = field.field.model # type: ignore
|
189
187
|
records: list = []
|
190
|
-
# populate additional fields from
|
191
|
-
from bionty.
|
192
|
-
|
188
|
+
# populate additional fields from public_df
|
189
|
+
from bionty._source import filter_public_df_columns, get_source_record
|
190
|
+
|
191
|
+
# get the default source
|
192
|
+
source_record = get_source_record(model, organism, source)
|
193
193
|
|
194
|
-
# create the corresponding
|
194
|
+
# create the corresponding PublicOntology object from model
|
195
195
|
try:
|
196
|
-
|
197
|
-
public_ontology = model.public(organism=organism, source=source)
|
196
|
+
public_ontology = model.public(source=source_record)
|
198
197
|
except Exception:
|
199
|
-
#
|
198
|
+
# no public source
|
200
199
|
return records, iterable_idx
|
201
|
-
# get the default source
|
202
|
-
if source is None:
|
203
|
-
source = get_source_record(public_ontology, model)
|
204
200
|
|
205
|
-
# filter the columns in
|
206
|
-
|
201
|
+
# filter the columns in public df based on fields
|
202
|
+
public_df = filter_public_df_columns(model=model, public_ontology=public_ontology)
|
203
|
+
|
204
|
+
if public_df.empty:
|
205
|
+
return records, iterable_idx
|
207
206
|
|
208
|
-
# standardize in the
|
207
|
+
# standardize in the public reference
|
209
208
|
# do not inspect synonyms if the field is not name field
|
210
209
|
inspect_synonyms = True
|
211
210
|
if hasattr(model, "_name_field") and field.field.name != model._name_field: # type: ignore
|
@@ -231,30 +230,30 @@ def create_records_from_source(
|
|
231
230
|
|
232
231
|
iterable_idx = iterable_idx.to_frame().rename(index=syn_mapper).index
|
233
232
|
|
234
|
-
# create records for values that are found in the
|
233
|
+
# create records for values that are found in the public reference
|
235
234
|
# matching either field or synonyms
|
236
|
-
mapped_values = iterable_idx.intersection(
|
235
|
+
mapped_values = iterable_idx.intersection(public_df[field.field.name]) # type: ignore
|
237
236
|
|
238
237
|
multi_msg = ""
|
239
238
|
if len(mapped_values) > 0:
|
240
|
-
|
239
|
+
public_kwargs, multi_msg = _bulk_create_dicts_from_df(
|
241
240
|
keys=mapped_values,
|
242
241
|
column_name=field.field.name, # type: ignore
|
243
|
-
df=
|
242
|
+
df=public_df,
|
244
243
|
)
|
245
244
|
|
246
245
|
# this here is needed when the organism is required to create new records
|
247
246
|
if organism is None:
|
248
|
-
organism =
|
249
|
-
field,
|
247
|
+
organism = get_organism_record_from_field(
|
248
|
+
field, source_record.organism, values=mapped_values
|
250
249
|
)
|
251
250
|
|
252
251
|
create_kwargs = (
|
253
|
-
{"organism": organism, "source":
|
252
|
+
{"organism": organism, "source": source_record}
|
254
253
|
if organism is not None
|
255
|
-
else {"source":
|
254
|
+
else {"source": source_record}
|
256
255
|
)
|
257
|
-
for bk in
|
256
|
+
for bk in public_kwargs:
|
258
257
|
records.append(model(**bk, **create_kwargs, _skip_validation=True))
|
259
258
|
|
260
259
|
# number of records that matches field (not synonyms)
|
@@ -279,12 +278,12 @@ def create_records_from_source(
|
|
279
278
|
if len(multi_msg) > 0 and not mute:
|
280
279
|
logger.warning(multi_msg)
|
281
280
|
|
282
|
-
# return the values that are not found in the
|
281
|
+
# return the values that are not found in the public reference
|
283
282
|
unmapped_values = iterable_idx.difference(mapped_values)
|
284
283
|
return records, unmapped_values
|
285
284
|
|
286
285
|
|
287
|
-
def index_iterable(iterable:
|
286
|
+
def index_iterable(iterable: ListLike) -> pd.Index:
|
288
287
|
"""Get unique values from an iterable."""
|
289
288
|
idx = pd.Index(iterable).unique()
|
290
289
|
# No entries are made for NAs, '', None
|
@@ -293,7 +292,7 @@ def index_iterable(iterable: Iterable) -> pd.Index:
|
|
293
292
|
|
294
293
|
|
295
294
|
def _format_values(
|
296
|
-
names:
|
295
|
+
names: ListLike, n: int = 20, quotes: bool = True, sep: str = "'"
|
297
296
|
) -> str:
|
298
297
|
"""Format values for printing."""
|
299
298
|
if isinstance(names, dict):
|
@@ -340,36 +339,10 @@ def _bulk_create_dicts_from_df(
|
|
340
339
|
return df.reset_index().to_dict(orient="records"), multi_msg
|
341
340
|
|
342
341
|
|
343
|
-
def
|
344
|
-
"""Check if the registry has an organism field and is required.
|
345
|
-
|
346
|
-
Returns:
|
347
|
-
True if the registry has an organism field and is required, False otherwise.
|
348
|
-
"""
|
349
|
-
try:
|
350
|
-
organism_field = registry._meta.get_field("organism")
|
351
|
-
# organism is not required or not a relation
|
352
|
-
if organism_field.null or not organism_field.is_relation:
|
353
|
-
return False
|
354
|
-
else:
|
355
|
-
return True
|
356
|
-
except FieldDoesNotExist:
|
357
|
-
return False
|
358
|
-
|
359
|
-
|
360
|
-
def _is_simple_field_unique(field: FieldAttr) -> bool:
|
361
|
-
"""Check if the field is an id field."""
|
362
|
-
# id field is a unique field that's not a relation
|
363
|
-
field = field.field
|
364
|
-
if field.unique and not field.is_relation:
|
365
|
-
return True
|
366
|
-
return False
|
367
|
-
|
368
|
-
|
369
|
-
def _get_organism_record( # type: ignore
|
342
|
+
def get_organism_record_from_field( # type: ignore
|
370
343
|
field: FieldAttr,
|
371
344
|
organism: str | Record | None = None,
|
372
|
-
values:
|
345
|
+
values: ListLike = None,
|
373
346
|
using_key: str | None = None,
|
374
347
|
) -> Record | None:
|
375
348
|
"""Get organism record.
|
@@ -385,45 +358,28 @@ def _get_organism_record( # type: ignore
|
|
385
358
|
The organism FK is required for the registry
|
386
359
|
The field is not unique or the organism is not None
|
387
360
|
"""
|
361
|
+
if values is None:
|
362
|
+
values = []
|
388
363
|
registry = field.field.model
|
389
364
|
field_str = field.field.name
|
390
|
-
|
365
|
+
# id field is a unique field that's not a relation
|
366
|
+
is_simple_field_unique = field.field.unique and not field.field.is_relation
|
367
|
+
check = not is_simple_field_unique or organism is not None
|
391
368
|
|
392
|
-
if
|
393
|
-
|
369
|
+
if (
|
370
|
+
registry.__get_name_with_module__() == "bionty.Gene"
|
371
|
+
and field.field.name == "ensembl_gene_id"
|
372
|
+
and len(values) > 0
|
373
|
+
and organism is None
|
374
|
+
): # type: ignore
|
375
|
+
from bionty._organism import organism_from_ensembl_id
|
394
376
|
|
395
|
-
|
396
|
-
|
377
|
+
return organism_from_ensembl_id(values[0], using_key) # type: ignore
|
378
|
+
|
379
|
+
if registry.__base__.__name__ == "BioRecord" and check:
|
380
|
+
from bionty._organism import create_or_get_organism_record
|
397
381
|
|
398
382
|
organism_record = create_or_get_organism_record(
|
399
383
|
organism=organism, registry=registry, field=field_str
|
400
384
|
)
|
401
|
-
if organism_record is not None:
|
402
|
-
return organism_record.save()
|
403
|
-
|
404
|
-
|
405
|
-
def _organism_from_ensembl_id(id: str, using_key: str | None) -> Record | None: # type: ignore
|
406
|
-
"""Get organism record from ensembl id."""
|
407
|
-
import bionty as bt
|
408
|
-
from bionty.base.dev._io import s3_bionty_assets
|
409
|
-
|
410
|
-
localpath = s3_bionty_assets(
|
411
|
-
".lamindb/0QeqXlKq9aqW8aqe0000.parquet", bt.base.settings.versionsdir
|
412
|
-
)
|
413
|
-
ensembl_prefixes = pd.read_parquet(localpath).set_index("gene_prefix")
|
414
|
-
|
415
|
-
prefix = re.sub(r"\d+", "", id)
|
416
|
-
if prefix in ensembl_prefixes.index:
|
417
|
-
organism_name = ensembl_prefixes.loc[prefix, "name"].lower()
|
418
|
-
|
419
|
-
using_key = None if using_key == "default" else using_key
|
420
|
-
|
421
|
-
organism_record = (
|
422
|
-
bt.Organism.using(using_key).filter(name=organism_name).one_or_none()
|
423
|
-
)
|
424
|
-
if organism_record is None:
|
425
|
-
organism_record = bt.Organism.from_source(name=organism_name)
|
426
|
-
if organism_record is not None:
|
427
|
-
organism_record.save(using=using_key)
|
428
|
-
|
429
385
|
return organism_record
|
lamindb/models/_label_manager.py
CHANGED
@@ -142,7 +142,7 @@ def _save_validated_records(
|
|
142
142
|
# save labels from ontology_ids
|
143
143
|
if hasattr(registry, "_ontology_id_field") and label_uids:
|
144
144
|
try:
|
145
|
-
records = registry.from_values(label_uids, field=field)
|
145
|
+
records = registry.from_values(label_uids, field=field, mute=True)
|
146
146
|
save([r for r in records if r._state.adding])
|
147
147
|
except Exception: # noqa: S110
|
148
148
|
pass
|
@@ -240,7 +240,7 @@ class LabelManager:
|
|
240
240
|
continue
|
241
241
|
# look for features
|
242
242
|
data_name_lower = data.__class__.__name__.lower()
|
243
|
-
labels_by_features = defaultdict(list)
|
243
|
+
labels_by_features: dict = defaultdict(list)
|
244
244
|
features = set()
|
245
245
|
new_labels = save_validated_records(labels)
|
246
246
|
if len(new_labels) > 0:
|
@@ -248,18 +248,24 @@ class LabelManager:
|
|
248
248
|
new_labels, using_key, transfer_logs=transfer_logs
|
249
249
|
)
|
250
250
|
for label in labels:
|
251
|
+
keys: list = []
|
251
252
|
# if the link table doesn't follow this convention, we'll ignore it
|
252
253
|
if not hasattr(label, f"links_{data_name_lower}"):
|
253
254
|
key = None
|
255
|
+
keys.append(key)
|
254
256
|
else:
|
255
|
-
|
256
|
-
|
257
|
+
links = (
|
258
|
+
getattr(label, f"links_{data_name_lower}")
|
259
|
+
.filter(**{f"{data_name_lower}_id": data.id})
|
260
|
+
.all()
|
257
261
|
)
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
262
|
+
for link in links:
|
263
|
+
if link.feature is not None:
|
264
|
+
features.add(link.feature)
|
265
|
+
key = link.feature.name
|
266
|
+
else:
|
267
|
+
key = None
|
268
|
+
keys.append(key)
|
263
269
|
label_returned = transfer_to_default_db(
|
264
270
|
label,
|
265
271
|
using_key,
|
@@ -270,7 +276,8 @@ class LabelManager:
|
|
270
276
|
# TODO: refactor return value of transfer to default db
|
271
277
|
if label_returned is not None:
|
272
278
|
label = label_returned
|
273
|
-
|
279
|
+
for key in keys:
|
280
|
+
labels_by_features[key].append(label)
|
274
281
|
# treat features
|
275
282
|
new_features = save_validated_records(list(features))
|
276
283
|
if len(new_features) > 0:
|
lamindb/models/artifact.py
CHANGED
@@ -16,6 +16,7 @@ from django.db.models import CASCADE, PROTECT, Q
|
|
16
16
|
from lamin_utils import colors, logger
|
17
17
|
from lamindb_setup import settings as setup_settings
|
18
18
|
from lamindb_setup._init_instance import register_storage_in_instance
|
19
|
+
from lamindb_setup.core import doc_args
|
19
20
|
from lamindb_setup.core._settings_storage import init_storage
|
20
21
|
from lamindb_setup.core.hashing import HASH_LENGTH, hash_dir, hash_file
|
21
22
|
from lamindb_setup.core.types import UPathStr
|
@@ -93,6 +94,8 @@ WARNING_RUN_TRANSFORM = "no run & transform got linked, call `ln.track()` & re-r
|
|
93
94
|
|
94
95
|
WARNING_NO_INPUT = "run input wasn't tracked, call `ln.track()` and re-run"
|
95
96
|
|
97
|
+
DEBUG_KWARGS_DOC = "**kwargs: Internal arguments for debugging."
|
98
|
+
|
96
99
|
try:
|
97
100
|
from ..core.storage._zarr import identify_zarr_type
|
98
101
|
except ImportError:
|
@@ -1428,7 +1431,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
1428
1431
|
kwargs["uid"] = uid
|
1429
1432
|
|
1430
1433
|
# only set key now so that we don't do a look-up on it in case revises is passed
|
1431
|
-
if revises is not None:
|
1434
|
+
if revises is not None and revises.key is not None:
|
1435
|
+
assert revises.key.endswith(kwargs["suffix"]), ( # noqa: S101
|
1436
|
+
revises.key,
|
1437
|
+
kwargs["suffix"],
|
1438
|
+
)
|
1432
1439
|
kwargs["key"] = revises.key
|
1433
1440
|
|
1434
1441
|
kwargs["kind"] = kind
|
@@ -2010,6 +2017,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2010
2017
|
# no need to upload if new file is already in storage
|
2011
2018
|
self._to_store = not check_path_in_storage
|
2012
2019
|
|
2020
|
+
# update old suffix with the new one so that checks in record pass
|
2021
|
+
# replace() supports changing the suffix
|
2022
|
+
self._old_suffix = self.suffix
|
2023
|
+
|
2013
2024
|
def open(
|
2014
2025
|
self, mode: str = "r", is_run_input: bool | None = None, **kwargs
|
2015
2026
|
) -> Union[
|
@@ -2146,13 +2157,16 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2146
2157
|
_track_run_input(self, is_run_input)
|
2147
2158
|
return access
|
2148
2159
|
|
2149
|
-
def load(
|
2160
|
+
def load(
|
2161
|
+
self, *, is_run_input: bool | None = None, mute: bool = False, **kwargs
|
2162
|
+
) -> Any:
|
2150
2163
|
"""Cache and load into memory.
|
2151
2164
|
|
2152
2165
|
See all :mod:`~lamindb.core.loaders`.
|
2153
2166
|
|
2154
2167
|
Args:
|
2155
2168
|
is_run_input: Whether to track this artifact as run input.
|
2169
|
+
mute: Silence logging of caching progress.
|
2156
2170
|
**kwargs: Keyword arguments for the loader.
|
2157
2171
|
|
2158
2172
|
Examples:
|
@@ -2188,7 +2202,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2188
2202
|
filepath, cache_key = filepath_cache_key_from_artifact(
|
2189
2203
|
self, using_key=settings._using_key
|
2190
2204
|
)
|
2191
|
-
cache_path = _synchronize_cleanup_on_error(
|
2205
|
+
cache_path = _synchronize_cleanup_on_error(
|
2206
|
+
filepath, cache_key=cache_key, print_progress=not mute
|
2207
|
+
)
|
2192
2208
|
try:
|
2193
2209
|
# cache_path is local so doesn't trigger any sync in load_to_memory
|
2194
2210
|
access_memory = load_to_memory(cache_path, **kwargs)
|
@@ -2209,14 +2225,17 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2209
2225
|
cache_path.unlink(missing_ok=True)
|
2210
2226
|
# download again and try to load into memory
|
2211
2227
|
cache_path = _synchronize_cleanup_on_error(
|
2212
|
-
filepath, cache_key=cache_key
|
2228
|
+
filepath, cache_key=cache_key, print_progress=not mute
|
2213
2229
|
)
|
2214
2230
|
access_memory = load_to_memory(cache_path, **kwargs)
|
2215
2231
|
# only call if load is successfull
|
2216
2232
|
_track_run_input(self, is_run_input)
|
2217
2233
|
return access_memory
|
2218
2234
|
|
2219
|
-
|
2235
|
+
@doc_args(DEBUG_KWARGS_DOC)
|
2236
|
+
def cache(
|
2237
|
+
self, *, is_run_input: bool | None = None, mute: bool = False, **kwargs
|
2238
|
+
) -> Path:
|
2220
2239
|
"""Download cloud artifact to local cache.
|
2221
2240
|
|
2222
2241
|
Follows synching logic: only caches an artifact if it's outdated in the local cache.
|
@@ -2224,8 +2243,9 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2224
2243
|
Returns a path to a locally cached on-disk object (say a `.jpg` file).
|
2225
2244
|
|
2226
2245
|
Args:
|
2246
|
+
mute: Silence logging of caching progress.
|
2227
2247
|
is_run_input: Whether to track this artifact as run input.
|
2228
|
-
|
2248
|
+
{}
|
2229
2249
|
|
2230
2250
|
Example::
|
2231
2251
|
|
@@ -2241,6 +2261,8 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2241
2261
|
filepath, cache_key = filepath_cache_key_from_artifact(
|
2242
2262
|
self, using_key=settings._using_key
|
2243
2263
|
)
|
2264
|
+
if mute:
|
2265
|
+
kwargs["print_progress"] = False
|
2244
2266
|
cache_path = _synchronize_cleanup_on_error(
|
2245
2267
|
filepath, cache_key=cache_key, **kwargs
|
2246
2268
|
)
|
@@ -2368,11 +2390,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2368
2390
|
if delete_msg != "did-not-delete":
|
2369
2391
|
logger.success(f"deleted {colors.yellow(f'{path}')}")
|
2370
2392
|
|
2393
|
+
@doc_args(DEBUG_KWARGS_DOC)
|
2371
2394
|
def save(self, upload: bool | None = None, **kwargs) -> Artifact:
|
2372
2395
|
"""Save to database & storage.
|
2373
2396
|
|
2374
2397
|
Args:
|
2375
2398
|
upload: Trigger upload to cloud storage in instances with hybrid storage mode.
|
2399
|
+
{}
|
2376
2400
|
|
2377
2401
|
Example::
|
2378
2402
|
|