lamindb 0.63.5__py3-none-any.whl → 0.64.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +5 -4
- lamindb/{_file.py → _artifact.py} +265 -210
- lamindb/_dataset.py +87 -115
- lamindb/_delete.py +2 -2
- lamindb/_filter.py +2 -2
- lamindb/_parents.py +7 -7
- lamindb/_query_manager.py +5 -2
- lamindb/_registry.py +3 -3
- lamindb/_save.py +63 -63
- lamindb/dev/_data.py +10 -9
- lamindb/dev/_feature_manager.py +10 -10
- lamindb/dev/_label_manager.py +4 -4
- lamindb/dev/_run_context.py +2 -2
- lamindb/dev/_settings.py +5 -4
- lamindb/dev/_view_tree.py +5 -5
- lamindb/dev/datasets/_core.py +6 -6
- lamindb/dev/hashing.py +11 -1
- lamindb/dev/storage/__init__.py +1 -1
- lamindb/dev/storage/_backed_access.py +6 -6
- lamindb/dev/storage/file.py +36 -31
- lamindb/dev/versioning.py +3 -3
- {lamindb-0.63.5.dist-info → lamindb-0.64.1.dist-info}/METADATA +5 -5
- lamindb-0.64.1.dist-info/RECORD +48 -0
- lamindb-0.63.5.dist-info/RECORD +0 -48
- {lamindb-0.63.5.dist-info → lamindb-0.64.1.dist-info}/LICENSE +0 -0
- {lamindb-0.63.5.dist-info → lamindb-0.64.1.dist-info}/WHEEL +0 -0
lamindb/_save.py
CHANGED
@@ -11,12 +11,12 @@ from django.db import transaction
|
|
11
11
|
from django.utils.functional import partition
|
12
12
|
from lamin_utils import logger
|
13
13
|
from lamindb_setup.dev.upath import print_hook
|
14
|
-
from lnschema_core.models import
|
14
|
+
from lnschema_core.models import Artifact, Registry
|
15
15
|
|
16
|
-
from lamindb.dev.storage import store_object
|
17
16
|
from lamindb.dev.storage.file import (
|
18
|
-
|
17
|
+
auto_storage_key_from_artifact,
|
19
18
|
delete_storage_using_key,
|
19
|
+
store_artifact,
|
20
20
|
)
|
21
21
|
|
22
22
|
try:
|
@@ -73,20 +73,20 @@ def save(
|
|
73
73
|
|
74
74
|
# previously, this was all set based,
|
75
75
|
# but models without primary keys aren't hashable
|
76
|
-
# we distinguish between
|
77
|
-
# for
|
76
|
+
# we distinguish between artifacts and non-artifacts
|
77
|
+
# for artifacts, we want to bulk-upload
|
78
78
|
# rather than upload one-by-one
|
79
|
-
|
80
|
-
if
|
79
|
+
non_artifacts, artifacts = partition(lambda r: isinstance(r, Artifact), records)
|
80
|
+
if non_artifacts:
|
81
81
|
# first save all records that do not yet have a primary key without
|
82
82
|
# recursing parents
|
83
|
-
_,
|
84
|
-
bulk_create(
|
85
|
-
|
86
|
-
r for r in
|
83
|
+
_, non_artifacts_without_pk = partition(lambda r: r.pk is None, non_artifacts)
|
84
|
+
bulk_create(non_artifacts_without_pk, ignore_conflicts=ignore_conflicts)
|
85
|
+
non_artifacts_with_parents = [
|
86
|
+
r for r in non_artifacts_without_pk if hasattr(r, "_parents")
|
87
87
|
]
|
88
88
|
|
89
|
-
if len(
|
89
|
+
if len(non_artifacts_with_parents) > 0 and kwargs.get("parents") is not False:
|
90
90
|
# this can only happen within lnschema_bionty right now!!
|
91
91
|
# we might extend to core lamindb later
|
92
92
|
import lnschema_bionty as lb
|
@@ -105,14 +105,14 @@ def save(
|
|
105
105
|
"you can switch this off via: lb.settings.auto_save_parents ="
|
106
106
|
" False"
|
107
107
|
)
|
108
|
-
for record in
|
108
|
+
for record in non_artifacts_with_parents:
|
109
109
|
record._save_ontology_parents(mute=True)
|
110
110
|
|
111
|
-
if
|
111
|
+
if artifacts:
|
112
112
|
with transaction.atomic():
|
113
|
-
for record in
|
113
|
+
for record in artifacts:
|
114
114
|
record._save_skip_storage()
|
115
|
-
|
115
|
+
store_artifacts(artifacts)
|
116
116
|
|
117
117
|
# this function returns None as potentially 10k records might be saved
|
118
118
|
# refreshing all of them from the DB would mean a severe performance penalty
|
@@ -128,27 +128,27 @@ def bulk_create(records: Iterable[Registry], ignore_conflicts: Optional[bool] =
|
|
128
128
|
orm.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
|
129
129
|
|
130
130
|
|
131
|
-
# This is also used within
|
132
|
-
def check_and_attempt_upload(
|
133
|
-
# if
|
131
|
+
# This is also used within Artifact.save()
|
132
|
+
def check_and_attempt_upload(artifact: Artifact) -> Optional[Exception]:
|
133
|
+
# if Artifact object is either newly instantiated or replace() was called on
|
134
134
|
# a local env it will have a _local_filepath and needs to be uploaded
|
135
|
-
if hasattr(
|
135
|
+
if hasattr(artifact, "_local_filepath"):
|
136
136
|
try:
|
137
|
-
|
137
|
+
upload_artifact(artifact)
|
138
138
|
except Exception as exception:
|
139
|
-
logger.warning(f"could not upload
|
139
|
+
logger.warning(f"could not upload artifact: {artifact}")
|
140
140
|
return exception
|
141
141
|
# copies (if on-disk) or moves the temporary file (if in-memory) to the cache
|
142
|
-
copy_or_move_to_cache(
|
142
|
+
copy_or_move_to_cache(artifact)
|
143
143
|
# after successful upload, we should remove the attribute so that another call
|
144
144
|
# call to save won't upload again, the user should call replace() then
|
145
|
-
del
|
145
|
+
del artifact._local_filepath
|
146
146
|
# returning None means proceed (either success or no action needed)
|
147
147
|
return None
|
148
148
|
|
149
149
|
|
150
|
-
def copy_or_move_to_cache(
|
151
|
-
local_path =
|
150
|
+
def copy_or_move_to_cache(artifact: Artifact):
|
151
|
+
local_path = artifact._local_filepath
|
152
152
|
|
153
153
|
# in-memory zarr or on-disk zarr
|
154
154
|
if local_path is None or not local_path.is_file():
|
@@ -164,7 +164,7 @@ def copy_or_move_to_cache(file: File):
|
|
164
164
|
return None
|
165
165
|
|
166
166
|
# maybe create something like storage.key_to_local(key) later to simplfy
|
167
|
-
storage_key =
|
167
|
+
storage_key = auto_storage_key_from_artifact(artifact)
|
168
168
|
storage_path = lamindb_setup.settings.storage.key_to_filepath(storage_key)
|
169
169
|
cache_path = lamindb_setup.settings.storage.cloud_to_local_no_update(storage_path)
|
170
170
|
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
@@ -178,58 +178,58 @@ def copy_or_move_to_cache(file: File):
|
|
178
178
|
os.utime(cache_path, times=(mts, mts))
|
179
179
|
|
180
180
|
|
181
|
-
# This is also used within
|
182
|
-
def check_and_attempt_clearing(
|
181
|
+
# This is also used within Artifact.save()
|
182
|
+
def check_and_attempt_clearing(artifact: Artifact) -> Optional[Exception]:
|
183
183
|
# this is a clean-up operation after replace() was called
|
184
184
|
# this will only evaluate to True if replace() was called
|
185
|
-
if hasattr(
|
185
|
+
if hasattr(artifact, "_clear_storagekey"):
|
186
186
|
try:
|
187
|
-
if
|
188
|
-
delete_storage_using_key(
|
187
|
+
if artifact._clear_storagekey is not None:
|
188
|
+
delete_storage_using_key(artifact, artifact._clear_storagekey)
|
189
189
|
logger.success(
|
190
|
-
f"deleted stale object at storage key {
|
190
|
+
f"deleted stale object at storage key {artifact._clear_storagekey}"
|
191
191
|
)
|
192
|
-
|
192
|
+
artifact._clear_storagekey = None
|
193
193
|
except Exception as exception:
|
194
194
|
return exception
|
195
195
|
# returning None means proceed (either success or no action needed)
|
196
196
|
return None
|
197
197
|
|
198
198
|
|
199
|
-
def
|
200
|
-
"""Upload
|
199
|
+
def store_artifacts(artifacts: Iterable[Artifact]) -> None:
|
200
|
+
"""Upload artifacts in a list of database-committed artifacts to storage.
|
201
201
|
|
202
|
-
If any upload fails, subsequent
|
202
|
+
If any upload fails, subsequent artifacts are cleaned up from the DB.
|
203
203
|
"""
|
204
204
|
exception: Optional[Exception] = None
|
205
205
|
# because uploads might fail, we need to maintain a new list
|
206
206
|
# of the succeeded uploads
|
207
|
-
|
207
|
+
stored_artifacts = []
|
208
208
|
|
209
|
-
# upload new local
|
210
|
-
for
|
211
|
-
exception = check_and_attempt_upload(
|
209
|
+
# upload new local artifacts
|
210
|
+
for artifact in artifacts:
|
211
|
+
exception = check_and_attempt_upload(artifact)
|
212
212
|
if exception is not None:
|
213
213
|
break
|
214
|
-
|
215
|
-
exception = check_and_attempt_clearing(
|
214
|
+
stored_artifacts += [artifact]
|
215
|
+
exception = check_and_attempt_clearing(artifact)
|
216
216
|
if exception is not None:
|
217
|
-
logger.warning(f"clean up of {
|
217
|
+
logger.warning(f"clean up of {artifact._clear_storagekey} failed")
|
218
218
|
break
|
219
219
|
|
220
220
|
if exception is not None:
|
221
|
-
# clean up metadata for
|
221
|
+
# clean up metadata for artifacts not uploaded to storage
|
222
222
|
with transaction.atomic():
|
223
|
-
for
|
224
|
-
if
|
225
|
-
|
226
|
-
error_message = prepare_error_message(
|
223
|
+
for artifact in artifacts:
|
224
|
+
if artifact not in stored_artifacts:
|
225
|
+
artifact._delete_skip_storage()
|
226
|
+
error_message = prepare_error_message(artifacts, stored_artifacts, exception)
|
227
227
|
raise RuntimeError(error_message)
|
228
228
|
return None
|
229
229
|
|
230
230
|
|
231
|
-
def prepare_error_message(records,
|
232
|
-
if len(records) == 1 or len(
|
231
|
+
def prepare_error_message(records, stored_artifacts, exception) -> str:
|
232
|
+
if len(records) == 1 or len(stored_artifacts) == 0:
|
233
233
|
error_message = (
|
234
234
|
"No entries were uploaded or committed"
|
235
235
|
" to the database. See error message:\n\n"
|
@@ -239,7 +239,7 @@ def prepare_error_message(records, stored_files, exception) -> str:
|
|
239
239
|
"The following entries have been"
|
240
240
|
" successfully uploaded and committed to the database:\n"
|
241
241
|
)
|
242
|
-
for record in
|
242
|
+
for record in stored_artifacts:
|
243
243
|
error_message += (
|
244
244
|
f"- {', '.join(record.__repr__().split(', ')[:3]) + ', ...)'}\n"
|
245
245
|
)
|
@@ -248,24 +248,24 @@ def prepare_error_message(records, stored_files, exception) -> str:
|
|
248
248
|
return error_message
|
249
249
|
|
250
250
|
|
251
|
-
def
|
251
|
+
def upload_artifact(artifact) -> None:
|
252
252
|
"""Store and add file and its linked entries."""
|
253
253
|
# do NOT hand-craft the storage key!
|
254
|
-
|
254
|
+
artifact_storage_key = auto_storage_key_from_artifact(artifact)
|
255
255
|
storage_path = lamindb_setup.settings.instance.storage.key_to_filepath(
|
256
|
-
|
256
|
+
artifact_storage_key
|
257
257
|
)
|
258
|
-
msg = f"storing
|
258
|
+
msg = f"storing artifact '{artifact.uid}' at '{storage_path}'"
|
259
259
|
if (
|
260
|
-
|
261
|
-
and hasattr(
|
262
|
-
and
|
260
|
+
artifact.suffix in {".zarr", ".zrad"}
|
261
|
+
and hasattr(artifact, "_memory_rep")
|
262
|
+
and artifact._memory_rep is not None
|
263
263
|
):
|
264
264
|
logger.save(msg)
|
265
265
|
print_progress = partial(
|
266
|
-
print_hook, filepath=
|
266
|
+
print_hook, filepath=artifact_storage_key, action="uploading"
|
267
267
|
)
|
268
|
-
write_adata_zarr(
|
269
|
-
elif hasattr(
|
268
|
+
write_adata_zarr(artifact._memory_rep, storage_path, callback=print_progress)
|
269
|
+
elif hasattr(artifact, "_to_store") and artifact._to_store:
|
270
270
|
logger.save(msg)
|
271
|
-
|
271
|
+
store_artifact(artifact._local_filepath, artifact_storage_key)
|
lamindb/dev/_data.py
CHANGED
@@ -4,11 +4,11 @@ from typing import Any, Dict, Iterable, List, Optional, Union
|
|
4
4
|
from lamin_utils import colors, logger
|
5
5
|
from lamindb_setup.dev._docs import doc_args
|
6
6
|
from lnschema_core.models import (
|
7
|
+
Artifact,
|
7
8
|
Data,
|
8
9
|
Dataset,
|
9
10
|
Feature,
|
10
11
|
FeatureSet,
|
11
|
-
File,
|
12
12
|
Registry,
|
13
13
|
Run,
|
14
14
|
ULabel,
|
@@ -54,7 +54,7 @@ def add_transform_to_kwargs(kwargs: Dict[str, Any], run: Run):
|
|
54
54
|
kwargs["transform"] = run.transform
|
55
55
|
|
56
56
|
|
57
|
-
def save_feature_sets(self: Union[
|
57
|
+
def save_feature_sets(self: Union[Artifact, Dataset]) -> None:
|
58
58
|
if hasattr(self, "_feature_sets"):
|
59
59
|
saved_feature_sets = {}
|
60
60
|
for key, feature_set in self._feature_sets.items():
|
@@ -72,7 +72,7 @@ def save_feature_sets(self: Union[File, Dataset]) -> None:
|
|
72
72
|
)
|
73
73
|
|
74
74
|
|
75
|
-
def save_feature_set_links(self: Union[
|
75
|
+
def save_feature_set_links(self: Union[Artifact, Dataset]) -> None:
|
76
76
|
from lamindb._save import bulk_create
|
77
77
|
|
78
78
|
Data = self.__class__
|
@@ -116,7 +116,7 @@ def describe(self: Data):
|
|
116
116
|
"initial_version": "🔖",
|
117
117
|
"file": "📄",
|
118
118
|
}
|
119
|
-
if len(foreign_key_fields) > 0: # always True for
|
119
|
+
if len(foreign_key_fields) > 0: # always True for Artifact and Dataset
|
120
120
|
record_msg = f"{colors.green(model_name)}{__repr__(self, include_foreign_keys=False).lstrip(model_name)}" # noqa
|
121
121
|
msg += f"{record_msg}\n\n"
|
122
122
|
|
@@ -229,7 +229,7 @@ def add_labels(
|
|
229
229
|
" feature=ln.Feature(name='my_feature'))"
|
230
230
|
)
|
231
231
|
if feature.registries is not None:
|
232
|
-
orm_dict = dict_schema_name_to_model_name(
|
232
|
+
orm_dict = dict_schema_name_to_model_name(Artifact)
|
233
233
|
for reg in feature.registries.split("|"):
|
234
234
|
orm = orm_dict.get(reg)
|
235
235
|
records_validated += orm.from_values(records, field=field)
|
@@ -398,14 +398,15 @@ def _track_run_input(
|
|
398
398
|
if run is None:
|
399
399
|
raise ValueError(
|
400
400
|
"No run context set. Call ln.track() or link input to a"
|
401
|
-
" run object via `run.
|
401
|
+
" run object via `run.input_artifacts.add(artifact)`"
|
402
402
|
)
|
403
403
|
# avoid adding the same run twice
|
404
404
|
run.save()
|
405
|
-
if data_class_name == "
|
406
|
-
LinkORM = run.
|
405
|
+
if data_class_name == "artifact":
|
406
|
+
LinkORM = run.input_artifacts.through
|
407
407
|
links = [
|
408
|
-
LinkORM(run_id=run.id,
|
408
|
+
LinkORM(run_id=run.id, artifact_id=data_id)
|
409
|
+
for data_id in input_data_ids
|
409
410
|
]
|
410
411
|
else:
|
411
412
|
LinkORM = run.input_datasets.through
|
lamindb/dev/_feature_manager.py
CHANGED
@@ -2,7 +2,7 @@ from typing import Dict, Union
|
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
from lamin_utils import colors
|
5
|
-
from lnschema_core.models import Data, Dataset, Feature
|
5
|
+
from lnschema_core.models import Artifact, Data, Dataset, Feature
|
6
6
|
|
7
7
|
from .._feature_set import FeatureSet
|
8
8
|
from .._query_set import QuerySet
|
@@ -15,15 +15,15 @@ from .._registry import (
|
|
15
15
|
from .._save import save
|
16
16
|
|
17
17
|
|
18
|
-
def get_host_id_field(host: Union[
|
19
|
-
if isinstance(host,
|
20
|
-
host_id_field = "
|
18
|
+
def get_host_id_field(host: Union[Artifact, Dataset]) -> str:
|
19
|
+
if isinstance(host, Artifact):
|
20
|
+
host_id_field = "artifact_id"
|
21
21
|
else:
|
22
22
|
host_id_field = "dataset_id"
|
23
23
|
return host_id_field
|
24
24
|
|
25
25
|
|
26
|
-
def get_accessor_by_orm(host: Union[
|
26
|
+
def get_accessor_by_orm(host: Union[Artifact, Dataset]) -> Dict:
|
27
27
|
dictionary = {
|
28
28
|
field.related_model.__get_name_with_schema__(): field.name
|
29
29
|
for field in host._meta.related_objects
|
@@ -56,7 +56,7 @@ def get_feature_set_by_slot(host) -> Dict:
|
|
56
56
|
|
57
57
|
|
58
58
|
def get_label_links(
|
59
|
-
host: Union[
|
59
|
+
host: Union[Artifact, Dataset], registry: str, feature: Feature
|
60
60
|
) -> QuerySet:
|
61
61
|
host_id_field = get_host_id_field(host)
|
62
62
|
kwargs = {host_id_field: host.id, "feature_id": feature.id}
|
@@ -68,7 +68,7 @@ def get_label_links(
|
|
68
68
|
return link_records
|
69
69
|
|
70
70
|
|
71
|
-
def get_feature_set_links(host: Union[
|
71
|
+
def get_feature_set_links(host: Union[Artifact, Dataset]) -> QuerySet:
|
72
72
|
host_id_field = get_host_id_field(host)
|
73
73
|
kwargs = {host_id_field: host.id}
|
74
74
|
feature_set_links = host.feature_sets.through.objects.filter(**kwargs)
|
@@ -124,7 +124,7 @@ class FeatureManager:
|
|
124
124
|
See :class:`~lamindb.dev.Data` for more information.
|
125
125
|
"""
|
126
126
|
|
127
|
-
def __init__(self, host: Union[
|
127
|
+
def __init__(self, host: Union[Artifact, Dataset]):
|
128
128
|
self._host = host
|
129
129
|
self._feature_set_by_slot = get_feature_set_by_slot(host)
|
130
130
|
self._accessor_by_orm = get_accessor_by_orm(host)
|
@@ -160,7 +160,7 @@ class FeatureManager:
|
|
160
160
|
"""
|
161
161
|
if self._host._state.adding:
|
162
162
|
raise ValueError(
|
163
|
-
"Please save the
|
163
|
+
"Please save the artifact or dataset before adding a feature set!"
|
164
164
|
)
|
165
165
|
host_db = self._host._state.db
|
166
166
|
feature_set.save(using=host_db)
|
@@ -180,7 +180,7 @@ class FeatureManager:
|
|
180
180
|
self._feature_set_by_slot[slot] = feature_set
|
181
181
|
|
182
182
|
def _add_from(self, data: Data):
|
183
|
-
"""Transfer features from a
|
183
|
+
"""Transfer features from a artifact or dataset."""
|
184
184
|
for slot, feature_set in data.features._feature_set_by_slot.items():
|
185
185
|
members = feature_set.members
|
186
186
|
registry = members[0].__class__
|
lamindb/dev/_label_manager.py
CHANGED
@@ -2,7 +2,7 @@ from typing import Dict, List, Optional, Union
|
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
from lamin_utils import colors, logger
|
5
|
-
from lnschema_core.models import Data, Dataset, Feature,
|
5
|
+
from lnschema_core.models import Artifact, Data, Dataset, Feature, Registry
|
6
6
|
|
7
7
|
from .._feature_set import dict_related_model_to_related_name
|
8
8
|
from .._from_values import _print_values
|
@@ -103,7 +103,7 @@ class LabelManager:
|
|
103
103
|
See :class:`~lamindb.dev.Data` for more information.
|
104
104
|
"""
|
105
105
|
|
106
|
-
def __init__(self, host: Union[
|
106
|
+
def __init__(self, host: Union[Artifact, Dataset]):
|
107
107
|
self._host = host
|
108
108
|
|
109
109
|
def __repr__(self) -> str:
|
@@ -150,9 +150,9 @@ class LabelManager:
|
|
150
150
|
"""Transfer labels from a file or dataset.
|
151
151
|
|
152
152
|
Examples:
|
153
|
-
>>> file1 = ln.
|
153
|
+
>>> file1 = ln.Artifact(pd.DataFrame(index=[0, 1]))
|
154
154
|
>>> file1.save()
|
155
|
-
>>> file2 = ln.
|
155
|
+
>>> file2 = ln.Artifact(pd.DataFrame(index=[2, 3]))
|
156
156
|
>>> file2.save()
|
157
157
|
>>> ulabels = ln.ULabel.from_values(["Label1", "Label2"], field="name")
|
158
158
|
>>> ln.save(ulabels)
|
lamindb/dev/_run_context.py
CHANGED
@@ -555,7 +555,7 @@ class run_context:
|
|
555
555
|
else:
|
556
556
|
# check whether there was an update
|
557
557
|
if (
|
558
|
-
transform.
|
558
|
+
transform.source_code_id is not None
|
559
559
|
or transform.latest_report_id is not None
|
560
560
|
):
|
561
561
|
if os.getenv("LAMIN_TESTING") is None:
|
@@ -572,7 +572,7 @@ class run_context:
|
|
572
572
|
else:
|
573
573
|
logger.warning(
|
574
574
|
"not tracking this transform, either increase version or delete"
|
575
|
-
" the saved transform.
|
575
|
+
" the saved transform.source_code and transform.latest_report"
|
576
576
|
)
|
577
577
|
return False
|
578
578
|
if transform.name != name or transform.short_name != short_name:
|
lamindb/dev/_settings.py
CHANGED
@@ -29,7 +29,7 @@ class Settings:
|
|
29
29
|
self._verbosity_int: int = 1 # success-level logging
|
30
30
|
logger.set_verbosity(self._verbosity_int)
|
31
31
|
|
32
|
-
|
32
|
+
upon_artifact_create_if_hash_exists: Literal[
|
33
33
|
"warn_return_existing", "error", "warn_create_new"
|
34
34
|
] = "warn_return_existing"
|
35
35
|
"""Behavior if file hash exists (default `"warn_return_existing"`).
|
@@ -61,10 +61,11 @@ class Settings:
|
|
61
61
|
"""
|
62
62
|
silence_file_run_transform_warning: bool = False
|
63
63
|
"""Silence warning about missing run & transform during file creation."""
|
64
|
-
|
65
|
-
"""
|
64
|
+
artifact_use_virtual_keys: bool = True
|
65
|
+
"""Treat `key` parameter in :class:`~lamindb.Artifact` as virtual.
|
66
66
|
|
67
|
-
If `True`, the `key` is **not** used to construct file paths
|
67
|
+
If `True`, the `key` is **not** used to construct file paths, but file paths are
|
68
|
+
based on the `uid` of artifact.
|
68
69
|
"""
|
69
70
|
|
70
71
|
@property
|
lamindb/dev/_view_tree.py
CHANGED
@@ -3,7 +3,7 @@ from collections import defaultdict
|
|
3
3
|
from typing import Iterable
|
4
4
|
|
5
5
|
from lamindb_setup import settings as setup_settings
|
6
|
-
from lnschema_core.models import
|
6
|
+
from lnschema_core.models import Artifact, Storage
|
7
7
|
|
8
8
|
|
9
9
|
def view_tree(
|
@@ -18,7 +18,7 @@ def view_tree(
|
|
18
18
|
print("queryset")
|
19
19
|
qs = cls
|
20
20
|
storage_ids = qs.list("storage_id")
|
21
|
-
elif cls ==
|
21
|
+
elif cls == Artifact:
|
22
22
|
print("file")
|
23
23
|
qs = cls.filter(storage_id=setup_settings.storage.id).all()
|
24
24
|
storage_ids = Storage.filter().list("id")
|
@@ -30,9 +30,9 @@ def view_tree(
|
|
30
30
|
storage_id: storages.get(id=storage_id).root for storage_id in storage_ids
|
31
31
|
}
|
32
32
|
keys = set()
|
33
|
-
for
|
34
|
-
root = storage_roots.get(
|
35
|
-
keys.add(f"{root}/{
|
33
|
+
for artifact in qs:
|
34
|
+
root = storage_roots.get(artifact.storage_id, "")
|
35
|
+
keys.add(f"{root}/{artifact.key}")
|
36
36
|
|
37
37
|
_view_tree(
|
38
38
|
keys=keys,
|
lamindb/dev/datasets/_core.py
CHANGED
@@ -12,7 +12,7 @@ from .._settings import settings
|
|
12
12
|
|
13
13
|
|
14
14
|
def file_fcs() -> Path:
|
15
|
-
"""Example FCS
|
15
|
+
"""Example FCS artifact."""
|
16
16
|
filepath, _ = urlretrieve(
|
17
17
|
"https://lamindb-test.s3.amazonaws.com/example.fcs", "example.fcs"
|
18
18
|
)
|
@@ -93,25 +93,25 @@ def file_tsv_rnaseq_nfcore_salmon_merged_gene_counts(
|
|
93
93
|
|
94
94
|
|
95
95
|
def file_fastq(in_storage_root=False) -> Path:
|
96
|
-
"""Mini mock fastq
|
96
|
+
"""Mini mock fastq artifact."""
|
97
97
|
basedir = Path(".") if not in_storage_root else settings.storage
|
98
98
|
filepath = basedir / "input.fastq.gz"
|
99
99
|
with open(filepath, "w") as f:
|
100
|
-
f.write("Mock fastq
|
100
|
+
f.write("Mock fastq artifact.")
|
101
101
|
return filepath
|
102
102
|
|
103
103
|
|
104
104
|
def file_bam(in_storage_root=False) -> Path: # pragma: no cover
|
105
|
-
"""Mini mock bam
|
105
|
+
"""Mini mock bam artifact."""
|
106
106
|
basedir = Path(".") if not in_storage_root else settings.storage
|
107
107
|
filepath = basedir / "output.bam"
|
108
108
|
with open(filepath, "w") as f:
|
109
|
-
f.write("Mock bam
|
109
|
+
f.write("Mock bam artifact.")
|
110
110
|
return filepath
|
111
111
|
|
112
112
|
|
113
113
|
def file_mini_csv(in_storage_root=False) -> Path:
|
114
|
-
"""Mini csv
|
114
|
+
"""Mini csv artifact."""
|
115
115
|
basedir = Path(".") if not in_storage_root else settings.storage
|
116
116
|
filepath = basedir / "mini.csv"
|
117
117
|
df = pd.DataFrame([1, 2, 3], columns=["test"])
|
lamindb/dev/hashing.py
CHANGED
@@ -10,7 +10,7 @@
|
|
10
10
|
|
11
11
|
import base64
|
12
12
|
import hashlib
|
13
|
-
from typing import Set, Tuple
|
13
|
+
from typing import List, Set, Tuple
|
14
14
|
|
15
15
|
|
16
16
|
def to_b64_str(bstr: bytes):
|
@@ -29,6 +29,16 @@ def hash_set(s: Set[str]) -> str:
|
|
29
29
|
return to_b64_str(hashlib.md5(bstr).digest())[:20]
|
30
30
|
|
31
31
|
|
32
|
+
def hash_md5s_from_dir(etags: List[str]) -> Tuple[str, str]:
|
33
|
+
# need to sort below because we don't want the order of parsing the dir to
|
34
|
+
# affect the hash
|
35
|
+
digests = b"".join(
|
36
|
+
hashlib.md5(etag.encode("utf-8")).digest() for etag in sorted(etags)
|
37
|
+
)
|
38
|
+
digest = hashlib.md5(digests).digest()
|
39
|
+
return to_b64_str(digest)[:22], "md5-d"
|
40
|
+
|
41
|
+
|
32
42
|
def hash_file(file_path, chunk_size=50 * 1024 * 1024) -> Tuple[str, str]:
|
33
43
|
chunks = []
|
34
44
|
with open(file_path, "rb") as fp:
|
lamindb/dev/storage/__init__.py
CHANGED
@@ -10,5 +10,5 @@ from lamindb_setup.dev.upath import LocalPathClasses, UPath, infer_filesystem
|
|
10
10
|
|
11
11
|
from ._anndata_sizes import size_adata
|
12
12
|
from ._backed_access import AnnDataAccessor, BackedAccessor
|
13
|
-
from .file import delete_storage, load_to_memory
|
13
|
+
from .file import delete_storage, load_to_memory
|
14
14
|
from .object import infer_suffix, write_to_file
|
@@ -19,10 +19,10 @@ from fsspec.core import OpenFile
|
|
19
19
|
from fsspec.implementations.local import LocalFileSystem
|
20
20
|
from lamin_utils import logger
|
21
21
|
from lamindb_setup.dev.upath import UPath, infer_filesystem
|
22
|
-
from lnschema_core import
|
22
|
+
from lnschema_core import Artifact
|
23
23
|
from packaging import version
|
24
24
|
|
25
|
-
from lamindb.dev.storage.file import
|
25
|
+
from lamindb.dev.storage.file import filepath_from_artifact
|
26
26
|
|
27
27
|
anndata_version_parse = version.parse(anndata_version)
|
28
28
|
|
@@ -684,12 +684,12 @@ class BackedAccessor:
|
|
684
684
|
|
685
685
|
|
686
686
|
def backed_access(
|
687
|
-
|
687
|
+
artifact_or_filepath: Union[Artifact, Path]
|
688
688
|
) -> Union[AnnDataAccessor, BackedAccessor]:
|
689
|
-
if isinstance(
|
690
|
-
filepath =
|
689
|
+
if isinstance(artifact_or_filepath, Artifact):
|
690
|
+
filepath = filepath_from_artifact(artifact_or_filepath)
|
691
691
|
else:
|
692
|
-
filepath =
|
692
|
+
filepath = artifact_or_filepath
|
693
693
|
name = filepath.name
|
694
694
|
|
695
695
|
if filepath.suffix in (".h5", ".hdf5", ".h5ad"):
|