lamindb 0.65.0__py3-none-any.whl → 0.66.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_collection.py +31 -4
- lamindb/_feature.py +3 -2
- lamindb/_parents.py +16 -8
- lamindb/_query_set.py +37 -19
- lamindb/_registry.py +5 -1
- lamindb/dev/__init__.py +11 -2
- lamindb/dev/_data.py +5 -6
- lamindb/dev/_label_manager.py +2 -1
- lamindb/dev/_mapped_collection.py +109 -30
- lamindb/dev/_run_context.py +7 -5
- lamindb/dev/_track_environment.py +7 -3
- {lamindb-0.65.0.dist-info → lamindb-0.66.0.dist-info}/METADATA +10 -10
- {lamindb-0.65.0.dist-info → lamindb-0.66.0.dist-info}/RECORD +16 -16
- {lamindb-0.65.0.dist-info → lamindb-0.66.0.dist-info}/LICENSE +0 -0
- {lamindb-0.65.0.dist-info → lamindb-0.66.0.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_collection.py
CHANGED
@@ -5,7 +5,7 @@ import anndata as ad
|
|
5
5
|
import pandas as pd
|
6
6
|
from lamin_utils import logger
|
7
7
|
from lamindb_setup.dev._docs import doc_args
|
8
|
-
from lnschema_core.models import Collection, Feature, FeatureSet
|
8
|
+
from lnschema_core.models import Collection, CollectionArtifact, Feature, FeatureSet
|
9
9
|
from lnschema_core.types import AnnDataLike, DataLike, FieldAttr, VisibilityChoice
|
10
10
|
|
11
11
|
from lamindb._utils import attach_func_to_class_method
|
@@ -15,6 +15,7 @@ from lamindb.dev.versioning import get_uid_from_old_version, init_uid
|
|
15
15
|
|
16
16
|
from . import _TESTING, Artifact, Run
|
17
17
|
from ._artifact import parse_feature_sets_from_anndata
|
18
|
+
from ._query_set import QuerySet
|
18
19
|
from ._registry import init_self_from_db
|
19
20
|
from .dev._data import (
|
20
21
|
add_transform_to_kwargs,
|
@@ -312,9 +313,11 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> Tuple[str, Dict[str, str]]:
|
|
312
313
|
def mapped(
|
313
314
|
self,
|
314
315
|
label_keys: Optional[Union[str, List[str]]] = None,
|
315
|
-
|
316
|
+
join: Optional[Literal["inner", "outer"]] = "inner",
|
316
317
|
encode_labels: bool = True,
|
318
|
+
cache_categories: bool = True,
|
317
319
|
parallel: bool = False,
|
320
|
+
dtype: Optional[str] = None,
|
318
321
|
stream: bool = False,
|
319
322
|
is_run_input: Optional[bool] = None,
|
320
323
|
) -> "MappedCollection":
|
@@ -328,7 +331,15 @@ def mapped(
|
|
328
331
|
path_list.append(artifact.stage())
|
329
332
|
else:
|
330
333
|
path_list.append(artifact.path)
|
331
|
-
return MappedCollection(
|
334
|
+
return MappedCollection(
|
335
|
+
path_list,
|
336
|
+
label_keys,
|
337
|
+
join,
|
338
|
+
encode_labels,
|
339
|
+
cache_categories,
|
340
|
+
parallel,
|
341
|
+
dtype,
|
342
|
+
)
|
332
343
|
|
333
344
|
|
334
345
|
# docstring handled through attach_func_to_class_method
|
@@ -416,7 +427,14 @@ def save(self, *args, **kwargs) -> None:
|
|
416
427
|
super(Collection, self).save()
|
417
428
|
if hasattr(self, "_artifacts"):
|
418
429
|
if self._artifacts is not None and len(self._artifacts) > 0:
|
419
|
-
|
430
|
+
links = [
|
431
|
+
CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
|
432
|
+
for artifact in self._artifacts
|
433
|
+
]
|
434
|
+
# the below seems to preserve the order of the list in the
|
435
|
+
# auto-incrementing integer primary
|
436
|
+
# merely using .unordered_artifacts.set(*...) doesn't achieve this
|
437
|
+
CollectionArtifact.objects.bulk_create(links)
|
420
438
|
save_feature_set_links(self)
|
421
439
|
|
422
440
|
|
@@ -429,6 +447,14 @@ def restore(self) -> None:
|
|
429
447
|
self.artifact.save()
|
430
448
|
|
431
449
|
|
450
|
+
@property # type: ignore
|
451
|
+
@doc_args(Collection.artifacts.__doc__)
|
452
|
+
def artifacts(self) -> QuerySet:
|
453
|
+
"""{}."""
|
454
|
+
_track_run_input(self)
|
455
|
+
return self.unordered_artifacts.order_by("collectionartifact__id")
|
456
|
+
|
457
|
+
|
432
458
|
METHOD_NAMES = [
|
433
459
|
"__init__",
|
434
460
|
"from_anndata",
|
@@ -455,3 +481,4 @@ for name in METHOD_NAMES:
|
|
455
481
|
|
456
482
|
# this seems a Django-generated function
|
457
483
|
delattr(Collection, "get_visibility_display")
|
484
|
+
Collection.artifacts = artifacts
|
lamindb/_feature.py
CHANGED
@@ -9,6 +9,7 @@ from lamindb._utils import attach_func_to_class_method
|
|
9
9
|
from lamindb.dev._settings import settings
|
10
10
|
|
11
11
|
from . import _TESTING
|
12
|
+
from ._query_set import RecordsList
|
12
13
|
|
13
14
|
FEATURE_TYPES = {
|
14
15
|
"int": "number",
|
@@ -86,7 +87,7 @@ def categoricals_from_df(df: "pd.DataFrame") -> Dict:
|
|
86
87
|
|
87
88
|
@classmethod # type:ignore
|
88
89
|
@doc_args(Feature.from_df.__doc__)
|
89
|
-
def from_df(cls, df: "pd.DataFrame") ->
|
90
|
+
def from_df(cls, df: "pd.DataFrame") -> "RecordsList":
|
90
91
|
"""{}."""
|
91
92
|
categoricals = categoricals_from_df(df)
|
92
93
|
|
@@ -141,7 +142,7 @@ def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
|
|
141
142
|
# f" {colors.yellow('unmapped categories')}:\n "
|
142
143
|
# f" {categoricals_with_unmapped_categories_formatted}"
|
143
144
|
# )
|
144
|
-
return features
|
145
|
+
return RecordsList(features)
|
145
146
|
|
146
147
|
|
147
148
|
@doc_args(Feature.save.__doc__)
|
lamindb/_parents.py
CHANGED
@@ -275,10 +275,15 @@ def _record_label(record: Registry, field: Optional[str] = None):
|
|
275
275
|
)
|
276
276
|
elif isinstance(record, Run):
|
277
277
|
name = f'{record.transform.name.replace("&", "&")}'
|
278
|
+
user_display = (
|
279
|
+
record.created_by.handle
|
280
|
+
if record.created_by.name is None
|
281
|
+
else record.created_by.name
|
282
|
+
)
|
278
283
|
return (
|
279
284
|
rf'<{TRANSFORM_EMOJIS.get(str(record.transform.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
|
280
285
|
rf' FACE="Monospace">uid={record.transform.uid}<BR/>type={record.transform.type},'
|
281
|
-
rf" user={
|
286
|
+
rf" user={user_display}<BR/>run={format_field_value(record.run_at)}</FONT>>"
|
282
287
|
)
|
283
288
|
elif isinstance(record, Transform):
|
284
289
|
name = f'{record.name.replace("&", "&")}'
|
@@ -317,13 +322,13 @@ def _get_all_parent_runs(data: Union[Artifact, Collection]) -> List:
|
|
317
322
|
inputs_run = (
|
318
323
|
r.__getattribute__(f"input_{name}s").all().filter(visibility=1).list()
|
319
324
|
)
|
320
|
-
if name == "
|
325
|
+
if name == "artifact":
|
321
326
|
inputs_run += r.input_collections.all().filter(visibility=1).list()
|
322
327
|
run_inputs_outputs += [(inputs_run, r)]
|
323
328
|
outputs_run = (
|
324
329
|
r.__getattribute__(f"output_{name}s").all().filter(visibility=1).list()
|
325
330
|
)
|
326
|
-
if name == "
|
331
|
+
if name == "artifact":
|
327
332
|
outputs_run += r.output_collections.all().filter(visibility=1).list()
|
328
333
|
run_inputs_outputs += [(r, outputs_run)]
|
329
334
|
inputs += inputs_run
|
@@ -337,8 +342,11 @@ def _get_all_child_runs(data: Union[Artifact, Collection]) -> List:
|
|
337
342
|
all_runs: Set[Run] = set()
|
338
343
|
run_inputs_outputs = []
|
339
344
|
|
340
|
-
|
341
|
-
|
345
|
+
if data.run is not None:
|
346
|
+
runs = {f.run for f in data.run.__getattribute__(f"output_{name}s").all()}
|
347
|
+
else:
|
348
|
+
runs = set()
|
349
|
+
if name == "artifact" and data.run is not None:
|
342
350
|
runs.update(
|
343
351
|
{
|
344
352
|
f.run
|
@@ -352,13 +360,13 @@ def _get_all_child_runs(data: Union[Artifact, Collection]) -> List:
|
|
352
360
|
inputs_run = (
|
353
361
|
r.__getattribute__(f"input_{name}s").all().filter(visibility=1).list()
|
354
362
|
)
|
355
|
-
if name == "
|
363
|
+
if name == "artifact":
|
356
364
|
inputs_run += r.input_collections.all().filter(visibility=1).list()
|
357
365
|
run_inputs_outputs += [(inputs_run, r)]
|
358
366
|
outputs_run = (
|
359
367
|
r.__getattribute__(f"output_{name}s").all().filter(visibility=1).list()
|
360
368
|
)
|
361
|
-
if name == "
|
369
|
+
if name == "artifact":
|
362
370
|
outputs_run += r.output_collections.all().filter(visibility=1).list()
|
363
371
|
run_inputs_outputs += [(r, outputs_run)]
|
364
372
|
child_runs.update(
|
@@ -366,7 +374,7 @@ def _get_all_child_runs(data: Union[Artifact, Collection]) -> List:
|
|
366
374
|
**{f"input_{name}s__id__in": [i.id for i in outputs_run]}
|
367
375
|
).list()
|
368
376
|
)
|
369
|
-
if name == "
|
377
|
+
if name == "artifact":
|
370
378
|
child_runs.update(
|
371
379
|
Run.filter(
|
372
380
|
input_collections__id__in=[i.id for i in outputs_run]
|
lamindb/_query_set.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
from
|
1
|
+
from collections import UserList
|
2
|
+
from typing import Dict, Iterable, List, NamedTuple, Optional, Union
|
2
3
|
|
3
4
|
import pandas as pd
|
4
5
|
from django.db import models
|
@@ -21,6 +22,40 @@ class MultipleResultsFound(Exception):
|
|
21
22
|
# return (series + timedelta).dt.strftime("%Y-%m-%d %H:%M:%S %Z")
|
22
23
|
|
23
24
|
|
25
|
+
def get_keys_from_df(data: List, registry: Registry) -> List[str]:
|
26
|
+
if len(data) > 0:
|
27
|
+
if isinstance(data[0], dict):
|
28
|
+
keys = list(data[0].keys())
|
29
|
+
else:
|
30
|
+
keys = list(data[0].__dict__.keys())
|
31
|
+
if "_state" in keys:
|
32
|
+
keys.remove("_state")
|
33
|
+
else:
|
34
|
+
keys = [
|
35
|
+
field.name
|
36
|
+
for field in registry._meta.fields
|
37
|
+
if not isinstance(field, models.ForeignKey)
|
38
|
+
]
|
39
|
+
keys += [
|
40
|
+
f"{field.name}_id"
|
41
|
+
for field in registry._meta.fields
|
42
|
+
if isinstance(field, models.ForeignKey)
|
43
|
+
]
|
44
|
+
return keys
|
45
|
+
|
46
|
+
|
47
|
+
class RecordsList(UserList):
|
48
|
+
"""Is ordered, can't be queried, but has `.df()`."""
|
49
|
+
|
50
|
+
def __init__(self, records: List[Registry]):
|
51
|
+
super().__init__(record for record in records)
|
52
|
+
|
53
|
+
def df(self) -> pd.DataFrame:
|
54
|
+
keys = get_keys_from_df(self.data, self.data[0].__class__)
|
55
|
+
values = [record.__dict__ for record in self.data]
|
56
|
+
return pd.DataFrame(values, columns=keys)
|
57
|
+
|
58
|
+
|
24
59
|
class QuerySet(models.QuerySet, CanValidate, IsTree):
|
25
60
|
"""Lazily loaded queried records returned by queries.
|
26
61
|
|
@@ -59,24 +94,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
|
|
59
94
|
>>> ln.ULabel.filter().df(include=["labels__name", "labels__created_by_id"])
|
60
95
|
"""
|
61
96
|
data = self.values()
|
62
|
-
|
63
|
-
keys = list(data[0].keys())
|
64
|
-
if "created_at" in keys:
|
65
|
-
keys.remove("created_at")
|
66
|
-
else:
|
67
|
-
keys = [
|
68
|
-
field.name
|
69
|
-
for field in self.model._meta.fields
|
70
|
-
if (
|
71
|
-
not isinstance(field, models.ForeignKey)
|
72
|
-
and field.name != "created_at"
|
73
|
-
)
|
74
|
-
]
|
75
|
-
keys += [
|
76
|
-
f"{field.name}_id"
|
77
|
-
for field in self.model._meta.fields
|
78
|
-
if isinstance(field, models.ForeignKey)
|
79
|
-
]
|
97
|
+
keys = get_keys_from_df(data, self.model)
|
80
98
|
df = pd.DataFrame(self.values(), columns=keys)
|
81
99
|
# if len(df) > 0 and "updated_at" in df:
|
82
100
|
# df.updated_at = format_and_convert_to_local_time(df.updated_at)
|
lamindb/_registry.py
CHANGED
@@ -469,7 +469,11 @@ def save(self, *args, **kwargs) -> None:
|
|
469
469
|
if result is not None:
|
470
470
|
init_self_from_db(self, result)
|
471
471
|
else:
|
472
|
-
|
472
|
+
# here, we can't use the parents argument
|
473
|
+
save_kwargs = kwargs.copy()
|
474
|
+
if "parents" in save_kwargs:
|
475
|
+
save_kwargs.pop("parents")
|
476
|
+
super(Registry, self).save(*args, **save_kwargs)
|
473
477
|
if db is not None and db != "default":
|
474
478
|
if hasattr(self, "labels"):
|
475
479
|
from copy import copy
|
lamindb/dev/__init__.py
CHANGED
@@ -14,6 +14,7 @@ Queries of registries:
|
|
14
14
|
|
15
15
|
QuerySet
|
16
16
|
QueryManager
|
17
|
+
RecordsList
|
17
18
|
|
18
19
|
Functionality of data registries:
|
19
20
|
|
@@ -24,6 +25,7 @@ Functionality of data registries:
|
|
24
25
|
FeatureManager
|
25
26
|
LabelManager
|
26
27
|
IsTree
|
28
|
+
IsVersioned
|
27
29
|
|
28
30
|
Functionality of metadata registries:
|
29
31
|
|
@@ -51,10 +53,17 @@ Auxiliary tools:
|
|
51
53
|
"""
|
52
54
|
|
53
55
|
from lamin_utils._inspect import InspectResult
|
54
|
-
from lnschema_core.models import
|
56
|
+
from lnschema_core.models import (
|
57
|
+
CanValidate,
|
58
|
+
Data,
|
59
|
+
HasParents,
|
60
|
+
IsTree,
|
61
|
+
IsVersioned,
|
62
|
+
Registry,
|
63
|
+
)
|
55
64
|
|
56
65
|
from lamindb._query_manager import QueryManager
|
57
|
-
from lamindb._query_set import QuerySet
|
66
|
+
from lamindb._query_set import QuerySet, RecordsList
|
58
67
|
from lamindb.dev._feature_manager import FeatureManager
|
59
68
|
from lamindb.dev._label_manager import LabelManager
|
60
69
|
|
lamindb/dev/_data.py
CHANGED
@@ -113,8 +113,7 @@ def describe(self: Data):
|
|
113
113
|
"created_by": "👤",
|
114
114
|
"transform": _transform_emoji(self.transform),
|
115
115
|
"run": "👣",
|
116
|
-
"
|
117
|
-
"file": "📄",
|
116
|
+
"artifact": "📄",
|
118
117
|
}
|
119
118
|
if len(foreign_key_fields) > 0: # always True for Artifact and Collection
|
120
119
|
record_msg = f"{colors.green(model_name)}{__repr__(self, include_foreign_keys=False).lstrip(model_name)}"
|
@@ -209,7 +208,7 @@ def add_labels(
|
|
209
208
|
) -> None:
|
210
209
|
"""{}."""
|
211
210
|
if self._state.adding:
|
212
|
-
raise ValueError("Please save the
|
211
|
+
raise ValueError("Please save the artifact/collection before adding a label!")
|
213
212
|
|
214
213
|
if isinstance(records, (QuerySet, QuerySet.__base__)): # need to have both
|
215
214
|
records = records.list()
|
@@ -331,7 +330,7 @@ def add_labels(
|
|
331
330
|
id=old_feature_set_link.feature_set_id
|
332
331
|
).one()
|
333
332
|
logger.info(
|
334
|
-
"
|
333
|
+
"nothing links to it anymore, deleting feature set"
|
335
334
|
f" {old_feature_set}"
|
336
335
|
)
|
337
336
|
old_feature_set.delete()
|
@@ -368,7 +367,7 @@ def _track_run_input(
|
|
368
367
|
if run is None:
|
369
368
|
if settings.track_run_inputs:
|
370
369
|
logger.hint(
|
371
|
-
"you can auto-track
|
370
|
+
"you can auto-track these data as a run input by calling"
|
372
371
|
" `ln.track()`"
|
373
372
|
)
|
374
373
|
# assume we have a run record
|
@@ -390,7 +389,7 @@ def _track_run_input(
|
|
390
389
|
track_run_input = True
|
391
390
|
else:
|
392
391
|
logger.hint(
|
393
|
-
"track
|
392
|
+
"track these data as a run input by passing `is_run_input=True`"
|
394
393
|
)
|
395
394
|
else:
|
396
395
|
track_run_input = is_run_input
|
lamindb/dev/_label_manager.py
CHANGED
@@ -23,11 +23,12 @@ def get_labels_as_dict(self: Data):
|
|
23
23
|
).items():
|
24
24
|
if related_name in {
|
25
25
|
"feature_sets",
|
26
|
-
"
|
26
|
+
"artifacts",
|
27
27
|
"input_of",
|
28
28
|
"collections",
|
29
29
|
"source_of",
|
30
30
|
"report_of",
|
31
|
+
"environment_of",
|
31
32
|
}:
|
32
33
|
continue
|
33
34
|
if self.id is not None:
|
@@ -5,6 +5,7 @@ from typing import List, Literal, Optional, Union
|
|
5
5
|
|
6
6
|
import numpy as np
|
7
7
|
import pandas as pd
|
8
|
+
from lamin_utils import logger
|
8
9
|
from lamindb_setup.dev.upath import UPath
|
9
10
|
|
10
11
|
from .storage._backed_access import (
|
@@ -57,10 +58,14 @@ class MappedCollection:
|
|
57
58
|
self,
|
58
59
|
path_list: List[Union[str, PathLike]],
|
59
60
|
label_keys: Optional[Union[str, List[str]]] = None,
|
60
|
-
|
61
|
+
join: Optional[Literal["inner", "outer"]] = "outer",
|
61
62
|
encode_labels: bool = True,
|
63
|
+
cache_categories: bool = True,
|
62
64
|
parallel: bool = False,
|
65
|
+
dtype: Optional[str] = None,
|
63
66
|
):
|
67
|
+
assert join in {None, "inner", "outer"}
|
68
|
+
|
64
69
|
self.storages = [] # type: ignore
|
65
70
|
self.conns = [] # type: ignore
|
66
71
|
self.parallel = parallel
|
@@ -79,16 +84,22 @@ class MappedCollection:
|
|
79
84
|
self.indices = np.hstack([np.arange(n_obs) for n_obs in self.n_obs_list])
|
80
85
|
self.storage_idx = np.repeat(np.arange(len(self.storages)), self.n_obs_list)
|
81
86
|
|
82
|
-
self.join_vars =
|
87
|
+
self.join_vars = join if len(path_list) > 1 else None
|
83
88
|
self.var_indices = None
|
84
89
|
if self.join_vars is not None:
|
85
90
|
self._make_join_vars()
|
86
91
|
|
87
92
|
self.encode_labels = encode_labels
|
88
93
|
self.label_keys = [label_keys] if isinstance(label_keys, str) else label_keys
|
89
|
-
if self.label_keys is not None
|
90
|
-
|
94
|
+
if self.label_keys is not None:
|
95
|
+
if cache_categories:
|
96
|
+
self._cache_categories(self.label_keys)
|
97
|
+
else:
|
98
|
+
self._cache_cats: dict = {}
|
99
|
+
if self.encode_labels:
|
100
|
+
self._make_encoders(self.label_keys)
|
91
101
|
|
102
|
+
self._dtype = dtype
|
92
103
|
self._closed = False
|
93
104
|
|
94
105
|
def _make_connections(self, path_list: list, parallel: bool):
|
@@ -104,6 +115,18 @@ class MappedCollection:
|
|
104
115
|
self.conns.append(conn)
|
105
116
|
self.storages.append(storage)
|
106
117
|
|
118
|
+
def _cache_categories(self, label_keys: list):
|
119
|
+
self._cache_cats = {}
|
120
|
+
decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
|
121
|
+
for label in label_keys:
|
122
|
+
self._cache_cats[label] = []
|
123
|
+
for storage in self.storages:
|
124
|
+
with _Connect(storage) as store:
|
125
|
+
cats = self.get_categories(store, label)
|
126
|
+
if cats is not None:
|
127
|
+
cats = decode(cats) if isinstance(cats[0], bytes) else cats[...]
|
128
|
+
self._cache_cats[label].append(cats)
|
129
|
+
|
107
130
|
def _make_encoders(self, label_keys: list):
|
108
131
|
self.encoders = []
|
109
132
|
for label in label_keys:
|
@@ -115,20 +138,38 @@ class MappedCollection:
|
|
115
138
|
for storage in self.storages:
|
116
139
|
with _Connect(storage) as store:
|
117
140
|
var_list.append(_safer_read_index(store["var"]))
|
141
|
+
|
142
|
+
self.var_joint = None
|
118
143
|
if self.join_vars == "auto":
|
119
144
|
vars_eq = all(var_list[0].equals(vrs) for vrs in var_list[1:])
|
120
145
|
if vars_eq:
|
121
146
|
self.join_vars = None
|
147
|
+
logger.info("The variables are same, no virtual join is performed.")
|
122
148
|
return
|
123
149
|
else:
|
124
|
-
self.
|
150
|
+
self.var_joint = reduce(pd.Index.intersection, var_list)
|
151
|
+
if len(self.var_joint) > 0:
|
152
|
+
self.join_vars = "inner"
|
153
|
+
logger.info(
|
154
|
+
"The intersection of variables is not empty, using virtual inner join."
|
155
|
+
)
|
156
|
+
else:
|
157
|
+
self.join_vars = "outer"
|
158
|
+
logger.info(
|
159
|
+
"The intersection of variables is empty, using virtual outer join."
|
160
|
+
)
|
161
|
+
|
125
162
|
if self.join_vars == "inner":
|
126
|
-
self.var_joint
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
163
|
+
if self.var_joint is None:
|
164
|
+
self.var_joint = reduce(pd.Index.intersection, var_list)
|
165
|
+
if len(self.var_joint) == 0:
|
166
|
+
raise ValueError(
|
167
|
+
"The provided AnnData objects don't have shared varibales."
|
168
|
+
)
|
131
169
|
self.var_indices = [vrs.get_indexer(self.var_joint) for vrs in var_list]
|
170
|
+
elif self.join_vars == "outer":
|
171
|
+
self.var_joint = reduce(pd.Index.union, var_list)
|
172
|
+
self.var_indices = [self.var_joint.get_indexer(vrs) for vrs in var_list]
|
132
173
|
|
133
174
|
def __len__(self):
|
134
175
|
return self.n_obs
|
@@ -137,15 +178,21 @@ class MappedCollection:
|
|
137
178
|
obs_idx = self.indices[idx]
|
138
179
|
storage_idx = self.storage_idx[idx]
|
139
180
|
if self.var_indices is not None:
|
140
|
-
|
181
|
+
var_idxs_join = self.var_indices[storage_idx]
|
141
182
|
else:
|
142
|
-
|
183
|
+
var_idxs_join = None
|
143
184
|
|
144
185
|
with _Connect(self.storages[storage_idx]) as store:
|
145
|
-
out = [self.get_data_idx(store, obs_idx,
|
186
|
+
out = [self.get_data_idx(store, obs_idx, var_idxs_join)]
|
146
187
|
if self.label_keys is not None:
|
147
188
|
for i, label in enumerate(self.label_keys):
|
148
|
-
|
189
|
+
if label in self._cache_cats:
|
190
|
+
cats = self._cache_cats[label][storage_idx]
|
191
|
+
if cats is None:
|
192
|
+
cats = []
|
193
|
+
else:
|
194
|
+
cats = None
|
195
|
+
label_idx = self.get_label_idx(store, obs_idx, label, cats)
|
149
196
|
if self.encode_labels:
|
150
197
|
label_idx = self.encoders[i][label_idx]
|
151
198
|
out.append(label_idx)
|
@@ -155,26 +202,50 @@ class MappedCollection:
|
|
155
202
|
self,
|
156
203
|
storage: StorageType, # type: ignore
|
157
204
|
idx: int,
|
158
|
-
|
205
|
+
var_idxs_join: Optional[list] = None,
|
159
206
|
layer_key: Optional[str] = None,
|
160
207
|
):
|
161
208
|
"""Get the index for the data."""
|
162
209
|
layer = storage["X"] if layer_key is None else storage["layers"][layer_key] # type: ignore
|
163
210
|
if isinstance(layer, ArrayTypes): # type: ignore
|
164
|
-
|
165
|
-
|
211
|
+
layer_idx = layer[idx]
|
212
|
+
if self.join_vars is None:
|
213
|
+
result = layer_idx
|
214
|
+
if self._dtype is not None:
|
215
|
+
result = result.astype(self._dtype, copy=False)
|
216
|
+
elif self.join_vars == "outer":
|
217
|
+
dtype = layer_idx.dtype if self._dtype is None else self._dtype
|
218
|
+
result = np.zeros(len(self.var_joint), dtype=dtype)
|
219
|
+
result[var_idxs_join] = layer_idx
|
220
|
+
else: # inner join
|
221
|
+
result = layer_idx[var_idxs_join]
|
222
|
+
if self._dtype is not None:
|
223
|
+
result = result.astype(self._dtype, copy=False)
|
224
|
+
return result
|
166
225
|
else: # assume csr_matrix here
|
167
226
|
data = layer["data"]
|
168
227
|
indices = layer["indices"]
|
169
228
|
indptr = layer["indptr"]
|
170
229
|
s = slice(*(indptr[idx : idx + 2]))
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
230
|
+
data_s = data[s]
|
231
|
+
dtype = data_s.dtype if self._dtype is None else self._dtype
|
232
|
+
if self.join_vars == "outer":
|
233
|
+
layer_idx = np.zeros(len(self.var_joint), dtype=dtype)
|
234
|
+
layer_idx[var_idxs_join[indices[s]]] = data_s
|
235
|
+
else:
|
236
|
+
layer_idx = np.zeros(layer.attrs["shape"][1], dtype=dtype)
|
237
|
+
layer_idx[indices[s]] = data_s
|
238
|
+
if self.join_vars == "inner":
|
239
|
+
layer_idx = layer_idx[var_idxs_join]
|
240
|
+
return layer_idx
|
176
241
|
|
177
|
-
def get_label_idx(
|
242
|
+
def get_label_idx(
|
243
|
+
self,
|
244
|
+
storage: StorageType,
|
245
|
+
idx: int,
|
246
|
+
label_key: str,
|
247
|
+
categories: Optional[list] = None,
|
248
|
+
):
|
178
249
|
"""Get the index for the label by key."""
|
179
250
|
obs = storage["obs"] # type: ignore
|
180
251
|
# how backwards compatible do we want to be here actually?
|
@@ -186,9 +257,11 @@ class MappedCollection:
|
|
186
257
|
label = labels[idx]
|
187
258
|
else:
|
188
259
|
label = labels["codes"][idx]
|
189
|
-
|
190
|
-
|
191
|
-
|
260
|
+
if categories is not None:
|
261
|
+
cats = categories
|
262
|
+
else:
|
263
|
+
cats = self.get_categories(storage, label_key)
|
264
|
+
if cats is not None and len(cats) > 0:
|
192
265
|
label = cats[label]
|
193
266
|
if isinstance(label, bytes):
|
194
267
|
label = label.decode("utf-8")
|
@@ -215,11 +288,14 @@ class MappedCollection:
|
|
215
288
|
"""Get merged labels."""
|
216
289
|
labels_merge = []
|
217
290
|
decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
|
218
|
-
for storage in self.storages:
|
291
|
+
for i, storage in enumerate(self.storages):
|
219
292
|
with _Connect(storage) as store:
|
220
293
|
codes = self.get_codes(store, label_key)
|
221
294
|
labels = decode(codes) if isinstance(codes[0], bytes) else codes
|
222
|
-
|
295
|
+
if label_key in self._cache_cats:
|
296
|
+
cats = self._cache_cats[label_key][i]
|
297
|
+
else:
|
298
|
+
cats = self.get_categories(store, label_key)
|
223
299
|
if cats is not None:
|
224
300
|
cats = decode(cats) if isinstance(cats[0], bytes) else cats
|
225
301
|
labels = cats[labels]
|
@@ -230,9 +306,12 @@ class MappedCollection:
|
|
230
306
|
"""Get merged categories."""
|
231
307
|
cats_merge = set()
|
232
308
|
decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
|
233
|
-
for storage in self.storages:
|
309
|
+
for i, storage in enumerate(self.storages):
|
234
310
|
with _Connect(storage) as store:
|
235
|
-
|
311
|
+
if label_key in self._cache_cats:
|
312
|
+
cats = self._cache_cats[label_key][i]
|
313
|
+
else:
|
314
|
+
cats = self.get_categories(store, label_key)
|
236
315
|
if cats is not None:
|
237
316
|
cats = decode(cats) if isinstance(cats[0], bytes) else cats
|
238
317
|
cats_merge.update(cats)
|
lamindb/dev/_run_context.py
CHANGED
@@ -33,7 +33,9 @@ msg_manual_init = (
|
|
33
33
|
)
|
34
34
|
|
35
35
|
|
36
|
-
|
36
|
+
# we don't want a real error here, as this is so frequent
|
37
|
+
# in VSCode
|
38
|
+
class UpdateNbWithNonInteractiveEditor(SystemExit):
|
37
39
|
pass
|
38
40
|
|
39
41
|
|
@@ -230,7 +232,7 @@ class run_context:
|
|
230
232
|
"it looks like you are running ln.track() from a "
|
231
233
|
"notebook!\nplease install nbproject: pip install nbproject"
|
232
234
|
)
|
233
|
-
elif isinstance(e,
|
235
|
+
elif isinstance(e, UpdateNbWithNonInteractiveEditor):
|
234
236
|
raise e
|
235
237
|
elif isinstance(e, (NotebookNotSavedError, NoTitleError)):
|
236
238
|
raise e
|
@@ -435,7 +437,7 @@ class run_context:
|
|
435
437
|
cls._notebook_meta = metadata # type: ignore
|
436
438
|
else:
|
437
439
|
msg = msg_manual_init.format(notebook_path=notebook_path_str)
|
438
|
-
raise
|
440
|
+
raise UpdateNbWithNonInteractiveEditor(msg)
|
439
441
|
|
440
442
|
if _env in ("lab", "notebook"):
|
441
443
|
# save the notebook in case that title was updated
|
@@ -450,7 +452,7 @@ class run_context:
|
|
450
452
|
is_interactive = _seconds_modified(_filepath) < 1.5 # should be ~1 sec
|
451
453
|
if not is_interactive and needs_init:
|
452
454
|
msg = msg_manual_init.format(notebook_path=_filepath)
|
453
|
-
raise
|
455
|
+
raise UpdateNbWithNonInteractiveEditor(msg)
|
454
456
|
|
455
457
|
nbproject_id = metadata["id"]
|
456
458
|
nbproject_version = metadata["version"]
|
@@ -509,7 +511,7 @@ class run_context:
|
|
509
511
|
cls._notebook_meta = metadata # type: ignore
|
510
512
|
else:
|
511
513
|
msg = msg_manual_init.format(notebook_path=filepath)
|
512
|
-
raise
|
514
|
+
raise UpdateNbWithNonInteractiveEditor(msg)
|
513
515
|
else:
|
514
516
|
from lamin_cli._transform import update_transform_source_metadata
|
515
517
|
|
@@ -6,9 +6,13 @@ from lnschema_core.models import Run
|
|
6
6
|
|
7
7
|
|
8
8
|
def track_environment(run: Run) -> None:
|
9
|
-
filepath = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}"
|
9
|
+
filepath = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
|
10
10
|
# create a requirements.txt
|
11
11
|
# we don't create a conda environment.yml mostly for its slowness
|
12
|
-
|
13
|
-
|
12
|
+
try:
|
13
|
+
result = subprocess.run(f"pip freeze > {str(filepath)}", shell=True)
|
14
|
+
except OSError as e:
|
15
|
+
result = None
|
16
|
+
logger.warning(f"could not run pip freeze with error {e}")
|
17
|
+
if result is not None and result.returncode == 0:
|
14
18
|
logger.info(f"tracked pip freeze > {str(filepath)}")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.66.0
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.8
|
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
|
|
9
9
|
Classifier: Programming Language :: Python :: 3.9
|
10
10
|
Classifier: Programming Language :: Python :: 3.10
|
11
11
|
Classifier: Programming Language :: Python :: 3.11
|
12
|
-
Requires-Dist: lnschema_core==0.
|
13
|
-
Requires-Dist: lamindb_setup==0.
|
12
|
+
Requires-Dist: lnschema_core==0.61.0
|
13
|
+
Requires-Dist: lamindb_setup==0.64.0
|
14
14
|
Requires-Dist: lamin_utils==0.13.0
|
15
|
-
Requires-Dist: lamin_cli==0.
|
15
|
+
Requires-Dist: lamin_cli==0.6.0
|
16
16
|
Requires-Dist: rapidfuzz
|
17
17
|
Requires-Dist: pyarrow
|
18
18
|
Requires-Dist: typing_extensions!=4.6.0
|
@@ -62,14 +62,14 @@ Provides-Extra: zarr
|
|
62
62
|
|
63
63
|
# LaminDB - A data framework for biology
|
64
64
|
|
65
|
-
LaminDB is an open-source Python framework to manage biological data & analyses
|
65
|
+
LaminDB is an open-source Python framework to manage biological data & analyses:
|
66
66
|
|
67
|
-
- Access data & metadata across storage
|
68
|
-
- Track data
|
69
|
-
- Manage registries for experimental metadata & in-house ontologies
|
70
|
-
- Validate, standardize & annotate data
|
67
|
+
- Access data & metadata across storage & databases.
|
68
|
+
- Track data lineage across notebooks & pipelines.
|
69
|
+
- Manage registries for experimental metadata & in-house ontologies.
|
70
|
+
- Validate, standardize & annotate data.
|
71
71
|
- Organize and share data across a mesh of LaminDB instances.
|
72
|
-
- Manage data access
|
72
|
+
- Manage data access, leverage an auditable system of record.
|
73
73
|
|
74
74
|
## Documentation
|
75
75
|
|
@@ -1,15 +1,15 @@
|
|
1
|
-
lamindb/__init__.py,sha256
|
1
|
+
lamindb/__init__.py,sha256=rgCY0tETrHKyB7V5f2Y3BhY4BAJicGUUYzSRwIdRlmI,2691
|
2
2
|
lamindb/_artifact.py,sha256=eWsLj8x6Cqy8MR7LxKyScxozM52MaqOTCK8gplloP2c,38087
|
3
|
-
lamindb/_collection.py,sha256=
|
3
|
+
lamindb/_collection.py,sha256=gVcs3A200JZilfdYd0zrX29UrAmhP9Eovu6r_SIxXQ4,17634
|
4
4
|
lamindb/_delete.py,sha256=jO6kcIoxY6EFgqiVF2vlbXaCaqlI25AvBo7nre3JXkQ,1968
|
5
|
-
lamindb/_feature.py,sha256=
|
5
|
+
lamindb/_feature.py,sha256=tEcqFoEj5yp4LSJfMGyiVvxDUuLoZaik6lo05ZKcCtE,6036
|
6
6
|
lamindb/_feature_set.py,sha256=KYgdmMdXb21pfpir1J1O21in3nJvUeznECOB38qfTvk,8654
|
7
7
|
lamindb/_filter.py,sha256=YwWqviJ34kHTMJ8NYlrEw-vsrXkKrVIPsEZSBVvMcrI,1163
|
8
8
|
lamindb/_from_values.py,sha256=dKz4cTUBRkXOOzFX2Ix2cKhK2Lw9PyTgi7d0PI-kh3c,11869
|
9
|
-
lamindb/_parents.py,sha256=
|
9
|
+
lamindb/_parents.py,sha256=hyoN92YnfJFmRWmQMLLUjTKKwnIOJci5z6csMjsdYDE,14165
|
10
10
|
lamindb/_query_manager.py,sha256=m4WUScviuNlMHeNEPZ8H8y0YsMXSbwWyfIgS4L00wBY,4332
|
11
|
-
lamindb/_query_set.py,sha256=
|
12
|
-
lamindb/_registry.py,sha256=
|
11
|
+
lamindb/_query_set.py,sha256=tItL2YNdycpbXklYd8aW4jJX6Z-kGcNclscg0v3l8t4,10495
|
12
|
+
lamindb/_registry.py,sha256=MxYpJUKD6Qu5eO2jO6JOcQBBGxfQpiEGPJrFaXau_jw,17421
|
13
13
|
lamindb/_run.py,sha256=659lqY32GW7F41rFUUo37OftUa38-p8yaV9Z0oF32CE,1120
|
14
14
|
lamindb/_save.py,sha256=UlRHJGUiHGOXv90wmawZVsOqhJIqk8f1wj8MW3Rlq_c,10535
|
15
15
|
lamindb/_storage.py,sha256=mz2Cy0CTaeJGA03A1FPQmmH0Vt2ib_KlXklaLqtN1mU,394
|
@@ -18,14 +18,14 @@ lamindb/_ulabel.py,sha256=HALoy6HerRnehR-u8zPH-qmiFQHWxeAwkZ31jxjrfgI,1893
|
|
18
18
|
lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
|
19
19
|
lamindb/_validate.py,sha256=fS2685MYX9h6iAWymEorJJmDYA2CGNOSmJpesbG6faU,14400
|
20
20
|
lamindb/_view.py,sha256=yFMu4vnt0YqvN1q11boAkwigxCH1gdliDUSbzh3IuDw,2175
|
21
|
-
lamindb/dev/__init__.py,sha256=
|
22
|
-
lamindb/dev/_data.py,sha256=
|
21
|
+
lamindb/dev/__init__.py,sha256=LLqivujL8c-oKWC15SJepAYyrTlLNvql5Vdwunc0qvE,1174
|
22
|
+
lamindb/dev/_data.py,sha256=YPZ664qGKMl34LbZCMCEFIxQ-E81iAt_b3lvMiTe-oc,17066
|
23
23
|
lamindb/dev/_feature_manager.py,sha256=jn8x_JbrtLFelmaFh4noOXqGSCfqVuVX0quoa7gTJtM,9366
|
24
|
-
lamindb/dev/_label_manager.py,sha256=
|
25
|
-
lamindb/dev/_mapped_collection.py,sha256=
|
26
|
-
lamindb/dev/_run_context.py,sha256=
|
24
|
+
lamindb/dev/_label_manager.py,sha256=6E_pSQicqfTWDGEGe4WPn_3GZl_CCIMTZ6xJDh4EkC0,8740
|
25
|
+
lamindb/dev/_mapped_collection.py,sha256=NRjOYnC1d3IcVyqhT_Yp0xycepmeytlngYnw-5Xcnw4,14445
|
26
|
+
lamindb/dev/_run_context.py,sha256=4eBZsbfcFpW5nqmRLbRZxuA5oeRW17XVHMzVtMH0bKA,22965
|
27
27
|
lamindb/dev/_settings.py,sha256=nixk8lVijCbq_fRlUpkX5gvO9AdgUFjbXzFThAJhGBA,3824
|
28
|
-
lamindb/dev/_track_environment.py,sha256=
|
28
|
+
lamindb/dev/_track_environment.py,sha256=QjHWbyl2u8J4hbJG8Q_ToFaZIgS-H15Ej6syJgk-dvY,662
|
29
29
|
lamindb/dev/_view_tree.py,sha256=K-C1BsOiEupwgkhyrsGxLFxHU45SAkiKsQbeOV9PbaY,3421
|
30
30
|
lamindb/dev/exceptions.py,sha256=PHk5lyBdJPrrEQcid3ItfdNzz3fgiQsUmsEDdz063F0,197
|
31
31
|
lamindb/dev/fields.py,sha256=0f0wai2aCjQYAQgI04UlCOAHo2MQknp4AsOKFDmE9iU,163
|
@@ -43,7 +43,7 @@ lamindb/dev/storage/file.py,sha256=jalzFQ8q110UUu_GGQBkU-g3M04h5g4LJ3nLjCzJ4pU,5
|
|
43
43
|
lamindb/dev/storage/object.py,sha256=KGuOwwYuN2yCJxTXn9v0LanC0fjKwy_62P-WksHcf40,1140
|
44
44
|
lamindb/setup/__init__.py,sha256=WaWKO-2XT67S65lSbS80hUojL-Mr_Wms9UxH6U54TsY,289
|
45
45
|
lamindb/setup/dev/__init__.py,sha256=tBty426VGF2PGqqt2XuNU-WgvOrbOp1aZBDowjLuzgA,242
|
46
|
-
lamindb-0.
|
47
|
-
lamindb-0.
|
48
|
-
lamindb-0.
|
49
|
-
lamindb-0.
|
46
|
+
lamindb-0.66.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
47
|
+
lamindb-0.66.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
48
|
+
lamindb-0.66.0.dist-info/METADATA,sha256=d9S5mPiFAzV1EhN7KB_VnugNCy7vdeivGqtxZsZPD60,3076
|
49
|
+
lamindb-0.66.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|