lamindb 0.65.0__py3-none-any.whl → 0.66.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -54,7 +54,7 @@ Modules & settings:
54
54
 
55
55
  """
56
56
 
57
- __version__ = "0.65.0" # denote a release candidate for 0.1.0 with 0.1rc1
57
+ __version__ = "0.66.0" # denote a release candidate for 0.1.0 with 0.1rc1
58
58
 
59
59
  import os as _os
60
60
 
lamindb/_collection.py CHANGED
@@ -5,7 +5,7 @@ import anndata as ad
5
5
  import pandas as pd
6
6
  from lamin_utils import logger
7
7
  from lamindb_setup.dev._docs import doc_args
8
- from lnschema_core.models import Collection, Feature, FeatureSet
8
+ from lnschema_core.models import Collection, CollectionArtifact, Feature, FeatureSet
9
9
  from lnschema_core.types import AnnDataLike, DataLike, FieldAttr, VisibilityChoice
10
10
 
11
11
  from lamindb._utils import attach_func_to_class_method
@@ -15,6 +15,7 @@ from lamindb.dev.versioning import get_uid_from_old_version, init_uid
15
15
 
16
16
  from . import _TESTING, Artifact, Run
17
17
  from ._artifact import parse_feature_sets_from_anndata
18
+ from ._query_set import QuerySet
18
19
  from ._registry import init_self_from_db
19
20
  from .dev._data import (
20
21
  add_transform_to_kwargs,
@@ -312,9 +313,11 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> Tuple[str, Dict[str, str]]:
312
313
  def mapped(
313
314
  self,
314
315
  label_keys: Optional[Union[str, List[str]]] = None,
315
- join_vars: Optional[Literal["auto", "inner"]] = "auto",
316
+ join: Optional[Literal["inner", "outer"]] = "inner",
316
317
  encode_labels: bool = True,
318
+ cache_categories: bool = True,
317
319
  parallel: bool = False,
320
+ dtype: Optional[str] = None,
318
321
  stream: bool = False,
319
322
  is_run_input: Optional[bool] = None,
320
323
  ) -> "MappedCollection":
@@ -328,7 +331,15 @@ def mapped(
328
331
  path_list.append(artifact.stage())
329
332
  else:
330
333
  path_list.append(artifact.path)
331
- return MappedCollection(path_list, label_keys, join_vars, encode_labels, parallel)
334
+ return MappedCollection(
335
+ path_list,
336
+ label_keys,
337
+ join,
338
+ encode_labels,
339
+ cache_categories,
340
+ parallel,
341
+ dtype,
342
+ )
332
343
 
333
344
 
334
345
  # docstring handled through attach_func_to_class_method
@@ -416,7 +427,14 @@ def save(self, *args, **kwargs) -> None:
416
427
  super(Collection, self).save()
417
428
  if hasattr(self, "_artifacts"):
418
429
  if self._artifacts is not None and len(self._artifacts) > 0:
419
- self.artifacts.set(self._artifacts)
430
+ links = [
431
+ CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
432
+ for artifact in self._artifacts
433
+ ]
434
+ # the below seems to preserve the order of the list in the
435
+ # auto-incrementing integer primary
436
+ # merely using .unordered_artifacts.set(*...) doesn't achieve this
437
+ CollectionArtifact.objects.bulk_create(links)
420
438
  save_feature_set_links(self)
421
439
 
422
440
 
@@ -429,6 +447,14 @@ def restore(self) -> None:
429
447
  self.artifact.save()
430
448
 
431
449
 
450
+ @property # type: ignore
451
+ @doc_args(Collection.artifacts.__doc__)
452
+ def artifacts(self) -> QuerySet:
453
+ """{}."""
454
+ _track_run_input(self)
455
+ return self.unordered_artifacts.order_by("collectionartifact__id")
456
+
457
+
432
458
  METHOD_NAMES = [
433
459
  "__init__",
434
460
  "from_anndata",
@@ -455,3 +481,4 @@ for name in METHOD_NAMES:
455
481
 
456
482
  # this seems a Django-generated function
457
483
  delattr(Collection, "get_visibility_display")
484
+ Collection.artifacts = artifacts
lamindb/_feature.py CHANGED
@@ -9,6 +9,7 @@ from lamindb._utils import attach_func_to_class_method
9
9
  from lamindb.dev._settings import settings
10
10
 
11
11
  from . import _TESTING
12
+ from ._query_set import RecordsList
12
13
 
13
14
  FEATURE_TYPES = {
14
15
  "int": "number",
@@ -86,7 +87,7 @@ def categoricals_from_df(df: "pd.DataFrame") -> Dict:
86
87
 
87
88
  @classmethod # type:ignore
88
89
  @doc_args(Feature.from_df.__doc__)
89
- def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
90
+ def from_df(cls, df: "pd.DataFrame") -> "RecordsList":
90
91
  """{}."""
91
92
  categoricals = categoricals_from_df(df)
92
93
 
@@ -141,7 +142,7 @@ def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
141
142
  # f" {colors.yellow('unmapped categories')}:\n "
142
143
  # f" {categoricals_with_unmapped_categories_formatted}"
143
144
  # )
144
- return features
145
+ return RecordsList(features)
145
146
 
146
147
 
147
148
  @doc_args(Feature.save.__doc__)
lamindb/_parents.py CHANGED
@@ -275,10 +275,15 @@ def _record_label(record: Registry, field: Optional[str] = None):
275
275
  )
276
276
  elif isinstance(record, Run):
277
277
  name = f'{record.transform.name.replace("&", "&")}'
278
+ user_display = (
279
+ record.created_by.handle
280
+ if record.created_by.name is None
281
+ else record.created_by.name
282
+ )
278
283
  return (
279
284
  rf'<{TRANSFORM_EMOJIS.get(str(record.transform.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
280
285
  rf' FACE="Monospace">uid={record.transform.uid}<BR/>type={record.transform.type},'
281
- rf" user={record.created_by.name}<BR/>run_at={format_field_value(record.run_at)}</FONT>>"
286
+ rf" user={user_display}<BR/>run={format_field_value(record.run_at)}</FONT>>"
282
287
  )
283
288
  elif isinstance(record, Transform):
284
289
  name = f'{record.name.replace("&", "&amp;")}'
@@ -317,13 +322,13 @@ def _get_all_parent_runs(data: Union[Artifact, Collection]) -> List:
317
322
  inputs_run = (
318
323
  r.__getattribute__(f"input_{name}s").all().filter(visibility=1).list()
319
324
  )
320
- if name == "file":
325
+ if name == "artifact":
321
326
  inputs_run += r.input_collections.all().filter(visibility=1).list()
322
327
  run_inputs_outputs += [(inputs_run, r)]
323
328
  outputs_run = (
324
329
  r.__getattribute__(f"output_{name}s").all().filter(visibility=1).list()
325
330
  )
326
- if name == "file":
331
+ if name == "artifact":
327
332
  outputs_run += r.output_collections.all().filter(visibility=1).list()
328
333
  run_inputs_outputs += [(r, outputs_run)]
329
334
  inputs += inputs_run
@@ -337,8 +342,11 @@ def _get_all_child_runs(data: Union[Artifact, Collection]) -> List:
337
342
  all_runs: Set[Run] = set()
338
343
  run_inputs_outputs = []
339
344
 
340
- runs = {f.run for f in data.run.__getattribute__(f"output_{name}s").all()}
341
- if name == "file":
345
+ if data.run is not None:
346
+ runs = {f.run for f in data.run.__getattribute__(f"output_{name}s").all()}
347
+ else:
348
+ runs = set()
349
+ if name == "artifact" and data.run is not None:
342
350
  runs.update(
343
351
  {
344
352
  f.run
@@ -352,13 +360,13 @@ def _get_all_child_runs(data: Union[Artifact, Collection]) -> List:
352
360
  inputs_run = (
353
361
  r.__getattribute__(f"input_{name}s").all().filter(visibility=1).list()
354
362
  )
355
- if name == "file":
363
+ if name == "artifact":
356
364
  inputs_run += r.input_collections.all().filter(visibility=1).list()
357
365
  run_inputs_outputs += [(inputs_run, r)]
358
366
  outputs_run = (
359
367
  r.__getattribute__(f"output_{name}s").all().filter(visibility=1).list()
360
368
  )
361
- if name == "file":
369
+ if name == "artifact":
362
370
  outputs_run += r.output_collections.all().filter(visibility=1).list()
363
371
  run_inputs_outputs += [(r, outputs_run)]
364
372
  child_runs.update(
@@ -366,7 +374,7 @@ def _get_all_child_runs(data: Union[Artifact, Collection]) -> List:
366
374
  **{f"input_{name}s__id__in": [i.id for i in outputs_run]}
367
375
  ).list()
368
376
  )
369
- if name == "file":
377
+ if name == "artifact":
370
378
  child_runs.update(
371
379
  Run.filter(
372
380
  input_collections__id__in=[i.id for i in outputs_run]
lamindb/_query_set.py CHANGED
@@ -1,4 +1,5 @@
1
- from typing import Iterable, List, NamedTuple, Optional, Union
1
+ from collections import UserList
2
+ from typing import Dict, Iterable, List, NamedTuple, Optional, Union
2
3
 
3
4
  import pandas as pd
4
5
  from django.db import models
@@ -21,6 +22,40 @@ class MultipleResultsFound(Exception):
21
22
  # return (series + timedelta).dt.strftime("%Y-%m-%d %H:%M:%S %Z")
22
23
 
23
24
 
25
+ def get_keys_from_df(data: List, registry: Registry) -> List[str]:
26
+ if len(data) > 0:
27
+ if isinstance(data[0], dict):
28
+ keys = list(data[0].keys())
29
+ else:
30
+ keys = list(data[0].__dict__.keys())
31
+ if "_state" in keys:
32
+ keys.remove("_state")
33
+ else:
34
+ keys = [
35
+ field.name
36
+ for field in registry._meta.fields
37
+ if not isinstance(field, models.ForeignKey)
38
+ ]
39
+ keys += [
40
+ f"{field.name}_id"
41
+ for field in registry._meta.fields
42
+ if isinstance(field, models.ForeignKey)
43
+ ]
44
+ return keys
45
+
46
+
47
+ class RecordsList(UserList):
48
+ """Is ordered, can't be queried, but has `.df()`."""
49
+
50
+ def __init__(self, records: List[Registry]):
51
+ super().__init__(record for record in records)
52
+
53
+ def df(self) -> pd.DataFrame:
54
+ keys = get_keys_from_df(self.data, self.data[0].__class__)
55
+ values = [record.__dict__ for record in self.data]
56
+ return pd.DataFrame(values, columns=keys)
57
+
58
+
24
59
  class QuerySet(models.QuerySet, CanValidate, IsTree):
25
60
  """Lazily loaded queried records returned by queries.
26
61
 
@@ -59,24 +94,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
59
94
  >>> ln.ULabel.filter().df(include=["labels__name", "labels__created_by_id"])
60
95
  """
61
96
  data = self.values()
62
- if len(data) > 0:
63
- keys = list(data[0].keys())
64
- if "created_at" in keys:
65
- keys.remove("created_at")
66
- else:
67
- keys = [
68
- field.name
69
- for field in self.model._meta.fields
70
- if (
71
- not isinstance(field, models.ForeignKey)
72
- and field.name != "created_at"
73
- )
74
- ]
75
- keys += [
76
- f"{field.name}_id"
77
- for field in self.model._meta.fields
78
- if isinstance(field, models.ForeignKey)
79
- ]
97
+ keys = get_keys_from_df(data, self.model)
80
98
  df = pd.DataFrame(self.values(), columns=keys)
81
99
  # if len(df) > 0 and "updated_at" in df:
82
100
  # df.updated_at = format_and_convert_to_local_time(df.updated_at)
lamindb/_registry.py CHANGED
@@ -469,7 +469,11 @@ def save(self, *args, **kwargs) -> None:
469
469
  if result is not None:
470
470
  init_self_from_db(self, result)
471
471
  else:
472
- super(Registry, self).save(*args, **kwargs)
472
+ # here, we can't use the parents argument
473
+ save_kwargs = kwargs.copy()
474
+ if "parents" in save_kwargs:
475
+ save_kwargs.pop("parents")
476
+ super(Registry, self).save(*args, **save_kwargs)
473
477
  if db is not None and db != "default":
474
478
  if hasattr(self, "labels"):
475
479
  from copy import copy
lamindb/dev/__init__.py CHANGED
@@ -14,6 +14,7 @@ Queries of registries:
14
14
 
15
15
  QuerySet
16
16
  QueryManager
17
+ RecordsList
17
18
 
18
19
  Functionality of data registries:
19
20
 
@@ -24,6 +25,7 @@ Functionality of data registries:
24
25
  FeatureManager
25
26
  LabelManager
26
27
  IsTree
28
+ IsVersioned
27
29
 
28
30
  Functionality of metadata registries:
29
31
 
@@ -51,10 +53,17 @@ Auxiliary tools:
51
53
  """
52
54
 
53
55
  from lamin_utils._inspect import InspectResult
54
- from lnschema_core.models import CanValidate, Data, HasParents, IsTree, Registry
56
+ from lnschema_core.models import (
57
+ CanValidate,
58
+ Data,
59
+ HasParents,
60
+ IsTree,
61
+ IsVersioned,
62
+ Registry,
63
+ )
55
64
 
56
65
  from lamindb._query_manager import QueryManager
57
- from lamindb._query_set import QuerySet
66
+ from lamindb._query_set import QuerySet, RecordsList
58
67
  from lamindb.dev._feature_manager import FeatureManager
59
68
  from lamindb.dev._label_manager import LabelManager
60
69
 
lamindb/dev/_data.py CHANGED
@@ -113,8 +113,7 @@ def describe(self: Data):
113
113
  "created_by": "👤",
114
114
  "transform": _transform_emoji(self.transform),
115
115
  "run": "👣",
116
- "initial_version": "🔖",
117
- "file": "📄",
116
+ "artifact": "📄",
118
117
  }
119
118
  if len(foreign_key_fields) > 0: # always True for Artifact and Collection
120
119
  record_msg = f"{colors.green(model_name)}{__repr__(self, include_foreign_keys=False).lstrip(model_name)}"
@@ -209,7 +208,7 @@ def add_labels(
209
208
  ) -> None:
210
209
  """{}."""
211
210
  if self._state.adding:
212
- raise ValueError("Please save the file/collection before adding a label!")
211
+ raise ValueError("Please save the artifact/collection before adding a label!")
213
212
 
214
213
  if isinstance(records, (QuerySet, QuerySet.__base__)): # need to have both
215
214
  records = records.list()
@@ -331,7 +330,7 @@ def add_labels(
331
330
  id=old_feature_set_link.feature_set_id
332
331
  ).one()
333
332
  logger.info(
334
- "no file links to it anymore, deleting feature set"
333
+ "nothing links to it anymore, deleting feature set"
335
334
  f" {old_feature_set}"
336
335
  )
337
336
  old_feature_set.delete()
@@ -368,7 +367,7 @@ def _track_run_input(
368
367
  if run is None:
369
368
  if settings.track_run_inputs:
370
369
  logger.hint(
371
- "you can auto-track this file as a run input by calling"
370
+ "you can auto-track these data as a run input by calling"
372
371
  " `ln.track()`"
373
372
  )
374
373
  # assume we have a run record
@@ -390,7 +389,7 @@ def _track_run_input(
390
389
  track_run_input = True
391
390
  else:
392
391
  logger.hint(
393
- "track this file as a run input by passing `is_run_input=True`"
392
+ "track these data as a run input by passing `is_run_input=True`"
394
393
  )
395
394
  else:
396
395
  track_run_input = is_run_input
@@ -23,11 +23,12 @@ def get_labels_as_dict(self: Data):
23
23
  ).items():
24
24
  if related_name in {
25
25
  "feature_sets",
26
- "files",
26
+ "artifacts",
27
27
  "input_of",
28
28
  "collections",
29
29
  "source_of",
30
30
  "report_of",
31
+ "environment_of",
31
32
  }:
32
33
  continue
33
34
  if self.id is not None:
@@ -5,6 +5,7 @@ from typing import List, Literal, Optional, Union
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
+ from lamin_utils import logger
8
9
  from lamindb_setup.dev.upath import UPath
9
10
 
10
11
  from .storage._backed_access import (
@@ -57,10 +58,14 @@ class MappedCollection:
57
58
  self,
58
59
  path_list: List[Union[str, PathLike]],
59
60
  label_keys: Optional[Union[str, List[str]]] = None,
60
- join_vars: Optional[Literal["auto", "inner"]] = "auto",
61
+ join: Optional[Literal["inner", "outer"]] = "outer",
61
62
  encode_labels: bool = True,
63
+ cache_categories: bool = True,
62
64
  parallel: bool = False,
65
+ dtype: Optional[str] = None,
63
66
  ):
67
+ assert join in {None, "inner", "outer"}
68
+
64
69
  self.storages = [] # type: ignore
65
70
  self.conns = [] # type: ignore
66
71
  self.parallel = parallel
@@ -79,16 +84,22 @@ class MappedCollection:
79
84
  self.indices = np.hstack([np.arange(n_obs) for n_obs in self.n_obs_list])
80
85
  self.storage_idx = np.repeat(np.arange(len(self.storages)), self.n_obs_list)
81
86
 
82
- self.join_vars = join_vars if len(path_list) > 1 else None
87
+ self.join_vars = join if len(path_list) > 1 else None
83
88
  self.var_indices = None
84
89
  if self.join_vars is not None:
85
90
  self._make_join_vars()
86
91
 
87
92
  self.encode_labels = encode_labels
88
93
  self.label_keys = [label_keys] if isinstance(label_keys, str) else label_keys
89
- if self.label_keys is not None and self.encode_labels:
90
- self._make_encoders(self.label_keys)
94
+ if self.label_keys is not None:
95
+ if cache_categories:
96
+ self._cache_categories(self.label_keys)
97
+ else:
98
+ self._cache_cats: dict = {}
99
+ if self.encode_labels:
100
+ self._make_encoders(self.label_keys)
91
101
 
102
+ self._dtype = dtype
92
103
  self._closed = False
93
104
 
94
105
  def _make_connections(self, path_list: list, parallel: bool):
@@ -104,6 +115,18 @@ class MappedCollection:
104
115
  self.conns.append(conn)
105
116
  self.storages.append(storage)
106
117
 
118
+ def _cache_categories(self, label_keys: list):
119
+ self._cache_cats = {}
120
+ decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
121
+ for label in label_keys:
122
+ self._cache_cats[label] = []
123
+ for storage in self.storages:
124
+ with _Connect(storage) as store:
125
+ cats = self.get_categories(store, label)
126
+ if cats is not None:
127
+ cats = decode(cats) if isinstance(cats[0], bytes) else cats[...]
128
+ self._cache_cats[label].append(cats)
129
+
107
130
  def _make_encoders(self, label_keys: list):
108
131
  self.encoders = []
109
132
  for label in label_keys:
@@ -115,20 +138,38 @@ class MappedCollection:
115
138
  for storage in self.storages:
116
139
  with _Connect(storage) as store:
117
140
  var_list.append(_safer_read_index(store["var"]))
141
+
142
+ self.var_joint = None
118
143
  if self.join_vars == "auto":
119
144
  vars_eq = all(var_list[0].equals(vrs) for vrs in var_list[1:])
120
145
  if vars_eq:
121
146
  self.join_vars = None
147
+ logger.info("The variables are same, no virtual join is performed.")
122
148
  return
123
149
  else:
124
- self.join_vars = "inner"
150
+ self.var_joint = reduce(pd.Index.intersection, var_list)
151
+ if len(self.var_joint) > 0:
152
+ self.join_vars = "inner"
153
+ logger.info(
154
+ "The intersection of variables is not empty, using virtual inner join."
155
+ )
156
+ else:
157
+ self.join_vars = "outer"
158
+ logger.info(
159
+ "The intersection of variables is empty, using virtual outer join."
160
+ )
161
+
125
162
  if self.join_vars == "inner":
126
- self.var_joint = reduce(pd.Index.intersection, var_list)
127
- if len(self.var_joint) == 0:
128
- raise ValueError(
129
- "The provided AnnData objects don't have shared varibales."
130
- )
163
+ if self.var_joint is None:
164
+ self.var_joint = reduce(pd.Index.intersection, var_list)
165
+ if len(self.var_joint) == 0:
166
+ raise ValueError(
167
+ "The provided AnnData objects don't have shared varibales."
168
+ )
131
169
  self.var_indices = [vrs.get_indexer(self.var_joint) for vrs in var_list]
170
+ elif self.join_vars == "outer":
171
+ self.var_joint = reduce(pd.Index.union, var_list)
172
+ self.var_indices = [self.var_joint.get_indexer(vrs) for vrs in var_list]
132
173
 
133
174
  def __len__(self):
134
175
  return self.n_obs
@@ -137,15 +178,21 @@ class MappedCollection:
137
178
  obs_idx = self.indices[idx]
138
179
  storage_idx = self.storage_idx[idx]
139
180
  if self.var_indices is not None:
140
- var_idxs = self.var_indices[storage_idx]
181
+ var_idxs_join = self.var_indices[storage_idx]
141
182
  else:
142
- var_idxs = None
183
+ var_idxs_join = None
143
184
 
144
185
  with _Connect(self.storages[storage_idx]) as store:
145
- out = [self.get_data_idx(store, obs_idx, var_idxs)]
186
+ out = [self.get_data_idx(store, obs_idx, var_idxs_join)]
146
187
  if self.label_keys is not None:
147
188
  for i, label in enumerate(self.label_keys):
148
- label_idx = self.get_label_idx(store, obs_idx, label)
189
+ if label in self._cache_cats:
190
+ cats = self._cache_cats[label][storage_idx]
191
+ if cats is None:
192
+ cats = []
193
+ else:
194
+ cats = None
195
+ label_idx = self.get_label_idx(store, obs_idx, label, cats)
149
196
  if self.encode_labels:
150
197
  label_idx = self.encoders[i][label_idx]
151
198
  out.append(label_idx)
@@ -155,26 +202,50 @@ class MappedCollection:
155
202
  self,
156
203
  storage: StorageType, # type: ignore
157
204
  idx: int,
158
- var_idxs: Optional[list] = None,
205
+ var_idxs_join: Optional[list] = None,
159
206
  layer_key: Optional[str] = None,
160
207
  ):
161
208
  """Get the index for the data."""
162
209
  layer = storage["X"] if layer_key is None else storage["layers"][layer_key] # type: ignore
163
210
  if isinstance(layer, ArrayTypes): # type: ignore
164
- # todo: better way to select variables
165
- return layer[idx] if var_idxs is None else layer[idx][var_idxs]
211
+ layer_idx = layer[idx]
212
+ if self.join_vars is None:
213
+ result = layer_idx
214
+ if self._dtype is not None:
215
+ result = result.astype(self._dtype, copy=False)
216
+ elif self.join_vars == "outer":
217
+ dtype = layer_idx.dtype if self._dtype is None else self._dtype
218
+ result = np.zeros(len(self.var_joint), dtype=dtype)
219
+ result[var_idxs_join] = layer_idx
220
+ else: # inner join
221
+ result = layer_idx[var_idxs_join]
222
+ if self._dtype is not None:
223
+ result = result.astype(self._dtype, copy=False)
224
+ return result
166
225
  else: # assume csr_matrix here
167
226
  data = layer["data"]
168
227
  indices = layer["indices"]
169
228
  indptr = layer["indptr"]
170
229
  s = slice(*(indptr[idx : idx + 2]))
171
- # this requires more memory than csr_matrix when var_idxs is not None
172
- # but it is faster
173
- layer_idx = np.zeros(layer.attrs["shape"][1])
174
- layer_idx[indices[s]] = data[s]
175
- return layer_idx if var_idxs is None else layer_idx[var_idxs]
230
+ data_s = data[s]
231
+ dtype = data_s.dtype if self._dtype is None else self._dtype
232
+ if self.join_vars == "outer":
233
+ layer_idx = np.zeros(len(self.var_joint), dtype=dtype)
234
+ layer_idx[var_idxs_join[indices[s]]] = data_s
235
+ else:
236
+ layer_idx = np.zeros(layer.attrs["shape"][1], dtype=dtype)
237
+ layer_idx[indices[s]] = data_s
238
+ if self.join_vars == "inner":
239
+ layer_idx = layer_idx[var_idxs_join]
240
+ return layer_idx
176
241
 
177
- def get_label_idx(self, storage: StorageType, idx: int, label_key: str): # type: ignore
242
+ def get_label_idx(
243
+ self,
244
+ storage: StorageType,
245
+ idx: int,
246
+ label_key: str,
247
+ categories: Optional[list] = None,
248
+ ):
178
249
  """Get the index for the label by key."""
179
250
  obs = storage["obs"] # type: ignore
180
251
  # how backwards compatible do we want to be here actually?
@@ -186,9 +257,11 @@ class MappedCollection:
186
257
  label = labels[idx]
187
258
  else:
188
259
  label = labels["codes"][idx]
189
-
190
- cats = self.get_categories(storage, label_key)
191
- if cats is not None:
260
+ if categories is not None:
261
+ cats = categories
262
+ else:
263
+ cats = self.get_categories(storage, label_key)
264
+ if cats is not None and len(cats) > 0:
192
265
  label = cats[label]
193
266
  if isinstance(label, bytes):
194
267
  label = label.decode("utf-8")
@@ -215,11 +288,14 @@ class MappedCollection:
215
288
  """Get merged labels."""
216
289
  labels_merge = []
217
290
  decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
218
- for storage in self.storages:
291
+ for i, storage in enumerate(self.storages):
219
292
  with _Connect(storage) as store:
220
293
  codes = self.get_codes(store, label_key)
221
294
  labels = decode(codes) if isinstance(codes[0], bytes) else codes
222
- cats = self.get_categories(store, label_key)
295
+ if label_key in self._cache_cats:
296
+ cats = self._cache_cats[label_key][i]
297
+ else:
298
+ cats = self.get_categories(store, label_key)
223
299
  if cats is not None:
224
300
  cats = decode(cats) if isinstance(cats[0], bytes) else cats
225
301
  labels = cats[labels]
@@ -230,9 +306,12 @@ class MappedCollection:
230
306
  """Get merged categories."""
231
307
  cats_merge = set()
232
308
  decode = np.frompyfunc(lambda x: x.decode("utf-8"), 1, 1)
233
- for storage in self.storages:
309
+ for i, storage in enumerate(self.storages):
234
310
  with _Connect(storage) as store:
235
- cats = self.get_categories(store, label_key)
311
+ if label_key in self._cache_cats:
312
+ cats = self._cache_cats[label_key][i]
313
+ else:
314
+ cats = self.get_categories(store, label_key)
236
315
  if cats is not None:
237
316
  cats = decode(cats) if isinstance(cats[0], bytes) else cats
238
317
  cats_merge.update(cats)
@@ -33,7 +33,9 @@ msg_manual_init = (
33
33
  )
34
34
 
35
35
 
36
- class UpdateNbWithNonInteractiveEditorError(Exception):
36
+ # we don't want a real error here, as this is so frequent
37
+ # in VSCode
38
+ class UpdateNbWithNonInteractiveEditor(SystemExit):
37
39
  pass
38
40
 
39
41
 
@@ -230,7 +232,7 @@ class run_context:
230
232
  "it looks like you are running ln.track() from a "
231
233
  "notebook!\nplease install nbproject: pip install nbproject"
232
234
  )
233
- elif isinstance(e, UpdateNbWithNonInteractiveEditorError):
235
+ elif isinstance(e, UpdateNbWithNonInteractiveEditor):
234
236
  raise e
235
237
  elif isinstance(e, (NotebookNotSavedError, NoTitleError)):
236
238
  raise e
@@ -435,7 +437,7 @@ class run_context:
435
437
  cls._notebook_meta = metadata # type: ignore
436
438
  else:
437
439
  msg = msg_manual_init.format(notebook_path=notebook_path_str)
438
- raise UpdateNbWithNonInteractiveEditorError(msg)
440
+ raise UpdateNbWithNonInteractiveEditor(msg)
439
441
 
440
442
  if _env in ("lab", "notebook"):
441
443
  # save the notebook in case that title was updated
@@ -450,7 +452,7 @@ class run_context:
450
452
  is_interactive = _seconds_modified(_filepath) < 1.5 # should be ~1 sec
451
453
  if not is_interactive and needs_init:
452
454
  msg = msg_manual_init.format(notebook_path=_filepath)
453
- raise UpdateNbWithNonInteractiveEditorError(msg)
455
+ raise UpdateNbWithNonInteractiveEditor(msg)
454
456
 
455
457
  nbproject_id = metadata["id"]
456
458
  nbproject_version = metadata["version"]
@@ -509,7 +511,7 @@ class run_context:
509
511
  cls._notebook_meta = metadata # type: ignore
510
512
  else:
511
513
  msg = msg_manual_init.format(notebook_path=filepath)
512
- raise UpdateNbWithNonInteractiveEditorError(msg)
514
+ raise UpdateNbWithNonInteractiveEditor(msg)
513
515
  else:
514
516
  from lamin_cli._transform import update_transform_source_metadata
515
517
 
@@ -6,9 +6,13 @@ from lnschema_core.models import Run
6
6
 
7
7
 
8
8
  def track_environment(run: Run) -> None:
9
- filepath = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}"
9
+ filepath = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
10
10
  # create a requirements.txt
11
11
  # we don't create a conda environment.yml mostly for its slowness
12
- result = subprocess.run(f"pip freeze > {str(filepath)}", shell=True)
13
- if result.returncode == 0:
12
+ try:
13
+ result = subprocess.run(f"pip freeze > {str(filepath)}", shell=True)
14
+ except OSError as e:
15
+ result = None
16
+ logger.warning(f"could not run pip freeze with error {e}")
17
+ if result is not None and result.returncode == 0:
14
18
  logger.info(f"tracked pip freeze > {str(filepath)}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.65.0
3
+ Version: 0.66.0
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: lnschema_core==0.60.0
13
- Requires-Dist: lamindb_setup==0.63.0
12
+ Requires-Dist: lnschema_core==0.61.0
13
+ Requires-Dist: lamindb_setup==0.64.0
14
14
  Requires-Dist: lamin_utils==0.13.0
15
- Requires-Dist: lamin_cli==0.5.0
15
+ Requires-Dist: lamin_cli==0.6.0
16
16
  Requires-Dist: rapidfuzz
17
17
  Requires-Dist: pyarrow
18
18
  Requires-Dist: typing_extensions!=4.6.0
@@ -62,14 +62,14 @@ Provides-Extra: zarr
62
62
 
63
63
  # LaminDB - A data framework for biology
64
64
 
65
- LaminDB is an open-source Python framework to manage biological data & analyses in generic backends:
65
+ LaminDB is an open-source Python framework to manage biological data & analyses:
66
66
 
67
- - Access data & metadata across storage (files, arrays) & database (SQL) backends.
68
- - Track data flow across notebooks, pipelines & UI.
69
- - Manage registries for experimental metadata & in-house ontologies, import public ontologies.
70
- - Validate, standardize & annotate data using registries.
67
+ - Access data & metadata across storage & databases.
68
+ - Track data lineage across notebooks & pipelines.
69
+ - Manage registries for experimental metadata & in-house ontologies.
70
+ - Validate, standardize & annotate data.
71
71
  - Organize and share data across a mesh of LaminDB instances.
72
- - Manage data access with an auditable system of record.
72
+ - Manage data access, leverage an auditable system of record.
73
73
 
74
74
  ## Documentation
75
75
 
@@ -1,15 +1,15 @@
1
- lamindb/__init__.py,sha256=-_IG5yZQ4fWBdgSG1qMl8oCQJf5WgAol6keJ2PuFh5I,2691
1
+ lamindb/__init__.py,sha256=rgCY0tETrHKyB7V5f2Y3BhY4BAJicGUUYzSRwIdRlmI,2691
2
2
  lamindb/_artifact.py,sha256=eWsLj8x6Cqy8MR7LxKyScxozM52MaqOTCK8gplloP2c,38087
3
- lamindb/_collection.py,sha256=ZGzx58Tm76wGxMBU9nzKRkme9IduLmiuL0H-8byPkdY,16812
3
+ lamindb/_collection.py,sha256=gVcs3A200JZilfdYd0zrX29UrAmhP9Eovu6r_SIxXQ4,17634
4
4
  lamindb/_delete.py,sha256=jO6kcIoxY6EFgqiVF2vlbXaCaqlI25AvBo7nre3JXkQ,1968
5
- lamindb/_feature.py,sha256=AqQZTOL38aElT3-e7WCj8Fm2Xcso0uJO0oE72fQCScU,5989
5
+ lamindb/_feature.py,sha256=tEcqFoEj5yp4LSJfMGyiVvxDUuLoZaik6lo05ZKcCtE,6036
6
6
  lamindb/_feature_set.py,sha256=KYgdmMdXb21pfpir1J1O21in3nJvUeznECOB38qfTvk,8654
7
7
  lamindb/_filter.py,sha256=YwWqviJ34kHTMJ8NYlrEw-vsrXkKrVIPsEZSBVvMcrI,1163
8
8
  lamindb/_from_values.py,sha256=dKz4cTUBRkXOOzFX2Ix2cKhK2Lw9PyTgi7d0PI-kh3c,11869
9
- lamindb/_parents.py,sha256=lDNuOys4OW5wSHzH6fxEcHPsOjwVgXFq0q-I-noPO5A,13907
9
+ lamindb/_parents.py,sha256=hyoN92YnfJFmRWmQMLLUjTKKwnIOJci5z6csMjsdYDE,14165
10
10
  lamindb/_query_manager.py,sha256=m4WUScviuNlMHeNEPZ8H8y0YsMXSbwWyfIgS4L00wBY,4332
11
- lamindb/_query_set.py,sha256=nacnkFaVYDmuFkpXr0fb3uNcWP6XahbMeIvJic0YCSk,9967
12
- lamindb/_registry.py,sha256=UX4O3Ne9QajcfG2FGXyVkyF6b-McPPxJmRQ2MwXZy3w,17254
11
+ lamindb/_query_set.py,sha256=tItL2YNdycpbXklYd8aW4jJX6Z-kGcNclscg0v3l8t4,10495
12
+ lamindb/_registry.py,sha256=MxYpJUKD6Qu5eO2jO6JOcQBBGxfQpiEGPJrFaXau_jw,17421
13
13
  lamindb/_run.py,sha256=659lqY32GW7F41rFUUo37OftUa38-p8yaV9Z0oF32CE,1120
14
14
  lamindb/_save.py,sha256=UlRHJGUiHGOXv90wmawZVsOqhJIqk8f1wj8MW3Rlq_c,10535
15
15
  lamindb/_storage.py,sha256=mz2Cy0CTaeJGA03A1FPQmmH0Vt2ib_KlXklaLqtN1mU,394
@@ -18,14 +18,14 @@ lamindb/_ulabel.py,sha256=HALoy6HerRnehR-u8zPH-qmiFQHWxeAwkZ31jxjrfgI,1893
18
18
  lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
19
19
  lamindb/_validate.py,sha256=fS2685MYX9h6iAWymEorJJmDYA2CGNOSmJpesbG6faU,14400
20
20
  lamindb/_view.py,sha256=yFMu4vnt0YqvN1q11boAkwigxCH1gdliDUSbzh3IuDw,2175
21
- lamindb/dev/__init__.py,sha256=Sm1-zkgy_7MKwFheXDrUKiY7ZBKX_VUQVfbr_hEPVqE,1089
22
- lamindb/dev/_data.py,sha256=C7Z3mygwx4IGoFOtjvnmA_-O7VXZqNvJJh6QAgN2MBM,17091
21
+ lamindb/dev/__init__.py,sha256=LLqivujL8c-oKWC15SJepAYyrTlLNvql5Vdwunc0qvE,1174
22
+ lamindb/dev/_data.py,sha256=YPZ664qGKMl34LbZCMCEFIxQ-E81iAt_b3lvMiTe-oc,17066
23
23
  lamindb/dev/_feature_manager.py,sha256=jn8x_JbrtLFelmaFh4noOXqGSCfqVuVX0quoa7gTJtM,9366
24
- lamindb/dev/_label_manager.py,sha256=q8rlFA_KgyVL_rE7h52dA6whCxGu72YTj62cilKWXGM,8706
25
- lamindb/dev/_mapped_collection.py,sha256=Woz5iUnCzQGraF-pjzZF0fQHEJlXnL6lpkIXq_k_d64,11129
26
- lamindb/dev/_run_context.py,sha256=Hgmq0yYQsLHK3cUVKR3V2bFUSllaIWO5S7a8GQcjEl0,22919
24
+ lamindb/dev/_label_manager.py,sha256=6E_pSQicqfTWDGEGe4WPn_3GZl_CCIMTZ6xJDh4EkC0,8740
25
+ lamindb/dev/_mapped_collection.py,sha256=NRjOYnC1d3IcVyqhT_Yp0xycepmeytlngYnw-5Xcnw4,14445
26
+ lamindb/dev/_run_context.py,sha256=4eBZsbfcFpW5nqmRLbRZxuA5oeRW17XVHMzVtMH0bKA,22965
27
27
  lamindb/dev/_settings.py,sha256=nixk8lVijCbq_fRlUpkX5gvO9AdgUFjbXzFThAJhGBA,3824
28
- lamindb/dev/_track_environment.py,sha256=GelTuDF_k9dXTLV5AcibfzXllmTXXorBy2RqJyb6GuI,508
28
+ lamindb/dev/_track_environment.py,sha256=QjHWbyl2u8J4hbJG8Q_ToFaZIgS-H15Ej6syJgk-dvY,662
29
29
  lamindb/dev/_view_tree.py,sha256=K-C1BsOiEupwgkhyrsGxLFxHU45SAkiKsQbeOV9PbaY,3421
30
30
  lamindb/dev/exceptions.py,sha256=PHk5lyBdJPrrEQcid3ItfdNzz3fgiQsUmsEDdz063F0,197
31
31
  lamindb/dev/fields.py,sha256=0f0wai2aCjQYAQgI04UlCOAHo2MQknp4AsOKFDmE9iU,163
@@ -43,7 +43,7 @@ lamindb/dev/storage/file.py,sha256=jalzFQ8q110UUu_GGQBkU-g3M04h5g4LJ3nLjCzJ4pU,5
43
43
  lamindb/dev/storage/object.py,sha256=KGuOwwYuN2yCJxTXn9v0LanC0fjKwy_62P-WksHcf40,1140
44
44
  lamindb/setup/__init__.py,sha256=WaWKO-2XT67S65lSbS80hUojL-Mr_Wms9UxH6U54TsY,289
45
45
  lamindb/setup/dev/__init__.py,sha256=tBty426VGF2PGqqt2XuNU-WgvOrbOp1aZBDowjLuzgA,242
46
- lamindb-0.65.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
47
- lamindb-0.65.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
48
- lamindb-0.65.0.dist-info/METADATA,sha256=OPOAcrY3znOQE4q664MuFvFEN30kOwunGfrup364TUE,3165
49
- lamindb-0.65.0.dist-info/RECORD,,
46
+ lamindb-0.66.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
47
+ lamindb-0.66.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
48
+ lamindb-0.66.0.dist-info/METADATA,sha256=d9S5mPiFAzV1EhN7KB_VnugNCy7vdeivGqtxZsZPD60,3076
49
+ lamindb-0.66.0.dist-info/RECORD,,