lamindb 0.65.1__py3-none-any.whl → 0.66.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -54,7 +54,7 @@ Modules & settings:
54
54
 
55
55
  """
56
56
 
57
- __version__ = "0.65.1" # denote a release candidate for 0.1.0 with 0.1rc1
57
+ __version__ = "0.66.0" # denote a release candidate for 0.1.0 with 0.1rc1
58
58
 
59
59
  import os as _os
60
60
 
lamindb/_collection.py CHANGED
@@ -5,7 +5,7 @@ import anndata as ad
5
5
  import pandas as pd
6
6
  from lamin_utils import logger
7
7
  from lamindb_setup.dev._docs import doc_args
8
- from lnschema_core.models import Collection, Feature, FeatureSet
8
+ from lnschema_core.models import Collection, CollectionArtifact, Feature, FeatureSet
9
9
  from lnschema_core.types import AnnDataLike, DataLike, FieldAttr, VisibilityChoice
10
10
 
11
11
  from lamindb._utils import attach_func_to_class_method
@@ -15,6 +15,7 @@ from lamindb.dev.versioning import get_uid_from_old_version, init_uid
15
15
 
16
16
  from . import _TESTING, Artifact, Run
17
17
  from ._artifact import parse_feature_sets_from_anndata
18
+ from ._query_set import QuerySet
18
19
  from ._registry import init_self_from_db
19
20
  from .dev._data import (
20
21
  add_transform_to_kwargs,
@@ -312,7 +313,7 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> Tuple[str, Dict[str, str]]:
312
313
  def mapped(
313
314
  self,
314
315
  label_keys: Optional[Union[str, List[str]]] = None,
315
- join_vars: Optional[Literal["auto", "inner"]] = "auto",
316
+ join: Optional[Literal["inner", "outer"]] = "inner",
316
317
  encode_labels: bool = True,
317
318
  cache_categories: bool = True,
318
319
  parallel: bool = False,
@@ -333,7 +334,7 @@ def mapped(
333
334
  return MappedCollection(
334
335
  path_list,
335
336
  label_keys,
336
- join_vars,
337
+ join,
337
338
  encode_labels,
338
339
  cache_categories,
339
340
  parallel,
@@ -426,7 +427,14 @@ def save(self, *args, **kwargs) -> None:
426
427
  super(Collection, self).save()
427
428
  if hasattr(self, "_artifacts"):
428
429
  if self._artifacts is not None and len(self._artifacts) > 0:
429
- self.artifacts.set(self._artifacts)
430
+ links = [
431
+ CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
432
+ for artifact in self._artifacts
433
+ ]
434
+ # the below seems to preserve the order of the list in the
435
+ # auto-incrementing integer primary
436
+ # merely using .unordered_artifacts.set(*...) doesn't achieve this
437
+ CollectionArtifact.objects.bulk_create(links)
430
438
  save_feature_set_links(self)
431
439
 
432
440
 
@@ -439,6 +447,14 @@ def restore(self) -> None:
439
447
  self.artifact.save()
440
448
 
441
449
 
450
+ @property # type: ignore
451
+ @doc_args(Collection.artifacts.__doc__)
452
+ def artifacts(self) -> QuerySet:
453
+ """{}."""
454
+ _track_run_input(self)
455
+ return self.unordered_artifacts.order_by("collectionartifact__id")
456
+
457
+
442
458
  METHOD_NAMES = [
443
459
  "__init__",
444
460
  "from_anndata",
@@ -465,3 +481,4 @@ for name in METHOD_NAMES:
465
481
 
466
482
  # this seems a Django-generated function
467
483
  delattr(Collection, "get_visibility_display")
484
+ Collection.artifacts = artifacts
lamindb/_feature.py CHANGED
@@ -9,6 +9,7 @@ from lamindb._utils import attach_func_to_class_method
9
9
  from lamindb.dev._settings import settings
10
10
 
11
11
  from . import _TESTING
12
+ from ._query_set import RecordsList
12
13
 
13
14
  FEATURE_TYPES = {
14
15
  "int": "number",
@@ -86,7 +87,7 @@ def categoricals_from_df(df: "pd.DataFrame") -> Dict:
86
87
 
87
88
  @classmethod # type:ignore
88
89
  @doc_args(Feature.from_df.__doc__)
89
- def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
90
+ def from_df(cls, df: "pd.DataFrame") -> "RecordsList":
90
91
  """{}."""
91
92
  categoricals = categoricals_from_df(df)
92
93
 
@@ -141,7 +142,7 @@ def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
141
142
  # f" {colors.yellow('unmapped categories')}:\n "
142
143
  # f" {categoricals_with_unmapped_categories_formatted}"
143
144
  # )
144
- return features
145
+ return RecordsList(features)
145
146
 
146
147
 
147
148
  @doc_args(Feature.save.__doc__)
lamindb/_parents.py CHANGED
@@ -275,10 +275,15 @@ def _record_label(record: Registry, field: Optional[str] = None):
275
275
  )
276
276
  elif isinstance(record, Run):
277
277
  name = f'{record.transform.name.replace("&", "&")}'
278
+ user_display = (
279
+ record.created_by.handle
280
+ if record.created_by.name is None
281
+ else record.created_by.name
282
+ )
278
283
  return (
279
284
  rf'<{TRANSFORM_EMOJIS.get(str(record.transform.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
280
285
  rf' FACE="Monospace">uid={record.transform.uid}<BR/>type={record.transform.type},'
281
- rf" user={record.created_by.name}<BR/>run_at={format_field_value(record.run_at)}</FONT>>"
286
+ rf" user={user_display}<BR/>run={format_field_value(record.run_at)}</FONT>>"
282
287
  )
283
288
  elif isinstance(record, Transform):
284
289
  name = f'{record.name.replace("&", "&amp;")}'
lamindb/_query_set.py CHANGED
@@ -1,4 +1,5 @@
1
- from typing import Iterable, List, NamedTuple, Optional, Union
1
+ from collections import UserList
2
+ from typing import Dict, Iterable, List, NamedTuple, Optional, Union
2
3
 
3
4
  import pandas as pd
4
5
  from django.db import models
@@ -21,6 +22,40 @@ class MultipleResultsFound(Exception):
21
22
  # return (series + timedelta).dt.strftime("%Y-%m-%d %H:%M:%S %Z")
22
23
 
23
24
 
25
+ def get_keys_from_df(data: List, registry: Registry) -> List[str]:
26
+ if len(data) > 0:
27
+ if isinstance(data[0], dict):
28
+ keys = list(data[0].keys())
29
+ else:
30
+ keys = list(data[0].__dict__.keys())
31
+ if "_state" in keys:
32
+ keys.remove("_state")
33
+ else:
34
+ keys = [
35
+ field.name
36
+ for field in registry._meta.fields
37
+ if not isinstance(field, models.ForeignKey)
38
+ ]
39
+ keys += [
40
+ f"{field.name}_id"
41
+ for field in registry._meta.fields
42
+ if isinstance(field, models.ForeignKey)
43
+ ]
44
+ return keys
45
+
46
+
47
+ class RecordsList(UserList):
48
+ """Is ordered, can't be queried, but has `.df()`."""
49
+
50
+ def __init__(self, records: List[Registry]):
51
+ super().__init__(record for record in records)
52
+
53
+ def df(self) -> pd.DataFrame:
54
+ keys = get_keys_from_df(self.data, self.data[0].__class__)
55
+ values = [record.__dict__ for record in self.data]
56
+ return pd.DataFrame(values, columns=keys)
57
+
58
+
24
59
  class QuerySet(models.QuerySet, CanValidate, IsTree):
25
60
  """Lazily loaded queried records returned by queries.
26
61
 
@@ -59,24 +94,7 @@ class QuerySet(models.QuerySet, CanValidate, IsTree):
59
94
  >>> ln.ULabel.filter().df(include=["labels__name", "labels__created_by_id"])
60
95
  """
61
96
  data = self.values()
62
- if len(data) > 0:
63
- keys = list(data[0].keys())
64
- if "created_at" in keys:
65
- keys.remove("created_at")
66
- else:
67
- keys = [
68
- field.name
69
- for field in self.model._meta.fields
70
- if (
71
- not isinstance(field, models.ForeignKey)
72
- and field.name != "created_at"
73
- )
74
- ]
75
- keys += [
76
- f"{field.name}_id"
77
- for field in self.model._meta.fields
78
- if isinstance(field, models.ForeignKey)
79
- ]
97
+ keys = get_keys_from_df(data, self.model)
80
98
  df = pd.DataFrame(self.values(), columns=keys)
81
99
  # if len(df) > 0 and "updated_at" in df:
82
100
  # df.updated_at = format_and_convert_to_local_time(df.updated_at)
lamindb/dev/__init__.py CHANGED
@@ -14,6 +14,7 @@ Queries of registries:
14
14
 
15
15
  QuerySet
16
16
  QueryManager
17
+ RecordsList
17
18
 
18
19
  Functionality of data registries:
19
20
 
@@ -62,7 +63,7 @@ from lnschema_core.models import (
62
63
  )
63
64
 
64
65
  from lamindb._query_manager import QueryManager
65
- from lamindb._query_set import QuerySet
66
+ from lamindb._query_set import QuerySet, RecordsList
66
67
  from lamindb.dev._feature_manager import FeatureManager
67
68
  from lamindb.dev._label_manager import LabelManager
68
69
 
@@ -5,6 +5,7 @@ from typing import List, Literal, Optional, Union
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
+ from lamin_utils import logger
8
9
  from lamindb_setup.dev.upath import UPath
9
10
 
10
11
  from .storage._backed_access import (
@@ -57,13 +58,13 @@ class MappedCollection:
57
58
  self,
58
59
  path_list: List[Union[str, PathLike]],
59
60
  label_keys: Optional[Union[str, List[str]]] = None,
60
- join_vars: Optional[Literal["auto", "inner", "outer"]] = "auto",
61
+ join: Optional[Literal["inner", "outer"]] = "outer",
61
62
  encode_labels: bool = True,
62
63
  cache_categories: bool = True,
63
64
  parallel: bool = False,
64
65
  dtype: Optional[str] = None,
65
66
  ):
66
- assert join_vars in {None, "auto", "inner", "outer"}
67
+ assert join in {None, "inner", "outer"}
67
68
 
68
69
  self.storages = [] # type: ignore
69
70
  self.conns = [] # type: ignore
@@ -83,7 +84,7 @@ class MappedCollection:
83
84
  self.indices = np.hstack([np.arange(n_obs) for n_obs in self.n_obs_list])
84
85
  self.storage_idx = np.repeat(np.arange(len(self.storages)), self.n_obs_list)
85
86
 
86
- self.join_vars = join_vars if len(path_list) > 1 else None
87
+ self.join_vars = join if len(path_list) > 1 else None
87
88
  self.var_indices = None
88
89
  if self.join_vars is not None:
89
90
  self._make_join_vars()
@@ -99,7 +100,6 @@ class MappedCollection:
99
100
  self._make_encoders(self.label_keys)
100
101
 
101
102
  self._dtype = dtype
102
-
103
103
  self._closed = False
104
104
 
105
105
  def _make_connections(self, path_list: list, parallel: bool):
@@ -144,13 +144,20 @@ class MappedCollection:
144
144
  vars_eq = all(var_list[0].equals(vrs) for vrs in var_list[1:])
145
145
  if vars_eq:
146
146
  self.join_vars = None
147
+ logger.info("The variables are same, no virtual join is performed.")
147
148
  return
148
149
  else:
149
150
  self.var_joint = reduce(pd.Index.intersection, var_list)
150
151
  if len(self.var_joint) > 0:
151
152
  self.join_vars = "inner"
153
+ logger.info(
154
+ "The intersection of variables is not empty, using virtual inner join."
155
+ )
152
156
  else:
153
157
  self.join_vars = "outer"
158
+ logger.info(
159
+ "The intersection of variables is empty, using virtual outer join."
160
+ )
154
161
 
155
162
  if self.join_vars == "inner":
156
163
  if self.var_joint is None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.65.1
3
+ Version: 0.66.0
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: lnschema_core==0.60.0
13
- Requires-Dist: lamindb_setup==0.63.1
12
+ Requires-Dist: lnschema_core==0.61.0
13
+ Requires-Dist: lamindb_setup==0.64.0
14
14
  Requires-Dist: lamin_utils==0.13.0
15
- Requires-Dist: lamin_cli==0.5.0
15
+ Requires-Dist: lamin_cli==0.6.0
16
16
  Requires-Dist: rapidfuzz
17
17
  Requires-Dist: pyarrow
18
18
  Requires-Dist: typing_extensions!=4.6.0
@@ -62,14 +62,14 @@ Provides-Extra: zarr
62
62
 
63
63
  # LaminDB - A data framework for biology
64
64
 
65
- LaminDB is an open-source Python framework to manage biological data & analyses in generic backends:
65
+ LaminDB is an open-source Python framework to manage biological data & analyses:
66
66
 
67
- - Access data & metadata across storage (files, arrays) & database (SQL) backends.
68
- - Track data flow across notebooks, pipelines & UI.
69
- - Manage registries for experimental metadata & in-house ontologies, import public ontologies.
70
- - Validate, standardize & annotate data using registries.
67
+ - Access data & metadata across storage & databases.
68
+ - Track data lineage across notebooks & pipelines.
69
+ - Manage registries for experimental metadata & in-house ontologies.
70
+ - Validate, standardize & annotate data.
71
71
  - Organize and share data across a mesh of LaminDB instances.
72
- - Manage data access with an auditable system of record.
72
+ - Manage data access, leverage an auditable system of record.
73
73
 
74
74
  ## Documentation
75
75
 
@@ -1,14 +1,14 @@
1
- lamindb/__init__.py,sha256=wu6XUzgFXCaSVeiVPAHKArxslewM8S2OY7-Lwk2w8EM,2691
1
+ lamindb/__init__.py,sha256=rgCY0tETrHKyB7V5f2Y3BhY4BAJicGUUYzSRwIdRlmI,2691
2
2
  lamindb/_artifact.py,sha256=eWsLj8x6Cqy8MR7LxKyScxozM52MaqOTCK8gplloP2c,38087
3
- lamindb/_collection.py,sha256=0gitrRx4K1p9dWp3VSPnEczeuZcRUJPeu9_crTCthZQ,16968
3
+ lamindb/_collection.py,sha256=gVcs3A200JZilfdYd0zrX29UrAmhP9Eovu6r_SIxXQ4,17634
4
4
  lamindb/_delete.py,sha256=jO6kcIoxY6EFgqiVF2vlbXaCaqlI25AvBo7nre3JXkQ,1968
5
- lamindb/_feature.py,sha256=AqQZTOL38aElT3-e7WCj8Fm2Xcso0uJO0oE72fQCScU,5989
5
+ lamindb/_feature.py,sha256=tEcqFoEj5yp4LSJfMGyiVvxDUuLoZaik6lo05ZKcCtE,6036
6
6
  lamindb/_feature_set.py,sha256=KYgdmMdXb21pfpir1J1O21in3nJvUeznECOB38qfTvk,8654
7
7
  lamindb/_filter.py,sha256=YwWqviJ34kHTMJ8NYlrEw-vsrXkKrVIPsEZSBVvMcrI,1163
8
8
  lamindb/_from_values.py,sha256=dKz4cTUBRkXOOzFX2Ix2cKhK2Lw9PyTgi7d0PI-kh3c,11869
9
- lamindb/_parents.py,sha256=KU_xH5mWBEQzBi7VSemDOOfB7H1PpReh0U6268v-8Q0,14020
9
+ lamindb/_parents.py,sha256=hyoN92YnfJFmRWmQMLLUjTKKwnIOJci5z6csMjsdYDE,14165
10
10
  lamindb/_query_manager.py,sha256=m4WUScviuNlMHeNEPZ8H8y0YsMXSbwWyfIgS4L00wBY,4332
11
- lamindb/_query_set.py,sha256=nacnkFaVYDmuFkpXr0fb3uNcWP6XahbMeIvJic0YCSk,9967
11
+ lamindb/_query_set.py,sha256=tItL2YNdycpbXklYd8aW4jJX6Z-kGcNclscg0v3l8t4,10495
12
12
  lamindb/_registry.py,sha256=MxYpJUKD6Qu5eO2jO6JOcQBBGxfQpiEGPJrFaXau_jw,17421
13
13
  lamindb/_run.py,sha256=659lqY32GW7F41rFUUo37OftUa38-p8yaV9Z0oF32CE,1120
14
14
  lamindb/_save.py,sha256=UlRHJGUiHGOXv90wmawZVsOqhJIqk8f1wj8MW3Rlq_c,10535
@@ -18,11 +18,11 @@ lamindb/_ulabel.py,sha256=HALoy6HerRnehR-u8zPH-qmiFQHWxeAwkZ31jxjrfgI,1893
18
18
  lamindb/_utils.py,sha256=LGdiW4k3GClLz65vKAVRkL6Tw-Gkx9DWAdez1jyA5bE,428
19
19
  lamindb/_validate.py,sha256=fS2685MYX9h6iAWymEorJJmDYA2CGNOSmJpesbG6faU,14400
20
20
  lamindb/_view.py,sha256=yFMu4vnt0YqvN1q11boAkwigxCH1gdliDUSbzh3IuDw,2175
21
- lamindb/dev/__init__.py,sha256=8EwMNFJ45Ws5ApoYjaXOuca4uvyR6WtjAvnfbq7M11g,1146
21
+ lamindb/dev/__init__.py,sha256=LLqivujL8c-oKWC15SJepAYyrTlLNvql5Vdwunc0qvE,1174
22
22
  lamindb/dev/_data.py,sha256=YPZ664qGKMl34LbZCMCEFIxQ-E81iAt_b3lvMiTe-oc,17066
23
23
  lamindb/dev/_feature_manager.py,sha256=jn8x_JbrtLFelmaFh4noOXqGSCfqVuVX0quoa7gTJtM,9366
24
24
  lamindb/dev/_label_manager.py,sha256=6E_pSQicqfTWDGEGe4WPn_3GZl_CCIMTZ6xJDh4EkC0,8740
25
- lamindb/dev/_mapped_collection.py,sha256=BiiVeFPs0g7TavYHtPh098cCN_8oFCo1nWguASKAAsI,14062
25
+ lamindb/dev/_mapped_collection.py,sha256=NRjOYnC1d3IcVyqhT_Yp0xycepmeytlngYnw-5Xcnw4,14445
26
26
  lamindb/dev/_run_context.py,sha256=4eBZsbfcFpW5nqmRLbRZxuA5oeRW17XVHMzVtMH0bKA,22965
27
27
  lamindb/dev/_settings.py,sha256=nixk8lVijCbq_fRlUpkX5gvO9AdgUFjbXzFThAJhGBA,3824
28
28
  lamindb/dev/_track_environment.py,sha256=QjHWbyl2u8J4hbJG8Q_ToFaZIgS-H15Ej6syJgk-dvY,662
@@ -43,7 +43,7 @@ lamindb/dev/storage/file.py,sha256=jalzFQ8q110UUu_GGQBkU-g3M04h5g4LJ3nLjCzJ4pU,5
43
43
  lamindb/dev/storage/object.py,sha256=KGuOwwYuN2yCJxTXn9v0LanC0fjKwy_62P-WksHcf40,1140
44
44
  lamindb/setup/__init__.py,sha256=WaWKO-2XT67S65lSbS80hUojL-Mr_Wms9UxH6U54TsY,289
45
45
  lamindb/setup/dev/__init__.py,sha256=tBty426VGF2PGqqt2XuNU-WgvOrbOp1aZBDowjLuzgA,242
46
- lamindb-0.65.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
47
- lamindb-0.65.1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
48
- lamindb-0.65.1.dist-info/METADATA,sha256=yGClOnYmbvX5RHc6bWW3Jldrji95UB-TLcoaXR98BMo,3165
49
- lamindb-0.65.1.dist-info/RECORD,,
46
+ lamindb-0.66.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
47
+ lamindb-0.66.0.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
48
+ lamindb-0.66.0.dist-info/METADATA,sha256=d9S5mPiFAzV1EhN7KB_VnugNCy7vdeivGqtxZsZPD60,3076
49
+ lamindb-0.66.0.dist-info/RECORD,,