lamindb 0.45.0__py3-none-any.whl → 0.46a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -13,12 +13,14 @@ The core schema entities are central to lamindb's API:
13
13
  .. autosummary::
14
14
  :toctree: .
15
15
 
16
+ Dataset
16
17
  File
17
18
  Transform
18
19
  Run
20
+ Feature
21
+ FeatureSet
19
22
  User
20
23
  Storage
21
- FeatureSet
22
24
  Tag
23
25
  Project
24
26
 
@@ -28,7 +30,6 @@ Functional tools:
28
30
  :toctree: .
29
31
 
30
32
  track
31
- parse
32
33
  view
33
34
  select
34
35
  save
@@ -54,23 +55,43 @@ Developer API:
54
55
 
55
56
  """
56
57
 
57
- __version__ = "0.45.0" # denote a release candidate for 0.1.0 with 0.1rc1
58
+ __version__ = "0.46a1" # denote a release candidate for 0.1.0 with 0.1rc1
59
+
60
+ import os as _os
58
61
 
59
62
  import lamindb_setup as _lamindb_setup
60
63
 
61
64
  # prints warning of python versions
62
65
  from lamin_logger import py_version_warning as _py_version_warning
63
66
  from lamindb_setup import _check_instance_setup
67
+ from lamindb_setup._check_instance_setup import _INSTANCE_NOT_SETUP_WARNING
64
68
 
65
- _py_version_warning("3.8", "3.10")
69
+ _py_version_warning("3.8", "3.11")
66
70
 
71
+ _TESTING = _lamindb_setup._TESTING
67
72
  _INSTANCE_SETUP = _check_instance_setup(from_lamindb=True)
68
73
  # allow the user to call setup
69
74
  from . import setup # noqa
70
75
 
76
+
77
+ class InstanceNotSetupError(Exception):
78
+ pass
79
+
80
+
81
+ def __getattr__(name):
82
+ raise InstanceNotSetupError(
83
+ f"{_INSTANCE_NOT_SETUP_WARNING}If you used the CLI to init or load an instance,"
84
+ " please RESTART the python session (in a notebook, restart kernel)"
85
+ )
86
+
87
+
71
88
  # only import all other functionality if setup was successful
72
89
  if _INSTANCE_SETUP:
90
+ del InstanceNotSetupError
91
+ del __getattr__ # delete so that imports work out
73
92
  from lnschema_core import ( # noqa
93
+ Dataset,
94
+ Feature,
74
95
  FeatureSet,
75
96
  File,
76
97
  Project,
@@ -95,12 +116,12 @@ if _INSTANCE_SETUP:
95
116
  f" {__version__})"
96
117
  )
97
118
 
98
- from . import _baseorm_methods # noqa
99
- from . import _featureset_methods # noqa
100
- from . import _file_methods # noqa
101
- from . import _transform_methods # noqa
119
+ from . import _dataset # noqa
120
+ from . import _feature_set # noqa
121
+ from . import _file # noqa
122
+ from . import _orm # noqa
123
+ from . import _transform # noqa
102
124
  from ._delete import delete # noqa
103
- from ._from_values import parse # noqa
104
125
  from ._save import save # noqa
105
126
  from ._select import select # noqa
106
127
  from ._view import view # noqa
lamindb/_context.py CHANGED
@@ -20,10 +20,9 @@ msg_path_failed = (
20
20
  " `notebook_path` to ln.track()."
21
21
  )
22
22
 
23
- msg_init_noninteractive = (
24
- "Please attach an ID to the notebook by running the CLI: lamin track"
25
- " my-notebook.ipynb"
26
- )
23
+
24
+ class NonInteractiveEditorError(Exception):
25
+ pass
27
26
 
28
27
 
29
28
  def _write_notebook_meta(metadata):
@@ -178,10 +177,11 @@ class context:
178
177
  " notebook!\nConsider installing nbproject for automatic"
179
178
  " name, title & id tracking."
180
179
  )
181
- elif str(e) == msg_init_noninteractive:
180
+ elif isinstance(e, NonInteractiveEditorError):
182
181
  raise e
183
182
  else:
184
183
  logger.warning(f"Automatic tracking of notebook failed: {e}")
184
+ raise e
185
185
  is_tracked_notebook = False
186
186
 
187
187
  if not is_tracked_notebook:
@@ -292,7 +292,7 @@ class context:
292
292
  except Exception as e:
293
293
  nbproject_failed_msg = (
294
294
  "Auto-retrieval of notebook name & title failed.\n\nFixes: Either"
295
- " init on the CLI `lamin track my-notebook.ipynb` or pass"
295
+ f" init on the CLI `lamin track {notebook_path}` or pass"
296
296
  " transform manually `ln.track(ln.Transform(name='My"
297
297
  " notebook'))`\n\nPlease consider pasting error at:"
298
298
  f" https://github.com/laminlabs/nbproject/issues/new\n\n{e}"
@@ -317,12 +317,11 @@ class context:
317
317
  if _env in ("lab", "notebook"):
318
318
  cls._notebook_meta = metadata # type: ignore
319
319
  else:
320
- # nb = nbproject.dev.read_notebook(_filepath)
321
- # nb.metadata["nbproject"] = metadata
322
- # nbproject.dev.write_notebook(nb, _filepath)
323
- # raise SystemExit(msg_init_complete)
324
- # the following is safer
325
- raise RuntimeError(msg_init_noninteractive)
320
+ msg_init_noninteractive = (
321
+ "Please attach metadata to the notebook by running the CLI: "
322
+ f"lamin track {notebook_path}"
323
+ )
324
+ raise NonInteractiveEditorError(msg_init_noninteractive)
326
325
 
327
326
  if _env in ("lab", "notebook"):
328
327
  # save the notebook in case that title was updated
lamindb/_dataset.py ADDED
@@ -0,0 +1,142 @@
1
+ from typing import Iterable, List, Optional, Union
2
+
3
+ import anndata as ad
4
+ import pandas as pd
5
+ from lnschema_core import ids
6
+ from lnschema_core.models import Dataset
7
+
8
+ from . import Feature, FeatureSet, File, Run
9
+ from .dev.hashing import hash_set
10
+
11
+
12
+ def __init__(
13
+ dataset: Dataset,
14
+ *args,
15
+ **kwargs,
16
+ ):
17
+ if len(args) == len(dataset._meta.concrete_fields):
18
+ super(Dataset, dataset).__init__(*args, **kwargs)
19
+ return None
20
+ # now we proceed with the user-facing constructor
21
+ if len(args) > 1:
22
+ raise ValueError("Only one non-keyword arg allowed: data")
23
+ data: Optional[Union[pd.DataFrame, ad.AnnData]] = None
24
+ if "data" in kwargs or len(args) == 1:
25
+ data = kwargs.pop("data") if len(args) == 0 else args[0]
26
+ name: Optional[str] = kwargs.pop("name") if "name" in kwargs else None
27
+ run: Optional[Run] = kwargs.pop("run") if "run" in kwargs else None
28
+ files: List[File] = kwargs.pop("files") if "files" in kwargs else []
29
+ file: Optional[File] = kwargs.pop("file") if "file" in kwargs else None
30
+ hash: Optional[str] = kwargs.pop("hash") if "hash" in kwargs else None
31
+ feature_sets: List[FeatureSet] = (
32
+ kwargs.pop("feature_sets") if "feature_sets" in kwargs else []
33
+ )
34
+ assert len(kwargs) == 0
35
+ if data is not None:
36
+ if isinstance(data, pd.DataFrame):
37
+ feature_set = FeatureSet.from_values(data.columns, Feature.name)
38
+ dataset._feature_sets = [feature_set]
39
+ elif isinstance(data, ad.AnnData):
40
+ if len(feature_sets) != 2:
41
+ raise ValueError(
42
+ "Please provide a feature set describing each `.var.index` &"
43
+ " `.obs.columns`"
44
+ )
45
+ dataset._feature_sets = feature_sets
46
+ file = File(data=data, run=run, feature_sets=dataset._feature_sets)
47
+ hash = file.hash
48
+ id = file.id
49
+ else:
50
+ id = ids.base62_20()
51
+ dataset._feature_sets = feature_sets
52
+ super(Dataset, dataset).__init__(id=id, name=name, file=file, hash=hash)
53
+ dataset._files = files
54
+
55
+
56
+ @classmethod # type: ignore
57
+ def from_files(dataset: Dataset, *, name: str, files: Iterable[File]) -> Dataset:
58
+ # assert all files are already saved
59
+ # saved = not any([file._state._adding for file in files])
60
+ # if not saved:
61
+ # raise ValueError("Not all files are yet saved, please save them")
62
+ # query all feature sets of files
63
+ file_ids = [file.id for file in files]
64
+ # query all feature sets at the same time rather than making a single query per file
65
+ feature_set_file_links = File.feature_sets.through.objects.filter(
66
+ file_id__in=file_ids
67
+ )
68
+ feature_set_ids = [link.featureset_id for link in feature_set_file_links]
69
+ feature_sets = FeatureSet.select(id__in=feature_set_ids)
70
+ # validate consistency of feature_sets
71
+ # we only allow one feature set per type
72
+ feature_set_types = [feature_set.type for feature_set in feature_sets]
73
+ feature_set_ids_types = [
74
+ (feature_set.id, feature_set.type) for feature_set in feature_sets
75
+ ]
76
+ if len(set(feature_set_ids_types)) != len(set(feature_set_types)):
77
+ # we can do below in the future!
78
+ # logger.warning(
79
+ # "feature sets are inconsistent across files"
80
+ # "computing union! files will be outer-joined"
81
+ # )
82
+ raise ValueError(
83
+ "Currently only supporting datasets from files with same feature sets"
84
+ )
85
+ # validate consistency of hashes
86
+ # we do not allow duplicate hashes
87
+ file_hashes = [file.hash for file in files]
88
+ file_hashes_set = set(file_hashes)
89
+ assert len(file_hashes) == len(file_hashes_set)
90
+ hash = hash_set(file_hashes_set)
91
+ # create the dataset
92
+ dataset = Dataset(name=name, hash=hash, feature_sets=feature_sets, files=files)
93
+ return dataset
94
+
95
+
96
+ def backed(dataset: Dataset):
97
+ if dataset.file is None:
98
+ raise RuntimeError("Can only call backed() for datasets with a single file")
99
+ return dataset.file.backed()
100
+
101
+
102
+ def load(dataset: Dataset):
103
+ """Load the combined dataset."""
104
+ if dataset.file is not None:
105
+ return dataset.file.load()
106
+ else:
107
+ suffixes = [file.suffix for file in dataset.files.all()]
108
+ if len(set(suffixes)) != 1:
109
+ raise RuntimeError(
110
+ "Can only load datasets where all files have the same suffix"
111
+ )
112
+ objects = [file.load() for file in dataset.files.all()]
113
+ if isinstance(objects[0], pd.DataFrame):
114
+ return pd.concat(objects)
115
+ elif isinstance(objects[0], ad.AnnData):
116
+ return ad.concat(objects)
117
+
118
+
119
+ def delete(dataset: Dataset, storage: bool = False):
120
+ super(Dataset, dataset).delete()
121
+ if dataset.file is not None:
122
+ dataset.file.delete(storage=storage)
123
+
124
+
125
+ def save(dataset: Dataset):
126
+ if dataset.file is not None:
127
+ dataset.file.save()
128
+ for feature_set in dataset._feature_sets:
129
+ feature_set.save()
130
+ super(Dataset, dataset).save()
131
+ if len(dataset._files) > 0:
132
+ dataset.files.set(dataset._files)
133
+ if len(dataset._feature_sets) > 0:
134
+ dataset.feature_sets.set(dataset._feature_sets)
135
+
136
+
137
+ Dataset.__init__ = __init__
138
+ Dataset.from_files = from_files
139
+ Dataset.backed = backed
140
+ Dataset.load = load
141
+ Dataset.delete = delete
142
+ Dataset.save = save
lamindb/_delete.py CHANGED
@@ -1,31 +1,31 @@
1
1
  from typing import List, Optional, Union, overload # noqa
2
2
 
3
- from lnschema_core import BaseORM
3
+ from lnschema_core import ORM
4
4
 
5
5
  from ._logger import colors, logger
6
6
 
7
7
 
8
8
  @overload
9
9
  def delete(
10
- record: BaseORM,
10
+ record: ORM,
11
11
  ) -> None:
12
12
  ...
13
13
 
14
14
 
15
15
  @overload
16
16
  def delete(
17
- records: List[BaseORM],
17
+ records: List[ORM],
18
18
  ) -> None: # type: ignore
19
19
  ...
20
20
 
21
21
 
22
22
  def delete( # type: ignore
23
- records: Union[BaseORM, List[BaseORM]],
23
+ records: Union[ORM, List[ORM]],
24
24
  ) -> None:
25
25
  """Delete metadata records & files.
26
26
 
27
27
  Args:
28
- records: `Union[BaseORM, List[BaseORM]]` One or multiple records.
28
+ records: `Union[ORM, List[ORM]]` One or multiple records.
29
29
 
30
30
  Returns:
31
31
  `None`
@@ -49,7 +49,7 @@ def delete( # type: ignore
49
49
  """
50
50
  if isinstance(records, list):
51
51
  records = records
52
- elif isinstance(records, BaseORM):
52
+ elif isinstance(records, ORM):
53
53
  records = [records]
54
54
  for record in records:
55
55
  record.delete()
@@ -0,0 +1,138 @@
1
+ from typing import List, Optional
2
+
3
+ from django.db.models.query_utils import DeferredAttribute as Field
4
+ from lamin_logger import logger
5
+ from lamindb_setup.dev._docs import doc_args
6
+ from lnschema_core import ORM, Feature, FeatureSet
7
+ from lnschema_core.types import ListLike
8
+
9
+ from lamindb.dev.hashing import hash_set
10
+ from lamindb.dev.utils import attach_func_to_class_method
11
+
12
+ from . import _TESTING
13
+ from ._from_values import get_or_create_records, index_iterable
14
+ from ._orm import init_self_from_db
15
+
16
+
17
+ def get_related_name(features_type: ORM):
18
+ candidates = [
19
+ field.related_name
20
+ for field in FeatureSet._meta.related_objects
21
+ if field.related_model == features_type
22
+ ]
23
+ if not candidates:
24
+ raise ValueError(
25
+ f"Can't create feature sets from {features_type.__name__} because it's not"
26
+ " related to it!\nYou need to create a link model between FeatureSet and"
27
+ " your ORM in your custom schema.\nTo do so, add a line:\nfeature_sets ="
28
+ " models.ManyToMany(FeatureSet, related_name='mythings')\n"
29
+ )
30
+ return candidates[0]
31
+
32
+
33
+ def validate_features(features: List[ORM]) -> ORM:
34
+ """Validate and return feature type."""
35
+ if len(features) == 0:
36
+ raise ValueError("provide list of features with at least one element")
37
+ if not hasattr(features, "__getitem__"):
38
+ raise TypeError("features has to be list-like")
39
+ if not isinstance(features[0], ORM):
40
+ raise TypeError(
41
+ "features has to store feature records! use .from_values() otherwise"
42
+ )
43
+ feature_types = set([feature.__class__ for feature in features])
44
+ if len(feature_types) > 1:
45
+ raise ValueError("feature_set can only contain a single type")
46
+ return next(iter(feature_types)) # return value in set of cardinality 1
47
+
48
+
49
+ def __init__(self, *args, **kwargs):
50
+ if len(args) == len(self._meta.concrete_fields):
51
+ super(FeatureSet, self).__init__(*args, **kwargs)
52
+ return None
53
+ # now we proceed with the user-facing constructor
54
+ if len(args) > 1:
55
+ raise ValueError("Only one non-keyword arg allowed: features")
56
+ features: List[ORM] = kwargs.pop("features") if len(args) == 0 else args[0]
57
+ field: Optional[str] = kwargs.pop("field") if "field" in kwargs else None
58
+ id: Optional[str] = kwargs.pop("id") if "id" in kwargs else None
59
+ features_type = validate_features(features)
60
+ related_name = get_related_name(features_type)
61
+ if id is None:
62
+ features_hash = hash_set({feature.id for feature in features})
63
+ feature_set = FeatureSet.select(id=features_hash).one_or_none()
64
+ if feature_set is not None:
65
+ logger.info("Returning an existing feature_set")
66
+ init_self_from_db(self, feature_set)
67
+ return None
68
+ else:
69
+ id = features_hash
70
+ self._features = (related_name, features)
71
+ if field is None:
72
+ field = "id"
73
+ super(FeatureSet, self).__init__(
74
+ id=id, type=features_type.__name_with_type__(), field=field
75
+ )
76
+
77
+
78
+ @doc_args(FeatureSet.save.__doc__)
79
+ def save(self, *args, **kwargs) -> None:
80
+ """{}"""
81
+ super(FeatureSet, self).save(*args, **kwargs)
82
+ if hasattr(self, "_features"):
83
+ related_name, records = self._features
84
+ [record.save() for record in records]
85
+ getattr(self, related_name).set(records)
86
+
87
+
88
+ @classmethod # type:ignore
89
+ @doc_args(FeatureSet.from_values.__doc__)
90
+ def from_values(
91
+ cls, values: ListLike, field: Field = Feature.name, **kwargs
92
+ ) -> "FeatureSet":
93
+ """{}"""
94
+ if not isinstance(field, Field):
95
+ raise TypeError("Argument `field` must be an ORM field, e.g., `Feature.name`")
96
+ if len(values) == 0:
97
+ raise ValueError("Provide a list of at least one value")
98
+ if not isinstance(values[0], (str, int)):
99
+ raise TypeError("values should be list-like of str or int")
100
+ orm = field.field.model
101
+ iterable_idx = index_iterable(values)
102
+ features_hash = hash_set(set(iterable_idx))
103
+ feature_set = FeatureSet.select(id=features_hash).one_or_none()
104
+ if feature_set is not None:
105
+ logger.info("Returning an existing feature_set")
106
+ else:
107
+ from_bionty = orm.__module__.startswith("lnschema_bionty")
108
+ records = get_or_create_records(
109
+ iterable=iterable_idx,
110
+ field=field,
111
+ from_bionty=from_bionty,
112
+ **kwargs,
113
+ )
114
+ feature_set = FeatureSet(
115
+ id=features_hash,
116
+ field=field.field.name,
117
+ features=records,
118
+ )
119
+ return feature_set
120
+
121
+
122
+ METHOD_NAMES = [
123
+ "__init__",
124
+ "from_values",
125
+ "save",
126
+ ]
127
+
128
+ if _TESTING:
129
+ from inspect import signature
130
+
131
+ SIGS = {
132
+ name: signature(getattr(FeatureSet, name))
133
+ for name in METHOD_NAMES
134
+ if name != "__init__"
135
+ }
136
+
137
+ for name in METHOD_NAMES:
138
+ attach_func_to_class_method(name, FeatureSet, globals())