lamindb 0.48a2__py3-none-any.whl → 0.48.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -4,11 +4,12 @@ Import the package::
4
4
 
5
5
  import lamindb as ln
6
6
 
7
- `File` tracks data artifacts in form of files, on-disk (`zarr`, etc.) and
8
- in-memory data objects (`DataFrame`, `AnnData`, etc.) and allows to link them
9
- against entities of core schema & custom schemas.
7
+ .. note::
10
8
 
11
- The core schema entities are central to lamindb's API:
9
+ `File` abstracts over objects in storage from blob-like files (pdf, txt, etc.)
10
+ to streamable storage backends (HDF5, DuckDB, zarr, TileDB, etc.).
11
+
12
+ `Dataset` abstracts over `File` and tables in classical warehouses (BigQuery, Snowflake).
12
13
 
13
14
  .. autosummary::
14
15
  :toctree: .
@@ -16,20 +17,13 @@ The core schema entities are central to lamindb's API:
16
17
  File
17
18
  Dataset
18
19
  Transform
19
- Run
20
+ Label
20
21
  Feature
22
+ FeatureSet
23
+ Modality
21
24
  User
22
25
  Storage
23
- Tag
24
- Project
25
-
26
- More control over feature management:
27
-
28
- .. autosummary::
29
- :toctree: .
30
-
31
- FeatureSet
32
- Category
26
+ Run
33
27
 
34
28
  Functional tools:
35
29
 
@@ -39,7 +33,6 @@ Functional tools:
39
33
  track
40
34
  view
41
35
  save
42
- delete
43
36
 
44
37
  Static classes & modules:
45
38
 
@@ -47,7 +40,6 @@ Static classes & modules:
47
40
  :toctree: .
48
41
 
49
42
  settings
50
- context
51
43
  types
52
44
  setup
53
45
  schema
@@ -55,14 +47,14 @@ Static classes & modules:
55
47
 
56
48
  """
57
49
 
58
- __version__ = "0.48a2" # denote a release candidate for 0.1.0 with 0.1rc1
50
+ __version__ = "0.48.1" # denote a release candidate for 0.1.0 with 0.1rc1
59
51
 
60
52
  import os as _os
61
53
 
62
54
  import lamindb_setup as _lamindb_setup
63
55
 
64
56
  # prints warning of python versions
65
- from lamin_logger import py_version_warning as _py_version_warning
57
+ from lamin_utils import py_version_warning as _py_version_warning
66
58
  from lamindb_setup import _check_instance_setup
67
59
  from lamindb_setup._check_instance_setup import _INSTANCE_NOT_SETUP_WARNING
68
60
 
@@ -90,15 +82,14 @@ if _INSTANCE_SETUP:
90
82
  del InstanceNotSetupError
91
83
  del __getattr__ # delete so that imports work out
92
84
  from lnschema_core import ( # noqa
93
- Category,
94
85
  Dataset,
95
86
  Feature,
96
87
  FeatureSet,
97
88
  File,
98
- Project,
89
+ Label,
90
+ Modality,
99
91
  Run,
100
92
  Storage,
101
- Tag,
102
93
  Transform,
103
94
  User,
104
95
  )
@@ -109,13 +100,13 @@ if _INSTANCE_SETUP:
109
100
  from ._context import context # noqa
110
101
 
111
102
  track = context._track # noqa
112
- from lamin_logger import logger as _logger
103
+ from lamin_utils import logger as _logger
113
104
 
114
- from . import _category # noqa
115
105
  from . import _dataset # noqa
116
106
  from . import _feature # noqa
117
107
  from . import _feature_set # noqa
118
108
  from . import _file # noqa
109
+ from . import _label # noqa
119
110
  from . import _orm # noqa
120
111
  from . import _transform # noqa
121
112
  from ._delete import delete # noqa
lamindb/_context.py CHANGED
@@ -6,7 +6,7 @@ from pathlib import Path, PurePath
6
6
  from typing import Dict, List, Optional, Tuple, Union
7
7
 
8
8
  import lnschema_core
9
- from lamin_logger import logger
9
+ from lamin_utils import logger
10
10
  from lamindb_setup import settings
11
11
  from lamindb_setup.dev import InstanceSettings
12
12
  from lnschema_core import Run, Transform
@@ -116,7 +116,7 @@ def get_notebook_name_colab() -> str:
116
116
  return name.rstrip(".ipynb")
117
117
 
118
118
 
119
- class context:
119
+ class run_context:
120
120
  """Global run context."""
121
121
 
122
122
  instance: Optional[InstanceSettings] = None
@@ -434,3 +434,6 @@ class context:
434
434
  logger.success(f"Updated: {transform}")
435
435
 
436
436
  cls.transform = transform
437
+
438
+
439
+ context = run_context
lamindb/_dataset.py CHANGED
@@ -67,7 +67,7 @@ def from_files(dataset: Dataset, *, name: str, files: Iterable[File]) -> Dataset
67
67
  feature_set_file_links = File.feature_sets.through.objects.filter(
68
68
  file_id__in=file_ids
69
69
  )
70
- feature_set_ids = [link.featureset_id for link in feature_set_file_links]
70
+ feature_set_ids = [link.feature_set_id for link in feature_set_file_links]
71
71
  feature_sets = FeatureSet.select(id__in=feature_set_ids)
72
72
  # validate consistency of feature_sets
73
73
  # we only allow one feature set per type
@@ -128,13 +128,16 @@ def delete(dataset: Dataset, storage: bool = False):
128
128
  def save(dataset: Dataset):
129
129
  if dataset.file is not None:
130
130
  dataset.file.save()
131
- for feature_set in dataset._feature_sets:
131
+ feature_sets = dataset._feature_sets
132
+ if isinstance(feature_sets, dict):
133
+ feature_sets = feature_sets.values()
134
+ for feature_set in feature_sets:
132
135
  feature_set.save()
133
136
  super(Dataset, dataset).save()
134
137
  if len(dataset._files) > 0:
135
138
  dataset.files.set(dataset._files)
136
139
  if len(dataset._feature_sets) > 0:
137
- dataset.feature_sets.set(dataset._feature_sets)
140
+ dataset.feature_sets.set(feature_sets)
138
141
 
139
142
 
140
143
  Dataset.__init__ = __init__
lamindb/_delete.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from typing import List, Union, overload # noqa
2
2
 
3
- from lamin_logger import colors, logger
3
+ from lamin_utils import colors, logger
4
4
  from lnschema_core import ORM
5
5
 
6
6
 
@@ -47,12 +47,12 @@ def delete( # type: ignore
47
47
 
48
48
  Bulk delete via QuerySet:
49
49
 
50
- >>> ln.save(ln.Tag.from_values(["Tag1", "Tag2", "Tag3"], field="name"))
51
- >>> queryset = ln.Tag.select(name__icontains = "tag")
50
+ >>> ln.save(ln.Label.from_values(["Label1", "Label2", "Label3"], field="name"))
51
+ >>> queryset = ln.Label.select(name__icontains = "label")
52
52
  >>> queryset.list()
53
- [Tag(id=o3FY3c5n, name=Tag2, updated_at=2023-07-19 18:28:16, created_by_id=kmvZDIX9), # noqa
54
- Tag(id=Qi3c4utq, name=Tag3, updated_at=2023-07-19 18:28:16, created_by_id=kmvZDIX9), # noqa
55
- Tag(id=CcFPLmpq, name=Tag1, updated_at=2023-07-19 18:28:16, created_by_id=kmvZDIX9)] # noqa
53
+ [Label(id=o3FY3c5n, name=Label2, updated_at=2023-07-19 18:28:16, created_by_id=kmvZDIX9), # noqa
54
+ Label(id=Qi3c4utq, name=Label3, updated_at=2023-07-19 18:28:16, created_by_id=kmvZDIX9), # noqa
55
+ Label(id=CcFPLmpq, name=Label1, updated_at=2023-07-19 18:28:16, created_by_id=kmvZDIX9)] # noqa
56
56
  >>> queryset.delete()
57
57
  """
58
58
  logger.warning("For efficient bulk delete, use `queryset.delete` instead")
lamindb/_feature.py CHANGED
@@ -1,8 +1,10 @@
1
- from typing import List
1
+ from itertools import islice
2
+ from typing import List, Optional, Union
2
3
 
3
4
  import pandas as pd
5
+ from lamin_utils import logger
4
6
  from lamindb_setup.dev._docs import doc_args
5
- from lnschema_core import Category, Feature
7
+ from lnschema_core import Feature, Label
6
8
  from pandas.api.types import is_categorical_dtype, is_string_dtype
7
9
 
8
10
  from lamindb.dev.utils import attach_func_to_class_method
@@ -11,6 +13,18 @@ from . import _TESTING
11
13
  from ._save import bulk_create
12
14
 
13
15
 
16
+ def convert_numpy_dtype_to_lamin_feature_type(dtype) -> str:
17
+ orig_type = dtype.name
18
+ # strip precision qualifiers
19
+ type = "".join(i for i in orig_type if not i.isdigit())
20
+ return type
21
+
22
+
23
+ def take(n, iterable):
24
+ """Return the first n items of the iterable as a list."""
25
+ return list(islice(iterable, n))
26
+
27
+
14
28
  def __init__(self, *args, **kwargs):
15
29
  if len(args) == len(self._meta.concrete_fields):
16
30
  super(Feature, self).__init__(*args, **kwargs)
@@ -18,43 +32,64 @@ def __init__(self, *args, **kwargs):
18
32
  # now we proceed with the user-facing constructor
19
33
  if len(args) != 0:
20
34
  raise ValueError("Only non-keyword args allowed")
35
+ type: Optional[Union[type, str]] = kwargs.pop("type") if "type" in kwargs else None
36
+ if type is not None:
37
+ type_str = type.__name__ if not isinstance(type, str) else type
38
+ else:
39
+ type_str = None
40
+ kwargs["type"] = type_str
21
41
  super(Feature, self).__init__(*args, **kwargs)
22
42
 
23
43
 
24
44
  @classmethod # type:ignore
25
45
  @doc_args(Feature.from_df.__doc__)
26
- def from_df(cls, df) -> List["Feature"]:
46
+ def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
27
47
  """{}"""
28
- records = Feature.from_values(df.columns, field=Feature.name)
29
- assert len(records) == len(df.columns)
30
-
31
48
  string_cols = [col for col in df.columns if is_string_dtype(df[col])]
32
49
  categoricals = {col: df[col] for col in df.columns if is_categorical_dtype(df[col])}
33
50
  for key in string_cols:
34
51
  c = pd.Categorical(df[key])
35
- # TODO: We should only check if non-null values are unique, but
36
- # this would break cases where string columns with nulls could
37
- # be written as categorical, but not as string.
38
- # Possible solution: https://github.com/scverse/anndata/issues/504
39
52
  if len(c.categories) < len(c):
40
53
  categoricals[key] = c
41
54
 
42
- for record in records:
43
- if record.name in categoricals:
44
- record.type = "Category"
45
- feature = Feature.select(name=record.name).one_or_none()
46
- categories = categoricals[record.name].categories
47
- if feature is not None:
48
- record._categories_records = Category.from_values(
49
- categories, feature=feature
50
- )
51
- else:
52
- record._categories_raw = categories
55
+ types = {}
56
+ categoricals_with_unmapped_categories = {}
57
+ for name, col in df.items():
58
+ if name in categoricals:
59
+ types[name] = "category"
60
+ categorical = categoricals[name]
61
+ if hasattr(
62
+ categorical, "cat"
63
+ ): # because .categories > pd2.0, .cat.categories < pd2.0
64
+ categorical = categorical.cat
65
+ categories = categorical.categories
66
+ categoricals_with_unmapped_categories[name] = Label.select(
67
+ feature=name
68
+ ).inspect(categories, "name", logging=False)["not_mapped"]
53
69
  else:
54
- orig_type = df[record.name].dtype.name
55
- # strip precision qualifiers
56
- record.type = "".join(i for i in orig_type if not i.isdigit())
57
- return records
70
+ types[name] = convert_numpy_dtype_to_lamin_feature_type(col.dtype)
71
+
72
+ features = Feature.from_values(df.columns, field=Feature.name, types=types)
73
+ assert len(features) == len(df.columns)
74
+
75
+ if len(categoricals_with_unmapped_categories) > 0:
76
+ n_max = 20
77
+ categoricals_with_unmapped_categories_formatted = "\n ".join(
78
+ [
79
+ f"{key}: {', '.join(value)}"
80
+ for key, value in take(
81
+ n_max, categoricals_with_unmapped_categories.items()
82
+ )
83
+ ]
84
+ )
85
+ if len(categoricals_with_unmapped_categories) > n_max:
86
+ categoricals_with_unmapped_categories_formatted += "\n ..."
87
+ categoricals_with_unmapped_categories_formatted
88
+ logger.info(
89
+ "There are unmapped categories:\n "
90
+ f" {categoricals_with_unmapped_categories_formatted}"
91
+ )
92
+ return features
58
93
 
59
94
 
60
95
  @doc_args(Feature.save.__doc__)
@@ -65,7 +100,7 @@ def save(self, *args, **kwargs) -> None:
65
100
  if hasattr(self, "_categories_records"):
66
101
  records = self._categories_records
67
102
  if hasattr(self, "_categories_raw"):
68
- records = Category.from_values(self._categories_raw, feature=self)
103
+ records = Label.from_values(self._categories_raw, feature=self)
69
104
  if records is not None:
70
105
  bulk_create(records)
71
106
 
@@ -0,0 +1,176 @@
1
+ from collections import defaultdict
2
+ from typing import Iterable, List, Optional, Union
3
+
4
+ import pandas as pd
5
+ from lamin_utils import logger
6
+ from lnschema_core.models import ORM, Dataset, Feature, FeatureSet, File
7
+
8
+ from ._queryset import QuerySet
9
+ from ._save import save
10
+
11
+
12
+ def validate_and_cast_feature(feature) -> Feature:
13
+ if isinstance(feature, str):
14
+ feature_name = feature
15
+ feature = Feature.select(name=feature_name).one_or_none()
16
+ if feature is None:
17
+ raise ValueError(
18
+ f"Please create feature: ln.Feature(name='{feature_name}',"
19
+ " type='category').save()"
20
+ )
21
+ return feature
22
+
23
+
24
+ def create_features_df(
25
+ file: File, feature_sets: List[FeatureSet], exclude: bool = True
26
+ ):
27
+ features = []
28
+ for feature_set in feature_sets:
29
+ if exclude:
30
+ features_df = feature_set.features.exclude(labels_orm__isnull=True).df()
31
+ else:
32
+ features_df = feature_set.features.df()
33
+ slots = file.feature_sets.through.objects.filter(
34
+ file=file, feature_set=feature_set
35
+ ).list("slot")
36
+ for slot in slots:
37
+ features_df["slot"] = slot
38
+ features.append(features_df)
39
+ features_df = pd.concat(features)
40
+ return features_df.sort_values(["labels_schema", "labels_orm"])
41
+
42
+
43
+ class FeatureManager:
44
+ """Feature manager."""
45
+
46
+ def __init__(self, host: Union[File, Dataset]):
47
+ self._host = host
48
+ slot_feature_sets = (
49
+ self._feature_set_df_with_slots().reset_index().set_index("slot")["id"]
50
+ )
51
+ self._slots = {
52
+ slot: self._host.feature_sets.get(id=i)
53
+ for slot, i in slot_feature_sets.items()
54
+ }
55
+
56
+ def __repr__(self) -> str:
57
+ if len(self._slots) > 0:
58
+ msg = "slots:\n"
59
+ for slot, feature_set in self._slots.items():
60
+ msg += f" {slot}: {feature_set}\n"
61
+ return msg
62
+ else:
63
+ return "No linked features."
64
+
65
+ def __getitem__(self, slot) -> QuerySet:
66
+ id = (
67
+ self._host.feature_sets.through.objects.filter(
68
+ file_id=self._host.id, slot=slot
69
+ )
70
+ .one()
71
+ .feature_set_id
72
+ )
73
+ accessor_by_orm = {
74
+ field.related_model.__name__: field.name
75
+ for field in self._host._meta.related_objects
76
+ }
77
+ accessor_by_orm["Feature"] = "features"
78
+ feature_set = self._host.feature_sets.filter(id=id).one()
79
+ return getattr(feature_set, accessor_by_orm[feature_set.ref_orm]).all()
80
+
81
+ def _feature_set_df_with_slots(self) -> pd.DataFrame:
82
+ """Return DataFrame."""
83
+ df = self._host.feature_sets.df()
84
+ df.insert(
85
+ 0,
86
+ "slot",
87
+ self._host.feature_sets.through.objects.filter(file_id=self._host.id)
88
+ .df()
89
+ .set_index("feature_set_id")
90
+ .slot,
91
+ )
92
+ return df
93
+
94
+ def add_labels(
95
+ self, records: Union[ORM, List[ORM]], feature: Optional[Union[str, ORM]] = None
96
+ ):
97
+ """Add one or several labels and associate them with a feature."""
98
+ if isinstance(records, str) or not isinstance(records, Iterable):
99
+ records = [records]
100
+ if isinstance(records[0], str): # type: ignore
101
+ raise ValueError(
102
+ "Please pass a record (an ORM object), not a string, e.g., via: label"
103
+ f" = ln.Label(name='{records[0]}')" # type: ignore
104
+ )
105
+ if self._host._state.adding:
106
+ raise ValueError("Please save the file or dataset before adding a label!")
107
+ feature = validate_and_cast_feature(feature)
108
+ records_by_orm = defaultdict(list)
109
+ records_by_feature_orm = defaultdict(list)
110
+ for record in records:
111
+ records_by_orm[record.__class__.__name__].append(record)
112
+ if feature is None:
113
+ try:
114
+ record_feature = (
115
+ record._feature
116
+ if hasattr(record, "_feature")
117
+ else record.feature
118
+ )
119
+ except ValueError:
120
+ raise ValueError("Pass feature argument")
121
+ else:
122
+ record_feature = feature
123
+ records_by_feature_orm[(record_feature, record.__class__.__name__)].append(
124
+ record
125
+ )
126
+ schema_and_accessor_by_orm = {
127
+ field.related_model.__name__: (
128
+ field.related_model.__get_schema_name__(),
129
+ field.name,
130
+ )
131
+ for field in self._host._meta.related_objects
132
+ }
133
+ schema_and_accessor_by_orm["Label"] = ("core", "labels")
134
+ for orm_name, records in records_by_orm.items():
135
+ save(records)
136
+ getattr(self._host, schema_and_accessor_by_orm[orm_name][1]).set(records)
137
+ accessor_by_orm = {
138
+ field.related_model.__name__: field.name
139
+ for field in self._host._meta.related_objects
140
+ }
141
+ accessor_by_orm["Feature"] = "features"
142
+ feature_sets = self._host.feature_sets.all()
143
+ feature_sets_by_orm = {
144
+ feature_set.ref_orm: feature_set for feature_set in feature_sets
145
+ }
146
+ for (feature, orm_name), records in records_by_feature_orm.items():
147
+ feature = validate_and_cast_feature(feature)
148
+ logger.info(f"Linking feature {feature.name} to {orm_name}")
149
+ feature.labels_orm = orm_name
150
+ feature.labels_schema = schema_and_accessor_by_orm[orm_name][0]
151
+ feature.save()
152
+ # check whether we have to update the feature set that manages labels
153
+ # (Feature) to account for a new feature
154
+ feature_set = feature_sets_by_orm["Feature"]
155
+ accessor = "features"
156
+ linked_features = getattr(feature_set, accessor)
157
+ if feature not in linked_features.all():
158
+ logger.info(
159
+ f"Linking feature {feature.name} to feature set {feature_set}"
160
+ )
161
+ linked_features.add(feature)
162
+ feature_set.n += 1
163
+ feature_set.save()
164
+
165
+ def add_feature_set(self, feature_set: FeatureSet, slot: str):
166
+ if self._host._state.adding:
167
+ raise ValueError(
168
+ "Please save the file or dataset before adding a feature set!"
169
+ )
170
+ feature_set.save()
171
+ self._host.feature_sets.add(feature_set)
172
+ link_record = self._host.feature_sets.through.objects.filter(
173
+ file=self._host, feature_set=feature_set
174
+ ).one()
175
+ link_record.slot = slot
176
+ link_record.save()
lamindb/_feature_set.py CHANGED
@@ -1,10 +1,10 @@
1
- from typing import List, Optional
1
+ from typing import Iterable, List, Optional, Type, Union
2
2
 
3
3
  import pandas as pd
4
4
  from django.db.models.query_utils import DeferredAttribute as Field
5
- from lamin_logger import logger
5
+ from lamin_utils import logger
6
6
  from lamindb_setup.dev._docs import doc_args
7
- from lnschema_core import ORM, Feature, FeatureSet
7
+ from lnschema_core import ORM, Feature, FeatureSet, ids
8
8
  from lnschema_core.types import ListLike
9
9
 
10
10
  from lamindb.dev.hashing import hash_set
@@ -55,28 +55,51 @@ def __init__(self, *args, **kwargs):
55
55
  # now we proceed with the user-facing constructor
56
56
  if len(args) > 1:
57
57
  raise ValueError("Only one non-keyword arg allowed: features")
58
- features: List[ORM] = kwargs.pop("features") if len(args) == 0 else args[0]
59
- field: Optional[str] = kwargs.pop("field") if "field" in kwargs else None
60
- id: Optional[str] = kwargs.pop("id") if "id" in kwargs else None
61
- features_type = validate_features(features)
62
- related_name = get_related_name(features_type)
63
- if id is None:
58
+ features: Iterable[ORM] = kwargs.pop("features") if len(args) == 0 else args[0]
59
+ ref_field: Optional[str] = (
60
+ kwargs.pop("ref_field") if "ref_field" in kwargs else "id"
61
+ )
62
+ type: Optional[Union[type, str]] = kwargs.pop("type") if "type" in kwargs else None
63
+ modality: Optional[str] = kwargs.pop("modality") if "modality" in kwargs else None
64
+ name: Optional[str] = kwargs.pop("name") if "name" in kwargs else None
65
+ # hash is only internally used
66
+ hash: Optional[str] = kwargs.pop("hash") if "hash" in kwargs else None
67
+ if len(kwargs) > 0:
68
+ raise ValueError(
69
+ "Only features, ref_field, type, modality, name are valid keyword arguments"
70
+ )
71
+
72
+ # now code
73
+ features_orm = validate_features(features)
74
+ if features_orm == Feature:
75
+ type = None
76
+ else:
77
+ type = float
78
+ n_features = len(features)
79
+ if hash is None:
64
80
  features_hash = hash_set({feature.id for feature in features})
65
- feature_set = FeatureSet.select(id=features_hash).one_or_none()
81
+ feature_set = FeatureSet.select(hash=features_hash).one_or_none()
66
82
  if feature_set is not None:
67
- logger.info("Returning an existing feature_set")
83
+ logger.info(f"Loaded {feature_set}")
68
84
  init_self_from_db(self, feature_set)
69
85
  return None
70
86
  else:
71
- id = features_hash
72
- self._features = (related_name, features)
73
- if field is None:
74
- field = "id"
87
+ hash = features_hash
88
+ self._features = (get_related_name(features_orm), features)
89
+ if type is not None:
90
+ type_str = type.__name__ if not isinstance(type, str) else type
91
+ else:
92
+ type_str = None
75
93
  super(FeatureSet, self).__init__(
76
- id=id,
77
- type=features_type.__name__,
78
- schema=features_type.__get_schema_name__(),
79
- field=field,
94
+ id=ids.base62_20(),
95
+ name=name,
96
+ type=type_str,
97
+ n=n_features,
98
+ modality=modality,
99
+ ref_orm=features_orm.__name__,
100
+ ref_schema=features_orm.__get_schema_name__(),
101
+ ref_field=ref_field,
102
+ hash=hash,
80
103
  )
81
104
 
82
105
 
@@ -99,33 +122,45 @@ def save(self, *args, **kwargs) -> None:
99
122
  @classmethod # type:ignore
100
123
  @doc_args(FeatureSet.from_values.__doc__)
101
124
  def from_values(
102
- cls, values: ListLike, field: Field = Feature.name, **kwargs
125
+ cls,
126
+ values: ListLike,
127
+ field: Field = Feature.name,
128
+ type: Optional[Union[Type, str]] = None,
129
+ name: Optional[str] = None,
130
+ modality: Optional[str] = None,
131
+ **kwargs,
103
132
  ) -> "FeatureSet":
104
133
  """{}"""
105
134
  if not isinstance(field, Field):
106
135
  raise TypeError("Argument `field` must be an ORM field, e.g., `Feature.name`")
107
136
  if len(values) == 0:
108
137
  raise ValueError("Provide a list of at least one value")
109
- orm = field.field.model
138
+ ORM = field.field.model
139
+ if isinstance(ORM, Feature):
140
+ raise ValueError("Please use from_df() instead of from_values()")
110
141
  iterable_idx = index_iterable(values)
111
142
  if not isinstance(iterable_idx[0], (str, int)):
112
143
  raise TypeError("values should be list-like of str or int")
113
144
  features_hash = hash_set(set(iterable_idx))
114
- feature_set = FeatureSet.select(id=features_hash).one_or_none()
145
+ feature_set = FeatureSet.select(hash=features_hash).one_or_none()
115
146
  if feature_set is not None:
116
- logger.info("Returning an existing feature_set")
147
+ logger.info(f"Loaded {feature_set}")
117
148
  else:
118
- from_bionty = orm.__module__.startswith("lnschema_bionty")
149
+ from_bionty = ORM.__module__.startswith("lnschema_bionty")
119
150
  records = get_or_create_records(
120
151
  iterable=iterable_idx,
121
152
  field=field,
122
153
  from_bionty=from_bionty,
123
154
  **kwargs,
124
155
  )
156
+ # type_str = type.__name__ if not isinstance(type, str) else type
125
157
  feature_set = FeatureSet(
126
- id=features_hash,
127
- field=field.field.name,
128
158
  features=records,
159
+ hash=features_hash,
160
+ name=name,
161
+ modality=modality,
162
+ type=type,
163
+ ref_field=field.field.name,
129
164
  )
130
165
  return feature_set
131
166
 
@@ -135,10 +170,11 @@ def from_values(
135
170
  def from_df(
136
171
  cls,
137
172
  df: "pd.DataFrame",
173
+ name: Optional[str] = None,
138
174
  ) -> "FeatureSet":
139
175
  """{}"""
140
176
  features = Feature.from_df(df)
141
- feature_set = FeatureSet(features)
177
+ feature_set = FeatureSet(features, name=name)
142
178
  return feature_set
143
179
 
144
180