lamindb 0.71.2__py3-none-any.whl → 0.72.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -41,7 +41,7 @@ Modules & settings:
41
41
  """
42
42
 
43
43
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
44
- __version__ = "0.71.2"
44
+ __version__ = "0.72.0"
45
45
 
46
46
  import os as _os
47
47
 
@@ -72,6 +72,7 @@ if _check_instance_setup(from_lamindb=True):
72
72
  User,
73
73
  )
74
74
 
75
+ from . import core # isort: split
75
76
  from . import (
76
77
  _annotate,
77
78
  _artifact,
@@ -86,7 +87,6 @@ if _check_instance_setup(from_lamindb=True):
86
87
  _storage,
87
88
  _transform,
88
89
  _ulabel,
89
- core,
90
90
  )
91
91
 
92
92
  dev = core # backward compat
lamindb/_annotate.py CHANGED
@@ -9,18 +9,14 @@ from lamin_utils import colors, logger
9
9
  from lamindb_setup.core._docs import doc_args
10
10
  from lnschema_core import Artifact, Collection, Feature, Registry, Run, ULabel
11
11
 
12
+ from .core.exceptions import ValidationError
13
+
12
14
  if TYPE_CHECKING:
13
15
  from lamindb_setup.core.types import UPathStr
14
16
  from lnschema_core.types import FieldAttr
15
17
  from mudata import MuData
16
18
 
17
19
 
18
- class ValidationError(ValueError):
19
- """Validation error."""
20
-
21
- pass
22
-
23
-
24
20
  class AnnotateLookup:
25
21
  """Lookup categories from the reference instance."""
26
22
 
@@ -566,7 +562,7 @@ class MuDataAnnotator:
566
562
  save_function="add_new_from_var_index",
567
563
  using=self._using,
568
564
  validated_only=validated_only,
569
- type="number",
565
+ dtype="number",
570
566
  **kwargs,
571
567
  )
572
568
 
@@ -1034,7 +1030,7 @@ def update_registry(
1034
1030
  validated_only: bool = True,
1035
1031
  df: pd.DataFrame | None = None,
1036
1032
  organism: str | None = None,
1037
- type: str | None = None,
1033
+ dtype: str | None = None,
1038
1034
  **kwargs,
1039
1035
  ) -> None:
1040
1036
  """Save features or labels records in the default instance from the using instance.
@@ -1048,7 +1044,7 @@ def update_registry(
1048
1044
  validated_only: If True, only save validated labels.
1049
1045
  df: A DataFrame to save labels from.
1050
1046
  organism: The organism name.
1051
- type: The type of the feature.
1047
+ dtype: The type of the feature.
1052
1048
  kwargs: Additional keyword arguments to pass to the registry model to create new records.
1053
1049
  """
1054
1050
  from lamindb._save import save as ln_save
@@ -1102,7 +1098,7 @@ def update_registry(
1102
1098
  for value in labels_saved["without reference"]:
1103
1099
  filter_kwargs[field.field.name] = value
1104
1100
  if registry == Feature:
1105
- filter_kwargs["type"] = "category" if type is None else type
1101
+ filter_kwargs["dtype"] = "cat" if dtype is None else dtype
1106
1102
  non_validated_records.append(registry(**filter_kwargs, **kwargs))
1107
1103
  ln_save(non_validated_records)
1108
1104
 
lamindb/_artifact.py CHANGED
@@ -1,12 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import os
3
4
  import shutil
5
+ from concurrent.futures import ThreadPoolExecutor
4
6
  from pathlib import Path, PurePath, PurePosixPath
5
7
  from typing import TYPE_CHECKING, Any, Mapping
6
8
 
7
9
  import fsspec
8
10
  import lamindb_setup as ln_setup
9
11
  import pandas as pd
12
+ import psutil
10
13
  from anndata import AnnData
11
14
  from lamin_utils import colors, logger
12
15
  from lamindb_setup import settings as setup_settings
@@ -204,15 +207,26 @@ def get_stat_or_artifact(
204
207
  return size, hash, hash_type, n_objects
205
208
  else:
206
209
  if path.is_dir():
207
- md5s = []
208
- size = 0
209
- for subpath in path.rglob("*"):
210
- if not subpath.is_file():
211
- continue
212
- size += subpath.stat().st_size
213
- md5s.append(hash_file(subpath)[0])
214
- hash, hash_type = hash_md5s_from_dir(md5s)
215
- n_objects = len(md5s)
210
+ files = (subpath for subpath in path.rglob("*") if subpath.is_file())
211
+
212
+ def hash_size(file):
213
+ file_size = file.stat().st_size
214
+ return hash_file(file, file_size)[0], file_size
215
+
216
+ try:
217
+ n_workers = len(psutil.Process().cpu_affinity())
218
+ except AttributeError:
219
+ n_workers = psutil.cpu_count()
220
+ if n_workers > 1:
221
+ with ThreadPoolExecutor(n_workers) as pool:
222
+ hashes_sizes = pool.map(hash_size, files)
223
+ else:
224
+ hashes_sizes = map(hash_size, files)
225
+ hashes, sizes = zip(*hashes_sizes)
226
+
227
+ hash, hash_type = hash_md5s_from_dir(hashes)
228
+ n_objects = len(hashes)
229
+ size = sum(sizes)
216
230
  else:
217
231
  hash, hash_type = hash_file(path)
218
232
  size = stat.st_size
@@ -335,7 +349,7 @@ def get_artifact_kwargs_from_data(
335
349
  # save the information that this artifact was previously
336
350
  # produced by another run
337
351
  if artifact.run is not None:
338
- artifact.run.replicated_output_artifacts.add(artifact)
352
+ artifact.run.output_artifacts_with_later_updates.add(artifact)
339
353
  # update the run of the artifact with the latest run
340
354
  stat_or_artifact.run = run
341
355
  stat_or_artifact.transform = run.transform
lamindb/_can_validate.py CHANGED
@@ -80,7 +80,9 @@ def _inspect(
80
80
 
81
81
  # inspect in the DB
82
82
  result_db = inspect(
83
- df=_filter_query_based_on_organism(queryset=queryset, organism=organism),
83
+ df=_filter_query_based_on_organism(
84
+ queryset=queryset, field=field, organism=organism
85
+ ),
84
86
  identifiers=values,
85
87
  field=field,
86
88
  mute=mute,
@@ -161,6 +163,7 @@ def _validate(
161
163
  field_values = pd.Series(
162
164
  _filter_query_based_on_organism(
163
165
  queryset=queryset,
166
+ field=field,
164
167
  organism=organism,
165
168
  values_list_field=field,
166
169
  ),
@@ -284,7 +287,9 @@ def _standardize(
284
287
 
285
288
  try:
286
289
  orm._meta.get_field(synonyms_field)
287
- df = _filter_query_based_on_organism(queryset=queryset, organism=organism)
290
+ df = _filter_query_based_on_organism(
291
+ queryset=queryset, field=field, organism=organism
292
+ )
288
293
  except FieldDoesNotExist:
289
294
  df = pd.DataFrame()
290
295
 
@@ -439,6 +444,7 @@ def _check_synonyms_field_exist(record: Registry):
439
444
 
440
445
  def _filter_query_based_on_organism(
441
446
  queryset: QuerySet,
447
+ field: str,
442
448
  organism: str | Registry | None = None,
443
449
  values_list_field: str | None = None,
444
450
  ):
@@ -447,7 +453,7 @@ def _filter_query_based_on_organism(
447
453
 
448
454
  orm = queryset.model
449
455
 
450
- if _has_organism_field(orm):
456
+ if _has_organism_field(orm) and not field.endswith("id"):
451
457
  # here, we can safely import lnschema_bionty
452
458
  from lnschema_bionty._bionty import create_or_get_organism_record
453
459
 
lamindb/_collection.py CHANGED
@@ -103,9 +103,9 @@ def __init__(
103
103
  if meta._state.adding:
104
104
  raise ValueError("Save meta artifact before creating collection!")
105
105
  if not feature_sets:
106
- feature_sets = meta.features._feature_set_by_slot
106
+ feature_sets = meta.features.feature_set_by_slot
107
107
  else:
108
- if len(meta.features._feature_set_by_slot) > 0:
108
+ if len(meta.features.feature_set_by_slot) > 0:
109
109
  logger.info("overwriting feature sets linked to artifact")
110
110
  # we ignore collections in trash containing the same hash
111
111
  if hash is not None:
@@ -121,7 +121,7 @@ def __init__(
121
121
  # save the information that this artifact was previously
122
122
  # produced by another run
123
123
  if existing_collection.run is not None:
124
- existing_collection.run.replicated_output_collections.add(
124
+ existing_collection.run.output_collections_with_later_updates.add(
125
125
  existing_collection
126
126
  )
127
127
  # update the run of the artifact with the latest run
@@ -129,7 +129,7 @@ def __init__(
129
129
  existing_collection.transform = run.transform
130
130
  init_self_from_db(collection, existing_collection)
131
131
  update_attributes(collection, {"description": description, "name": name})
132
- for slot, feature_set in collection.features._feature_set_by_slot.items():
132
+ for slot, feature_set in collection.features.feature_set_by_slot.items():
133
133
  if slot in feature_sets:
134
134
  if not feature_sets[slot] == feature_set:
135
135
  collection.feature_sets.remove(feature_set)
@@ -177,7 +177,7 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
177
177
  feature_sets_by_slots = defaultdict(list)
178
178
  logger.debug("slots")
179
179
  for link in feature_set_artifact_links:
180
- feature_sets_by_slots[link.slot].append(link.feature_set_id)
180
+ feature_sets_by_slots[link.slot].append(link.featureset_id)
181
181
  feature_sets_union = {}
182
182
  logger.debug("union")
183
183
  for slot, feature_set_ids_slot in feature_sets_by_slots.items():
@@ -197,7 +197,7 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
197
197
  )
198
198
  start_time = logger.debug("done, start evaluate", time=start_time)
199
199
  features = features_registry.filter(id__in=feature_ids)
200
- feature_sets_union[slot] = FeatureSet(features, type=feature_set_1.type)
200
+ feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
201
201
  start_time = logger.debug("done", time=start_time)
202
202
  # validate consistency of hashes
203
203
  # we do not allow duplicate hashes
@@ -361,7 +361,7 @@ def restore(self) -> None:
361
361
  @doc_args(Collection.artifacts.__doc__)
362
362
  def artifacts(self) -> QuerySet:
363
363
  """{}."""
364
- return self.unordered_artifacts.order_by("collectionartifact__id")
364
+ return self.unordered_artifacts.order_by("collection_links__id")
365
365
 
366
366
 
367
367
  METHOD_NAMES = [
lamindb/_feature.py CHANGED
@@ -1,26 +1,29 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, List
3
+ from typing import TYPE_CHECKING
4
4
 
5
5
  import lamindb_setup as ln_setup
6
6
  import pandas as pd
7
7
  from lamindb_setup.core._docs import doc_args
8
- from lnschema_core.models import Feature, Registry
8
+ from lnschema_core.models import Artifact, Feature
9
9
  from pandas.api.types import CategoricalDtype, is_string_dtype
10
10
 
11
11
  from lamindb._utils import attach_func_to_class_method
12
12
  from lamindb.core._settings import settings
13
13
 
14
14
  from ._query_set import RecordsList
15
+ from .core.schema import dict_schema_name_to_model_name
15
16
 
16
17
  if TYPE_CHECKING:
17
18
  from lnschema_core.types import FieldAttr
18
19
 
19
20
  FEATURE_TYPES = {
20
- "int": "number",
21
- "float": "number",
22
- "str": "category",
23
- "object": "category",
21
+ "number": "number",
22
+ "int": "int",
23
+ "float": "float",
24
+ "bool": "bool",
25
+ "str": "cat",
26
+ "object": "cat",
24
27
  }
25
28
 
26
29
 
@@ -28,10 +31,8 @@ def convert_numpy_dtype_to_lamin_feature_type(dtype) -> str:
28
31
  orig_type = dtype.name
29
32
  # strip precision qualifiers
30
33
  type = "".join(i for i in orig_type if not i.isdigit())
31
- if type == "int" or type == "float":
32
- type = "number"
33
- elif type == "object" or type == "str":
34
- type = "category"
34
+ if type == "object" or type == "str":
35
+ type = "cat"
35
36
  return type
36
37
 
37
38
 
@@ -42,38 +43,44 @@ def __init__(self, *args, **kwargs):
42
43
  # now we proceed with the user-facing constructor
43
44
  if len(args) != 0:
44
45
  raise ValueError("Only non-keyword args allowed")
45
- type: Optional[Union[type, str]] = ( # noqa
46
- kwargs.pop("type") if "type" in kwargs else None
47
- )
48
- registries: list[Registry] | None = (
49
- kwargs.pop("registries") if "registries" in kwargs else None
50
- )
46
+ dtype: type | str = kwargs.pop("dtype") if "dtype" in kwargs else None
51
47
  # cast type
52
- type_str = None
53
- if type is not None:
54
- type_str = type.__name__ if not isinstance(type, str) else type
55
- if type_str is None:
56
- raise ValueError("Please specify a type!")
57
- type_str = FEATURE_TYPES.get(type_str, type_str)
58
- if type_str not in {"number", "category", "bool"}:
59
- raise ValueError("type has to be one of 'number', 'category', 'bool'!")
60
- kwargs["type"] = type_str
61
- # cast registries
62
- registries_str: str | None = None
63
- if registries is not None:
64
- if isinstance(registries, str):
65
- # TODO: add more validation
66
- registries_str = registries
48
+ if dtype is None:
49
+ raise ValueError("Please pass a type!")
50
+ elif dtype is not None:
51
+ if not isinstance(dtype, str):
52
+ if not isinstance(dtype, list) and dtype.__name__ in FEATURE_TYPES:
53
+ dtype_str = FEATURE_TYPES[dtype.__name__]
54
+ else:
55
+ if not isinstance(dtype, list):
56
+ raise ValueError("dtype has to be a list of Registry types")
57
+ registries_str = ""
58
+ for cls in dtype:
59
+ if not hasattr(cls, "__get_name_with_schema__"):
60
+ raise ValueError(
61
+ "each element of the list has to be a Registry"
62
+ )
63
+ registries_str += cls.__get_name_with_schema__() + "|"
64
+ dtype_str = f'cat[{registries_str.rstrip("|")}]'
67
65
  else:
68
- if not isinstance(registries, List):
69
- raise ValueError("registries has to be a list of Registry types")
70
- registries_str = ""
71
- for cls in registries:
72
- if not hasattr(cls, "__get_name_with_schema__"):
73
- raise ValueError("each element of the list has to be a Registry")
74
- registries_str += cls.__get_name_with_schema__() + "|"
75
- registries_str = registries_str.rstrip("|")
76
- kwargs["registries"] = registries_str
66
+ dtype_str = dtype
67
+ # add validation that a registry actually exists
68
+ if dtype_str not in FEATURE_TYPES.values() and not dtype_str.startswith(
69
+ "cat"
70
+ ):
71
+ raise ValueError(
72
+ f"dtype is {dtype_str} but has to be one of 'number', 'int', 'float', 'cat', 'bool', 'cat[...]'!"
73
+ )
74
+ if dtype_str != "cat" and dtype_str.startswith("cat"):
75
+ registries_str = dtype_str.replace("cat[", "").rstrip("]")
76
+ if registries_str != "":
77
+ registry_str_list = registries_str.split("|")
78
+ for registry_str in registry_str_list:
79
+ if registry_str not in dict_schema_name_to_model_name(Artifact):
80
+ raise ValueError(
81
+ f"'{registry_str}' is an invalid dtype, pass, e.g. `[ln.ULabel, bt.CellType]` or similar"
82
+ )
83
+ kwargs["dtype"] = dtype_str
77
84
  super(Feature, self).__init__(*args, **kwargs)
78
85
 
79
86
 
@@ -99,11 +106,11 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
99
106
  field = Feature.name if field is None else field
100
107
  categoricals = categoricals_from_df(df)
101
108
 
102
- types = {}
109
+ dtypes = {}
103
110
  # categoricals_with_unmapped_categories = {} # type: ignore
104
111
  for name, col in df.items():
105
112
  if name in categoricals:
106
- types[name] = "category"
113
+ dtypes[name] = "cat"
107
114
  # below is a harder feature to write, now, because it requires to
108
115
  # query the link tables between the label Registry and file or collection
109
116
  # the original implementation fell short
@@ -117,7 +124,7 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
117
124
  # feature=name
118
125
  # ).inspect(categories, "name", logging=False)["not_mapped"]
119
126
  else:
120
- types[name] = convert_numpy_dtype_to_lamin_feature_type(col.dtype)
127
+ dtypes[name] = convert_numpy_dtype_to_lamin_feature_type(col.dtype)
121
128
 
122
129
  # silence the warning "loaded record with exact same name "
123
130
  verbosity = settings.verbosity
@@ -128,7 +135,7 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
128
135
  if registry != Feature:
129
136
  raise ValueError("field must be a Feature FieldAttr!")
130
137
  # create records for all features including non-validated
131
- features = [Feature(name=name, type=type) for name, type in types.items()]
138
+ features = [Feature(name=name, dtype=dtype) for name, dtype in dtypes.items()]
132
139
  finally:
133
140
  settings.verbosity = verbosity
134
141
 
@@ -174,9 +181,10 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
174
181
 
175
182
 
176
183
  @doc_args(Feature.save.__doc__)
177
- def save(self, *args, **kwargs) -> None:
184
+ def save(self, *args, **kwargs) -> Feature:
178
185
  """{}."""
179
186
  super(Feature, self).save(*args, **kwargs)
187
+ return self
180
188
 
181
189
 
182
190
  METHOD_NAMES = [
lamindb/_feature_set.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Iterable
3
+ from typing import TYPE_CHECKING, Iterable, Type
4
4
 
5
5
  import lamindb_setup as ln_setup
6
6
  import numpy as np
@@ -14,6 +14,11 @@ from lamindb._utils import attach_func_to_class_method
14
14
 
15
15
  from ._feature import convert_numpy_dtype_to_lamin_feature_type
16
16
  from ._registry import init_self_from_db
17
+ from .core.exceptions import ValidationError
18
+ from .core.schema import (
19
+ dict_related_model_to_related_name,
20
+ get_related_name,
21
+ )
17
22
 
18
23
  if TYPE_CHECKING:
19
24
  import pandas as pd
@@ -21,57 +26,7 @@ if TYPE_CHECKING:
21
26
  from ._query_set import QuerySet
22
27
 
23
28
  NUMBER_TYPE = "number"
24
-
25
-
26
- def dict_related_model_to_related_name(orm):
27
- d: dict = {
28
- i.related_model.__get_name_with_schema__(): i.related_name
29
- for i in orm._meta.related_objects
30
- if i.related_name is not None
31
- }
32
- d.update(
33
- {
34
- i.related_model.__get_name_with_schema__(): i.name
35
- for i in orm._meta.many_to_many
36
- if i.name is not None
37
- }
38
- )
39
-
40
- return d
41
-
42
-
43
- def dict_schema_name_to_model_name(orm):
44
- d: dict = {
45
- i.related_model.__get_name_with_schema__(): i.related_model
46
- for i in orm._meta.related_objects
47
- if i.related_name is not None
48
- }
49
- d.update(
50
- {
51
- i.related_model.__get_name_with_schema__(): i.related_model
52
- for i in orm._meta.many_to_many
53
- if i.name is not None
54
- }
55
- )
56
-
57
- return d
58
-
59
-
60
- def get_related_name(features_type: Registry):
61
- candidates = [
62
- field.related_name
63
- for field in FeatureSet._meta.related_objects
64
- if field.related_model == features_type
65
- ]
66
- if not candidates:
67
- raise ValueError(
68
- f"Can't create feature sets from {features_type.__name__} because it's not"
69
- " related to it!\nYou need to create a link model between FeatureSet and"
70
- " your Registry in your custom schema.\nTo do so, add a"
71
- " line:\nfeature_sets = models.ManyToMany(FeatureSet,"
72
- " related_name='mythings')\n"
73
- )
74
- return candidates[0]
29
+ DICT_KEYS_TYPE = type({}.keys()) # type: ignore
75
30
 
76
31
 
77
32
  def validate_features(features: list[Registry]) -> Registry:
@@ -106,14 +61,14 @@ def __init__(self, *args, **kwargs):
106
61
  if len(args) > 1:
107
62
  raise ValueError("Only one non-keyword arg allowed: features")
108
63
  features: Iterable[Registry] = kwargs.pop("features") if len(args) == 0 else args[0]
109
- type: str | None = kwargs.pop("type") if "type" in kwargs else None
64
+ dtype: str | None = kwargs.pop("dtype") if "dtype" in kwargs else None
110
65
  name: str | None = kwargs.pop("name") if "name" in kwargs else None
111
66
  if len(kwargs) > 0:
112
67
  raise ValueError("Only features, type, name are valid keyword arguments")
113
68
  # now code
114
69
  features_registry = validate_features(features)
115
- if type is None:
116
- type = None if features_registry == Feature else NUMBER_TYPE
70
+ if dtype is None:
71
+ dtype = None if features_registry == Feature else NUMBER_TYPE
117
72
  n_features = len(features)
118
73
  features_hash = hash_set({feature.uid for feature in features})
119
74
  feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
@@ -128,7 +83,7 @@ def __init__(self, *args, **kwargs):
128
83
  super(FeatureSet, self).__init__(
129
84
  uid=ids.base62_20(),
130
85
  name=name,
131
- type=get_type_str(type),
86
+ dtype=get_type_str(dtype),
132
87
  n=n_features,
133
88
  registry=features_registry.__get_name_with_schema__(),
134
89
  hash=hash,
@@ -144,13 +99,11 @@ def save(self, *args, **kwargs) -> None:
144
99
  getattr(self, related_name).set(records)
145
100
 
146
101
 
147
- def get_type_str(type: str | None) -> str | None:
148
- if type is not None:
149
- type_str = type.__name__ if not isinstance(type, str) else type # type: ignore
102
+ def get_type_str(dtype: str | None) -> str | None:
103
+ if dtype is not None:
104
+ type_str = dtype.__name__ if not isinstance(dtype, str) else dtype # type: ignore
150
105
  else:
151
106
  type_str = None
152
- if type == "int" or type == "float":
153
- type_str = NUMBER_TYPE
154
107
  return type_str
155
108
 
156
109
 
@@ -165,7 +118,8 @@ def from_values(
165
118
  mute: bool = False,
166
119
  organism: Registry | str | None = None,
167
120
  public_source: Registry | None = None,
168
- ) -> FeatureSet | None:
121
+ raise_validation_error: bool = True,
122
+ ) -> FeatureSet:
169
123
  """{}."""
170
124
  if not isinstance(field, FieldAttr):
171
125
  raise TypeError(
@@ -173,16 +127,25 @@ def from_values(
173
127
  )
174
128
  if len(values) == 0:
175
129
  raise ValueError("Provide a list of at least one value")
130
+ if isinstance(values, DICT_KEYS_TYPE):
131
+ values = list(values)
176
132
  registry = field.field.model
177
133
  if registry != Feature and type is None:
178
134
  type = NUMBER_TYPE
179
135
  logger.debug("setting feature set to 'number'")
180
136
  validated = registry.validate(values, field=field, mute=mute, organism=organism)
181
- if validated.sum() == 0:
182
- if mute is True:
183
- logger.warning("no validated features, skip creating feature set")
184
- return None
185
- validated_values = np.array(values)[validated]
137
+ values_array = np.array(values)
138
+ validated_values = values_array[validated]
139
+ if validated.sum() != len(values):
140
+ not_validated_values = values_array[~validated]
141
+ msg = (
142
+ f"These values could not be validated: {not_validated_values.tolist()}\n"
143
+ f"If there are no typos, add them to their registry: {registry}"
144
+ )
145
+ if raise_validation_error:
146
+ raise ValidationError(msg)
147
+ elif len(validated_values) == 0:
148
+ return None # temporarily return None here
186
149
  validated_features = registry.from_values(
187
150
  validated_values,
188
151
  field=field,
@@ -192,7 +155,7 @@ def from_values(
192
155
  feature_set = FeatureSet(
193
156
  features=validated_features,
194
157
  name=name,
195
- type=get_type_str(type),
158
+ dtype=get_type_str(type),
196
159
  )
197
160
  return feature_set
198
161
 
@@ -217,12 +180,12 @@ def from_df(
217
180
  return None
218
181
  if registry == Feature:
219
182
  validated_features = Feature.from_df(df.loc[:, validated])
220
- feature_set = FeatureSet(validated_features, name=name, type=None)
183
+ feature_set = FeatureSet(validated_features, name=name, dtype=None)
221
184
  else:
222
185
  dtypes = [col.dtype for (_, col) in df.loc[:, validated].items()]
223
186
  if len(set(dtypes)) != 1:
224
187
  raise ValueError(f"data types are heterogeneous: {set(dtypes)}")
225
- type = convert_numpy_dtype_to_lamin_feature_type(dtypes[0])
188
+ dtype = convert_numpy_dtype_to_lamin_feature_type(dtypes[0])
226
189
  validated_features = registry.from_values(
227
190
  df.columns[validated],
228
191
  field=field,
@@ -232,7 +195,7 @@ def from_df(
232
195
  feature_set = FeatureSet(
233
196
  features=validated_features,
234
197
  name=name,
235
- type=get_type_str(type),
198
+ dtype=get_type_str(dtype),
236
199
  )
237
200
  return feature_set
238
201
 
@@ -246,14 +209,14 @@ def members(self) -> QuerySet:
246
209
  # need to fix this
247
210
  return self._features[1]
248
211
  related_name = self._get_related_name()
212
+ if related_name is None:
213
+ related_name = "features"
249
214
  return self.__getattribute__(related_name).all()
250
215
 
251
216
 
252
217
  def _get_related_name(self: FeatureSet) -> str:
253
- key_split = self.registry.split(".")
254
- orm_name_with_schema = f"{key_split[0]}.{key_split[1]}"
255
218
  feature_sets_related_models = dict_related_model_to_related_name(self)
256
- related_name = feature_sets_related_models.get(orm_name_with_schema)
219
+ related_name = feature_sets_related_models.get(self.registry)
257
220
  return related_name
258
221
 
259
222