lamindb 0.49.3__py3-none-any.whl → 0.50.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -1,29 +1,67 @@
1
- """Open-source data lake & feature store for biology.
1
+ """Open-source data platform for biology.
2
2
 
3
- Import the package::
3
+ LaminDB helps you manage data using registries.
4
+ The two most central are:
4
5
 
5
- import lamindb as ln
6
+ .. autosummary::
7
+ :toctree: .
8
+
9
+ File
10
+ Dataset
11
+
12
+
13
+ .. dropdown:: With more detail, what are files & datasets?
14
+
15
+ Both files & datasets
16
+
17
+ - track numerical & categorical data batches of arbitrary format & size
18
+
19
+ - can validate & link features (the measured dimensions in a data batch)
20
+
21
+ Roughly,
22
+
23
+ - a file stores a single immutable batch of data
24
+
25
+ - a dataset stores a mutable collection of data batches
26
+
27
+ Examples:
28
+
29
+ - Blob-like immutable files (pdf, txt, csv, jpg, ...) or arrays (h5, h5ad,
30
+ ...) → :class:`~lamindb.File`
31
+
32
+ - Mutable streamable backends (DuckDB, zarr, TileDB, ...) → :class:`~lamindb.Dataset` wrapping :class:`~lamindb.File`
33
+
34
+ - Collections of files → :class:`~lamindb.Dataset` wrapping :class:`~lamindb.File`
6
35
 
7
- .. note::
36
+ - Datasets in BigQuery, Snowflake, Postgres, ... → :class:`~lamindb.Dataset` (not yet implemented)
8
37
 
9
- `File` abstracts over objects in storage from blob-like files (pdf, txt, etc.)
10
- to streamable storage backends (HDF5, DuckDB, zarr, TileDB, etc.).
38
+ Hence, while
11
39
 
12
- `Dataset` abstracts over `File` and tables in classical warehouses (BigQuery, Snowflake).
40
+ - files *always* have a one-to-one correspondence with a storage accessor
41
+
42
+ - datasets *can* reference a single file, multiple files or a dataset
43
+ in a warehouse like BigQuery or Snowflake
44
+
45
+
46
+ There are four registries to track provenance of data batches:
13
47
 
14
48
  .. autosummary::
15
49
  :toctree: .
16
50
 
17
- File
18
- Dataset
51
+ User
52
+ Storage
19
53
  Transform
54
+ Run
55
+
56
+ And four registries to validate & contextualize measurements in data batches:
57
+
58
+ .. autosummary::
59
+ :toctree: .
60
+
20
61
  Label
21
62
  Feature
22
63
  FeatureSet
23
64
  Modality
24
- User
25
- Storage
26
- Run
27
65
 
28
66
  Functional tools:
29
67
 
@@ -47,7 +85,7 @@ Static classes & modules:
47
85
 
48
86
  """
49
87
 
50
- __version__ = "0.49.3" # denote a release candidate for 0.1.0 with 0.1rc1
88
+ __version__ = "0.50.1" # denote a release candidate for 0.1.0 with 0.1rc1
51
89
 
52
90
  import os as _os
53
91
 
@@ -107,11 +145,13 @@ if _INSTANCE_SETUP:
107
145
  from . import _feature_set # noqa
108
146
  from . import _file # noqa
109
147
  from . import _label # noqa
110
- from . import _orm # noqa
148
+ from . import _registry # noqa
111
149
  from . import _storage # noqa
150
+ from . import _synonym # noqa
112
151
  from . import _transform # noqa
152
+ from . import _validate # noqa
113
153
  from ._delete import delete # noqa
114
- from ._orm import select_backward as select # noqa
154
+ from ._registry import select_backward as select # noqa
115
155
  from ._save import save # noqa
116
156
  from ._view import view # noqa
117
157
  from .dev._settings import settings
lamindb/_context.py CHANGED
@@ -15,16 +15,16 @@ from lnschema_core.types import TransformType
15
15
  is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
16
16
 
17
17
  msg_path_failed = (
18
- "Failed to infer notebook path.\nFix: Either track manually via"
18
+ "failed to infer notebook path.\nfix: either track manually via"
19
19
  " `ln.track(ln.Transform(name='My notebook'))` or pass"
20
- " `notebook_path` to ln.track()."
20
+ " `notebook_path` to ln.track()"
21
21
  )
22
22
 
23
23
  msg_manual_init = (
24
- "\n(1) Save your notebook!"
25
- "\n(2) Attach metadata to the notebook by running the CLI:\n"
24
+ "\n(1) save your notebook!"
25
+ "\n(2) attach metadata to the notebook by running the CLI:\n"
26
26
  "lamin track {notebook_path}"
27
- "\n(3) Reload or re-open your notebook"
27
+ "\n(3) reload or re-open your notebook"
28
28
  )
29
29
 
30
30
 
@@ -167,8 +167,8 @@ class run_context:
167
167
  install[jupyter]`, you can simply call:
168
168
 
169
169
  >>> ln.track()
170
- Saved: Transform(id=1LCd8kco9lZUBg, name=Track data lineage / provenance, short_name=02-data-lineage, stem_id=1LCd8kco9lZU, version=0, type=notebook, updated_at=2023-07-10 18:37:19, created_by_id=DzTjkKse) # noqa
171
- Saved: Run(id=pHgVICV9DxBaV6BAuKJl, run_at=2023-07-10 18:37:19, transform_id=1LCd8kco9lZUBg, created_by_id=DzTjkKse) # noqa
170
+ saved: Transform(id=1LCd8kco9lZUBg, name=Track data lineage / provenance, short_name=02-data-lineage, stem_id=1LCd8kco9lZU, version=0, type=notebook, updated_at=2023-07-10 18:37:19, created_by_id=DzTjkKse) # noqa
171
+ saved: Run(id=pHgVICV9DxBaV6BAuKJl, run_at=2023-07-10 18:37:19, transform_id=1LCd8kco9lZUBg, created_by_id=DzTjkKse) # noqa
172
172
  >>> ln.context.transform
173
173
  Transform(id=1LCd8kco9lZUBg, name=Track data lineage / provenance, short_name=02-data-lineage, stem_id=1LCd8kco9lZU, version=0, type=notebook, updated_at=2023-07-10 18:37:19, created_by_id=DzTjkKse) # noqa
174
174
  >>> ln.context.run
@@ -180,8 +180,8 @@ class run_context:
180
180
  >>> ln.Transform(name="Cell Ranger", version="7.2.0", type="pipeline").save()
181
181
  >>> transform = ln.Transform.filter(name="Cell Ranger", version="7.2.0").one()
182
182
  >>> ln.track(transform)
183
- 💬 Loaded: Transform(id=ceHkZMaiHFdoB6, name=Cell Ranger, stem_id=ceHkZMaiHFdo, version=7.2.0, type=pipeline, updated_at=2023-07-10 18:37:19, created_by_id=DzTjkKse) # noqa
184
- Saved: Run(id=RcpWIKC8cF74Pn3RUJ1W, run_at=2023-07-10 18:37:19, transform_id=ceHkZMaiHFdoB6, created_by_id=DzTjkKse) # noqa
183
+ 💬 loaded: Transform(id=ceHkZMaiHFdoB6, name=Cell Ranger, stem_id=ceHkZMaiHFdo, version=7.2.0, type=pipeline, updated_at=2023-07-10 18:37:19, created_by_id=DzTjkKse) # noqa
184
+ saved: Run(id=RcpWIKC8cF74Pn3RUJ1W, run_at=2023-07-10 18:37:19, transform_id=ceHkZMaiHFdoB6, created_by_id=DzTjkKse) # noqa
185
185
  >>> ln.context.transform
186
186
  Transform(id=ceHkZMaiHFdoB6, name=Cell Ranger, stem_id=ceHkZMaiHFdo, version=7.2.0, type=pipeline, updated_at=2023-07-10 18:37:19, created_by_id=DzTjkKse) # noqa
187
187
  >>> ln.context.run
@@ -204,20 +204,20 @@ class run_context:
204
204
  except Exception as e:
205
205
  if isinstance(e, ImportError):
206
206
  logger.info(
207
- "It looks like you are running ln.track() from a "
208
- "notebook!\nPlease install nbproject: pip install nbproject"
207
+ "it looks like you are running ln.track() from a "
208
+ "notebook!\nplease install nbproject: pip install nbproject"
209
209
  )
210
210
  elif isinstance(e, UpdateNbWithNonInteractiveEditorError):
211
211
  raise e
212
212
  elif isinstance(e, (NotebookNotSavedError, NoTitleError)):
213
213
  raise e
214
214
  else:
215
- logger.warning(f"Automatic tracking of notebook failed: {e}")
215
+ logger.warning(f"automatic tracking of notebook failed: {e}")
216
216
  is_tracked_notebook = False
217
217
 
218
218
  if not is_tracked_notebook:
219
219
  logger.warning(
220
- "No automatic metadata detection, consider passing transform"
220
+ "no automatic metadata detection, consider passing transform"
221
221
  )
222
222
  return None
223
223
  else:
@@ -227,10 +227,10 @@ class run_context:
227
227
  transform_exists = Transform.filter(id=transform.id).first()
228
228
  if transform_exists is None:
229
229
  transform.save()
230
- logger.success(f"Saved: {transform}")
230
+ logger.save(f"saved: {transform}")
231
231
  transform_exists = transform
232
232
  else:
233
- logger.info(f"Loaded: {transform_exists}")
233
+ logger.success(f"loaded: {transform_exists}")
234
234
  cls.transform = transform_exists
235
235
 
236
236
  if new_run is None: # for notebooks, default to loading latest runs
@@ -248,22 +248,22 @@ class run_context:
248
248
  if run is not None: # loaded latest run
249
249
  run.run_at = datetime.now(timezone.utc) # update run time
250
250
  run.save()
251
- logger.info(f"Loaded: {run}")
251
+ logger.success(f"loaded: {run}")
252
252
 
253
253
  if run is None: # create new run
254
254
  run = ln.Run(transform=cls.transform)
255
255
  run.save()
256
- logger.success(f"Saved: {run}")
256
+ logger.save(f"saved: {run}")
257
257
  cls.run = run
258
258
 
259
259
  # at this point, we have a transform can display its parents if there are any
260
260
  parents = cls.transform.parents.all() if cls.transform is not None else []
261
261
  if len(parents) > 0:
262
262
  if len(parents) == 1:
263
- logger.info(f"Parent transform: {parents[0]}")
263
+ logger.info(f" parent transform: {parents[0]}")
264
264
  else:
265
265
  parents_formatted = "\n - ".join([f"{parent}" for parent in parents])
266
- logger.info(f"Parent transforms:\n - {parents_formatted}")
266
+ logger.info(f" parent transforms:\n - {parents_formatted}")
267
267
 
268
268
  # only for newly intialized notebooks
269
269
  if hasattr(cls, "_notebook_meta"):
@@ -347,11 +347,11 @@ class run_context:
347
347
 
348
348
  dm = DisplayMeta(metadata)
349
349
  logger.info(
350
- "Notebook imports:"
350
+ "notebook imports:"
351
351
  f" {' '.join(dm.pypackage(infer_pypackages(nb, pin_versions=True)))}" # noqa
352
352
  )
353
353
  except Exception:
354
- logger.debug("Inferring imported packages failed")
354
+ logger.debug("inferring imported packages failed")
355
355
  pass
356
356
 
357
357
  if needs_init:
@@ -405,9 +405,9 @@ class run_context:
405
405
  type=TransformType.notebook,
406
406
  )
407
407
  transform.save()
408
- logger.success(f"Saved: {transform}")
408
+ logger.save(f"saved: {transform}")
409
409
  else:
410
- logger.info(f"Loaded: {transform}")
410
+ logger.success(f"loaded: {transform}")
411
411
  if transform.name != title or transform.short_name != filestem:
412
412
  response = input(
413
413
  "Updated notebook name and/or title: Do you want to assign a"
@@ -427,9 +427,9 @@ class run_context:
427
427
  transform.short_name = filestem
428
428
  transform.save()
429
429
  if response == "y":
430
- logger.success(f"Saved: {transform}")
430
+ logger.save(f"saved: {transform}")
431
431
  else:
432
- logger.success(f"Updated: {transform}")
432
+ logger.success(f"updated: {transform}")
433
433
 
434
434
  cls.transform = transform
435
435
 
lamindb/_delete.py CHANGED
@@ -1,30 +1,30 @@
1
1
  from typing import List, Union, overload # noqa
2
2
 
3
3
  from lamin_utils import colors, logger
4
- from lnschema_core import ORM
4
+ from lnschema_core import Registry
5
5
 
6
6
 
7
7
  @overload
8
8
  def delete(
9
- record: ORM,
9
+ record: Registry,
10
10
  ) -> None:
11
11
  ...
12
12
 
13
13
 
14
14
  @overload
15
15
  def delete(
16
- records: List[ORM],
16
+ records: List[Registry],
17
17
  ) -> None: # type: ignore
18
18
  ...
19
19
 
20
20
 
21
21
  def delete( # type: ignore
22
- records: Union[ORM, List[ORM]],
22
+ records: Union[Registry, List[Registry]],
23
23
  ) -> None:
24
24
  """Delete metadata records & files.
25
25
 
26
26
  Args:
27
- records: `Union[ORM, List[ORM]]` One or multiple records.
27
+ records: `Union[Registry, List[Registry]]` One or multiple records.
28
28
 
29
29
  Returns:
30
30
  `None`
@@ -55,11 +55,11 @@ def delete( # type: ignore
55
55
  Label(id=CcFPLmpq, name=Label1, updated_at=2023-07-19 18:28:16, created_by_id=kmvZDIX9)] # noqa
56
56
  >>> queryset.delete()
57
57
  """
58
- logger.warning("For efficient bulk delete, use `queryset.delete` instead")
58
+ logger.warning("for efficient bulk delete, use `queryset.delete` instead")
59
59
  if isinstance(records, list):
60
60
  records = records
61
- elif isinstance(records, ORM):
61
+ elif isinstance(records, Registry):
62
62
  records = [records]
63
63
  for record in records:
64
64
  record.delete()
65
- logger.success(f"Deleted {colors.yellow(f'{record}')}")
65
+ logger.success(f"deleted {colors.yellow(f'{record}')}")
lamindb/_feature.py CHANGED
@@ -5,7 +5,7 @@ import pandas as pd
5
5
  from lamin_utils import colors, logger
6
6
  from lamindb_setup.dev._docs import doc_args
7
7
  from lnschema_core import Feature, Label
8
- from lnschema_core.models import ORM
8
+ from lnschema_core.models import Registry
9
9
  from pandas.api.types import is_categorical_dtype, is_string_dtype
10
10
 
11
11
  from lamindb.dev.utils import attach_func_to_class_method
@@ -34,7 +34,7 @@ def __init__(self, *args, **kwargs):
34
34
  if len(args) != 0:
35
35
  raise ValueError("Only non-keyword args allowed")
36
36
  type: Optional[Union[type, str]] = kwargs.pop("type") if "type" in kwargs else None
37
- registries: Optional[List[ORM]] = (
37
+ registries: Optional[List[Registry]] = (
38
38
  kwargs.pop("registries") if "registries" in kwargs else None
39
39
  )
40
40
  # cast type
@@ -46,14 +46,18 @@ def __init__(self, *args, **kwargs):
46
46
  # cast registries
47
47
  registries_str: Optional[str] = None
48
48
  if registries is not None:
49
- if not isinstance(registries, List):
50
- raise ValueError("registries has to be a list of ORM types")
51
- registries_str = ""
52
- for cls in registries:
53
- if not hasattr(cls, "__get_name_with_schema__"):
54
- raise ValueError("each element of the list has to be an ORM type")
55
- registries_str += cls.__get_name_with_schema__() + "|"
56
- registries_str = registries_str.rstrip("|")
49
+ if isinstance(registries, str):
50
+ # TODO: add more validation
51
+ registries_str = registries
52
+ else:
53
+ if not isinstance(registries, List):
54
+ raise ValueError("registries has to be a list of Registry types")
55
+ registries_str = ""
56
+ for cls in registries:
57
+ if not hasattr(cls, "__get_name_with_schema__"):
58
+ raise ValueError("each element of the list has to be a Registry")
59
+ registries_str += cls.__get_name_with_schema__() + "|"
60
+ registries_str = registries_str.rstrip("|")
57
61
  kwargs["registries"] = registries_str
58
62
  super(Feature, self).__init__(*args, **kwargs)
59
63
 
@@ -75,7 +79,7 @@ def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
75
79
  if name in categoricals:
76
80
  types[name] = "category"
77
81
  # below is a harder feature to write, now, because it requires to
78
- # query the link tables between the label ORM and file or dataset
82
+ # query the link tables between the label Registry and file or dataset
79
83
  # the original implementation fell short
80
84
  # categorical = categoricals[name]
81
85
  # if hasattr(
lamindb/_feature_set.py CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
4
4
  from django.db.models.query_utils import DeferredAttribute as Field
5
5
  from lamin_utils import logger
6
6
  from lamindb_setup.dev._docs import doc_args
7
- from lnschema_core import ORM, Feature, FeatureSet, Modality, ids
7
+ from lnschema_core import Feature, FeatureSet, Modality, Registry, ids
8
8
  from lnschema_core.types import ListLike
9
9
 
10
10
  from lamindb.dev.hashing import hash_set
@@ -12,11 +12,11 @@ from lamindb.dev.utils import attach_func_to_class_method
12
12
 
13
13
  from . import _TESTING
14
14
  from ._from_values import get_or_create_records, index_iterable
15
- from ._orm import init_self_from_db
15
+ from ._registry import init_self_from_db
16
16
  from ._save import bulk_create
17
17
 
18
18
 
19
- def get_related_name(features_type: ORM):
19
+ def get_related_name(features_type: Registry):
20
20
  candidates = [
21
21
  field.related_name
22
22
  for field in FeatureSet._meta.related_objects
@@ -26,19 +26,20 @@ def get_related_name(features_type: ORM):
26
26
  raise ValueError(
27
27
  f"Can't create feature sets from {features_type.__name__} because it's not"
28
28
  " related to it!\nYou need to create a link model between FeatureSet and"
29
- " your ORM in your custom schema.\nTo do so, add a line:\nfeature_sets ="
30
- " models.ManyToMany(FeatureSet, related_name='mythings')\n"
29
+ " your Registry in your custom schema.\nTo do so, add a"
30
+ " line:\nfeature_sets = models.ManyToMany(FeatureSet,"
31
+ " related_name='mythings')\n"
31
32
  )
32
33
  return candidates[0]
33
34
 
34
35
 
35
- def validate_features(features: List[ORM]) -> ORM:
36
+ def sanity_check_features(features: List[Registry]) -> Registry:
36
37
  """Validate and return feature type."""
37
38
  if len(features) == 0:
38
39
  raise ValueError("provide list of features with at least one element")
39
40
  if not hasattr(features, "__getitem__"):
40
41
  raise TypeError("features has to be list-like")
41
- if not isinstance(features[0], ORM):
42
+ if not isinstance(features[0], Registry):
42
43
  raise TypeError(
43
44
  "features has to store feature records! use .from_values() otherwise"
44
45
  )
@@ -48,6 +49,24 @@ def validate_features(features: List[ORM]) -> ORM:
48
49
  return next(iter(feature_types)) # return value in set of cardinality 1
49
50
 
50
51
 
52
+ def get_validated_features(features: List[Registry], field: Field) -> List[Registry]:
53
+ validated_features = []
54
+ non_validated_features = []
55
+ for feature in features:
56
+ if feature._state.adding and not (
57
+ hasattr(feature, "_from_bionty") and feature._from_bionty
58
+ ):
59
+ non_validated_features.append(getattr(feature, field.field.name))
60
+ else:
61
+ validated_features.append(feature)
62
+ if non_validated_features:
63
+ non_validated_features_display = ",".join(non_validated_features)
64
+ logger.warning(
65
+ f"ignoring non-validated features: {non_validated_features_display}"
66
+ )
67
+ return validated_features
68
+
69
+
51
70
  def __init__(self, *args, **kwargs):
52
71
  if len(args) == len(self._meta.concrete_fields):
53
72
  super(FeatureSet, self).__init__(*args, **kwargs)
@@ -55,10 +74,7 @@ def __init__(self, *args, **kwargs):
55
74
  # now we proceed with the user-facing constructor
56
75
  if len(args) > 1:
57
76
  raise ValueError("Only one non-keyword arg allowed: features")
58
- features: Iterable[ORM] = kwargs.pop("features") if len(args) == 0 else args[0]
59
- ref_field: Optional[str] = (
60
- kwargs.pop("ref_field") if "ref_field" in kwargs else "id"
61
- )
77
+ features: Iterable[Registry] = kwargs.pop("features") if len(args) == 0 else args[0]
62
78
  type: Optional[Union[type, str]] = kwargs.pop("type") if "type" in kwargs else None
63
79
  modality: Optional[str] = kwargs.pop("modality") if "modality" in kwargs else None
64
80
  name: Optional[str] = kwargs.pop("name") if "name" in kwargs else None
@@ -66,11 +82,11 @@ def __init__(self, *args, **kwargs):
66
82
  hash: Optional[str] = kwargs.pop("hash") if "hash" in kwargs else None
67
83
  if len(kwargs) > 0:
68
84
  raise ValueError(
69
- "Only features, ref_field, type, modality, name are valid keyword arguments"
85
+ "Only features, type, modality, name are valid keyword arguments"
70
86
  )
71
87
 
72
88
  # now code
73
- features_orm = validate_features(features)
89
+ features_orm = sanity_check_features(features)
74
90
  if features_orm == Feature:
75
91
  type = None
76
92
  else:
@@ -80,7 +96,7 @@ def __init__(self, *args, **kwargs):
80
96
  features_hash = hash_set({feature.id for feature in features})
81
97
  feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
82
98
  if feature_set is not None:
83
- logger.info(f"Loaded {feature_set}")
99
+ logger.success(f"loaded: {feature_set}")
84
100
  init_self_from_db(self, feature_set)
85
101
  return None
86
102
  else:
@@ -108,7 +124,7 @@ def __init__(self, *args, **kwargs):
108
124
  type=type_str,
109
125
  n=n_features,
110
126
  modality=modality_record,
111
- ref_field=f"{features_orm.__get_name_with_schema__()}.{ref_field}",
127
+ registry=features_orm.__get_name_with_schema__(),
112
128
  hash=hash,
113
129
  )
114
130
 
@@ -139,39 +155,48 @@ def from_values(
139
155
  name: Optional[str] = None,
140
156
  modality: Optional[str] = None,
141
157
  **kwargs,
142
- ) -> "FeatureSet":
158
+ ) -> Optional["FeatureSet"]:
143
159
  """{}"""
144
160
  if not isinstance(field, Field):
145
- raise TypeError("Argument `field` must be an ORM field, e.g., `Feature.name`")
161
+ raise TypeError(
162
+ "Argument `field` must be an Registry field, e.g., `Feature.name`"
163
+ )
146
164
  if len(values) == 0:
147
165
  raise ValueError("Provide a list of at least one value")
148
- ORM = field.field.model
149
- if isinstance(ORM, Feature):
166
+ registry = field.field.model
167
+ if isinstance(registry, Feature):
150
168
  raise ValueError("Please use from_df() instead of from_values()")
151
169
  iterable_idx = index_iterable(values)
152
170
  if not isinstance(iterable_idx[0], (str, int)):
153
171
  raise TypeError("values should be list-like of str or int")
154
- features_hash = hash_set(set(iterable_idx))
172
+ from_bionty = registry.__module__.startswith("lnschema_bionty")
173
+ features = get_or_create_records(
174
+ iterable=iterable_idx,
175
+ field=field,
176
+ from_bionty=from_bionty,
177
+ **kwargs,
178
+ )
179
+ validated_features = get_validated_features(features, field)
180
+ validated_feature_ids = [feature.id for feature in validated_features]
181
+ features_hash = hash_set(set(validated_feature_ids))
155
182
  feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
156
183
  if feature_set is not None:
157
- logger.info(f"Loaded {feature_set}")
184
+ logger.success(f"loaded {feature_set}")
158
185
  else:
159
- from_bionty = ORM.__module__.startswith("lnschema_bionty")
160
- records = get_or_create_records(
161
- iterable=iterable_idx,
162
- field=field,
163
- from_bionty=from_bionty,
164
- **kwargs,
165
- )
166
- # type_str = type.__name__ if not isinstance(type, str) else type
167
- feature_set = FeatureSet(
168
- features=records,
169
- hash=features_hash,
170
- name=name,
171
- modality=modality,
172
- type=type,
173
- ref_field=field.field.name,
174
- )
186
+ if type is not None:
187
+ type_str = type.__name__ if not isinstance(type, str) else type
188
+ else:
189
+ type_str = None
190
+ if validated_features:
191
+ feature_set = FeatureSet(
192
+ features=validated_features,
193
+ hash=features_hash,
194
+ name=name,
195
+ modality=modality,
196
+ type=type_str,
197
+ )
198
+ else:
199
+ feature_set = None
175
200
  return feature_set
176
201
 
177
202
 
@@ -181,10 +206,16 @@ def from_df(
181
206
  cls,
182
207
  df: "pd.DataFrame",
183
208
  name: Optional[str] = None,
184
- ) -> "FeatureSet":
209
+ ) -> Optional["FeatureSet"]:
185
210
  """{}"""
186
211
  features = Feature.from_df(df)
187
- feature_set = FeatureSet(features, name=name)
212
+ validated_features = get_validated_features(features, Feature.name)
213
+ if validated_features:
214
+ feature_set = FeatureSet(validated_features, name=name)
215
+ else:
216
+ logger.warning("no validated features, skip creating feature set")
217
+ feature_set = None
218
+ # raise ValidationError("Dataframe columns contain no validated feature names")
188
219
  return feature_set
189
220
 
190
221