lamindb 0.48a3__py3-none-any.whl → 0.48.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -4,11 +4,12 @@ Import the package::
4
4
 
5
5
  import lamindb as ln
6
6
 
7
- `File` tracks data artifacts in form of files, on-disk (`zarr`, etc.) and
8
- in-memory data objects (`DataFrame`, `AnnData`, etc.) and allows to link them
9
- against entities of core schema & custom schemas.
7
+ .. note::
10
8
 
11
- The core schema entities are central to lamindb's API:
9
+ `File` abstracts over objects in storage from blob-like files (pdf, txt, etc.)
10
+ to streamable storage backends (HDF5, DuckDB, zarr, TileDB, etc.).
11
+
12
+ `Dataset` abstracts over `File` and tables in classical warehouses (BigQuery, Snowflake).
12
13
 
13
14
  .. autosummary::
14
15
  :toctree: .
@@ -16,18 +17,13 @@ The core schema entities are central to lamindb's API:
16
17
  File
17
18
  Dataset
18
19
  Transform
19
- Run
20
- Feature
21
20
  Label
21
+ Feature
22
+ FeatureSet
23
+ Modality
22
24
  User
23
25
  Storage
24
-
25
- More control over feature management:
26
-
27
- .. autosummary::
28
- :toctree: .
29
-
30
- FeatureSet
26
+ Run
31
27
 
32
28
  Functional tools:
33
29
 
@@ -37,7 +33,6 @@ Functional tools:
37
33
  track
38
34
  view
39
35
  save
40
- delete
41
36
 
42
37
  Static classes & modules:
43
38
 
@@ -45,7 +40,6 @@ Static classes & modules:
45
40
  :toctree: .
46
41
 
47
42
  settings
48
- context
49
43
  types
50
44
  setup
51
45
  schema
@@ -53,7 +47,7 @@ Static classes & modules:
53
47
 
54
48
  """
55
49
 
56
- __version__ = "0.48a3" # denote a release candidate for 0.1.0 with 0.1rc1
50
+ __version__ = "0.48.1" # denote a release candidate for 0.1.0 with 0.1rc1
57
51
 
58
52
  import os as _os
59
53
 
@@ -93,6 +87,7 @@ if _INSTANCE_SETUP:
93
87
  FeatureSet,
94
88
  File,
95
89
  Label,
90
+ Modality,
96
91
  Run,
97
92
  Storage,
98
93
  Transform,
lamindb/_context.py CHANGED
@@ -116,7 +116,7 @@ def get_notebook_name_colab() -> str:
116
116
  return name.rstrip(".ipynb")
117
117
 
118
118
 
119
- class context:
119
+ class run_context:
120
120
  """Global run context."""
121
121
 
122
122
  instance: Optional[InstanceSettings] = None
@@ -434,3 +434,6 @@ class context:
434
434
  logger.success(f"Updated: {transform}")
435
435
 
436
436
  cls.transform = transform
437
+
438
+
439
+ context = run_context
lamindb/_dataset.py CHANGED
@@ -67,7 +67,7 @@ def from_files(dataset: Dataset, *, name: str, files: Iterable[File]) -> Dataset
67
67
  feature_set_file_links = File.feature_sets.through.objects.filter(
68
68
  file_id__in=file_ids
69
69
  )
70
- feature_set_ids = [link.featureset_id for link in feature_set_file_links]
70
+ feature_set_ids = [link.feature_set_id for link in feature_set_file_links]
71
71
  feature_sets = FeatureSet.select(id__in=feature_set_ids)
72
72
  # validate consistency of feature_sets
73
73
  # we only allow one feature set per type
@@ -128,13 +128,16 @@ def delete(dataset: Dataset, storage: bool = False):
128
128
  def save(dataset: Dataset):
129
129
  if dataset.file is not None:
130
130
  dataset.file.save()
131
- for feature_set in dataset._feature_sets:
131
+ feature_sets = dataset._feature_sets
132
+ if isinstance(feature_sets, dict):
133
+ feature_sets = feature_sets.values()
134
+ for feature_set in feature_sets:
132
135
  feature_set.save()
133
136
  super(Dataset, dataset).save()
134
137
  if len(dataset._files) > 0:
135
138
  dataset.files.set(dataset._files)
136
139
  if len(dataset._feature_sets) > 0:
137
- dataset.feature_sets.set(dataset._feature_sets)
140
+ dataset.feature_sets.set(feature_sets)
138
141
 
139
142
 
140
143
  Dataset.__init__ = __init__
lamindb/_feature.py CHANGED
@@ -1,5 +1,5 @@
1
1
  from itertools import islice
2
- from typing import List
2
+ from typing import List, Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
  from lamin_utils import logger
@@ -32,6 +32,12 @@ def __init__(self, *args, **kwargs):
32
32
  # now we proceed with the user-facing constructor
33
33
  if len(args) != 0:
34
34
  raise ValueError("Only non-keyword args allowed")
35
+ type: Optional[Union[type, str]] = kwargs.pop("type") if "type" in kwargs else None
36
+ if type is not None:
37
+ type_str = type.__name__ if not isinstance(type, str) else type
38
+ else:
39
+ type_str = None
40
+ kwargs["type"] = type_str
35
41
  super(Feature, self).__init__(*args, **kwargs)
36
42
 
37
43
 
@@ -50,7 +56,7 @@ def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
50
56
  categoricals_with_unmapped_categories = {}
51
57
  for name, col in df.items():
52
58
  if name in categoricals:
53
- types[name] = "categorical"
59
+ types[name] = "category"
54
60
  categorical = categoricals[name]
55
61
  if hasattr(
56
62
  categorical, "cat"
@@ -66,7 +72,7 @@ def from_df(cls, df: "pd.DataFrame") -> List["Feature"]:
66
72
  features = Feature.from_values(df.columns, field=Feature.name, types=types)
67
73
  assert len(features) == len(df.columns)
68
74
 
69
- if len(categoricals) > 0:
75
+ if len(categoricals_with_unmapped_categories) > 0:
70
76
  n_max = 20
71
77
  categoricals_with_unmapped_categories_formatted = "\n ".join(
72
78
  [
@@ -0,0 +1,176 @@
1
+ from collections import defaultdict
2
+ from typing import Iterable, List, Optional, Union
3
+
4
+ import pandas as pd
5
+ from lamin_utils import logger
6
+ from lnschema_core.models import ORM, Dataset, Feature, FeatureSet, File
7
+
8
+ from ._queryset import QuerySet
9
+ from ._save import save
10
+
11
+
12
+ def validate_and_cast_feature(feature) -> Feature:
13
+ if isinstance(feature, str):
14
+ feature_name = feature
15
+ feature = Feature.select(name=feature_name).one_or_none()
16
+ if feature is None:
17
+ raise ValueError(
18
+ f"Please create feature: ln.Feature(name='{feature_name}',"
19
+ " type='category').save()"
20
+ )
21
+ return feature
22
+
23
+
24
+ def create_features_df(
25
+ file: File, feature_sets: List[FeatureSet], exclude: bool = True
26
+ ):
27
+ features = []
28
+ for feature_set in feature_sets:
29
+ if exclude:
30
+ features_df = feature_set.features.exclude(labels_orm__isnull=True).df()
31
+ else:
32
+ features_df = feature_set.features.df()
33
+ slots = file.feature_sets.through.objects.filter(
34
+ file=file, feature_set=feature_set
35
+ ).list("slot")
36
+ for slot in slots:
37
+ features_df["slot"] = slot
38
+ features.append(features_df)
39
+ features_df = pd.concat(features)
40
+ return features_df.sort_values(["labels_schema", "labels_orm"])
41
+
42
+
43
+ class FeatureManager:
44
+ """Feature manager."""
45
+
46
+ def __init__(self, host: Union[File, Dataset]):
47
+ self._host = host
48
+ slot_feature_sets = (
49
+ self._feature_set_df_with_slots().reset_index().set_index("slot")["id"]
50
+ )
51
+ self._slots = {
52
+ slot: self._host.feature_sets.get(id=i)
53
+ for slot, i in slot_feature_sets.items()
54
+ }
55
+
56
+ def __repr__(self) -> str:
57
+ if len(self._slots) > 0:
58
+ msg = "slots:\n"
59
+ for slot, feature_set in self._slots.items():
60
+ msg += f" {slot}: {feature_set}\n"
61
+ return msg
62
+ else:
63
+ return "No linked features."
64
+
65
+ def __getitem__(self, slot) -> QuerySet:
66
+ id = (
67
+ self._host.feature_sets.through.objects.filter(
68
+ file_id=self._host.id, slot=slot
69
+ )
70
+ .one()
71
+ .feature_set_id
72
+ )
73
+ accessor_by_orm = {
74
+ field.related_model.__name__: field.name
75
+ for field in self._host._meta.related_objects
76
+ }
77
+ accessor_by_orm["Feature"] = "features"
78
+ feature_set = self._host.feature_sets.filter(id=id).one()
79
+ return getattr(feature_set, accessor_by_orm[feature_set.ref_orm]).all()
80
+
81
+ def _feature_set_df_with_slots(self) -> pd.DataFrame:
82
+ """Return DataFrame."""
83
+ df = self._host.feature_sets.df()
84
+ df.insert(
85
+ 0,
86
+ "slot",
87
+ self._host.feature_sets.through.objects.filter(file_id=self._host.id)
88
+ .df()
89
+ .set_index("feature_set_id")
90
+ .slot,
91
+ )
92
+ return df
93
+
94
+ def add_labels(
95
+ self, records: Union[ORM, List[ORM]], feature: Optional[Union[str, ORM]] = None
96
+ ):
97
+ """Add one or several labels and associate them with a feature."""
98
+ if isinstance(records, str) or not isinstance(records, Iterable):
99
+ records = [records]
100
+ if isinstance(records[0], str): # type: ignore
101
+ raise ValueError(
102
+ "Please pass a record (an ORM object), not a string, e.g., via: label"
103
+ f" = ln.Label(name='{records[0]}')" # type: ignore
104
+ )
105
+ if self._host._state.adding:
106
+ raise ValueError("Please save the file or dataset before adding a label!")
107
+ feature = validate_and_cast_feature(feature)
108
+ records_by_orm = defaultdict(list)
109
+ records_by_feature_orm = defaultdict(list)
110
+ for record in records:
111
+ records_by_orm[record.__class__.__name__].append(record)
112
+ if feature is None:
113
+ try:
114
+ record_feature = (
115
+ record._feature
116
+ if hasattr(record, "_feature")
117
+ else record.feature
118
+ )
119
+ except ValueError:
120
+ raise ValueError("Pass feature argument")
121
+ else:
122
+ record_feature = feature
123
+ records_by_feature_orm[(record_feature, record.__class__.__name__)].append(
124
+ record
125
+ )
126
+ schema_and_accessor_by_orm = {
127
+ field.related_model.__name__: (
128
+ field.related_model.__get_schema_name__(),
129
+ field.name,
130
+ )
131
+ for field in self._host._meta.related_objects
132
+ }
133
+ schema_and_accessor_by_orm["Label"] = ("core", "labels")
134
+ for orm_name, records in records_by_orm.items():
135
+ save(records)
136
+ getattr(self._host, schema_and_accessor_by_orm[orm_name][1]).set(records)
137
+ accessor_by_orm = {
138
+ field.related_model.__name__: field.name
139
+ for field in self._host._meta.related_objects
140
+ }
141
+ accessor_by_orm["Feature"] = "features"
142
+ feature_sets = self._host.feature_sets.all()
143
+ feature_sets_by_orm = {
144
+ feature_set.ref_orm: feature_set for feature_set in feature_sets
145
+ }
146
+ for (feature, orm_name), records in records_by_feature_orm.items():
147
+ feature = validate_and_cast_feature(feature)
148
+ logger.info(f"Linking feature {feature.name} to {orm_name}")
149
+ feature.labels_orm = orm_name
150
+ feature.labels_schema = schema_and_accessor_by_orm[orm_name][0]
151
+ feature.save()
152
+ # check whether we have to update the feature set that manages labels
153
+ # (Feature) to account for a new feature
154
+ feature_set = feature_sets_by_orm["Feature"]
155
+ accessor = "features"
156
+ linked_features = getattr(feature_set, accessor)
157
+ if feature not in linked_features.all():
158
+ logger.info(
159
+ f"Linking feature {feature.name} to feature set {feature_set}"
160
+ )
161
+ linked_features.add(feature)
162
+ feature_set.n += 1
163
+ feature_set.save()
164
+
165
+ def add_feature_set(self, feature_set: FeatureSet, slot: str):
166
+ if self._host._state.adding:
167
+ raise ValueError(
168
+ "Please save the file or dataset before adding a feature set!"
169
+ )
170
+ feature_set.save()
171
+ self._host.feature_sets.add(feature_set)
172
+ link_record = self._host.feature_sets.through.objects.filter(
173
+ file=self._host, feature_set=feature_set
174
+ ).one()
175
+ link_record.slot = slot
176
+ link_record.save()
lamindb/_feature_set.py CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
4
4
  from django.db.models.query_utils import DeferredAttribute as Field
5
5
  from lamin_utils import logger
6
6
  from lamindb_setup.dev._docs import doc_args
7
- from lnschema_core import ORM, Feature, FeatureSet
7
+ from lnschema_core import ORM, Feature, FeatureSet, ids
8
8
  from lnschema_core.types import ListLike
9
9
 
10
10
  from lamindb.dev.hashing import hash_set
@@ -60,9 +60,14 @@ def __init__(self, *args, **kwargs):
60
60
  kwargs.pop("ref_field") if "ref_field" in kwargs else "id"
61
61
  )
62
62
  type: Optional[Union[type, str]] = kwargs.pop("type") if "type" in kwargs else None
63
- readout: Optional[str] = kwargs.pop("readout") if "readout" in kwargs else None
63
+ modality: Optional[str] = kwargs.pop("modality") if "modality" in kwargs else None
64
64
  name: Optional[str] = kwargs.pop("name") if "name" in kwargs else None
65
- id: Optional[str] = kwargs.pop("id") if "id" in kwargs else None
65
+ # hash is only internally used
66
+ hash: Optional[str] = kwargs.pop("hash") if "hash" in kwargs else None
67
+ if len(kwargs) > 0:
68
+ raise ValueError(
69
+ "Only features, ref_field, type, modality, name are valid keyword arguments"
70
+ )
66
71
 
67
72
  # now code
68
73
  features_orm = validate_features(features)
@@ -71,29 +76,30 @@ def __init__(self, *args, **kwargs):
71
76
  else:
72
77
  type = float
73
78
  n_features = len(features)
74
- features_hash = hash_set({feature.id for feature in features})
75
- if id is None:
76
- feature_set = FeatureSet.select(id=features_hash).one_or_none()
79
+ if hash is None:
80
+ features_hash = hash_set({feature.id for feature in features})
81
+ feature_set = FeatureSet.select(hash=features_hash).one_or_none()
77
82
  if feature_set is not None:
78
- logger.info("Loaded an existing `feature_set`")
83
+ logger.info(f"Loaded {feature_set}")
79
84
  init_self_from_db(self, feature_set)
80
85
  return None
81
86
  else:
82
- id = features_hash
87
+ hash = features_hash
83
88
  self._features = (get_related_name(features_orm), features)
84
89
  if type is not None:
85
90
  type_str = type.__name__ if not isinstance(type, str) else type
86
91
  else:
87
92
  type_str = None
88
93
  super(FeatureSet, self).__init__(
89
- id=id,
94
+ id=ids.base62_20(),
90
95
  name=name,
91
96
  type=type_str,
92
97
  n=n_features,
93
- readout=readout,
98
+ modality=modality,
94
99
  ref_orm=features_orm.__name__,
95
100
  ref_schema=features_orm.__get_schema_name__(),
96
101
  ref_field=ref_field,
102
+ hash=hash,
97
103
  )
98
104
 
99
105
 
@@ -121,7 +127,7 @@ def from_values(
121
127
  field: Field = Feature.name,
122
128
  type: Optional[Union[Type, str]] = None,
123
129
  name: Optional[str] = None,
124
- readout: Optional[str] = None,
130
+ modality: Optional[str] = None,
125
131
  **kwargs,
126
132
  ) -> "FeatureSet":
127
133
  """{}"""
@@ -135,11 +141,10 @@ def from_values(
135
141
  iterable_idx = index_iterable(values)
136
142
  if not isinstance(iterable_idx[0], (str, int)):
137
143
  raise TypeError("values should be list-like of str or int")
138
- n_features = len(iterable_idx)
139
144
  features_hash = hash_set(set(iterable_idx))
140
- feature_set = FeatureSet.select(id=features_hash).one_or_none()
145
+ feature_set = FeatureSet.select(hash=features_hash).one_or_none()
141
146
  if feature_set is not None:
142
- logger.info("Returning an existing feature_set")
147
+ logger.info(f"Loaded {feature_set}")
143
148
  else:
144
149
  from_bionty = ORM.__module__.startswith("lnschema_bionty")
145
150
  records = get_or_create_records(
@@ -150,13 +155,12 @@ def from_values(
150
155
  )
151
156
  # type_str = type.__name__ if not isinstance(type, str) else type
152
157
  feature_set = FeatureSet(
153
- id=features_hash,
158
+ features=records,
159
+ hash=features_hash,
154
160
  name=name,
155
- n=n_features,
156
- readout=readout,
161
+ modality=modality,
157
162
  type=type,
158
163
  ref_field=field.field.name,
159
- features=records,
160
164
  )
161
165
  return feature_set
162
166
 
lamindb/_file.py CHANGED
@@ -17,6 +17,7 @@ from lnschema_core import Feature, FeatureSet, File, Run, ids
17
17
  from lnschema_core.types import AnnDataLike, DataLike, PathLike
18
18
 
19
19
  from lamindb._context import context
20
+ from lamindb.dev import FeatureManager
20
21
  from lamindb.dev._settings import settings
21
22
  from lamindb.dev.hashing import b16_to_b64, hash_file
22
23
  from lamindb.dev.storage import (
@@ -353,6 +354,19 @@ def data_is_anndata(data: DataLike):
353
354
  return False
354
355
 
355
356
 
357
+ def data_is_mudata(data: DataLike):
358
+ try:
359
+ from mudata import MuData
360
+ except ModuleNotFoundError:
361
+ return False
362
+
363
+ if isinstance(data, MuData):
364
+ return True
365
+ if isinstance(data, (str, Path, UPath)):
366
+ return Path(data).suffix in {".h5mu"}
367
+ return False
368
+
369
+
356
370
  def __init__(file: File, *args, **kwargs):
357
371
  # Below checks for the Django-internal call in from_db()
358
372
  # it'd be better if we could avoid this, but not being able to create a File
@@ -382,9 +396,7 @@ def __init__(file: File, *args, **kwargs):
382
396
  )
383
397
 
384
398
  if not len(kwargs) == 0:
385
- raise ValueError(
386
- "Only data, key, run, description & feature_sets can be passed."
387
- )
399
+ raise ValueError("Only data, key, run, description can be passed.")
388
400
 
389
401
  if name is not None and description is not None:
390
402
  raise ValueError("Only pass description, do not pass a name")
@@ -392,19 +404,8 @@ def __init__(file: File, *args, **kwargs):
392
404
  logger.warning("Argument `name` is deprecated, please use `description`")
393
405
  description = name
394
406
 
395
- if isinstance(data, pd.DataFrame) and log_hint:
396
- logger.hint(
397
- "This is a dataframe, consider using File.from_df() to link column"
398
- " names as features!"
399
- )
400
- elif data_is_anndata(data) and log_hint:
401
- logger.hint(
402
- "This is AnnDataLike, consider using File.from_anndata() to link var_names"
403
- " and obs.columns as features!"
404
- )
405
-
406
407
  provisional_id = ids.base62_20()
407
- kwargs, privates = get_file_kwargs_from_data(
408
+ kwargs_or_file, privates = get_file_kwargs_from_data(
408
409
  data=data,
409
410
  key=key,
410
411
  run=run,
@@ -412,17 +413,38 @@ def __init__(file: File, *args, **kwargs):
412
413
  provisional_id=provisional_id,
413
414
  skip_check_exists=skip_check_exists,
414
415
  )
416
+
415
417
  # an object with the same hash already exists
416
- if isinstance(kwargs, File):
418
+ if isinstance(kwargs_or_file, File):
417
419
  # this is the way Django instantiates from the DB internally
418
420
  # https://github.com/django/django/blob/549d6ffeb6d626b023acc40c3bb2093b4b25b3d6/django/db/models/base.py#LL488C1-L491C51
419
421
  new_args = [
420
- getattr(kwargs, field.attname) for field in file._meta.concrete_fields
422
+ getattr(kwargs_or_file, field.attname)
423
+ for field in file._meta.concrete_fields
421
424
  ]
422
425
  super(File, file).__init__(*new_args)
423
426
  file._state.adding = False
424
427
  file._state.db = "default"
425
428
  return None
429
+ else:
430
+ kwargs = kwargs_or_file
431
+
432
+ if isinstance(data, pd.DataFrame):
433
+ if log_hint:
434
+ logger.hint(
435
+ "This is a dataframe, consider using File.from_df() to link column"
436
+ " names as features!"
437
+ )
438
+ kwargs["accessor"] = "DataFrame"
439
+ elif data_is_anndata(data):
440
+ if log_hint:
441
+ logger.hint(
442
+ "This is AnnDataLike, consider using File.from_anndata() to link"
443
+ " var_names and obs.columns as features!"
444
+ )
445
+ kwargs["accessor"] = "AnnData"
446
+ elif data_is_mudata(data):
447
+ kwargs["accessor"] = "MuData"
426
448
 
427
449
  kwargs["id"] = provisional_id
428
450
  kwargs["description"] = description
@@ -468,7 +490,7 @@ def from_df(
468
490
  """{}"""
469
491
  file = File(data=df, key=key, run=run, description=description, log_hint=False)
470
492
  feature_set = FeatureSet.from_df(df)
471
- file._feature_sets = [feature_set]
493
+ file._feature_sets = {"columns": feature_set}
472
494
  return file
473
495
 
474
496
 
@@ -497,19 +519,22 @@ def from_anndata(
497
519
  type = "float"
498
520
  else:
499
521
  type = convert_numpy_dtype_to_lamin_feature_type(adata.X.dtype)
500
- feature_sets = []
501
- logger.info("Parsing features of X (numerical)")
522
+ feature_sets = {}
523
+ logger.info("Parsing feature names of X, stored in slot .var")
502
524
  logger.indent = " "
503
525
  feature_set_x = FeatureSet.from_values(
504
- data_parse.var.index, var_ref, type=type, name="var", readout="abundance"
526
+ data_parse.var.index,
527
+ var_ref,
528
+ type=type,
505
529
  )
506
- feature_sets.append(feature_set_x)
507
- logger.indent = ""
508
- logger.info("Parsing features of obs (numerical & categorical)")
509
- logger.indent = " "
510
- feature_set_obs = FeatureSet.from_df(data_parse.obs, name="obs")
511
- feature_sets.append(feature_set_obs)
530
+ feature_sets["var"] = feature_set_x
512
531
  logger.indent = ""
532
+ if len(data_parse.obs.columns) > 0:
533
+ logger.info("Parsing feature names of slot .obs")
534
+ logger.indent = " "
535
+ feature_set_obs = FeatureSet.from_df(data_parse.obs)
536
+ feature_sets["obs"] = feature_set_obs
537
+ logger.indent = ""
513
538
  file._feature_sets = feature_sets
514
539
  return file
515
540
 
@@ -521,19 +546,13 @@ def from_dir(
521
546
  path: PathLike,
522
547
  *,
523
548
  run: Optional[Run] = None,
549
+ storage_root: Optional[PathLike] = None,
524
550
  ) -> List["File"]:
525
551
  """{}"""
526
552
  folderpath = UPath(path)
527
- check_path_in_storage = check_path_in_default_storage(folderpath)
528
-
529
- if check_path_in_storage:
530
- folder_key = get_relative_path_to_root(path=folderpath).as_posix()
531
- else:
532
- raise RuntimeError(
533
- "Currently, only directories in default storage can be registered!\n"
534
- "You can either move your folder into the current default storage"
535
- "or add a new default storage through `ln.settings.storage`"
536
- )
553
+ folder_key = get_relative_path_to_root(
554
+ path=folderpath, root=storage_root
555
+ ).as_posix()
537
556
  # always sanitize by stripping a trailing slash
538
557
  folder_key = folder_key.rstrip("/")
539
558
  logger.hint(f"using storage prefix = {folder_key}/")
@@ -634,9 +653,14 @@ def _track_run_input(file: File, is_run_input: Optional[bool] = None):
634
653
  # avoid cycles (a file is both input and output)
635
654
  if file.run != context.run:
636
655
  if settings.track_run_inputs:
656
+ transform_note = ""
657
+ if file.transform is not None:
658
+ transform_note = (
659
+ f", adding parent transform {file.transform.id}"
660
+ )
637
661
  logger.info(
638
- f"Adding file {file.id} as input for run {context.run.id},"
639
- f" adding parent transform {file.transform.id}"
662
+ f"Adding file {file.id} as input for run"
663
+ f" {context.run.id}{transform_note}"
640
664
  )
641
665
  track_run_input = True
642
666
  else:
@@ -716,14 +740,21 @@ def _save_skip_storage(file, *args, **kwargs) -> None:
716
740
  if file.run is not None:
717
741
  file.run.save()
718
742
  if hasattr(file, "_feature_sets"):
719
- for feature_set in file._feature_sets:
743
+ for feature_set in file._feature_sets.values():
720
744
  feature_set.save()
721
- if hasattr(file, "_feature_values"):
722
- for feature_value in file._feature_values:
723
- feature_value.save()
724
745
  super(File, file).save(*args, **kwargs)
725
746
  if hasattr(file, "_feature_sets"):
726
- file.feature_sets.set(file._feature_sets)
747
+ links = []
748
+ for slot, feature_set in file._feature_sets.items():
749
+ links.append(
750
+ File.feature_sets.through(
751
+ file_id=file.id, feature_set_id=feature_set.id, slot=slot
752
+ )
753
+ )
754
+
755
+ from lamindb._save import bulk_create
756
+
757
+ bulk_create(links)
727
758
 
728
759
 
729
760
  def path(self) -> Union[Path, UPath]:
@@ -820,6 +851,9 @@ def inherit_relations(self, file: File, fields: Optional[List[str]] = None):
820
851
  else:
821
852
  raise KeyError(f"No many-to-many relationship is found with '{field}'")
822
853
 
854
+ if None in related_names:
855
+ related_names.remove(None)
856
+
823
857
  inherit_names = [
824
858
  related_name
825
859
  for related_name in related_names
@@ -834,6 +868,15 @@ def inherit_relations(self, file: File, fields: Optional[List[str]] = None):
834
868
  )
835
869
 
836
870
 
871
+ @property # type: ignore
872
+ @doc_args(File.features.__doc__)
873
+ def features(self) -> "FeatureManager":
874
+ """{}"""
875
+ from lamindb._feature_manager import FeatureManager
876
+
877
+ return FeatureManager(self)
878
+
879
+
837
880
  METHOD_NAMES = [
838
881
  "__init__",
839
882
  "from_anndata",
@@ -864,5 +907,8 @@ for name in METHOD_NAMES:
864
907
  # privates currently dealt with separately
865
908
  File._delete_skip_storage = _delete_skip_storage
866
909
  File._save_skip_storage = _save_skip_storage
910
+ # TODO: move these to METHOD_NAMES
867
911
  setattr(File, "view_lineage", view_lineage)
868
912
  setattr(File, "inherit_relations", inherit_relations)
913
+ # property signature is not tested:
914
+ setattr(File, "features", features)