lamindb 0.74.3__py3-none-any.whl → 0.75.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_is_versioned.py CHANGED
@@ -16,7 +16,7 @@ def _add_to_version_family(
16
16
  ):
17
17
  old_uid = self.uid
18
18
  new_uid, version = get_uid_from_old_version(is_new_version_of, version)
19
- if self.__class__.__name__ == "Artifact" and self.key_is_virtual:
19
+ if self.__class__.__name__ == "Artifact" and self._key_is_virtual:
20
20
  old_path = self.path
21
21
  new_path = get_new_path_from_uid(
22
22
  old_path=old_path, old_uid=old_uid, new_uid=new_uid
lamindb/_parents.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import builtins
4
- from typing import TYPE_CHECKING
4
+ from typing import TYPE_CHECKING, Literal
5
5
 
6
6
  import lamindb_setup as ln_setup
7
7
  from lamin_utils import logger
@@ -10,7 +10,7 @@ from lnschema_core.models import HasParents, format_field_value
10
10
 
11
11
  from lamindb._utils import attach_func_to_class_method
12
12
 
13
- from ._record import get_default_str_field
13
+ from ._record import get_name_field
14
14
 
15
15
  if TYPE_CHECKING:
16
16
  from lnschema_core.types import StrField
@@ -61,7 +61,7 @@ def view_parents(
61
61
  distance: int = 5,
62
62
  ):
63
63
  if field is None:
64
- field = get_default_str_field(self)
64
+ field = get_name_field(self)
65
65
  if not isinstance(field, str):
66
66
  field = field.field.name
67
67
 
@@ -137,10 +137,14 @@ def view_lineage(data: Artifact | Collection, with_children: bool = True) -> Non
137
137
 
138
138
 
139
139
  def _view_parents(
140
- record: Record, field: str, with_children: bool = False, distance: int = 100
140
+ record: Record,
141
+ field: str,
142
+ with_children: bool = False,
143
+ distance: int = 100,
144
+ attr_name: Literal["parents", "predecessors"] = "parents",
141
145
  ):
142
146
  """Graph of parents."""
143
- if not hasattr(record, "parents"):
147
+ if not hasattr(record, attr_name):
144
148
  raise NotImplementedError(
145
149
  f"Parents view is not supported for {record.__class__.__name__}!"
146
150
  )
@@ -149,13 +153,17 @@ def _view_parents(
149
153
 
150
154
  df_edges = None
151
155
  df_edges_parents = _df_edges_from_parents(
152
- record=record, field=field, distance=distance
156
+ record=record, field=field, distance=distance, attr_name=attr_name
153
157
  )
154
158
  if df_edges_parents is not None:
155
159
  df_edges = df_edges_parents
156
160
  if with_children:
157
161
  df_edges_children = _df_edges_from_parents(
158
- record=record, field=field, distance=distance, children=True
162
+ record=record,
163
+ field=field,
164
+ distance=distance,
165
+ children=True,
166
+ attr_name=attr_name,
159
167
  )
160
168
  if df_edges_children is not None:
161
169
  if df_edges is not None:
@@ -197,12 +205,18 @@ def _view_parents(
197
205
  _view(u)
198
206
 
199
207
 
200
- def _get_parents(record: Record, field: str, distance: int, children: bool = False):
208
+ def _get_parents(
209
+ record: Record,
210
+ field: str,
211
+ distance: int,
212
+ children: bool = False,
213
+ attr_name: Literal["parents", "predecessors"] = "parents",
214
+ ):
201
215
  """Recursively get parent records within a distance."""
202
216
  if children:
203
- key = "parents"
217
+ key = attr_name
204
218
  else:
205
- key = "children"
219
+ key = "children" if attr_name == "parents" else "successors" # type: ignore
206
220
  model = record.__class__
207
221
  condition = f"{key}__{field}"
208
222
  results = model.filter(**{condition: record.__getattribute__(field)}).all()
@@ -228,12 +242,23 @@ def _get_parents(record: Record, field: str, distance: int, children: bool = Fal
228
242
 
229
243
 
230
244
  def _df_edges_from_parents(
231
- record: Record, field: str, distance: int, children: bool = False
245
+ record: Record,
246
+ field: str,
247
+ distance: int,
248
+ children: bool = False,
249
+ attr_name: Literal["parents", "predecessors"] = "parents",
232
250
  ):
233
251
  """Construct a DataFrame of edges as the input of graphviz.Digraph."""
234
- key = "children" if children else "parents"
252
+ if attr_name == "parents":
253
+ key = "children" if children else "parents"
254
+ else:
255
+ key = "successors" if children else "predecessors"
235
256
  parents = _get_parents(
236
- record=record, field=field, distance=distance, children=children
257
+ record=record,
258
+ field=field,
259
+ distance=distance,
260
+ children=children,
261
+ attr_name=attr_name,
237
262
  )
238
263
  all = record.__class__.objects
239
264
  records = parents | all.filter(id=record.id)
lamindb/_record.py CHANGED
@@ -160,19 +160,22 @@ def from_values(
160
160
  field: StrField | None = None,
161
161
  create: bool = False,
162
162
  organism: Record | str | None = None,
163
- public_source: Record | None = None,
163
+ source: Record | None = None,
164
164
  mute: bool = False,
165
165
  ) -> list[Record]:
166
166
  """{}""" # noqa: D415
167
- from_public = True if cls.__module__.startswith("lnschema_bionty.") else False
168
- field_str = get_default_str_field(cls, field=field)
167
+ from_source = True if cls.__module__.startswith("bionty.") else False
168
+ # if records from source is already saved in db, skip from_source
169
+ if isinstance(source, Record) and source.in_db:
170
+ from_source = False
171
+ field_str = get_name_field(cls, field=field)
169
172
  return get_or_create_records(
170
173
  iterable=values,
171
174
  field=getattr(cls, field_str),
172
175
  create=create,
173
- from_public=from_public,
176
+ from_source=from_source,
174
177
  organism=organism,
175
- public_source=public_source,
178
+ source=source,
176
179
  mute=mute,
177
180
  )
178
181
 
@@ -284,7 +287,7 @@ def _lookup(
284
287
  ) -> NamedTuple:
285
288
  """{}""" # noqa: D415
286
289
  queryset = _queryset(cls, using_key=using_key)
287
- field = get_default_str_field(orm=queryset.model, field=field)
290
+ field = get_name_field(orm=queryset.model, field=field)
288
291
 
289
292
  return Lookup(
290
293
  records=queryset,
@@ -293,7 +296,7 @@ def _lookup(
293
296
  prefix="ln",
294
297
  ).lookup(
295
298
  return_field=(
296
- get_default_str_field(orm=queryset.model, field=return_field)
299
+ get_name_field(orm=queryset.model, field=return_field)
297
300
  if return_field is not None
298
301
  else None
299
302
  )
@@ -311,7 +314,7 @@ def lookup(
311
314
  return _lookup(cls=cls, field=field, return_field=return_field)
312
315
 
313
316
 
314
- def get_default_str_field(
317
+ def get_name_field(
315
318
  orm: Record | QuerySet | Manager,
316
319
  *,
317
320
  field: str | StrField | None = None,
@@ -321,14 +324,11 @@ def get_default_str_field(
321
324
  orm = orm.model
322
325
  model_field_names = [i.name for i in orm._meta.fields]
323
326
 
324
- # set default field
327
+ # set to default name field
325
328
  if field is None:
326
- if orm._meta.model.__name__ == "Run":
327
- field = orm._meta.get_field("created_at")
328
- elif orm._meta.model.__name__ == "User":
329
- field = orm._meta.get_field("handle")
329
+ if hasattr(orm, "_name_field"):
330
+ field = orm._meta.get_field(orm._name_field)
330
331
  elif "name" in model_field_names:
331
- # by default use the name field
332
332
  field = orm._meta.get_field("name")
333
333
  else:
334
334
  # first char or text field that doesn't contain "id"
@@ -339,7 +339,7 @@ def get_default_str_field(
339
339
  field = i
340
340
  break
341
341
 
342
- # no default field can be found
342
+ # no default name field can be found
343
343
  if field is None:
344
344
  raise ValueError(
345
345
  "please pass a Record string field, e.g., `CellType.name`!"
@@ -443,9 +443,8 @@ def update_fk_to_default_db(
443
443
 
444
444
  FKBULK = [
445
445
  "organism",
446
- "public_source",
447
- "latest_report", # Transform
448
- "source_code", # Transform
446
+ "source",
447
+ "_source_code_artifact", # Transform
449
448
  "report", # Run
450
449
  ]
451
450
 
@@ -523,7 +522,7 @@ def save(self, *args, **kwargs) -> Record:
523
522
  artifacts: list = []
524
523
  if self.__class__.__name__ == "Collection" and self.id is not None:
525
524
  # when creating a new collection without being able to access artifacts
526
- artifacts = self.artifacts.list()
525
+ artifacts = self.ordered_artifacts.list()
527
526
  # transfer of the record to the default db with fk fields
528
527
  result = transfer_to_default_db(self, using_key)
529
528
  if result is not None:
@@ -538,7 +537,7 @@ def save(self, *args, **kwargs) -> Record:
538
537
  logger.info("transfer artifacts")
539
538
  for artifact in artifacts:
540
539
  artifact.save()
541
- self.unordered_artifacts.add(*artifacts)
540
+ self.artifacts.add(*artifacts)
542
541
  if hasattr(self, "labels"):
543
542
  from copy import copy
544
543
 
lamindb/_save.py CHANGED
@@ -85,9 +85,9 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
85
85
  r for r in non_artifacts_new if hasattr(r, "_parents")
86
86
  ]
87
87
  if len(non_artifacts_with_parents) > 0:
88
- # this can only happen within lnschema_bionty right now!!
88
+ # this can only happen within bionty right now!!
89
89
  # we might extend to core lamindb later
90
- from lnschema_bionty.core import add_ontology
90
+ from bionty.core import add_ontology
91
91
 
92
92
  add_ontology(non_artifacts_with_parents)
93
93
 
lamindb/_transform.py CHANGED
@@ -1,11 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from typing import TYPE_CHECKING
4
+
5
+ from lamindb_setup.core._docs import doc_args
3
6
  from lnschema_core.models import Run, Transform
4
- from lnschema_core.types import TransformType
5
7
 
8
+ from ._parents import _view_parents
6
9
  from ._run import delete_run_artifacts
7
10
  from .core.versioning import process_is_new_version_of
8
11
 
12
+ if TYPE_CHECKING:
13
+ from lnschema_core.types import TransformType
14
+
9
15
 
10
16
  def __init__(transform: Transform, *args, **kwargs):
11
17
  if len(args) == len(transform._meta.concrete_fields):
@@ -18,9 +24,7 @@ def __init__(transform: Transform, *args, **kwargs):
18
24
  )
19
25
  (kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None)
20
26
  version: str | None = kwargs.pop("version") if "version" in kwargs else None
21
- type: TransformType | None = (
22
- kwargs.pop("type") if "type" in kwargs else TransformType.pipeline
23
- )
27
+ type: TransformType | None = kwargs.pop("type") if "type" in kwargs else "pipeline"
24
28
  reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
25
29
  reference_type: str | None = (
26
30
  kwargs.pop("reference_type") if "reference_type" in kwargs else None
@@ -55,19 +59,13 @@ def __init__(transform: Transform, *args, **kwargs):
55
59
 
56
60
 
57
61
  def delete(self) -> None:
58
- # set latest_report to None, it's tracked through the latest run
59
- latest_report = None
60
- if self.latest_report is not None:
61
- latest_report = self.latest_report
62
- self.latest_report = None
63
- source_code = None
64
- if self.source_code is not None:
65
- source_code = self.source_code
66
- self.source_code = None
67
- if latest_report is not None or source_code is not None:
62
+ _source_code_artifact = None
63
+ if self._source_code_artifact is not None:
64
+ _source_code_artifact = self._source_code_artifact
65
+ self._source_code_artifact = None
68
66
  self.save()
69
- if source_code is not None:
70
- source_code.delete(permanent=True)
67
+ if _source_code_artifact is not None:
68
+ _source_code_artifact.delete(permanent=True)
71
69
  # query all runs and delete their artifacts
72
70
  runs = Run.filter(transform=self)
73
71
  for run in runs:
@@ -78,10 +76,23 @@ def delete(self) -> None:
78
76
 
79
77
 
80
78
  @property # type: ignore
79
+ @doc_args(Transform.latest_run.__doc__)
81
80
  def latest_run(self) -> Run:
81
+ """{}""" # noqa: D415
82
82
  return self.runs.order_by("-started_at").first()
83
83
 
84
84
 
85
+ def view_lineage(self, with_successors: bool = False, distance: int = 5):
86
+ return _view_parents(
87
+ record=self,
88
+ field="name",
89
+ with_children=with_successors,
90
+ distance=distance,
91
+ attr_name="predecessors",
92
+ )
93
+
94
+
85
95
  Transform.__init__ = __init__
86
96
  Transform.delete = delete
87
97
  Transform.latest_run = latest_run
98
+ Transform.view_lineage = view_lineage
lamindb/core/_data.py CHANGED
@@ -20,7 +20,7 @@ from lnschema_core.models import (
20
20
 
21
21
  from lamindb._parents import view_lineage
22
22
  from lamindb._query_set import QuerySet
23
- from lamindb._record import get_default_str_field
23
+ from lamindb._record import get_name_field
24
24
  from lamindb.core._settings import settings
25
25
 
26
26
  from ._feature_manager import (
@@ -129,31 +129,29 @@ def describe(self: HasFeatures, print_types: bool = False):
129
129
  # prefetch m-2-m relationships
130
130
  self = (
131
131
  self.__class__.objects.using(self._state.db)
132
- .prefetch_related("feature_sets", "input_of")
132
+ .prefetch_related("feature_sets", "input_of_runs")
133
133
  .get(id=self.id)
134
134
  )
135
135
 
136
136
  # provenance
137
137
  if len(foreign_key_fields) > 0: # always True for Artifact and Collection
138
138
  fields_values = [(field, getattr(self, field)) for field in foreign_key_fields]
139
- type_str = (
140
- lambda attr: f": {attr.__class__.__get_name_with_schema__()}"
141
- if print_types
142
- else ""
139
+ type_str = lambda attr: (
140
+ f": {attr.__class__.__get_name_with_schema__()}" if print_types else ""
143
141
  )
144
142
  related_msg = "".join(
145
143
  [
146
- f" .{field_name}{type_str(attr)} = {format_field_value(getattr(attr, get_default_str_field(attr)))}\n"
144
+ f" .{field_name}{type_str(attr)} = {format_field_value(getattr(attr, get_name_field(attr)))}\n"
147
145
  for (field_name, attr) in fields_values
148
146
  if attr is not None
149
147
  ]
150
148
  )
151
149
  prov_msg += related_msg
152
150
  # input of
153
- if self.id is not None and self.input_of.exists():
154
- values = [format_field_value(i.started_at) for i in self.input_of.all()]
151
+ if self.id is not None and self.input_of_runs.exists():
152
+ values = [format_field_value(i.started_at) for i in self.input_of_runs.all()]
155
153
  type_str = ": Run" if print_types else "" # type: ignore
156
- prov_msg += f" .input_of{type_str} = {values}\n"
154
+ prov_msg += f" .input_of_runs{type_str} = {values}\n"
157
155
  if prov_msg:
158
156
  msg += f" {colors.italic('Provenance')}\n"
159
157
  msg += prov_msg
@@ -210,11 +208,11 @@ def get_labels(
210
208
  ).all()
211
209
  if flat_names:
212
210
  # returns a flat list of names
213
- from lamindb._record import get_default_str_field
211
+ from lamindb._record import get_name_field
214
212
 
215
213
  values = []
216
214
  for v in qs_by_registry.values():
217
- values += v.list(get_default_str_field(v))
215
+ values += v.list(get_name_field(v))
218
216
  return values
219
217
  if len(registries_to_check) == 1 and registry in qs_by_registry:
220
218
  return qs_by_registry[registry]
@@ -304,12 +302,12 @@ def add_labels(
304
302
  if len(linked_labels) > 0:
305
303
  labels_accessor.remove(*linked_labels)
306
304
  labels_accessor.add(*records, through_defaults={"feature_id": feature.id})
307
- feature_set_links = get_feature_set_links(self)
308
- feature_set_ids = [link.featureset_id for link in feature_set_links.all()]
305
+ links_feature_set = get_feature_set_links(self)
306
+ feature_set_ids = [link.featureset_id for link in links_feature_set.all()]
309
307
  # get all linked features of type Feature
310
308
  feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
311
309
  {
312
- feature_set_links.filter(featureset_id=feature_set.id)
310
+ links_feature_set.filter(featureset_id=feature_set.id)
313
311
  .one()
314
312
  .slot: feature_set.features.all()
315
313
  for feature_set in feature_sets
@@ -415,7 +413,7 @@ def _track_run_input(
415
413
  # generalize below for more than one data batch
416
414
  if len(input_data) == 1:
417
415
  if input_data[0].transform is not None:
418
- run.transform.parents.add(input_data[0].transform)
416
+ run.transform.predecessors.add(input_data[0].transform)
419
417
 
420
418
 
421
419
  HasFeatures.describe = describe
@@ -39,7 +39,7 @@ from lamindb._feature import FEATURE_TYPES, convert_numpy_dtype_to_lamin_feature
39
39
  from lamindb._feature_set import DICT_KEYS_TYPE, FeatureSet
40
40
  from lamindb._record import (
41
41
  REGISTRY_UNIQUE_FIELD,
42
- get_default_str_field,
42
+ get_name_field,
43
43
  transfer_fk_to_default_db_bulk,
44
44
  transfer_to_default_db,
45
45
  )
@@ -88,12 +88,12 @@ def get_feature_set_by_slot_(host) -> dict:
88
88
  host_id_field = get_host_id_field(host)
89
89
  kwargs = {host_id_field: host.id}
90
90
  # otherwise, we need a query
91
- feature_set_links = (
91
+ links_feature_set = (
92
92
  host.feature_sets.through.objects.using(host_db)
93
93
  .filter(**kwargs)
94
94
  .select_related("featureset")
95
95
  )
96
- return {fsl.slot: fsl.featureset for fsl in feature_set_links}
96
+ return {fsl.slot: fsl.featureset for fsl in links_feature_set}
97
97
 
98
98
 
99
99
  def get_label_links(
@@ -112,8 +112,8 @@ def get_label_links(
112
112
  def get_feature_set_links(host: Artifact | Collection) -> QuerySet:
113
113
  host_id_field = get_host_id_field(host)
114
114
  kwargs = {host_id_field: host.id}
115
- feature_set_links = host.feature_sets.through.objects.filter(**kwargs)
116
- return feature_set_links
115
+ links_feature_set = host.feature_sets.through.objects.filter(**kwargs)
116
+ return links_feature_set
117
117
 
118
118
 
119
119
  def get_link_attr(link: LinkORM | type[LinkORM], data: HasFeatures) -> str:
@@ -122,12 +122,7 @@ def get_link_attr(link: LinkORM | type[LinkORM], data: HasFeatures) -> str:
122
122
  link_model_name == "ModelBase" or link_model_name == "RecordMeta"
123
123
  ): # we passed the type of the link
124
124
  link_model_name = link.__name__
125
- link_attr = link_model_name.replace(data.__class__.__name__, "")
126
- if link_attr == "ExperimentalFactor":
127
- link_attr = "experimental_factor"
128
- else:
129
- link_attr = link_attr.lower()
130
- return link_attr
125
+ return link_model_name.replace(data.__class__.__name__, "").lower()
131
126
 
132
127
 
133
128
  # Custom aggregation for SQLite
@@ -182,14 +177,14 @@ def print_features(
182
177
  non_labels_msg = ""
183
178
  if self.id is not None and self.__class__ == Artifact or self.__class__ == Run:
184
179
  attr_name = "param" if print_params else "feature"
185
- feature_values = (
186
- getattr(self, f"{attr_name}_values")
180
+ _feature_values = (
181
+ getattr(self, f"_{attr_name}_values")
187
182
  .values(f"{attr_name}__name", f"{attr_name}__dtype")
188
183
  .annotate(values=custom_aggregate("value", self._state.db))
189
184
  .order_by(f"{attr_name}__name")
190
185
  )
191
- if len(feature_values) > 0:
192
- for fv in feature_values:
186
+ if len(_feature_values) > 0:
187
+ for fv in _feature_values:
193
188
  feature_name = fv[f"{attr_name}__name"]
194
189
  feature_dtype = fv[f"{attr_name}__dtype"]
195
190
  values = fv["values"]
@@ -217,7 +212,7 @@ def print_features(
217
212
  for slot, feature_set in get_feature_set_by_slot_(self).items():
218
213
  features = feature_set.members
219
214
  # features.first() is a lot slower than features[0] here
220
- name_field = get_default_str_field(features[0])
215
+ name_field = get_name_field(features[0])
221
216
  feature_names = list(features.values_list(name_field, flat=True)[:20])
222
217
  type_str = f": {feature_set.registry}" if print_types else ""
223
218
  feature_set_msg += (
@@ -246,7 +241,7 @@ def parse_feature_sets_from_anndata(
246
241
  from lamindb.core.storage._backed_access import backed_access
247
242
 
248
243
  using_key = settings._using_key
249
- data_parse = backed_access(filepath, using_key)
244
+ data_parse = backed_access(filepath, using_key=using_key)
250
245
  else:
251
246
  data_parse = ad.read_h5ad(filepath, backed="r")
252
247
  type = "float"
@@ -316,13 +311,13 @@ def infer_feature_type_convert_json(
316
311
  if len(value) > 0: # type: ignore
317
312
  first_element_type = type(next(iter(value)))
318
313
  if all(isinstance(elem, first_element_type) for elem in value):
319
- if first_element_type == bool:
314
+ if first_element_type is bool:
320
315
  return f"list[{FEATURE_TYPES['bool']}]", value
321
- elif first_element_type == int:
316
+ elif first_element_type is int:
322
317
  return f"list[{FEATURE_TYPES['int']}]", value
323
- elif first_element_type == float:
318
+ elif first_element_type is float:
324
319
  return f"list[{FEATURE_TYPES['float']}]", value
325
- elif first_element_type == str:
320
+ elif first_element_type is str:
326
321
  if str_as_ulabel:
327
322
  return FEATURE_TYPES["str"] + "[ULabel]", value
328
323
  else:
@@ -390,7 +385,7 @@ def filter(cls, **expression) -> QuerySet:
390
385
  feature = features.get(name=normalized_key)
391
386
  if not feature.dtype.startswith("cat"):
392
387
  feature_value = value_model.filter(feature=feature, value=value).one()
393
- new_expression["feature_values"] = feature_value
388
+ new_expression["_feature_values"] = feature_value
394
389
  else:
395
390
  if isinstance(value, str):
396
391
  label = ULabel.filter(name=value).one()
@@ -478,7 +473,7 @@ def _add_values(
478
473
  )
479
474
  # figure out which of the values go where
480
475
  features_labels = defaultdict(list)
481
- feature_values = []
476
+ _feature_values = []
482
477
  not_validated_values = []
483
478
  for key, value in features_values.items():
484
479
  feature = model.filter(name=key).one()
@@ -508,7 +503,7 @@ def _add_values(
508
503
  feature_value = value_model.filter(**filter_kwargs).one_or_none()
509
504
  if feature_value is None:
510
505
  feature_value = value_model(**filter_kwargs)
511
- feature_values.append(feature_value)
506
+ _feature_values.append(feature_value)
512
507
  else:
513
508
  if isinstance(value, Record) or (
514
509
  isinstance(value, Iterable) and isinstance(next(iter(value)), Record)
@@ -578,7 +573,7 @@ def _add_values(
578
573
  except Exception:
579
574
  save(links, ignore_conflicts=True)
580
575
  # now deal with links that were previously saved without a feature_id
581
- saved_links = LinkORM.filter(
576
+ links_saved = LinkORM.filter(
582
577
  **{
583
578
  "artifact_id": self._host.id,
584
579
  f"{field_name}__in": [
@@ -586,7 +581,7 @@ def _add_values(
586
581
  ],
587
582
  }
588
583
  )
589
- for link in saved_links.all():
584
+ for link in links_saved.all():
590
585
  # TODO: also check for inconsistent features
591
586
  if link.feature_id is None:
592
587
  link.feature_id = [
@@ -595,13 +590,13 @@ def _add_values(
595
590
  if l.id == getattr(link, field_name)
596
591
  ][0]
597
592
  link.save()
598
- if feature_values:
599
- save(feature_values)
593
+ if _feature_values:
594
+ save(_feature_values)
600
595
  if is_param:
601
- LinkORM = self._host.param_values.through
596
+ LinkORM = self._host._param_values.through
602
597
  valuefield_id = "paramvalue_id"
603
598
  else:
604
- LinkORM = self._host.feature_values.through
599
+ LinkORM = self._host._feature_values.through
605
600
  valuefield_id = "featurevalue_id"
606
601
  links = [
607
602
  LinkORM(
@@ -610,7 +605,7 @@ def _add_values(
610
605
  valuefield_id: feature_value.id,
611
606
  }
612
607
  )
613
- for feature_value in feature_values
608
+ for feature_value in _feature_values
614
609
  ]
615
610
  # a link might already exist, to avoid raising a unique constraint
616
611
  # error, ignore_conflicts
@@ -683,10 +678,10 @@ def _add_set_from_df(
683
678
  ):
684
679
  """Add feature set corresponding to column names of DataFrame."""
685
680
  if isinstance(self._host, Artifact):
686
- assert self._host.accessor == "DataFrame" # noqa: S101
681
+ assert self._host._accessor == "DataFrame" # noqa: S101
687
682
  else:
688
683
  # Collection
689
- assert self._host.artifact.accessor == "DataFrame" # noqa: S101
684
+ assert self._host.artifact._accessor == "DataFrame" # noqa: S101
690
685
 
691
686
  # parse and register features
692
687
  registry = field.field.model
@@ -714,7 +709,7 @@ def _add_set_from_anndata(
714
709
  ):
715
710
  """Add features from AnnData."""
716
711
  if isinstance(self._host, Artifact):
717
- assert self._host.accessor == "AnnData" # noqa: S101
712
+ assert self._host._accessor == "AnnData" # noqa: S101
718
713
  else:
719
714
  raise NotImplementedError()
720
715
 
@@ -744,7 +739,7 @@ def _add_set_from_mudata(
744
739
  if obs_fields is None:
745
740
  obs_fields = {}
746
741
  if isinstance(self._host, Artifact):
747
- assert self._host.accessor == "MuData" # noqa: S101
742
+ assert self._host._accessor == "MuData" # noqa: S101
748
743
  else:
749
744
  raise NotImplementedError()
750
745
 
@@ -781,17 +776,12 @@ def _add_from(self, data: HasFeatures):
781
776
  registry = members[0].__class__
782
777
  # note here the features are transferred based on an unique field
783
778
  field = REGISTRY_UNIQUE_FIELD.get(registry.__name__.lower(), "uid")
784
- # TODO: get a default ID field for the registry
785
- if hasattr(registry, "ontology_id"):
786
- field = "ontology_id"
787
- elif hasattr(registry, "ensembl_gene_id"):
788
- field = "ensembl_gene_id"
789
- elif hasattr(registry, "uniprotkb_id"):
790
- field = "uniprotkb_id"
779
+ if hasattr(registry, "_ontology_id_field"):
780
+ field = registry._ontology_id_field
791
781
  # this will be e.g. be a list of ontology_ids or uids
792
782
  member_uids = list(members.values_list(field, flat=True))
793
783
  # create records from ontology_id
794
- if field == "ontology_id" and len(member_uids) > 0:
784
+ if hasattr(registry, "_ontology_id_field") and len(member_uids) > 0:
795
785
  # create from bionty
796
786
  save(registry.from_values(member_uids, field=field))
797
787
  validated = registry.validate(member_uids, field=field, mute=True)
@@ -816,7 +806,7 @@ def _add_from(self, data: HasFeatures):
816
806
  member_uids, field=getattr(registry, field)
817
807
  )
818
808
  if feature_set_self is None:
819
- if hasattr(registry, "organism"):
809
+ if hasattr(registry, "organism_id"):
820
810
  logger.warning(
821
811
  f"FeatureSet is not transferred, check if organism is set correctly: {feature_set}"
822
812
  )