lamindb 0.74.2__py3-none-any.whl → 0.75.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_is_versioned.py CHANGED
@@ -16,7 +16,7 @@ def _add_to_version_family(
16
16
  ):
17
17
  old_uid = self.uid
18
18
  new_uid, version = get_uid_from_old_version(is_new_version_of, version)
19
- if self.__class__.__name__ == "Artifact" and self.key_is_virtual:
19
+ if self.__class__.__name__ == "Artifact" and self._key_is_virtual:
20
20
  old_path = self.path
21
21
  new_path = get_new_path_from_uid(
22
22
  old_path=old_path, old_uid=old_uid, new_uid=new_uid
lamindb/_parents.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import builtins
4
- from typing import TYPE_CHECKING
4
+ from typing import TYPE_CHECKING, Literal
5
5
 
6
6
  import lamindb_setup as ln_setup
7
7
  from lamin_utils import logger
@@ -10,7 +10,7 @@ from lnschema_core.models import HasParents, format_field_value
10
10
 
11
11
  from lamindb._utils import attach_func_to_class_method
12
12
 
13
- from ._record import get_default_str_field
13
+ from ._record import get_name_field
14
14
 
15
15
  if TYPE_CHECKING:
16
16
  from lnschema_core.types import StrField
@@ -61,7 +61,7 @@ def view_parents(
61
61
  distance: int = 5,
62
62
  ):
63
63
  if field is None:
64
- field = get_default_str_field(self)
64
+ field = get_name_field(self)
65
65
  if not isinstance(field, str):
66
66
  field = field.field.name
67
67
 
@@ -137,10 +137,14 @@ def view_lineage(data: Artifact | Collection, with_children: bool = True) -> Non
137
137
 
138
138
 
139
139
  def _view_parents(
140
- record: Record, field: str, with_children: bool = False, distance: int = 100
140
+ record: Record,
141
+ field: str,
142
+ with_children: bool = False,
143
+ distance: int = 100,
144
+ attr_name: Literal["parents", "predecessors"] = "parents",
141
145
  ):
142
146
  """Graph of parents."""
143
- if not hasattr(record, "parents"):
147
+ if not hasattr(record, attr_name):
144
148
  raise NotImplementedError(
145
149
  f"Parents view is not supported for {record.__class__.__name__}!"
146
150
  )
@@ -149,13 +153,17 @@ def _view_parents(
149
153
 
150
154
  df_edges = None
151
155
  df_edges_parents = _df_edges_from_parents(
152
- record=record, field=field, distance=distance
156
+ record=record, field=field, distance=distance, attr_name=attr_name
153
157
  )
154
158
  if df_edges_parents is not None:
155
159
  df_edges = df_edges_parents
156
160
  if with_children:
157
161
  df_edges_children = _df_edges_from_parents(
158
- record=record, field=field, distance=distance, children=True
162
+ record=record,
163
+ field=field,
164
+ distance=distance,
165
+ children=True,
166
+ attr_name=attr_name,
159
167
  )
160
168
  if df_edges_children is not None:
161
169
  if df_edges is not None:
@@ -197,12 +205,18 @@ def _view_parents(
197
205
  _view(u)
198
206
 
199
207
 
200
- def _get_parents(record: Record, field: str, distance: int, children: bool = False):
208
+ def _get_parents(
209
+ record: Record,
210
+ field: str,
211
+ distance: int,
212
+ children: bool = False,
213
+ attr_name: Literal["parents", "predecessors"] = "parents",
214
+ ):
201
215
  """Recursively get parent records within a distance."""
202
216
  if children:
203
- key = "parents"
217
+ key = attr_name
204
218
  else:
205
- key = "children"
219
+ key = "children" if attr_name == "parents" else "successors" # type: ignore
206
220
  model = record.__class__
207
221
  condition = f"{key}__{field}"
208
222
  results = model.filter(**{condition: record.__getattribute__(field)}).all()
@@ -228,12 +242,23 @@ def _get_parents(record: Record, field: str, distance: int, children: bool = Fal
228
242
 
229
243
 
230
244
  def _df_edges_from_parents(
231
- record: Record, field: str, distance: int, children: bool = False
245
+ record: Record,
246
+ field: str,
247
+ distance: int,
248
+ children: bool = False,
249
+ attr_name: Literal["parents", "predecessors"] = "parents",
232
250
  ):
233
251
  """Construct a DataFrame of edges as the input of graphviz.Digraph."""
234
- key = "children" if children else "parents"
252
+ if attr_name == "parents":
253
+ key = "children" if children else "parents"
254
+ else:
255
+ key = "successors" if children else "predecessors"
235
256
  parents = _get_parents(
236
- record=record, field=field, distance=distance, children=children
257
+ record=record,
258
+ field=field,
259
+ distance=distance,
260
+ children=children,
261
+ attr_name=attr_name,
237
262
  )
238
263
  all = record.__class__.objects
239
264
  records = parents | all.filter(id=record.id)
lamindb/_record.py CHANGED
@@ -160,19 +160,22 @@ def from_values(
160
160
  field: StrField | None = None,
161
161
  create: bool = False,
162
162
  organism: Record | str | None = None,
163
- public_source: Record | None = None,
163
+ source: Record | None = None,
164
164
  mute: bool = False,
165
165
  ) -> list[Record]:
166
166
  """{}""" # noqa: D415
167
- from_public = True if cls.__module__.startswith("lnschema_bionty.") else False
168
- field_str = get_default_str_field(cls, field=field)
167
+ from_source = True if cls.__module__.startswith("bionty.") else False
168
+ # if records from source is already saved in db, skip from_source
169
+ if isinstance(source, Record) and source.in_db:
170
+ from_source = False
171
+ field_str = get_name_field(cls, field=field)
169
172
  return get_or_create_records(
170
173
  iterable=values,
171
174
  field=getattr(cls, field_str),
172
175
  create=create,
173
- from_public=from_public,
176
+ from_source=from_source,
174
177
  organism=organism,
175
- public_source=public_source,
178
+ source=source,
176
179
  mute=mute,
177
180
  )
178
181
 
@@ -284,7 +287,7 @@ def _lookup(
284
287
  ) -> NamedTuple:
285
288
  """{}""" # noqa: D415
286
289
  queryset = _queryset(cls, using_key=using_key)
287
- field = get_default_str_field(orm=queryset.model, field=field)
290
+ field = get_name_field(orm=queryset.model, field=field)
288
291
 
289
292
  return Lookup(
290
293
  records=queryset,
@@ -293,7 +296,7 @@ def _lookup(
293
296
  prefix="ln",
294
297
  ).lookup(
295
298
  return_field=(
296
- get_default_str_field(orm=queryset.model, field=return_field)
299
+ get_name_field(orm=queryset.model, field=return_field)
297
300
  if return_field is not None
298
301
  else None
299
302
  )
@@ -311,7 +314,7 @@ def lookup(
311
314
  return _lookup(cls=cls, field=field, return_field=return_field)
312
315
 
313
316
 
314
- def get_default_str_field(
317
+ def get_name_field(
315
318
  orm: Record | QuerySet | Manager,
316
319
  *,
317
320
  field: str | StrField | None = None,
@@ -321,14 +324,11 @@ def get_default_str_field(
321
324
  orm = orm.model
322
325
  model_field_names = [i.name for i in orm._meta.fields]
323
326
 
324
- # set default field
327
+ # set to default name field
325
328
  if field is None:
326
- if orm._meta.model.__name__ == "Run":
327
- field = orm._meta.get_field("created_at")
328
- elif orm._meta.model.__name__ == "User":
329
- field = orm._meta.get_field("handle")
329
+ if hasattr(orm, "_name_field"):
330
+ field = orm._meta.get_field(orm._name_field)
330
331
  elif "name" in model_field_names:
331
- # by default use the name field
332
332
  field = orm._meta.get_field("name")
333
333
  else:
334
334
  # first char or text field that doesn't contain "id"
@@ -339,7 +339,7 @@ def get_default_str_field(
339
339
  field = i
340
340
  break
341
341
 
342
- # no default field can be found
342
+ # no default name field can be found
343
343
  if field is None:
344
344
  raise ValueError(
345
345
  "please pass a Record string field, e.g., `CellType.name`!"
@@ -443,9 +443,8 @@ def update_fk_to_default_db(
443
443
 
444
444
  FKBULK = [
445
445
  "organism",
446
- "public_source",
447
- "latest_report", # Transform
448
- "source_code", # Transform
446
+ "source",
447
+ "_source_code_artifact", # Transform
449
448
  "report", # Run
450
449
  ]
451
450
 
@@ -523,18 +522,13 @@ def save(self, *args, **kwargs) -> Record:
523
522
  artifacts: list = []
524
523
  if self.__class__.__name__ == "Collection" and self.id is not None:
525
524
  # when creating a new collection without being able to access artifacts
526
- artifacts = self.artifacts.list()
525
+ artifacts = self.ordered_artifacts.list()
527
526
  # transfer of the record to the default db with fk fields
528
527
  result = transfer_to_default_db(self, using_key)
529
528
  if result is not None:
530
529
  init_self_from_db(self, result)
531
530
  else:
532
- # here, we can't use the parents argument
533
- # parents are not saved for the self record
534
- save_kwargs = kwargs.copy()
535
- if "parents" in save_kwargs:
536
- save_kwargs.pop("parents")
537
- super(Record, self).save(*args, **save_kwargs)
531
+ super(Record, self).save(*args, **kwargs)
538
532
  # perform transfer of many-to-many fields
539
533
  # only supported for Artifact and Collection records
540
534
  if db is not None and db != "default" and using_key is None:
@@ -543,7 +537,7 @@ def save(self, *args, **kwargs) -> Record:
543
537
  logger.info("transfer artifacts")
544
538
  for artifact in artifacts:
545
539
  artifact.save()
546
- self.unordered_artifacts.add(*artifacts)
540
+ self.artifacts.add(*artifacts)
547
541
  if hasattr(self, "labels"):
548
542
  from copy import copy
549
543
 
@@ -554,16 +548,8 @@ def save(self, *args, **kwargs) -> Record:
554
548
  self_on_db._state.db = db
555
549
  self_on_db.pk = pk_on_db # manually set the primary key
556
550
  self_on_db.features = FeatureManager(self_on_db)
557
- # by default, transfer parents of the labels to maintain ontological hierarchy
558
- try:
559
- import bionty as bt
560
-
561
- parents = kwargs.get("parents", bt.settings.auto_save_parents)
562
- except ImportError:
563
- parents = kwargs.get("parents", True)
564
- add_from_kwargs = {"parents": parents}
565
- self.features._add_from(self_on_db, **add_from_kwargs)
566
- self.labels.add_from(self_on_db, **add_from_kwargs)
551
+ self.features._add_from(self_on_db)
552
+ self.labels.add_from(self_on_db)
567
553
  return self
568
554
 
569
555
 
lamindb/_save.py CHANGED
@@ -27,9 +27,7 @@ if TYPE_CHECKING:
27
27
  from lamindb_setup.core.upath import UPath
28
28
 
29
29
 
30
- def save(
31
- records: Iterable[Record], ignore_conflicts: bool | None = False, **kwargs
32
- ) -> None:
30
+ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> None:
33
31
  """Bulk save to registries & storage.
34
32
 
35
33
  Note:
@@ -47,7 +45,6 @@ def save(
47
45
  unique or another constraint. However, it won't inplace update the id
48
46
  fields of records. If you need records with ids, you need to query
49
47
  them from the database.
50
- **kwargs: Get kwargs related to parents.
51
48
 
52
49
  Examples:
53
50
 
@@ -87,27 +84,12 @@ def save(
87
84
  non_artifacts_with_parents = [
88
85
  r for r in non_artifacts_new if hasattr(r, "_parents")
89
86
  ]
90
- if len(non_artifacts_with_parents) > 0 and kwargs.get("parents") is not False:
91
- # this can only happen within lnschema_bionty right now!!
87
+ if len(non_artifacts_with_parents) > 0:
88
+ # this can only happen within bionty right now!!
92
89
  # we might extend to core lamindb later
93
- import bionty as bt
94
-
95
- if kwargs.get("parents") or (
96
- kwargs.get("parents") is None and bt.settings.auto_save_parents
97
- ):
98
- mute = False if kwargs.get("mute") is None else kwargs.get("mute")
99
- if not mute:
100
- # save the record with parents one by one
101
- logger.warning(
102
- "now recursing through parents: "
103
- "this only happens once, but is much slower than bulk saving"
104
- )
105
- logger.hint(
106
- "you can switch this off via: bt.settings.auto_save_parents ="
107
- " False"
108
- )
109
- for record in non_artifacts_with_parents:
110
- record._save_ontology_parents(mute=True)
90
+ from bionty.core import add_ontology
91
+
92
+ add_ontology(non_artifacts_with_parents)
111
93
 
112
94
  if artifacts:
113
95
  with transaction.atomic():
lamindb/_transform.py CHANGED
@@ -1,11 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from typing import TYPE_CHECKING
4
+
5
+ from lamindb_setup.core._docs import doc_args
3
6
  from lnschema_core.models import Run, Transform
4
- from lnschema_core.types import TransformType
5
7
 
8
+ from ._parents import _view_parents
6
9
  from ._run import delete_run_artifacts
7
10
  from .core.versioning import process_is_new_version_of
8
11
 
12
+ if TYPE_CHECKING:
13
+ from lnschema_core.types import TransformType
14
+
9
15
 
10
16
  def __init__(transform: Transform, *args, **kwargs):
11
17
  if len(args) == len(transform._meta.concrete_fields):
@@ -18,9 +24,7 @@ def __init__(transform: Transform, *args, **kwargs):
18
24
  )
19
25
  (kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None)
20
26
  version: str | None = kwargs.pop("version") if "version" in kwargs else None
21
- type: TransformType | None = (
22
- kwargs.pop("type") if "type" in kwargs else TransformType.pipeline
23
- )
27
+ type: TransformType | None = kwargs.pop("type") if "type" in kwargs else "pipeline"
24
28
  reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
25
29
  reference_type: str | None = (
26
30
  kwargs.pop("reference_type") if "reference_type" in kwargs else None
@@ -55,19 +59,13 @@ def __init__(transform: Transform, *args, **kwargs):
55
59
 
56
60
 
57
61
  def delete(self) -> None:
58
- # set latest_report to None, it's tracked through the latest run
59
- latest_report = None
60
- if self.latest_report is not None:
61
- latest_report = self.latest_report
62
- self.latest_report = None
63
- source_code = None
64
- if self.source_code is not None:
65
- source_code = self.source_code
66
- self.source_code = None
67
- if latest_report is not None or source_code is not None:
62
+ _source_code_artifact = None
63
+ if self._source_code_artifact is not None:
64
+ _source_code_artifact = self._source_code_artifact
65
+ self._source_code_artifact = None
68
66
  self.save()
69
- if source_code is not None:
70
- source_code.delete(permanent=True)
67
+ if _source_code_artifact is not None:
68
+ _source_code_artifact.delete(permanent=True)
71
69
  # query all runs and delete their artifacts
72
70
  runs = Run.filter(transform=self)
73
71
  for run in runs:
@@ -78,10 +76,23 @@ def delete(self) -> None:
78
76
 
79
77
 
80
78
  @property # type: ignore
79
+ @doc_args(Transform.latest_run.__doc__)
81
80
  def latest_run(self) -> Run:
81
+ """{}""" # noqa: D415
82
82
  return self.runs.order_by("-started_at").first()
83
83
 
84
84
 
85
+ def view_lineage(self, with_successors: bool = False, distance: int = 5):
86
+ return _view_parents(
87
+ record=self,
88
+ field="name",
89
+ with_children=with_successors,
90
+ distance=distance,
91
+ attr_name="predecessors",
92
+ )
93
+
94
+
85
95
  Transform.__init__ = __init__
86
96
  Transform.delete = delete
87
97
  Transform.latest_run = latest_run
98
+ Transform.view_lineage = view_lineage
lamindb/core/_data.py CHANGED
@@ -20,7 +20,7 @@ from lnschema_core.models import (
20
20
 
21
21
  from lamindb._parents import view_lineage
22
22
  from lamindb._query_set import QuerySet
23
- from lamindb._record import get_default_str_field
23
+ from lamindb._record import get_name_field
24
24
  from lamindb.core._settings import settings
25
25
 
26
26
  from ._feature_manager import (
@@ -129,31 +129,29 @@ def describe(self: HasFeatures, print_types: bool = False):
129
129
  # prefetch m-2-m relationships
130
130
  self = (
131
131
  self.__class__.objects.using(self._state.db)
132
- .prefetch_related("feature_sets", "input_of")
132
+ .prefetch_related("feature_sets", "input_of_runs")
133
133
  .get(id=self.id)
134
134
  )
135
135
 
136
136
  # provenance
137
137
  if len(foreign_key_fields) > 0: # always True for Artifact and Collection
138
138
  fields_values = [(field, getattr(self, field)) for field in foreign_key_fields]
139
- type_str = (
140
- lambda attr: f": {attr.__class__.__get_name_with_schema__()}"
141
- if print_types
142
- else ""
139
+ type_str = lambda attr: (
140
+ f": {attr.__class__.__get_name_with_schema__()}" if print_types else ""
143
141
  )
144
142
  related_msg = "".join(
145
143
  [
146
- f" .{field_name}{type_str(attr)} = {format_field_value(getattr(attr, get_default_str_field(attr)))}\n"
144
+ f" .{field_name}{type_str(attr)} = {format_field_value(getattr(attr, get_name_field(attr)))}\n"
147
145
  for (field_name, attr) in fields_values
148
146
  if attr is not None
149
147
  ]
150
148
  )
151
149
  prov_msg += related_msg
152
150
  # input of
153
- if self.id is not None and self.input_of.exists():
154
- values = [format_field_value(i.started_at) for i in self.input_of.all()]
151
+ if self.id is not None and self.input_of_runs.exists():
152
+ values = [format_field_value(i.started_at) for i in self.input_of_runs.all()]
155
153
  type_str = ": Run" if print_types else "" # type: ignore
156
- prov_msg += f" .input_of{type_str} = {values}\n"
154
+ prov_msg += f" .input_of_runs{type_str} = {values}\n"
157
155
  if prov_msg:
158
156
  msg += f" {colors.italic('Provenance')}\n"
159
157
  msg += prov_msg
@@ -210,11 +208,11 @@ def get_labels(
210
208
  ).all()
211
209
  if flat_names:
212
210
  # returns a flat list of names
213
- from lamindb._record import get_default_str_field
211
+ from lamindb._record import get_name_field
214
212
 
215
213
  values = []
216
214
  for v in qs_by_registry.values():
217
- values += v.list(get_default_str_field(v))
215
+ values += v.list(get_name_field(v))
218
216
  return values
219
217
  if len(registries_to_check) == 1 and registry in qs_by_registry:
220
218
  return qs_by_registry[registry]
@@ -304,12 +302,12 @@ def add_labels(
304
302
  if len(linked_labels) > 0:
305
303
  labels_accessor.remove(*linked_labels)
306
304
  labels_accessor.add(*records, through_defaults={"feature_id": feature.id})
307
- feature_set_links = get_feature_set_links(self)
308
- feature_set_ids = [link.featureset_id for link in feature_set_links.all()]
305
+ links_feature_set = get_feature_set_links(self)
306
+ feature_set_ids = [link.featureset_id for link in links_feature_set.all()]
309
307
  # get all linked features of type Feature
310
308
  feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
311
309
  {
312
- feature_set_links.filter(featureset_id=feature_set.id)
310
+ links_feature_set.filter(featureset_id=feature_set.id)
313
311
  .one()
314
312
  .slot: feature_set.features.all()
315
313
  for feature_set in feature_sets
@@ -415,7 +413,7 @@ def _track_run_input(
415
413
  # generalize below for more than one data batch
416
414
  if len(input_data) == 1:
417
415
  if input_data[0].transform is not None:
418
- run.transform.parents.add(input_data[0].transform)
416
+ run.transform.predecessors.add(input_data[0].transform)
419
417
 
420
418
 
421
419
  HasFeatures.describe = describe