lamindb 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. lamindb/__init__.py +52 -36
  2. lamindb/_finish.py +17 -10
  3. lamindb/_tracked.py +1 -1
  4. lamindb/base/__init__.py +3 -1
  5. lamindb/base/fields.py +40 -22
  6. lamindb/base/ids.py +1 -94
  7. lamindb/base/types.py +2 -0
  8. lamindb/base/uids.py +117 -0
  9. lamindb/core/_context.py +203 -102
  10. lamindb/core/_settings.py +38 -25
  11. lamindb/core/datasets/__init__.py +11 -4
  12. lamindb/core/datasets/_core.py +5 -5
  13. lamindb/core/datasets/_small.py +0 -93
  14. lamindb/core/datasets/mini_immuno.py +172 -0
  15. lamindb/core/loaders.py +1 -1
  16. lamindb/core/storage/_backed_access.py +100 -6
  17. lamindb/core/storage/_polars_lazy_df.py +51 -0
  18. lamindb/core/storage/_pyarrow_dataset.py +15 -30
  19. lamindb/core/storage/_tiledbsoma.py +29 -13
  20. lamindb/core/storage/objects.py +6 -0
  21. lamindb/core/subsettings/__init__.py +2 -0
  22. lamindb/core/subsettings/_annotation_settings.py +11 -0
  23. lamindb/curators/__init__.py +7 -3349
  24. lamindb/curators/_legacy.py +2056 -0
  25. lamindb/curators/core.py +1534 -0
  26. lamindb/errors.py +11 -0
  27. lamindb/examples/__init__.py +27 -0
  28. lamindb/examples/schemas/__init__.py +12 -0
  29. lamindb/examples/schemas/_anndata.py +25 -0
  30. lamindb/examples/schemas/_simple.py +19 -0
  31. lamindb/integrations/_vitessce.py +8 -5
  32. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
  33. lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
  34. lamindb/migrations/0093_alter_schemacomponent_unique_together.py +16 -0
  35. lamindb/models/__init__.py +4 -1
  36. lamindb/models/_describe.py +21 -4
  37. lamindb/models/_feature_manager.py +382 -287
  38. lamindb/models/_label_manager.py +8 -2
  39. lamindb/models/artifact.py +177 -106
  40. lamindb/models/artifact_set.py +122 -0
  41. lamindb/models/collection.py +73 -52
  42. lamindb/models/core.py +1 -1
  43. lamindb/models/feature.py +51 -17
  44. lamindb/models/has_parents.py +69 -14
  45. lamindb/models/project.py +1 -1
  46. lamindb/models/query_manager.py +221 -22
  47. lamindb/models/query_set.py +247 -172
  48. lamindb/models/record.py +65 -247
  49. lamindb/models/run.py +4 -4
  50. lamindb/models/save.py +8 -2
  51. lamindb/models/schema.py +456 -184
  52. lamindb/models/transform.py +2 -2
  53. lamindb/models/ulabel.py +8 -5
  54. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/METADATA +6 -6
  55. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/RECORD +57 -43
  56. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/LICENSE +0 -0
  57. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,122 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable, Iterator
4
+ from typing import TYPE_CHECKING, Literal
5
+
6
+ from lamin_utils import logger
7
+ from lamindb_setup.core._docs import doc_args
8
+
9
+ from ..core._mapped_collection import MappedCollection
10
+ from ..core.storage._backed_access import _open_dataframe
11
+ from .artifact import Artifact, _track_run_input
12
+ from .collection import Collection, _load_concat_artifacts
13
+
14
+ if TYPE_CHECKING:
15
+ from anndata import AnnData
16
+ from pandas import DataFrame
17
+ from polars import LazyFrame as PolarsLazyFrame
18
+ from pyarrow.dataset import Dataset as PyArrowDataset
19
+ from upath import UPath
20
+
21
+
22
+ UNORDERED_WARNING = (
23
+ "this query set is unordered, consider using `.order_by()` first "
24
+ "to avoid opening the artifacts in an arbitrary order"
25
+ )
26
+
27
+
28
+ class ArtifactSet(Iterable):
29
+ """Abstract class representing sets of artifacts returned by queries.
30
+
31
+ This class automatically extends :class:`~lamindb.models.BasicQuerySet`
32
+ and :class:`~lamindb.models.QuerySet` when the base model is :class:`~lamindb.Artifact`.
33
+
34
+ Examples:
35
+
36
+ >>> artifacts = ln.Artifact.filter(otype="AnnData")
37
+ >>> artifacts # an instance of ArtifactQuerySet inheriting from ArtifactSet
38
+ """
39
+
40
+ @doc_args(Collection.load.__doc__)
41
+ def load(
42
+ self,
43
+ join: Literal["inner", "outer"] = "outer",
44
+ is_run_input: bool | None = None,
45
+ **kwargs,
46
+ ) -> DataFrame | AnnData:
47
+ """{}""" # noqa: D415
48
+ if not self.ordered: # type: ignore
49
+ logger.warning(UNORDERED_WARNING)
50
+
51
+ artifacts: list[Artifact] = list(self)
52
+ concat_object = _load_concat_artifacts(artifacts, join, **kwargs)
53
+ # track only if successful
54
+ _track_run_input(artifacts, is_run_input)
55
+ return concat_object
56
+
57
+ @doc_args(Collection.open.__doc__)
58
+ def open(
59
+ self,
60
+ engine: Literal["pyarrow", "polars"] = "pyarrow",
61
+ is_run_input: bool | None = None,
62
+ **kwargs,
63
+ ) -> PyArrowDataset | Iterator[PolarsLazyFrame]:
64
+ """{}""" # noqa: D415
65
+ if not self.ordered: # type: ignore
66
+ logger.warning(UNORDERED_WARNING)
67
+
68
+ artifacts: list[Artifact] = list(self)
69
+ paths: list[UPath] = [artifact.path for artifact in artifacts]
70
+
71
+ dataframe = _open_dataframe(paths, engine=engine, **kwargs)
72
+ # track only if successful
73
+ _track_run_input(artifacts, is_run_input)
74
+ return dataframe
75
+
76
+ @doc_args(Collection.mapped.__doc__)
77
+ def mapped(
78
+ self,
79
+ layers_keys: str | list[str] | None = None,
80
+ obs_keys: str | list[str] | None = None,
81
+ obsm_keys: str | list[str] | None = None,
82
+ obs_filter: dict[str, str | list[str]] | None = None,
83
+ join: Literal["inner", "outer"] | None = "inner",
84
+ encode_labels: bool | list[str] = True,
85
+ unknown_label: str | dict[str, str] | None = None,
86
+ cache_categories: bool = True,
87
+ parallel: bool = False,
88
+ dtype: str | None = None,
89
+ stream: bool = False,
90
+ is_run_input: bool | None = None,
91
+ ) -> MappedCollection:
92
+ """{}""" # noqa: D415
93
+ if not self.ordered: # type: ignore
94
+ logger.warning(UNORDERED_WARNING)
95
+
96
+ artifacts: list[Artifact] = []
97
+ paths: list[UPath] = []
98
+ for artifact in self:
99
+ if ".h5ad" not in artifact.suffix and ".zarr" not in artifact.suffix:
100
+ logger.warning(f"ignoring artifact with suffix {artifact.suffix}")
101
+ continue
102
+ elif not stream:
103
+ paths.append(artifact.cache())
104
+ else:
105
+ paths.append(artifact.path)
106
+ artifacts.append(artifact)
107
+ ds = MappedCollection(
108
+ paths,
109
+ layers_keys,
110
+ obs_keys,
111
+ obsm_keys,
112
+ obs_filter,
113
+ join,
114
+ encode_labels,
115
+ unknown_label,
116
+ cache_categories,
117
+ parallel,
118
+ dtype,
119
+ )
120
+ # track only if successful
121
+ _track_run_input(artifacts, is_run_input)
122
+ return ds
@@ -24,7 +24,7 @@ from lamindb.base.fields import (
24
24
 
25
25
  from ..base.ids import base62_20
26
26
  from ..core._mapped_collection import MappedCollection
27
- from ..core.storage._pyarrow_dataset import _is_pyarrow_dataset, _open_pyarrow_dataset
27
+ from ..core.storage._backed_access import _open_dataframe
28
28
  from ..errors import FieldValidationError
29
29
  from ..models._is_versioned import process_revises
30
30
  from ._is_versioned import IsVersioned
@@ -48,8 +48,9 @@ from .record import (
48
48
  from .run import Run, TracksRun, TracksUpdates
49
49
 
50
50
  if TYPE_CHECKING:
51
- from collections.abc import Iterable
51
+ from collections.abc import Iterable, Iterator
52
52
 
53
+ from polars import LazyFrame as PolarsLazyFrame
53
54
  from pyarrow.dataset import Dataset as PyArrowDataset
54
55
 
55
56
  from ..core.storage import UPath
@@ -94,6 +95,39 @@ if TYPE_CHECKING:
94
95
  # return feature_sets_union
95
96
 
96
97
 
98
+ def _load_concat_artifacts(
99
+ artifacts: list[Artifact], join: Literal["inner", "outer"] = "outer", **kwargs
100
+ ) -> pd.DataFrame | ad.AnnData:
101
+ suffixes = {artifact.suffix for artifact in artifacts}
102
+ # Why is that? - Sergei
103
+ if len(suffixes) != 1:
104
+ raise ValueError(
105
+ "Can only load collections where all artifacts have the same suffix"
106
+ )
107
+
108
+ # because we're tracking data flow on the collection-level, here, we don't
109
+ # want to track it on the artifact-level
110
+ first_object = artifacts[0].load(is_run_input=False)
111
+ is_dataframe = isinstance(first_object, pd.DataFrame)
112
+ is_anndata = isinstance(first_object, ad.AnnData)
113
+ if not is_dataframe and not is_anndata:
114
+ raise ValueError(f"Unable to concatenate {suffixes.pop()} objects.")
115
+
116
+ objects = [first_object]
117
+ artifact_uids = [artifacts[0].uid]
118
+ for artifact in artifacts[1:]:
119
+ objects.append(artifact.load(is_run_input=False))
120
+ artifact_uids.append(artifact.uid)
121
+
122
+ if is_dataframe:
123
+ concat_object = pd.concat(objects, join=join, **kwargs)
124
+ elif is_anndata:
125
+ label = kwargs.pop("label", "artifact_uid")
126
+ keys = kwargs.pop("keys", artifact_uids)
127
+ concat_object = ad.concat(objects, join=join, label=label, keys=keys, **kwargs)
128
+ return concat_object
129
+
130
+
97
131
  class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
98
132
  """Collections of artifacts.
99
133
 
@@ -342,13 +376,25 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
342
376
  run=run,
343
377
  )
344
378
 
345
- def open(self, is_run_input: bool | None = None) -> PyArrowDataset:
346
- """Return a cloud-backed pyarrow Dataset.
379
+ def open(
380
+ self,
381
+ engine: Literal["pyarrow", "polars"] = "pyarrow",
382
+ is_run_input: bool | None = None,
383
+ **kwargs,
384
+ ) -> PyArrowDataset | Iterator[PolarsLazyFrame]:
385
+ """Open a dataset for streaming.
347
386
 
348
- Works for `pyarrow` compatible formats.
387
+ Works for `pyarrow` and `polars` compatible formats
388
+ (`.parquet`, `.csv`, `.ipc` etc. files or directories with such files).
389
+
390
+ Args:
391
+ engine: Which module to use for lazy loading of a dataframe
392
+ from `pyarrow` or `polars` compatible formats.
393
+ is_run_input: Whether to track this artifact as run input.
394
+ **kwargs: Keyword arguments for `pyarrow.dataset.dataset` or `polars.scan_*` functions.
349
395
 
350
396
  Notes:
351
- For more info, see tutorial: :doc:`/arrays`.
397
+ For more info, see guide: :doc:`/arrays`.
352
398
  """
353
399
  if self._state.adding:
354
400
  artifacts = self._artifacts
@@ -356,31 +402,12 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
356
402
  else:
357
403
  artifacts = self.ordered_artifacts.all()
358
404
  paths = [artifact.path for artifact in artifacts]
359
- # this checks that the filesystem is the same for all paths
360
- # this is a requirement of pyarrow.dataset.dataset
361
- fs = paths[0].fs
362
- for path in paths[1:]:
363
- # this assumes that the filesystems are cached by fsspec
364
- if path.fs is not fs:
365
- raise ValueError(
366
- "The collection has artifacts with different filesystems, this is not supported."
367
- )
368
- if not _is_pyarrow_dataset(paths):
369
- suffixes = {path.suffix for path in paths}
370
- suffixes_str = ", ".join(suffixes)
371
- err_msg = (
372
- "This collection is not compatible with pyarrow.dataset.dataset(), "
373
- )
374
- err_msg += (
375
- f"the artifacts have incompatible file types: {suffixes_str}"
376
- if len(suffixes) > 1
377
- else f"the file type {suffixes_str} is not supported by pyarrow."
378
- )
379
- raise ValueError(err_msg)
380
- dataset = _open_pyarrow_dataset(paths)
405
+
406
+ dataframe = _open_dataframe(paths, engine=engine, **kwargs)
381
407
  # track only if successful
408
+ # is it really needed if tracking is done in self.ordered_artifacts.all()? - Sergei
382
409
  _track_run_input(self, is_run_input)
383
- return dataset
410
+ return dataframe
384
411
 
385
412
  def mapped(
386
413
  self,
@@ -403,8 +430,8 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
403
430
  <https://pytorch.org/docs/stable/data.html#map-style-datasets>`__ by
404
431
  virtually concatenating `AnnData` arrays.
405
432
 
406
- If your `AnnData` collection is in the cloud, move them into a local
407
- cache first via :meth:`~lamindb.Collection.cache`.
433
+ By default (`stream=False`) `AnnData` arrays are moved into a local
434
+ cache first.
408
435
 
409
436
  `__getitem__` of the `MappedCollection` object takes a single integer index
410
437
  and returns a dictionary with the observation data sample for this index from
@@ -416,7 +443,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
416
443
 
417
444
  For a guide, see :doc:`docs:scrna-mappedcollection`.
418
445
 
419
- This method currently only works for collections of `AnnData` artifacts.
446
+ This method currently only works for collections or query sets of `AnnData` artifacts.
420
447
 
421
448
  Args:
422
449
  layers_keys: Keys from the ``.layers`` slot. ``layers_keys=None`` or ``"X"`` in the list
@@ -445,6 +472,11 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
445
472
  >>> ds = ln.Collection.get(description="my collection")
446
473
  >>> mapped = collection.mapped(obs_keys=["cell_type", "batch"])
447
474
  >>> dl = DataLoader(mapped, batch_size=128, shuffle=True)
475
+ >>> # also works for query sets of artifacts, '...' represents some filtering condition
476
+ >>> # additional filtering on artifacts of the collection
477
+ >>> mapped = collection.artifacts.all().filter(...).order_by("-created_at").mapped()
478
+ >>> # or directly from a query set of artifacts
479
+ >>> mapped = ln.Artifact.filter(..., otype="AnnData").order_by("-created_at").mapped()
448
480
  """
449
481
  path_list = []
450
482
  if self._state.adding:
@@ -474,6 +506,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
474
506
  dtype,
475
507
  )
476
508
  # track only if successful
509
+ # is it really needed if tracking is done in self.ordered_artifacts.all()? - Sergei
477
510
  _track_run_input(self, is_run_input)
478
511
  return ds
479
512
 
@@ -490,6 +523,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
490
523
  path_list = []
491
524
  for artifact in self.ordered_artifacts.all():
492
525
  path_list.append(artifact.cache())
526
+ # is it really needed if tracking is done in self.ordered_artifacts.all()? - Sergei
493
527
  _track_run_input(self, is_run_input)
494
528
  return path_list
495
529
 
@@ -498,29 +532,16 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
498
532
  join: Literal["inner", "outer"] = "outer",
499
533
  is_run_input: bool | None = None,
500
534
  **kwargs,
501
- ) -> Any:
502
- """Stage and load to memory.
535
+ ) -> pd.DataFrame | ad.AnnData:
536
+ """Cache and load to memory.
503
537
 
504
- Returns in-memory representation if possible such as a concatenated `DataFrame` or `AnnData` object.
538
+ Returns an in-memory concatenated `DataFrame` or `AnnData` object.
505
539
  """
506
540
  # cannot call _track_run_input here, see comment further down
507
- all_artifacts = self.ordered_artifacts.all()
508
- suffixes = [artifact.suffix for artifact in all_artifacts]
509
- if len(set(suffixes)) != 1:
510
- raise RuntimeError(
511
- "Can only load collections where all artifacts have the same suffix"
512
- )
513
- # because we're tracking data flow on the collection-level, here, we don't
514
- # want to track it on the artifact-level
515
- objects = [artifact.load(is_run_input=False) for artifact in all_artifacts]
516
- artifact_uids = [artifact.uid for artifact in all_artifacts]
517
- if isinstance(objects[0], pd.DataFrame):
518
- concat_object = pd.concat(objects, join=join)
519
- elif isinstance(objects[0], ad.AnnData):
520
- concat_object = ad.concat(
521
- objects, join=join, label="artifact_uid", keys=artifact_uids
522
- )
523
- # only call it here because there might be errors during concat
541
+ artifacts = self.ordered_artifacts.all()
542
+ concat_object = _load_concat_artifacts(artifacts, join, **kwargs)
543
+ # only call it here because there might be errors during load or concat
544
+ # is it really needed if tracking is done in self.ordered_artifacts.all()? - Sergei
524
545
  _track_run_input(self, is_run_input)
525
546
  return concat_object
526
547
 
lamindb/models/core.py CHANGED
@@ -24,7 +24,7 @@ if TYPE_CHECKING:
24
24
 
25
25
 
26
26
  class Storage(Record, TracksRun, TracksUpdates):
27
- """Storage locations.
27
+ """Storage locations of artifacts such as S3 buckets or local directories.
28
28
 
29
29
  A storage location is either a directory/folder (local or in the cloud) or
30
30
  an entire S3/GCP bucket.
lamindb/models/feature.py CHANGED
@@ -143,40 +143,50 @@ def parse_cat_dtype(
143
143
 
144
144
 
145
145
  def serialize_dtype(
146
- dtype: Record | FieldAttr | list[Record], is_itype: bool = False
146
+ dtype: Registry | Record | FieldAttr | list[Record] | list[Registry] | str,
147
+ is_itype: bool = False,
147
148
  ) -> str:
148
149
  """Converts a data type object into its string representation."""
150
+ from .ulabel import ULabel
151
+
149
152
  if (
150
153
  not isinstance(dtype, list)
151
154
  and hasattr(dtype, "__name__")
152
155
  and dtype.__name__ in FEATURE_DTYPES
153
156
  ):
154
157
  dtype_str = dtype.__name__
158
+ elif dtype is dict:
159
+ dtype_str = "dict"
160
+ elif is_itype and isinstance(dtype, str):
161
+ if dtype not in "Feature":
162
+ parse_cat_dtype(
163
+ dtype_str=dtype, is_itype=True
164
+ ) # throws an error if invalid
165
+ dtype_str = dtype
155
166
  elif isinstance(dtype, (ExtensionDtype, np.dtype)):
156
167
  dtype_str = serialize_pandas_dtype(dtype)
157
168
  else:
158
- error_message = (
159
- "dtype has to be a record, a record field, or a list of records, not {}"
160
- )
161
- if isinstance(dtype, Registry):
162
- dtype = [dtype]
163
- elif isinstance(dtype, DeferredAttribute):
169
+ error_message = "dtype has to be a registry, a ulabel subtype, a registry field, or a list of registries or fields, not {}"
170
+ if isinstance(dtype, (Registry, DeferredAttribute, ULabel)):
164
171
  dtype = [dtype]
165
172
  elif not isinstance(dtype, list):
166
173
  raise ValueError(error_message.format(dtype))
167
174
  dtype_str = ""
168
- for single_dtype in dtype:
169
- if not isinstance(single_dtype, Registry) and not isinstance(
170
- single_dtype, DeferredAttribute
171
- ):
172
- raise ValueError(error_message.format(single_dtype))
173
- if isinstance(single_dtype, Registry):
174
- dtype_str += single_dtype.__get_name_with_module__() + "|"
175
+ for one_dtype in dtype:
176
+ if not isinstance(one_dtype, (Registry, DeferredAttribute, ULabel)):
177
+ raise ValueError(error_message.format(one_dtype))
178
+ if isinstance(one_dtype, Registry):
179
+ dtype_str += one_dtype.__get_name_with_module__() + "|"
180
+ elif isinstance(one_dtype, ULabel):
181
+ assert one_dtype.is_type, ( # noqa: S101
182
+ f"ulabel has to be a type if acting as dtype, {one_dtype} has `is_type` False"
183
+ )
184
+ dtype_str += f"ULabel[{one_dtype.name}]"
175
185
  else:
186
+ name = one_dtype.field.name
187
+ field_ext = f".{name}" if name != "name" else ""
176
188
  dtype_str += (
177
- single_dtype.field.model.__get_name_with_module__()
178
- + f".{single_dtype.field.name}"
179
- + "|"
189
+ one_dtype.field.model.__get_name_with_module__() + field_ext + "|"
180
190
  )
181
191
  dtype_str = dtype_str.rstrip("|")
182
192
  if not is_itype:
@@ -571,6 +581,30 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
571
581
  self._aux = self._aux or {}
572
582
  self._aux.setdefault("af", {})["2"] = value
573
583
 
584
+ # we'll enable this later
585
+ # @property
586
+ # def observational_unit(self) -> Literal["Artifact", "Observation"]:
587
+ # """Default observational unit on which the feature is measured.
588
+
589
+ # Currently, we only make a distinction between artifact-level and observation-level features.
590
+
591
+ # For example, a feature `"ml_split"` that stores `"test"` & `"train"` labels is typically defined on the artifact level.
592
+ # When accessing `artifact.features.get_values(["ml_split"])`, you expect a single value, either `"test"` or `"train"`.
593
+
594
+ # However, when accessing an artifact annotation with a feature that's defined on the observation-level, say `"cell_type"`, you expect a set of values. So,
595
+ # `artifact.features.get_values(["cell_type_from_expert"])` should return a set: `{"T cell", "B cell"}`.
596
+
597
+ # The value of `observational_unit` is currently auto-managed: if using `artifact.featueres.add_values()`,
598
+ # it will be set to `Artifact`. In a curator, the value depends on whether it's an artifact- or observation-level slot
599
+ # (e.g. `.uns` is artifact-level in `AnnData` whereas `.obs` is observation-level).
600
+
601
+ # Note: This attribute might in the future be used to distinguish different types of observational units (e.g. single cells vs. physical samples vs. study subjects etc.).
602
+ # """
603
+ # if self._expect_many:
604
+ # return "Observation" # this here might be replaced with the specific observational unit
605
+ # else:
606
+ # return "Artifact"
607
+
574
608
 
575
609
  class FeatureValue(Record, TracksRun):
576
610
  """Non-categorical features values.
@@ -84,10 +84,44 @@ class HasParents:
84
84
  return view_parents(
85
85
  record=self, # type: ignore
86
86
  field=field,
87
+ with_parents=True,
87
88
  with_children=with_children,
88
89
  distance=distance,
89
90
  )
90
91
 
92
+ def view_children(
93
+ self,
94
+ field: StrField | None = None,
95
+ distance: int = 5,
96
+ ):
97
+ """View children in an ontology.
98
+
99
+ Args:
100
+ field: Field to display on graph
101
+ distance: Maximum distance still shown.
102
+
103
+ Ontological hierarchies: :class:`~lamindb.ULabel` (project & sub-project), :class:`~bionty.CellType` (cell type & subtype).
104
+
105
+ Examples:
106
+ >>> import bionty as bt
107
+ >>> bt.Tissue.from_source(name="subsegmental bronchus").save()
108
+ >>> record = bt.Tissue.get(name="respiratory tube")
109
+ >>> record.view_parents()
110
+ >>> tissue.view_parents(with_children=True)
111
+ """
112
+ if field is None:
113
+ field = get_name_field(self)
114
+ if not isinstance(field, str):
115
+ field = field.field.name
116
+
117
+ return view_parents(
118
+ record=self, # type: ignore
119
+ field=field,
120
+ with_parents=False,
121
+ with_children=True,
122
+ distance=distance,
123
+ )
124
+
91
125
  def query_parents(self) -> QuerySet:
92
126
  """Query parents in an ontology."""
93
127
  return _query_relatives([self], "parents", self.__class__) # type: ignore
@@ -210,6 +244,7 @@ def view_lineage(
210
244
  def view_parents(
211
245
  record: Record,
212
246
  field: str,
247
+ with_parents: bool = True,
213
248
  with_children: bool = False,
214
249
  distance: int = 100,
215
250
  attr_name: Literal["parents", "predecessors"] = "parents",
@@ -223,11 +258,12 @@ def view_parents(
223
258
  import pandas as pd
224
259
 
225
260
  df_edges = None
226
- df_edges_parents = _df_edges_from_parents(
227
- record=record, field=field, distance=distance, attr_name=attr_name
228
- )
229
- if df_edges_parents is not None:
230
- df_edges = df_edges_parents
261
+ df_edges_parents = None
262
+ df_edges_children = None
263
+ if with_parents:
264
+ df_edges_parents = _df_edges_from_parents(
265
+ record=record, field=field, distance=distance, attr_name=attr_name
266
+ )
231
267
  if with_children:
232
268
  df_edges_children = _df_edges_from_parents(
233
269
  record=record,
@@ -236,13 +272,32 @@ def view_parents(
236
272
  children=True,
237
273
  attr_name=attr_name,
238
274
  )
239
- if df_edges_children is not None:
240
- if df_edges is not None:
241
- df_edges = pd.concat(
242
- [df_edges_parents, df_edges_children]
243
- ).drop_duplicates()
244
- else:
245
- df_edges = df_edges_children
275
+ # Rename the columns to swap source and target
276
+ df_edges_children = df_edges_children.rename(
277
+ columns={
278
+ "source": "temp_target",
279
+ "source_label": "temp_target_label",
280
+ "source_record": "temp_target_record",
281
+ "target": "source",
282
+ "target_label": "source_label",
283
+ "target_record": "source_record",
284
+ }
285
+ )
286
+ df_edges_children = df_edges_children.rename(
287
+ columns={
288
+ "temp_target": "target",
289
+ "temp_target_label": "target_label",
290
+ "temp_target_record": "target_record",
291
+ }
292
+ )
293
+ if df_edges_parents is not None and df_edges_children is not None:
294
+ df_edges = pd.concat([df_edges_parents, df_edges_children]).drop_duplicates()
295
+ elif df_edges_parents is not None:
296
+ df_edges = df_edges_parents
297
+ elif df_edges_children is not None:
298
+ df_edges = df_edges_children
299
+ else:
300
+ return None
246
301
 
247
302
  record_label = _record_label(record, field)
248
303
 
@@ -520,14 +575,14 @@ def _get_all_child_runs(data: Artifact | Collection) -> list:
520
575
  run_inputs_outputs += [(r, outputs_run)]
521
576
 
522
577
  child_runs.update(
523
- Run.filter(
578
+ Run.filter( # type: ignore
524
579
  **{f"input_{name}s__uid__in": [i.uid for i in outputs_run]}
525
580
  ).list()
526
581
  )
527
582
  # for artifacts, also include collections in the lineage
528
583
  if name == "artifact":
529
584
  child_runs.update(
530
- Run.filter(
585
+ Run.filter( # type: ignore
531
586
  input_collections__uid__in=[i.uid for i in outputs_run]
532
587
  ).list()
533
588
  )
lamindb/models/project.py CHANGED
@@ -36,7 +36,7 @@ if TYPE_CHECKING:
36
36
 
37
37
 
38
38
  class Person(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
39
- """People.
39
+ """People such as authors of a study or collaborators in a project.
40
40
 
41
41
  This registry is distinct from `User` and exists for project management.
42
42