lamindb 0.69.8__py3-none-any.whl → 0.69.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_artifact.py CHANGED
@@ -22,7 +22,6 @@ from lamindb_setup.core.upath import (
22
22
  from lnschema_core import Artifact, Run, Storage
23
23
  from lnschema_core.models import IsTree
24
24
  from lnschema_core.types import (
25
- DataLike,
26
25
  VisibilityChoice,
27
26
  )
28
27
 
@@ -52,9 +51,11 @@ from .core._data import (
52
51
  save_feature_sets,
53
52
  )
54
53
  from .core.storage.file import AUTO_KEY_PREFIX
54
+ from .core.storage.object import _mudata_is_installed
55
55
 
56
56
  if TYPE_CHECKING:
57
57
  from lamindb_setup.core.types import UPathStr
58
+ from mudata import MuData
58
59
 
59
60
  from lamindb.core.storage._backed_access import AnnDataAccessor, BackedAccessor
60
61
 
@@ -114,7 +115,7 @@ def process_pathlike(
114
115
 
115
116
  def process_data(
116
117
  provisional_uid: str,
117
- data: UPathStr | DataLike,
118
+ data: UPathStr | pd.DataFrame | AnnData,
118
119
  format: str | None,
119
120
  key: str | None,
120
121
  default_storage: Storage,
@@ -123,6 +124,13 @@ def process_data(
123
124
  ) -> tuple[Any, Path | UPath, str, Storage, bool]:
124
125
  """Serialize a data object that's provided as file or in memory."""
125
126
  # if not overwritten, data gets stored in default storage
127
+ if _mudata_is_installed():
128
+ from mudata import MuData
129
+
130
+ data_types = (pd.DataFrame, AnnData, MuData)
131
+ else:
132
+ data_types = (pd.DataFrame, AnnData) # type:ignore
133
+
126
134
  if isinstance(data, (str, Path, UPath)): # UPathStr, spelled out
127
135
  access_token = (
128
136
  default_storage._access_token
@@ -138,7 +146,7 @@ def process_data(
138
146
  )
139
147
  suffix = extract_suffix_from_path(path)
140
148
  memory_rep = None
141
- elif isinstance(data, (pd.DataFrame, AnnData)): # DataLike, spelled out
149
+ elif isinstance(data, data_types):
142
150
  storage = default_storage
143
151
  memory_rep = data
144
152
  if key is not None:
@@ -297,7 +305,7 @@ def get_relative_path_to_directory(
297
305
 
298
306
  def get_artifact_kwargs_from_data(
299
307
  *,
300
- data: Path | UPath | str | pd.DataFrame | AnnData,
308
+ data: Path | UPath | str | pd.DataFrame | AnnData | MuData,
301
309
  key: str | None,
302
310
  run: Run | None,
303
311
  format: str | None,
@@ -427,22 +435,20 @@ def log_storage_hint(
427
435
  logger.hint(hint)
428
436
 
429
437
 
430
- def data_is_anndata(data: DataLike):
438
+ def data_is_anndata(data: AnnData | UPathStr):
431
439
  if isinstance(data, AnnData):
432
440
  return True
433
441
  if isinstance(data, (str, Path, UPath)):
434
442
  return Path(data).suffix in {".h5ad", ".zrad"}
435
- return False # pragma: no cover
443
+ return False
436
444
 
437
445
 
438
- def data_is_mudata(data: DataLike): # pragma: no cover
439
- try:
446
+ def data_is_mudata(data: MuData | UPathStr):
447
+ if _mudata_is_installed():
440
448
  from mudata import MuData
441
- except ModuleNotFoundError:
442
- return False
443
449
 
444
- if isinstance(data, MuData):
445
- return True
450
+ if isinstance(data, MuData):
451
+ return True
446
452
  if isinstance(data, (str, Path, UPath)):
447
453
  return Path(data).suffix in {".h5mu"}
448
454
  return False
@@ -456,6 +462,9 @@ def _check_accessor_artifact(data: Any, accessor: str | None = None):
456
462
  elif data_is_anndata(data):
457
463
  logger.warning("data is an AnnData, please use .from_anndata()")
458
464
  accessor = "AnnData"
465
+ elif data_is_mudata(data):
466
+ logger.warning("data is a MuData, please use .from_mudata()")
467
+ accessor = "MuData"
459
468
  else:
460
469
  raise TypeError("data has to be a string, Path, UPath")
461
470
  return accessor
@@ -620,6 +629,32 @@ def from_anndata(
620
629
  return artifact
621
630
 
622
631
 
632
+ @classmethod # type: ignore
633
+ @doc_args(Artifact.from_mudata.__doc__)
634
+ def from_mudata(
635
+ cls,
636
+ mdata: MuData,
637
+ key: str | None = None,
638
+ description: str | None = None,
639
+ run: Run | None = None,
640
+ version: str | None = None,
641
+ is_new_version_of: Artifact | None = None,
642
+ **kwargs,
643
+ ) -> Artifact:
644
+ """{}."""
645
+ artifact = Artifact(
646
+ data=mdata,
647
+ key=key,
648
+ run=run,
649
+ description=description,
650
+ version=version,
651
+ is_new_version_of=is_new_version_of,
652
+ accessor="MuData",
653
+ **kwargs,
654
+ )
655
+ return artifact
656
+
657
+
623
658
  @classmethod # type: ignore
624
659
  @doc_args(Artifact.from_dir.__doc__)
625
660
  def from_dir(
@@ -725,7 +760,7 @@ def from_dir(
725
760
  # docstring handled through attach_func_to_class_method
726
761
  def replace(
727
762
  self,
728
- data: UPathStr | DataLike,
763
+ data: UPathStr,
729
764
  run: Run | None = None,
730
765
  format: str | None = None,
731
766
  ) -> None:
@@ -808,9 +843,7 @@ def backed(self, is_run_input: bool | None = None) -> AnnDataAccessor | BackedAc
808
843
 
809
844
 
810
845
  # docstring handled through attach_func_to_class_method
811
- def load(
812
- self, is_run_input: bool | None = None, stream: bool = False, **kwargs
813
- ) -> DataLike:
846
+ def load(self, is_run_input: bool | None = None, stream: bool = False, **kwargs) -> Any:
814
847
  _track_run_input(self, is_run_input)
815
848
  if hasattr(self, "_memory_rep") and self._memory_rep is not None:
816
849
  return self._memory_rep
@@ -963,6 +996,7 @@ METHOD_NAMES = [
963
996
  "__init__",
964
997
  "from_anndata",
965
998
  "from_df",
999
+ "from_mudata",
966
1000
  "backed",
967
1001
  "stage",
968
1002
  "load",
lamindb/_can_validate.py CHANGED
@@ -29,7 +29,7 @@ def inspect(
29
29
  field: str | StrField | None = None,
30
30
  *,
31
31
  mute: bool = False,
32
- **kwargs,
32
+ organism: str | Registry | None = None,
33
33
  ) -> InspectResult:
34
34
  """{}."""
35
35
  return _inspect(
@@ -37,7 +37,7 @@ def inspect(
37
37
  values=values,
38
38
  field=field,
39
39
  mute=mute,
40
- **kwargs,
40
+ organism=organism,
41
41
  )
42
42
 
43
43
 
@@ -49,10 +49,10 @@ def validate(
49
49
  field: str | StrField | None = None,
50
50
  *,
51
51
  mute: bool = False,
52
- **kwargs,
52
+ organism: str | Registry | None = None,
53
53
  ) -> np.ndarray:
54
54
  """{}."""
55
- return _validate(cls=cls, values=values, field=field, mute=mute, **kwargs)
55
+ return _validate(cls=cls, values=values, field=field, mute=mute, organism=organism)
56
56
 
57
57
 
58
58
  def _inspect(
@@ -62,7 +62,7 @@ def _inspect(
62
62
  *,
63
63
  mute: bool = False,
64
64
  using_key: str | None = None,
65
- **kwargs,
65
+ organism: str | Registry | None = None,
66
66
  ) -> pd.DataFrame | dict[str, list[str]]:
67
67
  """{}."""
68
68
  from lamin_utils._inspect import inspect
@@ -77,20 +77,17 @@ def _inspect(
77
77
 
78
78
  # inspect in the DB
79
79
  result_db = inspect(
80
- df=_filter_query_based_on_organism(
81
- queryset=queryset, organism=kwargs.get("organism")
82
- ),
80
+ df=_filter_query_based_on_organism(queryset=queryset, organism=organism),
83
81
  identifiers=values,
84
82
  field=field,
85
83
  mute=mute,
86
- **kwargs,
87
84
  )
88
85
  nonval = set(result_db.non_validated).difference(result_db.synonyms_mapper.keys())
89
86
 
90
87
  if len(nonval) > 0 and orm.__get_schema_name__() == "bionty":
91
88
  try:
92
- bionty_result = orm.public(organism=kwargs.get("organism")).inspect(
93
- values=nonval, field=field, mute=True, **kwargs
89
+ bionty_result = orm.public(organism=organism).inspect(
90
+ values=nonval, field=field, mute=True
94
91
  )
95
92
  bionty_validated = bionty_result.validated
96
93
  bionty_mapper = bionty_result.synonyms_mapper
@@ -146,7 +143,7 @@ def _validate(
146
143
  *,
147
144
  mute: bool = False,
148
145
  using_key: str | None = None,
149
- **kwargs,
146
+ organism: str | Registry | None = None,
150
147
  ) -> np.ndarray:
151
148
  """{}."""
152
149
  from lamin_utils._inspect import validate
@@ -161,7 +158,7 @@ def _validate(
161
158
  field_values = pd.Series(
162
159
  _filter_query_based_on_organism(
163
160
  queryset=queryset,
164
- organism=kwargs.get("organism"),
161
+ organism=organism,
165
162
  values_list_field=field,
166
163
  ),
167
164
  dtype="object",
@@ -173,7 +170,6 @@ def _validate(
173
170
  case_sensitive=True,
174
171
  mute=mute,
175
172
  field=field,
176
- **kwargs,
177
173
  )
178
174
  if return_str and len(result) == 1:
179
175
  return result[0]
@@ -195,7 +191,7 @@ def standardize(
195
191
  public_aware: bool = True,
196
192
  keep: Literal["first", "last", False] = "first",
197
193
  synonyms_field: str = "synonyms",
198
- **kwargs,
194
+ organism: str | Registry | None = None,
199
195
  ) -> list[str] | dict[str, str]:
200
196
  """{}."""
201
197
  return _standardize(
@@ -209,7 +205,7 @@ def standardize(
209
205
  public_aware=public_aware,
210
206
  keep=keep,
211
207
  synonyms_field=synonyms_field,
212
- **kwargs,
208
+ organism=organism,
213
209
  )
214
210
 
215
211
 
@@ -258,7 +254,7 @@ def _standardize(
258
254
  keep: Literal["first", "last", False] = "first",
259
255
  synonyms_field: str = "synonyms",
260
256
  using_key: str | None = None,
261
- **kwargs,
257
+ organism: str | Registry | None = None,
262
258
  ) -> list[str] | dict[str, str]:
263
259
  """{}."""
264
260
  from lamin_utils._standardize import standardize as map_synonyms
@@ -274,7 +270,6 @@ def _standardize(
274
270
  queryset = _queryset(cls, using_key)
275
271
  orm = queryset.model
276
272
 
277
- organism = kwargs.get("organism")
278
273
  if _has_organism_field(orm):
279
274
  # here, we can safely import lnschema_bionty
280
275
  from lnschema_bionty._bionty import create_or_get_organism_record
lamindb/_collection.py CHANGED
@@ -16,7 +16,7 @@ from lamin_utils import logger
16
16
  from lamindb_setup.core._docs import doc_args
17
17
  from lamindb_setup.core.hashing import hash_set
18
18
  from lnschema_core.models import Collection, CollectionArtifact, FeatureSet
19
- from lnschema_core.types import DataLike, VisibilityChoice
19
+ from lnschema_core.types import VisibilityChoice
20
20
 
21
21
  from lamindb._utils import attach_func_to_class_method
22
22
  from lamindb.core._data import _track_run_input
@@ -40,17 +40,6 @@ if TYPE_CHECKING:
40
40
  from ._query_set import QuerySet
41
41
 
42
42
 
43
- def _check_accessor_collection(data: Any, accessor: str | None = None):
44
- if accessor is None and isinstance(data, (AnnData, pd.DataFrame)):
45
- if isinstance(data, pd.DataFrame):
46
- logger.warning("data is a DataFrame, please use .from_df()")
47
- accessor = "DataFrame"
48
- elif data_is_anndata(data):
49
- logger.warning("data is an AnnData, please use .from_anndata()")
50
- accessor = "AnnData"
51
- return accessor
52
-
53
-
54
43
  def __init__(
55
44
  collection: Collection,
56
45
  *args,
@@ -61,11 +50,11 @@ def __init__(
61
50
  return None
62
51
  # now we proceed with the user-facing constructor
63
52
  if len(args) > 1:
64
- raise ValueError("Only one non-keyword arg allowed: data")
65
- data: Artifact | Iterable[Artifact] = (
66
- kwargs.pop("data") if len(args) == 0 else args[0]
53
+ raise ValueError("Only one non-keyword arg allowed: artifacts")
54
+ artifacts: Artifact | Iterable[Artifact] = (
55
+ kwargs.pop("artifacts") if len(args) == 0 else args[0]
67
56
  )
68
- meta: str | None = kwargs.pop("meta") if "meta" in kwargs else None
57
+ meta: Artifact | None = kwargs.pop("meta") if "meta" in kwargs else None
69
58
  name: str | None = kwargs.pop("name") if "name" in kwargs else None
70
59
  description: str | None = (
71
60
  kwargs.pop("description") if "description" in kwargs else None
@@ -87,14 +76,10 @@ def __init__(
87
76
  feature_sets: dict[str, FeatureSet] = (
88
77
  kwargs.pop("feature_sets") if "feature_sets" in kwargs else {}
89
78
  )
90
- accessor = kwargs.pop("accessor") if "accessor" in kwargs else None
91
- if not isinstance(data, (Artifact, Iterable)):
92
- accessor = _check_accessor_collection(data=data, accessor=accessor)
93
79
  if not len(kwargs) == 0:
94
80
  raise ValueError(
95
- f"Only data, name, run, description, reference, reference_type, visibility can be passed, you passed: {kwargs}"
81
+ f"Only artifacts, name, run, description, reference, reference_type, visibility can be passed, you passed: {kwargs}"
96
82
  )
97
-
98
83
  if is_new_version_of is None:
99
84
  provisional_uid = init_uid(version=version, n_full_id=20)
100
85
  else:
@@ -104,13 +89,13 @@ def __init__(
104
89
  if name is None:
105
90
  name = is_new_version_of.name
106
91
  run = get_run(run)
107
- if isinstance(data, Artifact):
108
- data = [data]
92
+ if isinstance(artifacts, Artifact):
93
+ artifacts = [artifacts]
109
94
  else:
110
- if not hasattr(data, "__getitem__"):
95
+ if not hasattr(artifacts, "__getitem__"):
111
96
  raise ValueError("Artifact or List[Artifact] is allowed.")
112
- assert isinstance(data[0], Artifact) # type: ignore
113
- hash, feature_sets = from_artifacts(data) # type: ignore
97
+ assert isinstance(artifacts[0], Artifact) # type: ignore
98
+ hash, feature_sets = from_artifacts(artifacts) # type: ignore
114
99
  if meta is not None:
115
100
  if not isinstance(meta, Artifact):
116
101
  raise ValueError("meta has to be an Artifact")
@@ -153,12 +138,12 @@ def __init__(
153
138
  visibility=visibility,
154
139
  **kwargs,
155
140
  )
156
- collection._artifacts = data
141
+ collection._artifacts = artifacts
157
142
  collection._feature_sets = feature_sets
158
143
  # register provenance
159
144
  if is_new_version_of is not None:
160
145
  _track_run_input(is_new_version_of, run=run)
161
- _track_run_input(data, run=run)
146
+ _track_run_input(artifacts, run=run)
162
147
 
163
148
 
164
149
  # internal function, not exposed to user
@@ -224,7 +209,9 @@ def from_artifacts(artifacts: Iterable[Artifact]) -> tuple[str, dict[str, str]]:
224
209
  # docstring handled through attach_func_to_class_method
225
210
  def mapped(
226
211
  self,
227
- label_keys: str | list[str] | None = None,
212
+ layers_keys: str | list[str] | None = None,
213
+ obs_keys: str | list[str] | None = None,
214
+ obsm_keys: str | list[str] | None = None,
228
215
  join: Literal["inner", "outer"] | None = "inner",
229
216
  encode_labels: bool | list[str] = True,
230
217
  unknown_label: str | dict[str, str] | None = None,
@@ -245,7 +232,9 @@ def mapped(
245
232
  path_list.append(artifact.path)
246
233
  ds = MappedCollection(
247
234
  path_list,
248
- label_keys,
235
+ layers_keys,
236
+ obs_keys,
237
+ obsm_keys,
249
238
  join,
250
239
  encode_labels,
251
240
  unknown_label,
@@ -273,7 +262,7 @@ def load(
273
262
  join: Literal["inner", "outer"] = "outer",
274
263
  is_run_input: bool | None = None,
275
264
  **kwargs,
276
- ) -> DataLike:
265
+ ) -> Any:
277
266
  # cannot call _track_run_input here, see comment further down
278
267
  all_artifacts = self.artifacts.all()
279
268
  suffixes = [artifact.suffix for artifact in all_artifacts]
@@ -321,7 +310,7 @@ def delete(self, permanent: bool | None = None) -> None:
321
310
 
322
311
 
323
312
  # docstring handled through attach_func_to_class_method
324
- def save(self, *args, **kwargs) -> None:
313
+ def save(self, transfer_labels: bool = False, using: str | None = None) -> None:
325
314
  if self.artifact is not None:
326
315
  self.artifact.save()
327
316
  # we don't need to save feature sets again
@@ -330,18 +319,21 @@ def save(self, *args, **kwargs) -> None:
330
319
  # we don't allow updating the collection of artifacts
331
320
  # if users want to update the set of artifacts, they
332
321
  # have to create a new collection
333
- if hasattr(self, "_artifacts"):
334
- if self._artifacts is not None and len(self._artifacts) > 0:
335
- links = [
336
- CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
337
- for artifact in self._artifacts
338
- ]
339
- # the below seems to preserve the order of the list in the
340
- # auto-incrementing integer primary
341
- # merely using .unordered_artifacts.set(*...) doesn't achieve this
342
- # we need ignore_conflicts=True so that this won't error if links already exist
343
- CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
322
+ links = [
323
+ CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
324
+ for artifact in self._artifacts
325
+ ]
326
+ # the below seems to preserve the order of the list in the
327
+ # auto-incrementing integer primary
328
+ # merely using .unordered_artifacts.set(*...) doesn't achieve this
329
+ # we need ignore_conflicts=True so that this won't error if links already exist
330
+ CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
344
331
  save_feature_set_links(self)
332
+ if using is not None:
333
+ logger.warning("using argument is ignored")
334
+ if transfer_labels:
335
+ for artifact in self._artifacts:
336
+ self.labels.add_from(artifact)
345
337
 
346
338
 
347
339
  # docstring handled through attach_func_to_class_method
lamindb/_feature_set.py CHANGED
@@ -162,7 +162,9 @@ def from_values(
162
162
  field: FieldAttr = Feature.name,
163
163
  type: str | None = None,
164
164
  name: str | None = None,
165
- **kwargs,
165
+ mute: bool = False,
166
+ organism: Registry | str | None = None,
167
+ public_source: Registry | None = None,
166
168
  ) -> FeatureSet | None:
167
169
  """{}."""
168
170
  if not isinstance(field, FieldAttr):
@@ -175,13 +177,18 @@ def from_values(
175
177
  if registry != Feature and type is None:
176
178
  type = NUMBER_TYPE
177
179
  logger.debug("setting feature set to 'number'")
178
- validated = registry.validate(values, field=field, organism=kwargs.get("organism"))
180
+ validated = registry.validate(values, field=field, mute=mute, organism=organism)
179
181
  if validated.sum() == 0:
180
- if kwargs.get("mute") is True:
182
+ if mute is True:
181
183
  logger.warning("no validated features, skip creating feature set")
182
184
  return None
183
185
  validated_values = np.array(values)[validated]
184
- validated_features = registry.from_values(validated_values, field=field, **kwargs)
186
+ validated_features = registry.from_values(
187
+ validated_values,
188
+ field=field,
189
+ organism=organism,
190
+ public_source=public_source,
191
+ )
185
192
  feature_set = FeatureSet(
186
193
  features=validated_features,
187
194
  name=name,
@@ -197,13 +204,15 @@ def from_df(
197
204
  df: pd.DataFrame,
198
205
  field: FieldAttr = Feature.name,
199
206
  name: str | None = None,
200
- **kwargs,
207
+ mute: bool = False,
208
+ organism: Registry | str | None = None,
209
+ public_source: Registry | None = None,
201
210
  ) -> FeatureSet | None:
202
211
  """{}."""
203
212
  registry = field.field.model
204
- validated = registry.validate(df.columns, field=field, **kwargs)
213
+ validated = registry.validate(df.columns, field=field, mute=mute, organism=organism)
205
214
  if validated.sum() == 0:
206
- if kwargs.get("mute") is True:
215
+ if mute is True:
207
216
  logger.warning("no validated features, skip creating feature set")
208
217
  return None
209
218
  if registry == Feature:
@@ -215,7 +224,10 @@ def from_df(
215
224
  raise ValueError(f"data types are heterogeneous: {set(dtypes)}")
216
225
  type = convert_numpy_dtype_to_lamin_feature_type(dtypes[0])
217
226
  validated_features = registry.from_values(
218
- df.columns[validated], field=field, **kwargs
227
+ df.columns[validated],
228
+ field=field,
229
+ organism=organism,
230
+ public_source=public_source,
219
231
  )
220
232
  feature_set = FeatureSet(
221
233
  features=validated_features,
lamindb/_from_values.py CHANGED
@@ -19,19 +19,26 @@ def get_or_create_records(
19
19
  field: StrField,
20
20
  *,
21
21
  from_public: bool = False,
22
- **kwargs,
22
+ organism: Registry | str | None = None,
23
+ public_source: Registry | None = None,
23
24
  ) -> list[Registry]:
24
25
  """Get or create records from iterables."""
25
26
  upon_create_search_names = settings.upon_create_search_names
26
- settings.upon_create_search_names = False
27
27
  feature: Feature = None
28
+ organism = _get_organism_record(field, organism)
29
+ kwargs: dict = {}
30
+ if organism is not None:
31
+ kwargs["organism"] = organism
32
+ if public_source is not None:
33
+ kwargs["public_source"] = public_source
34
+ settings.upon_create_search_names = False
28
35
  try:
29
36
  Registry = field.field.model
30
37
  iterable_idx = index_iterable(iterable)
31
38
 
32
39
  # returns existing records & non-existing values
33
40
  records, nonexist_values, msg = get_existing_records(
34
- iterable_idx=iterable_idx, field=field, kwargs=kwargs
41
+ iterable_idx=iterable_idx, field=field, **kwargs
35
42
  )
36
43
 
37
44
  # new records to be created based on new values
@@ -78,26 +85,14 @@ def get_or_create_records(
78
85
  def get_existing_records(
79
86
  iterable_idx: pd.Index,
80
87
  field: StrField,
81
- kwargs: dict = None,
88
+ **kwargs,
82
89
  ):
83
- if kwargs is None:
84
- kwargs = {}
85
90
  model = field.field.model
86
91
  condition: dict = {} if len(kwargs) == 0 else kwargs.copy()
87
92
  # existing records matching is agnostic to the bionty source
88
93
  if "public_source" in condition:
89
94
  condition.pop("public_source")
90
95
 
91
- if _has_organism_field(model):
92
- from lnschema_bionty._bionty import create_or_get_organism_record
93
-
94
- organism_record = create_or_get_organism_record(
95
- organism=kwargs.get("organism"), orm=model
96
- )
97
- if organism_record is not None:
98
- kwargs.update({"organism": organism_record})
99
- condition.update({"organism": organism_record})
100
-
101
96
  # standardize based on the DB reference
102
97
  # log synonyms mapped terms
103
98
  result = model.inspect(
@@ -322,3 +317,13 @@ def _has_organism_field(orm: Registry) -> bool:
322
317
  return True
323
318
  except FieldDoesNotExist:
324
319
  return False
320
+
321
+
322
+ def _get_organism_record(field: StrField, organism: str | Registry) -> Registry:
323
+ model = field.field.model
324
+ if _has_organism_field(model):
325
+ from lnschema_bionty._bionty import create_or_get_organism_record
326
+
327
+ organism_record = create_or_get_organism_record(organism=organism, orm=model)
328
+ if organism_record is not None:
329
+ return organism_record
lamindb/_registry.py CHANGED
@@ -129,7 +129,11 @@ def __init__(orm: Registry, *args, **kwargs):
129
129
  @classmethod # type:ignore
130
130
  @doc_args(Registry.from_values.__doc__)
131
131
  def from_values(
132
- cls, values: ListLike, field: StrField | None = None, **kwargs
132
+ cls,
133
+ values: ListLike,
134
+ field: StrField | None = None,
135
+ organism: Registry | str | None = None,
136
+ public_source: Registry | None = None,
133
137
  ) -> list[Registry]:
134
138
  """{}."""
135
139
  from_public = True if cls.__module__.startswith("lnschema_bionty.") else False
@@ -138,7 +142,8 @@ def from_values(
138
142
  iterable=values,
139
143
  field=getattr(cls, field_str),
140
144
  from_public=from_public,
141
- **kwargs,
145
+ organism=organism,
146
+ public_source=public_source,
142
147
  )
143
148
 
144
149
 
lamindb/core/__init__.py CHANGED
@@ -14,14 +14,21 @@ Registries:
14
14
  LabelManager
15
15
  IsTree
16
16
  IsVersioned
17
- DataFrameAnnotator
18
- AnnDataAnnotator
19
- AnnotateLookup
20
17
  CanValidate
21
18
  HasParents
22
19
  InspectResult
23
20
  fields
24
21
 
22
+ Annotators:
23
+
24
+ .. autosummary::
25
+ :toctree: .
26
+
27
+ DataFrameAnnotator
28
+ AnnDataAnnotator
29
+ MuDataAnnotator
30
+ AnnotateLookup
31
+
25
32
  Classes:
26
33
 
27
34
  .. autosummary::
@@ -53,7 +60,12 @@ from lnschema_core.models import (
53
60
  Registry,
54
61
  )
55
62
 
56
- from lamindb._annotate import AnnDataAnnotator, AnnotateLookup, DataFrameAnnotator
63
+ from lamindb._annotate import (
64
+ AnnDataAnnotator,
65
+ AnnotateLookup,
66
+ DataFrameAnnotator,
67
+ MuDataAnnotator,
68
+ )
57
69
  from lamindb._query_manager import QueryManager
58
70
  from lamindb._query_set import QuerySet, RecordsList
59
71
  from lamindb.core._feature_manager import FeatureManager
lamindb/core/_data.py CHANGED
@@ -109,17 +109,7 @@ def describe(self: Data):
109
109
  else:
110
110
  direct_fields.append(f.name)
111
111
 
112
- # Display Provenance
113
- # display line by line the foreign key fields
114
- from lamindb._parents import _transform_emoji
115
-
116
- emojis = {
117
- "storage": "🗃️",
118
- "created_by": "👤",
119
- "transform": _transform_emoji(self.transform),
120
- "run": "👣",
121
- "artifact": "📄",
122
- }
112
+ # provenance
123
113
  if len(foreign_key_fields) > 0: # always True for Artifact and Collection
124
114
  record_msg = f"{colors.green(model_name)}{__repr__(self, include_foreign_keys=False).lstrip(model_name)}"
125
115
  msg += f"{record_msg}\n\n"
@@ -127,17 +117,16 @@ def describe(self: Data):
127
117
  msg += f"{colors.green('Provenance')}:\n "
128
118
  related_msg = "".join(
129
119
  [
130
- f"{emojis.get(i, '📎')} {i}: {self.__getattribute__(i)}\n "
131
- for i in foreign_key_fields
132
- if self.__getattribute__(i) is not None
120
+ f"📎 {field}: {self.__getattribute__(field)}\n "
121
+ for field in foreign_key_fields
122
+ if self.__getattribute__(field) is not None
133
123
  ]
134
124
  )
135
125
  msg += related_msg
136
126
  # input of
137
- # can only access many-to-many once record is saved
138
127
  if self.id is not None and self.input_of.exists():
139
128
  values = [format_field_value(i.started_at) for i in self.input_of.all()]
140
- msg += f"⬇️ input_of ({colors.italic('core.Run')}): {values}\n "
129
+ msg += f"📎 input_of ({colors.italic('core.Run')}): {values}\n "
141
130
  msg = msg.rstrip(" ") # do not use removesuffix as we need to remove 2 or 4 spaces
142
131
  msg += print_features(self)
143
132
  msg += print_labels(self)