lamindb 0.72.0__py3-none-any.whl → 0.73.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -41,7 +41,7 @@ Modules & settings:
41
41
  """
42
42
 
43
43
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
44
- __version__ = "0.72.0"
44
+ __version__ = "0.73.0"
45
45
 
46
46
  import os as _os
47
47
 
lamindb/_annotate.py CHANGED
@@ -22,15 +22,13 @@ class AnnotateLookup:
22
22
 
23
23
  def __init__(
24
24
  self,
25
- categorials: dict[str, FieldAttr],
25
+ categoricals: dict[str, FieldAttr],
26
26
  slots: dict[str, FieldAttr] = None,
27
27
  using: str | None = None,
28
28
  ) -> None:
29
29
  if slots is None:
30
30
  slots = {}
31
- if slots is None:
32
- slots = {}
33
- self._fields = {**categorials, **slots}
31
+ self._fields = {**categoricals, **slots}
34
32
  self._using = None if using == "default" else using
35
33
  self._using_name = self._using or ln_setup.settings.instance.slug
36
34
  debug_message = f"Lookup objects from the " f"{colors.italic(self._using_name)}"
@@ -73,7 +71,7 @@ class AnnotateLookup:
73
71
  "Example:\n → categories = validator.lookup().cell_type\n"
74
72
  " → categories.alveolar_type_1_fibroblast_cell"
75
73
  )
76
- else:
74
+ else: # pragma: no cover
77
75
  return colors.warning("No fields are found!")
78
76
 
79
77
 
@@ -132,7 +130,7 @@ class DataFrameAnnotator:
132
130
  if "public", the lookup is performed on the public reference.
133
131
  """
134
132
  return AnnotateLookup(
135
- categorials=self._fields,
133
+ categoricals=self._fields,
136
134
  slots={"columns": self._columns_field},
137
135
  using=using or self._using,
138
136
  )
@@ -305,10 +303,10 @@ class DataFrameAnnotator:
305
303
  slug = ln_setup.settings.instance.slug
306
304
  if collection._state.adding:
307
305
  collection.save()
308
- else:
306
+ else: # pragma: no cover
309
307
  collection.save()
310
308
  logger.warning(f"collection already exists in {colors.italic(slug)}!")
311
- if ln_setup.settings.instance.is_remote:
309
+ if ln_setup.settings.instance.is_remote: # pragma: no cover
312
310
  logger.print(f"go to https://lamin.ai/{slug}/collection/{collection.uid}")
313
311
  self._collection = collection
314
312
  return collection
@@ -363,7 +361,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
363
361
  )
364
362
  if isinstance(data, ad.AnnData):
365
363
  self._adata = data
366
- else:
364
+ else: # pragma: no cover
367
365
  from lamindb.core.storage._backed_access import backed_access
368
366
 
369
367
  self._adata = backed_access(upath.create_path(data))
@@ -399,7 +397,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
399
397
  if "public", the lookup is performed on the public reference.
400
398
  """
401
399
  return AnnotateLookup(
402
- categorials=self._obs_fields,
400
+ categoricals=self._obs_fields,
403
401
  slots={"columns": self._columns_field, "var_index": self._var_field},
404
402
  using=using or self._using,
405
403
  )
@@ -466,7 +464,9 @@ class AnnDataAnnotator(DataFrameAnnotator):
466
464
  A saved artifact record.
467
465
  """
468
466
  if not self._validated:
469
- raise ValidationError("Please run `validate()` first!")
467
+ raise ValidationError(
468
+ f"Data object is not validated, please run {colors.yellow('validate()')}!"
469
+ )
470
470
 
471
471
  self._artifact = save_artifact(
472
472
  self._data,
@@ -489,6 +489,7 @@ class MuDataAnnotator:
489
489
  For example:
490
490
  ``{"modality_1": bt.Gene.ensembl_gene_id, "modality_2": ln.CellMarker.name}``
491
491
  categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
492
+ Use modality keys to specify categoricals for MuData slots such as `"rna:cell_type": bt.CellType.name"`.
492
493
  using: A reference LaminDB instance.
493
494
  verbosity: The verbosity level.
494
495
  organism: The organism name.
@@ -593,7 +594,7 @@ class MuDataAnnotator:
593
594
  if "public", the lookup is performed on the public reference.
594
595
  """
595
596
  return AnnotateLookup(
596
- categorials=self._obs_fields,
597
+ categoricals=self._obs_fields,
597
598
  slots={
598
599
  **self._obs_fields,
599
600
  **{f"{k}_var_index": v for k, v in self._var_fields.items()},
@@ -988,11 +989,15 @@ def save_artifact(
988
989
  )
989
990
 
990
991
  if artifact.accessor == "DataFrame":
991
- artifact.features.add_from_df(field=columns_field, **feature_kwargs)
992
+ artifact.features._add_set_from_df(field=columns_field, **feature_kwargs)
992
993
  elif artifact.accessor == "AnnData":
993
- artifact.features.add_from_anndata(var_field=columns_field, **feature_kwargs)
994
+ artifact.features._add_set_from_anndata(
995
+ var_field=columns_field, **feature_kwargs
996
+ )
994
997
  elif artifact.accessor == "MuData":
995
- artifact.features.add_from_mudata(var_fields=columns_field, **feature_kwargs)
998
+ artifact.features._add_set_from_mudata(
999
+ var_fields=columns_field, **feature_kwargs
1000
+ )
996
1001
  else:
997
1002
  raise NotImplementedError
998
1003
 
@@ -1016,7 +1021,7 @@ def save_artifact(
1016
1021
  _add_labels(data, artifact, fields)
1017
1022
 
1018
1023
  slug = ln_setup.settings.instance.slug
1019
- if ln_setup.settings.instance.is_remote:
1024
+ if ln_setup.settings.instance.is_remote: # pragma: no cover
1020
1025
  logger.important(f"go to https://lamin.ai/{slug}/artifact/{artifact.uid}")
1021
1026
  return artifact
1022
1027
 
@@ -1124,6 +1129,8 @@ def log_saved_labels(
1124
1129
  validated_only: bool = True,
1125
1130
  ) -> None:
1126
1131
  """Log the saved labels."""
1132
+ from ._from_values import _print_values
1133
+
1127
1134
  model_field = colors.italic(model_field)
1128
1135
  for k, labels in labels_saved.items():
1129
1136
  if not labels:
@@ -1151,7 +1158,7 @@ def log_saved_labels(
1151
1158
  # labels from a public ontology or a different instance to the present instance
1152
1159
  s = "s" if len(labels) > 1 else ""
1153
1160
  logger.success(
1154
- f"added {len(labels)} record{s} {k}with {model_field} for {colors.italic(key)}: {labels}"
1161
+ f"added {len(labels)} record{s} {k}with {model_field} for {colors.italic(key)}: {_print_values(labels)}"
1155
1162
  )
1156
1163
 
1157
1164
 
@@ -1204,7 +1211,7 @@ def update_registry_from_using_instance(
1204
1211
  return labels_saved, not_saved
1205
1212
 
1206
1213
 
1207
- def _save_organism(name: str):
1214
+ def _save_organism(name: str): # pragma: no cover
1208
1215
  """Save an organism record."""
1209
1216
  import bionty as bt
1210
1217
 
lamindb/_artifact.py CHANGED
@@ -23,13 +23,13 @@ from lamindb_setup.core.upath import (
23
23
  get_stat_dir_cloud,
24
24
  get_stat_file_cloud,
25
25
  )
26
- from lnschema_core import Artifact, Run, Storage
26
+ from lnschema_core.models import Artifact, FeatureManager, Run, Storage
27
27
  from lnschema_core.types import (
28
28
  VisibilityChoice,
29
29
  )
30
30
 
31
31
  from lamindb._utils import attach_func_to_class_method
32
- from lamindb.core._data import Data, _track_run_input
32
+ from lamindb.core._data import HasFeatures, _track_run_input
33
33
  from lamindb.core._settings import settings
34
34
  from lamindb.core.storage import (
35
35
  LocalPathClasses,
@@ -186,8 +186,6 @@ def process_data(
186
186
 
187
187
  def get_stat_or_artifact(
188
188
  path: UPath,
189
- suffix: str,
190
- memory_rep: Any | None = None,
191
189
  check_hash: bool = True,
192
190
  using_key: str | None = None,
193
191
  ) -> tuple[int, str | None, str | None, int | None] | Artifact:
@@ -261,7 +259,7 @@ def get_stat_or_artifact(
261
259
  f"You're trying to re-create this artifact in trash: {result[0]}"
262
260
  "Either permanently delete it with `artifact.delete(permanent=True)` or restore it with `artifact.restore()`"
263
261
  )
264
- logger.warning(f"returning existing artifact with same hash: {result[0]}")
262
+ logger.important(f"returning existing artifact with same hash: {result[0]}")
265
263
  return result[0]
266
264
  else:
267
265
  return size, hash, hash_type, n_objects
@@ -338,8 +336,6 @@ def get_artifact_kwargs_from_data(
338
336
  )
339
337
  stat_or_artifact = get_stat_or_artifact(
340
338
  path=path,
341
- suffix=suffix,
342
- memory_rep=memory_rep,
343
339
  using_key=using_key,
344
340
  )
345
341
  if isinstance(stat_or_artifact, Artifact):
@@ -509,7 +505,7 @@ def _check_accessor_artifact(data: Any, accessor: str | None = None):
509
505
  return accessor
510
506
 
511
507
 
512
- def update_attributes(data: Data, attributes: Mapping[str, str]):
508
+ def update_attributes(data: HasFeatures, attributes: Mapping[str, str]):
513
509
  for key, value in attributes.items():
514
510
  if getattr(data, key) != value:
515
511
  logger.warning(f"updated {key} from {getattr(data, key)} to {value}")
@@ -517,6 +513,7 @@ def update_attributes(data: Data, attributes: Mapping[str, str]):
517
513
 
518
514
 
519
515
  def __init__(artifact: Artifact, *args, **kwargs):
516
+ artifact.features = FeatureManager(artifact)
520
517
  # Below checks for the Django-internal call in from_db()
521
518
  # it'd be better if we could avoid this, but not being able to create a Artifact
522
519
  # from data with the default constructor renders the central class of the API
@@ -1006,7 +1003,7 @@ def delete(
1006
1003
  # we don't yet have logic to bring back the deleted metadata record
1007
1004
  # in case storage deletion fails - this is important for ACID down the road
1008
1005
  if delete_in_storage:
1009
- delete_msg = delete_storage(path)
1006
+ delete_msg = delete_storage(path, raise_file_not_found_error=False)
1010
1007
  if delete_msg != "did-not-delete":
1011
1008
  logger.success(f"deleted {colors.yellow(f'{path}')}")
1012
1009
 
@@ -1018,6 +1015,7 @@ def _delete_skip_storage(artifact, *args, **kwargs) -> None:
1018
1015
  # docstring handled through attach_func_to_class_method
1019
1016
  def save(self, upload: bool | None = None, **kwargs) -> None:
1020
1017
  state_was_adding = self._state.adding
1018
+ print_progress = kwargs.pop("print_progress", True)
1021
1019
  access_token = kwargs.pop("access_token", None)
1022
1020
  local_path = None
1023
1021
  if upload and setup_settings.instance.keep_artifacts_local:
@@ -1038,7 +1036,9 @@ def save(self, upload: bool | None = None, **kwargs) -> None:
1038
1036
  using_key = None
1039
1037
  if "using" in kwargs:
1040
1038
  using_key = kwargs["using"]
1041
- exception = check_and_attempt_upload(self, using_key, access_token=access_token)
1039
+ exception = check_and_attempt_upload(
1040
+ self, using_key, access_token=access_token, print_progress=print_progress
1041
+ )
1042
1042
  if exception is not None:
1043
1043
  self._delete_skip_storage()
1044
1044
  raise RuntimeError(exception)
lamindb/_collection.py CHANGED
@@ -15,7 +15,12 @@ from anndata import AnnData
15
15
  from lamin_utils import logger
16
16
  from lamindb_setup.core._docs import doc_args
17
17
  from lamindb_setup.core.hashing import hash_set
18
- from lnschema_core.models import Collection, CollectionArtifact, FeatureSet
18
+ from lnschema_core.models import (
19
+ Collection,
20
+ CollectionArtifact,
21
+ FeatureManager,
22
+ FeatureSet,
23
+ )
19
24
  from lnschema_core.types import VisibilityChoice
20
25
 
21
26
  from lamindb._artifact import update_attributes
@@ -45,6 +50,7 @@ def __init__(
45
50
  *args,
46
51
  **kwargs,
47
52
  ):
53
+ collection.features = FeatureManager(collection)
48
54
  if len(args) == len(collection._meta.concrete_fields):
49
55
  super(Collection, collection).__init__(*args, **kwargs)
50
56
  return None
@@ -103,9 +109,9 @@ def __init__(
103
109
  if meta._state.adding:
104
110
  raise ValueError("Save meta artifact before creating collection!")
105
111
  if not feature_sets:
106
- feature_sets = meta.features.feature_set_by_slot
112
+ feature_sets = meta.features._feature_set_by_slot
107
113
  else:
108
- if len(meta.features.feature_set_by_slot) > 0:
114
+ if len(meta.features._feature_set_by_slot) > 0:
109
115
  logger.info("overwriting feature sets linked to artifact")
110
116
  # we ignore collections in trash containing the same hash
111
117
  if hash is not None:
@@ -129,7 +135,7 @@ def __init__(
129
135
  existing_collection.transform = run.transform
130
136
  init_self_from_db(collection, existing_collection)
131
137
  update_attributes(collection, {"description": description, "name": name})
132
- for slot, feature_set in collection.features.feature_set_by_slot.items():
138
+ for slot, feature_set in collection.features._feature_set_by_slot.items():
133
139
  if slot in feature_sets:
134
140
  if not feature_sets[slot] == feature_set:
135
141
  collection.feature_sets.remove(feature_set)
@@ -322,7 +328,7 @@ def delete(self, permanent: bool | None = None) -> None:
322
328
 
323
329
 
324
330
  # docstring handled through attach_func_to_class_method
325
- def save(self, transfer_labels: bool = False, using: str | None = None) -> None:
331
+ def save(self, using: str | None = None) -> None:
326
332
  if self.artifact is not None:
327
333
  self.artifact.save()
328
334
  # we don't need to save feature sets again
@@ -331,21 +337,19 @@ def save(self, transfer_labels: bool = False, using: str | None = None) -> None:
331
337
  # we don't allow updating the collection of artifacts
332
338
  # if users want to update the set of artifacts, they
333
339
  # have to create a new collection
334
- links = [
335
- CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
336
- for artifact in self._artifacts
337
- ]
338
- # the below seems to preserve the order of the list in the
339
- # auto-incrementing integer primary
340
- # merely using .unordered_artifacts.set(*...) doesn't achieve this
341
- # we need ignore_conflicts=True so that this won't error if links already exist
342
- CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
340
+ if hasattr(self, "_artifacts"):
341
+ links = [
342
+ CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
343
+ for artifact in self._artifacts
344
+ ]
345
+ # the below seems to preserve the order of the list in the
346
+ # auto-incrementing integer primary
347
+ # merely using .unordered_artifacts.set(*...) doesn't achieve this
348
+ # we need ignore_conflicts=True so that this won't error if links already exist
349
+ CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
343
350
  save_feature_set_links(self)
344
351
  if using is not None:
345
352
  logger.warning("using argument is ignored")
346
- if transfer_labels:
347
- for artifact in self._artifacts:
348
- self.labels.add_from(artifact)
349
353
 
350
354
 
351
355
  # docstring handled through attach_func_to_class_method
lamindb/_feature_set.py CHANGED
@@ -73,7 +73,7 @@ def __init__(self, *args, **kwargs):
73
73
  features_hash = hash_set({feature.uid for feature in features})
74
74
  feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
75
75
  if feature_set is not None:
76
- logger.success(f"loaded: {feature_set}")
76
+ logger.debug(f"loaded: {feature_set}")
77
77
  init_self_from_db(self, feature_set)
78
78
  return None
79
79
  else:
lamindb/_filter.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from lnschema_core import Artifact, Collection, Registry
3
+ from lnschema_core import Artifact, Collection, Feature, Registry
4
4
  from lnschema_core.types import VisibilityChoice
5
5
 
6
6
  from lamindb import settings
@@ -14,7 +14,11 @@ def filter(Registry: type[Registry], **expressions) -> QuerySet:
14
14
  _using_key = expressions.pop("_using_key")
15
15
  if Registry in {Artifact, Collection}:
16
16
  # visibility is set to 0 unless expressions contains id or uid equality
17
- if not ("id" in expressions or "uid" in expressions):
17
+ if not (
18
+ "id" in expressions
19
+ or "uid" in expressions
20
+ or "uid__startswith" in expressions
21
+ ):
18
22
  visibility = "visibility"
19
23
  if not any(e.startswith(visibility) for e in expressions):
20
24
  expressions[
lamindb/_finish.py CHANGED
@@ -63,6 +63,7 @@ def save_run_context_core(
63
63
  filepath: Path,
64
64
  transform_family: QuerySet | None = None,
65
65
  finished_at: bool = False,
66
+ from_cli: bool = False,
66
67
  ) -> str | None:
67
68
  import lamindb as ln
68
69
 
@@ -70,9 +71,10 @@ def save_run_context_core(
70
71
 
71
72
  # for scripts, things are easy
72
73
  is_consecutive = True
74
+ is_notebook = transform.type == TransformType.notebook
73
75
  source_code_path = filepath
74
76
  # for notebooks, we need more work
75
- if transform.type == TransformType.notebook:
77
+ if is_notebook:
76
78
  try:
77
79
  import nbstripout
78
80
  from nbproject.dev import (
@@ -85,7 +87,9 @@ def save_run_context_core(
85
87
  )
86
88
  return None
87
89
  notebook_content = read_notebook(filepath) # type: ignore
88
- is_consecutive = check_consecutiveness(notebook_content)
90
+ is_consecutive = check_consecutiveness(
91
+ notebook_content, calling_statement="ln.finish()"
92
+ )
89
93
  if not is_consecutive:
90
94
  msg = " Do you still want to proceed with finishing? (y/n) "
91
95
  if os.getenv("LAMIN_TESTING") is None:
@@ -106,13 +110,13 @@ def save_run_context_core(
106
110
  # in an existing storage location -> we want to move associated
107
111
  # artifacts into default storage and not register them in an existing
108
112
  # location
109
- filepath_html_orig = filepath.with_suffix(".html") # current location
110
- filepath_html = ln_setup.settings.storage.cache_dir / filepath_html_orig.name
113
+ report_path_orig = filepath.with_suffix(".html") # current location
114
+ report_path = ln_setup.settings.storage.cache_dir / report_path_orig.name
111
115
  # don't use Path.rename here because of cross-device link error
112
116
  # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
113
117
  shutil.move(
114
- filepath_html_orig, # type: ignore
115
- filepath_html,
118
+ report_path_orig, # type: ignore
119
+ report_path,
116
120
  )
117
121
  # strip the output from the notebook to create the source code file
118
122
  # first, copy the notebook file to a temporary file in the cache
@@ -159,6 +163,8 @@ def save_run_context_core(
159
163
  else:
160
164
  logger.warning("Please re-run `ln.track()` to make a new version")
161
165
  return "rerun-the-notebook"
166
+ else:
167
+ logger.important("source code is already saved")
162
168
  else:
163
169
  source_code = ln.Artifact(
164
170
  source_code_path,
@@ -168,57 +174,73 @@ def save_run_context_core(
168
174
  visibility=0, # hidden file
169
175
  run=False,
170
176
  )
171
- source_code.save(upload=True)
177
+ source_code.save(upload=True, print_progress=False)
172
178
  transform.source_code = source_code
173
- logger.success(f"saved transform.source_code: {transform.source_code}")
179
+ logger.debug(f"saved transform.source_code: {transform.source_code}")
174
180
 
175
181
  # track environment
176
- filepath_env = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
177
- if filepath_env.exists():
178
- hash, _ = hash_file(filepath_env)
179
- artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
180
- new_env_artifact = artifact is None
181
- if new_env_artifact:
182
- artifact = ln.Artifact(
183
- filepath_env,
184
- description="requirements.txt",
185
- visibility=0,
186
- run=False,
187
- )
188
- artifact.save(upload=True)
189
- run.environment = artifact
190
- if new_env_artifact:
191
- logger.success(f"saved run.environment: {run.environment}")
182
+ env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
183
+ if env_path.exists():
184
+ overwrite_env = True
185
+ if run.environment_id is not None and from_cli:
186
+ logger.important("run.environment is already saved")
187
+ overwrite_env = False
188
+ if overwrite_env:
189
+ hash, _ = hash_file(env_path)
190
+ artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
191
+ new_env_artifact = artifact is None
192
+ if new_env_artifact:
193
+ artifact = ln.Artifact(
194
+ env_path,
195
+ description="requirements.txt",
196
+ visibility=0,
197
+ run=False,
198
+ )
199
+ artifact.save(upload=True, print_progress=False)
200
+ run.environment = artifact
201
+ if new_env_artifact:
202
+ logger.debug(f"saved run.environment: {run.environment}")
192
203
 
193
204
  # set finished_at
194
205
  if finished_at:
195
206
  run.finished_at = datetime.now(timezone.utc)
196
207
 
197
208
  # track report and set is_consecutive
198
- if not transform.type == TransformType.notebook:
209
+ if not is_notebook:
199
210
  run.is_consecutive = True
200
211
  run.save()
201
212
  else:
202
213
  if run.report_id is not None:
203
- logger.warning(
204
- "there is already an existing report for this run, replacing it"
205
- )
206
- run.report.replace(filepath_html)
207
- run.report.save(upload=True)
214
+ hash, _ = hash_file(report_path) # ignore hash_type for now
215
+ if hash != run.report.hash:
216
+ if os.getenv("LAMIN_TESTING") is None:
217
+ # in test, auto-confirm overwrite
218
+ response = input(
219
+ f"You are about to replace (overwrite) an existing run report (hash '{run.report.hash}'). Proceed? (y/n)"
220
+ )
221
+ else:
222
+ response = "y"
223
+ if response == "y":
224
+ run.report.replace(report_path)
225
+ run.report.save(upload=True)
226
+ else:
227
+ logger.important("keeping old report")
228
+ else:
229
+ logger.important("report is already saved")
208
230
  else:
209
231
  report_file = ln.Artifact(
210
- filepath_html,
232
+ report_path,
211
233
  description=f"Report of run {run.uid}",
212
234
  is_new_version_of=prev_report,
213
235
  visibility=0, # hidden file
214
236
  run=False,
215
237
  )
216
- report_file.save(upload=True)
238
+ report_file.save(upload=True, print_progress=False)
217
239
  run.report = report_file
218
240
  run.is_consecutive = is_consecutive
219
241
  run.save()
220
242
  transform.latest_report = run.report
221
- logger.success(f"saved transform.latest_report: {transform.latest_report}")
243
+ logger.debug(f"saved transform.latest_report: {transform.latest_report}")
222
244
  transform.save()
223
245
 
224
246
  # finalize
@@ -227,6 +249,15 @@ def save_run_context_core(
227
249
  logger.important(
228
250
  f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
229
251
  )
252
+ if not from_cli:
253
+ thing, name = (
254
+ ("notebook", "notebook.ipynb")
255
+ if is_notebook
256
+ else ("script", "script.py")
257
+ )
258
+ logger.important(
259
+ f"if you want to update your {thing} without re-running it, use `lamin save {name}`"
260
+ )
230
261
  # because run & transform changed, update the global run_context
231
262
  run_context.run = run
232
263
  run_context.transform = transform
lamindb/_from_values.py CHANGED
@@ -18,12 +18,16 @@ def get_or_create_records(
18
18
  iterable: ListLike,
19
19
  field: StrField,
20
20
  *,
21
+ create: bool = False,
21
22
  from_public: bool = False,
22
23
  organism: Registry | str | None = None,
23
24
  public_source: Registry | None = None,
24
25
  mute: bool = False,
25
26
  ) -> list[Registry]:
26
27
  """Get or create records from iterables."""
28
+ Registry = field.field.model
29
+ if create:
30
+ return [Registry(**{field.field.name: value}) for value in iterable]
27
31
  upon_create_search_names = settings.upon_create_search_names
28
32
  feature: Feature = None
29
33
  organism = _get_organism_record(field, organism)
@@ -34,7 +38,6 @@ def get_or_create_records(
34
38
  kwargs["public_source"] = public_source
35
39
  settings.upon_create_search_names = False
36
40
  try:
37
- Registry = field.field.model
38
41
  iterable_idx = index_iterable(iterable)
39
42
 
40
43
  # returns existing records & non-existing values
@@ -274,10 +277,13 @@ def index_iterable(iterable: Iterable) -> pd.Index:
274
277
  return idx[(idx != "") & (~idx.isnull())]
275
278
 
276
279
 
277
- def _print_values(names: Iterable, n: int = 20) -> str:
280
+ def _print_values(names: Iterable, n: int = 20, quotes: bool = True) -> str:
278
281
  names = (name for name in names if name != "None")
279
282
  unique_names = list(dict.fromkeys(names))[:n]
280
- print_values = ", ".join(f"'{name}'" for name in unique_names)
283
+ if quotes:
284
+ print_values = ", ".join(f"'{name}'" for name in unique_names)
285
+ else:
286
+ print_values = ", ".join(f"{name}" for name in unique_names)
281
287
  if len(unique_names) > n:
282
288
  print_values += ", ..."
283
289
  return print_values
lamindb/_parents.py CHANGED
@@ -30,6 +30,8 @@ def _transform_emoji(transform: Transform):
30
30
 
31
31
 
32
32
  def _view(u):
33
+ from graphviz.backend import ExecutableNotFound
34
+
33
35
  try:
34
36
  if is_run_from_ipython:
35
37
  from IPython import get_ipython
@@ -39,10 +41,12 @@ def _view(u):
39
41
  if get_ipython().__class__.__name__ == "TerminalInteractiveShell":
40
42
  return u.view()
41
43
  else:
42
- display(u)
44
+ # call u._repr_mimebundle_() manually that exception gets raised properly and not just printed by
45
+ # call to display()
46
+ display(u._repr_mimebundle_(), raw=True)
43
47
  else:
44
48
  return u
45
- except (FileNotFoundError, RuntimeError): # pragma: no cover
49
+ except (FileNotFoundError, RuntimeError, ExecutableNotFound): # pragma: no cover
46
50
  logger.error(
47
51
  "please install the graphviz executable on your system:\n - Ubuntu: `sudo"
48
52
  " apt-get install graphviz`\n - Windows:"
@@ -177,9 +181,11 @@ def _view_parents(
177
181
  )
178
182
  u.node(
179
183
  record.uid,
180
- label=_record_label(record)
181
- if record.__class__.__name__ == "Transform"
182
- else _add_emoji(record, record_label),
184
+ label=(
185
+ _record_label(record)
186
+ if record.__class__.__name__ == "Transform"
187
+ else _add_emoji(record, record_label)
188
+ ),
183
189
  fillcolor=LAMIN_GREEN_LIGHTER,
184
190
  )
185
191
  if df_edges is not None:
lamindb/_query_manager.py CHANGED
@@ -9,7 +9,7 @@ from lnschema_core.models import Registry
9
9
 
10
10
  from lamindb.core._settings import settings
11
11
 
12
- from .core._feature_manager import get_feature_set_by_slot
12
+ from .core._feature_manager import get_feature_set_by_slot_
13
13
 
14
14
  if TYPE_CHECKING:
15
15
  from lnschema_core.types import StrField
@@ -107,7 +107,7 @@ class QueryManager(models.Manager):
107
107
  source_field_name in {"artifact", "collection"}
108
108
  and target_field_name == "feature_set"
109
109
  ):
110
- return get_feature_set_by_slot(host=self.instance).get(item)
110
+ return get_feature_set_by_slot_(host=self.instance).get(item)
111
111
 
112
112
  except Exception: # pragma: no cover
113
113
  return
lamindb/_query_set.py CHANGED
@@ -99,9 +99,30 @@ class QuerySet(models.QuerySet, CanValidate):
99
99
  @doc_args(Registry.df.__doc__)
100
100
  def df(self, include: str | list[str] | None = None) -> pd.DataFrame:
101
101
  """{}."""
102
- data = self.values()
103
- keys = get_keys_from_df(data, self.model)
104
- df = pd.DataFrame(self.values(), columns=keys)
102
+ # re-order the columns
103
+ exclude_field_names = ["created_at"]
104
+ field_names = [
105
+ field.name
106
+ for field in self.model._meta.fields
107
+ if (
108
+ not isinstance(field, models.ForeignKey)
109
+ and field.name not in exclude_field_names
110
+ )
111
+ ]
112
+ field_names += [
113
+ f"{field.name}_id"
114
+ for field in self.model._meta.fields
115
+ if isinstance(field, models.ForeignKey)
116
+ ]
117
+ for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]:
118
+ if field_name in field_names:
119
+ field_names.remove(field_name)
120
+ field_names.append(field_name)
121
+ if field_names[0] != "uid" and "uid" in field_names:
122
+ field_names.remove("uid")
123
+ field_names.insert(0, "uid")
124
+ # create the dataframe
125
+ df = pd.DataFrame(self.values(), columns=field_names)
105
126
  # if len(df) > 0 and "updated_at" in df:
106
127
  # df.updated_at = format_and_convert_to_local_time(df.updated_at)
107
128
  # if len(df) > 0 and "started_at" in df: