lamindb 0.72.1__py3-none-any.whl → 0.73.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -41,7 +41,7 @@ Modules & settings:
41
41
  """
42
42
 
43
43
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
44
- __version__ = "0.72.1"
44
+ __version__ = "0.73.1"
45
45
 
46
46
  import os as _os
47
47
 
lamindb/_annotate.py CHANGED
@@ -22,15 +22,13 @@ class AnnotateLookup:
22
22
 
23
23
  def __init__(
24
24
  self,
25
- categorials: dict[str, FieldAttr],
25
+ categoricals: dict[str, FieldAttr],
26
26
  slots: dict[str, FieldAttr] = None,
27
27
  using: str | None = None,
28
28
  ) -> None:
29
29
  if slots is None:
30
30
  slots = {}
31
- if slots is None:
32
- slots = {}
33
- self._fields = {**categorials, **slots}
31
+ self._fields = {**categoricals, **slots}
34
32
  self._using = None if using == "default" else using
35
33
  self._using_name = self._using or ln_setup.settings.instance.slug
36
34
  debug_message = f"Lookup objects from the " f"{colors.italic(self._using_name)}"
@@ -73,7 +71,7 @@ class AnnotateLookup:
73
71
  "Example:\n → categories = validator.lookup().cell_type\n"
74
72
  " → categories.alveolar_type_1_fibroblast_cell"
75
73
  )
76
- else:
74
+ else: # pragma: no cover
77
75
  return colors.warning("No fields are found!")
78
76
 
79
77
 
@@ -132,7 +130,7 @@ class DataFrameAnnotator:
132
130
  if "public", the lookup is performed on the public reference.
133
131
  """
134
132
  return AnnotateLookup(
135
- categorials=self._fields,
133
+ categoricals=self._fields,
136
134
  slots={"columns": self._columns_field},
137
135
  using=using or self._using,
138
136
  )
@@ -305,10 +303,10 @@ class DataFrameAnnotator:
305
303
  slug = ln_setup.settings.instance.slug
306
304
  if collection._state.adding:
307
305
  collection.save()
308
- else:
306
+ else: # pragma: no cover
309
307
  collection.save()
310
308
  logger.warning(f"collection already exists in {colors.italic(slug)}!")
311
- if ln_setup.settings.instance.is_remote:
309
+ if ln_setup.settings.instance.is_remote: # pragma: no cover
312
310
  logger.print(f"go to https://lamin.ai/{slug}/collection/{collection.uid}")
313
311
  self._collection = collection
314
312
  return collection
@@ -363,7 +361,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
363
361
  )
364
362
  if isinstance(data, ad.AnnData):
365
363
  self._adata = data
366
- else:
364
+ else: # pragma: no cover
367
365
  from lamindb.core.storage._backed_access import backed_access
368
366
 
369
367
  self._adata = backed_access(upath.create_path(data))
@@ -399,7 +397,7 @@ class AnnDataAnnotator(DataFrameAnnotator):
399
397
  if "public", the lookup is performed on the public reference.
400
398
  """
401
399
  return AnnotateLookup(
402
- categorials=self._obs_fields,
400
+ categoricals=self._obs_fields,
403
401
  slots={"columns": self._columns_field, "var_index": self._var_field},
404
402
  using=using or self._using,
405
403
  )
@@ -466,7 +464,9 @@ class AnnDataAnnotator(DataFrameAnnotator):
466
464
  A saved artifact record.
467
465
  """
468
466
  if not self._validated:
469
- raise ValidationError("Please run `validate()` first!")
467
+ raise ValidationError(
468
+ f"Data object is not validated, please run {colors.yellow('validate()')}!"
469
+ )
470
470
 
471
471
  self._artifact = save_artifact(
472
472
  self._data,
@@ -489,6 +489,7 @@ class MuDataAnnotator:
489
489
  For example:
490
490
  ``{"modality_1": bt.Gene.ensembl_gene_id, "modality_2": ln.CellMarker.name}``
491
491
  categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
492
+ Use modality keys to specify categoricals for MuData slots such as `"rna:cell_type": bt.CellType.name"`.
492
493
  using: A reference LaminDB instance.
493
494
  verbosity: The verbosity level.
494
495
  organism: The organism name.
@@ -593,7 +594,7 @@ class MuDataAnnotator:
593
594
  if "public", the lookup is performed on the public reference.
594
595
  """
595
596
  return AnnotateLookup(
596
- categorials=self._obs_fields,
597
+ categoricals=self._obs_fields,
597
598
  slots={
598
599
  **self._obs_fields,
599
600
  **{f"{k}_var_index": v for k, v in self._var_fields.items()},
@@ -812,7 +813,10 @@ def standardize_and_inspect(
812
813
  values: Iterable[str], field: FieldAttr, registry: Registry, **kwargs
813
814
  ):
814
815
  """Standardize and inspect values using a registry."""
815
- if hasattr(registry, "standardize"):
816
+ if hasattr(registry, "standardize") and hasattr(
817
+ registry,
818
+ "synonyms", # https://github.com/laminlabs/lamindb/issues/1685
819
+ ):
816
820
  values = registry.standardize(values, field=field, mute=True, **kwargs)
817
821
  return registry.inspect(values, field=field, mute=True, **kwargs)
818
822
 
@@ -975,7 +979,7 @@ def save_artifact(
975
979
  except ImportError:
976
980
  pass
977
981
  if artifact is None:
978
- raise ValueError("data must be a DataFrame, AnnData or MuData object")
982
+ raise ValueError("data must be a DataFrame, AnnData or MuData object.")
979
983
  artifact.save()
980
984
 
981
985
  feature_kwargs = check_registry_organism(
@@ -988,11 +992,15 @@ def save_artifact(
988
992
  )
989
993
 
990
994
  if artifact.accessor == "DataFrame":
991
- artifact.features.add_from_df(field=columns_field, **feature_kwargs)
995
+ artifact.features._add_set_from_df(field=columns_field, **feature_kwargs)
992
996
  elif artifact.accessor == "AnnData":
993
- artifact.features.add_from_anndata(var_field=columns_field, **feature_kwargs)
997
+ artifact.features._add_set_from_anndata(
998
+ var_field=columns_field, **feature_kwargs
999
+ )
994
1000
  elif artifact.accessor == "MuData":
995
- artifact.features.add_from_mudata(var_fields=columns_field, **feature_kwargs)
1001
+ artifact.features._add_set_from_mudata(
1002
+ var_fields=columns_field, **feature_kwargs
1003
+ )
996
1004
  else:
997
1005
  raise NotImplementedError
998
1006
 
@@ -1016,7 +1024,7 @@ def save_artifact(
1016
1024
  _add_labels(data, artifact, fields)
1017
1025
 
1018
1026
  slug = ln_setup.settings.instance.slug
1019
- if ln_setup.settings.instance.is_remote:
1027
+ if ln_setup.settings.instance.is_remote: # pragma: no cover
1020
1028
  logger.important(f"go to https://lamin.ai/{slug}/artifact/{artifact.uid}")
1021
1029
  return artifact
1022
1030
 
@@ -1124,6 +1132,8 @@ def log_saved_labels(
1124
1132
  validated_only: bool = True,
1125
1133
  ) -> None:
1126
1134
  """Log the saved labels."""
1135
+ from ._from_values import _print_values
1136
+
1127
1137
  model_field = colors.italic(model_field)
1128
1138
  for k, labels in labels_saved.items():
1129
1139
  if not labels:
@@ -1151,7 +1161,7 @@ def log_saved_labels(
1151
1161
  # labels from a public ontology or a different instance to the present instance
1152
1162
  s = "s" if len(labels) > 1 else ""
1153
1163
  logger.success(
1154
- f"added {len(labels)} record{s} {k}with {model_field} for {colors.italic(key)}: {labels}"
1164
+ f"added {len(labels)} record{s} {k}with {model_field} for {colors.italic(key)}: {_print_values(labels)}"
1155
1165
  )
1156
1166
 
1157
1167
 
@@ -1204,7 +1214,7 @@ def update_registry_from_using_instance(
1204
1214
  return labels_saved, not_saved
1205
1215
 
1206
1216
 
1207
- def _save_organism(name: str):
1217
+ def _save_organism(name: str): # pragma: no cover
1208
1218
  """Save an organism record."""
1209
1219
  import bionty as bt
1210
1220
 
lamindb/_artifact.py CHANGED
@@ -23,13 +23,13 @@ from lamindb_setup.core.upath import (
23
23
  get_stat_dir_cloud,
24
24
  get_stat_file_cloud,
25
25
  )
26
- from lnschema_core import Artifact, Run, Storage
26
+ from lnschema_core.models import Artifact, FeatureManager, Run, Storage
27
27
  from lnschema_core.types import (
28
28
  VisibilityChoice,
29
29
  )
30
30
 
31
31
  from lamindb._utils import attach_func_to_class_method
32
- from lamindb.core._data import Data, _track_run_input
32
+ from lamindb.core._data import HasFeatures, _track_run_input
33
33
  from lamindb.core._settings import settings
34
34
  from lamindb.core.storage import (
35
35
  LocalPathClasses,
@@ -186,8 +186,6 @@ def process_data(
186
186
 
187
187
  def get_stat_or_artifact(
188
188
  path: UPath,
189
- suffix: str,
190
- memory_rep: Any | None = None,
191
189
  check_hash: bool = True,
192
190
  using_key: str | None = None,
193
191
  ) -> tuple[int, str | None, str | None, int | None] | Artifact:
@@ -198,6 +196,8 @@ def get_stat_or_artifact(
198
196
  if not isinstance(path, LocalPathClasses):
199
197
  size, hash, hash_type = None, None, None
200
198
  if stat is not None:
199
+ # convert UPathStatResult to fsspec info dict
200
+ stat = stat.as_info()
201
201
  if "ETag" in stat: # is file
202
202
  size, hash, hash_type = get_stat_file_cloud(stat)
203
203
  elif stat["type"] == "directory":
@@ -261,7 +261,7 @@ def get_stat_or_artifact(
261
261
  f"You're trying to re-create this artifact in trash: {result[0]}"
262
262
  "Either permanently delete it with `artifact.delete(permanent=True)` or restore it with `artifact.restore()`"
263
263
  )
264
- logger.warning(f"returning existing artifact with same hash: {result[0]}")
264
+ logger.important(f"returning existing artifact with same hash: {result[0]}")
265
265
  return result[0]
266
266
  else:
267
267
  return size, hash, hash_type, n_objects
@@ -338,8 +338,6 @@ def get_artifact_kwargs_from_data(
338
338
  )
339
339
  stat_or_artifact = get_stat_or_artifact(
340
340
  path=path,
341
- suffix=suffix,
342
- memory_rep=memory_rep,
343
341
  using_key=using_key,
344
342
  )
345
343
  if isinstance(stat_or_artifact, Artifact):
@@ -509,7 +507,7 @@ def _check_accessor_artifact(data: Any, accessor: str | None = None):
509
507
  return accessor
510
508
 
511
509
 
512
- def update_attributes(data: Data, attributes: Mapping[str, str]):
510
+ def update_attributes(data: HasFeatures, attributes: Mapping[str, str]):
513
511
  for key, value in attributes.items():
514
512
  if getattr(data, key) != value:
515
513
  logger.warning(f"updated {key} from {getattr(data, key)} to {value}")
@@ -517,6 +515,7 @@ def update_attributes(data: Data, attributes: Mapping[str, str]):
517
515
 
518
516
 
519
517
  def __init__(artifact: Artifact, *args, **kwargs):
518
+ artifact.features = FeatureManager(artifact)
520
519
  # Below checks for the Django-internal call in from_db()
521
520
  # it'd be better if we could avoid this, but not being able to create a Artifact
522
521
  # from data with the default constructor renders the central class of the API
@@ -1006,7 +1005,7 @@ def delete(
1006
1005
  # we don't yet have logic to bring back the deleted metadata record
1007
1006
  # in case storage deletion fails - this is important for ACID down the road
1008
1007
  if delete_in_storage:
1009
- delete_msg = delete_storage(path)
1008
+ delete_msg = delete_storage(path, raise_file_not_found_error=False)
1010
1009
  if delete_msg != "did-not-delete":
1011
1010
  logger.success(f"deleted {colors.yellow(f'{path}')}")
1012
1011
 
@@ -1018,6 +1017,7 @@ def _delete_skip_storage(artifact, *args, **kwargs) -> None:
1018
1017
  # docstring handled through attach_func_to_class_method
1019
1018
  def save(self, upload: bool | None = None, **kwargs) -> None:
1020
1019
  state_was_adding = self._state.adding
1020
+ print_progress = kwargs.pop("print_progress", True)
1021
1021
  access_token = kwargs.pop("access_token", None)
1022
1022
  local_path = None
1023
1023
  if upload and setup_settings.instance.keep_artifacts_local:
@@ -1038,7 +1038,9 @@ def save(self, upload: bool | None = None, **kwargs) -> None:
1038
1038
  using_key = None
1039
1039
  if "using" in kwargs:
1040
1040
  using_key = kwargs["using"]
1041
- exception = check_and_attempt_upload(self, using_key, access_token=access_token)
1041
+ exception = check_and_attempt_upload(
1042
+ self, using_key, access_token=access_token, print_progress=print_progress
1043
+ )
1042
1044
  if exception is not None:
1043
1045
  self._delete_skip_storage()
1044
1046
  raise RuntimeError(exception)
@@ -1109,6 +1111,5 @@ for name in METHOD_NAMES:
1109
1111
  Artifact._delete_skip_storage = _delete_skip_storage
1110
1112
  Artifact._save_skip_storage = _save_skip_storage
1111
1113
  Artifact.path = path
1112
- Artifact.stage = cache
1113
1114
  # this seems a Django-generated function
1114
1115
  delattr(Artifact, "get_visibility_display")
lamindb/_collection.py CHANGED
@@ -15,7 +15,12 @@ from anndata import AnnData
15
15
  from lamin_utils import logger
16
16
  from lamindb_setup.core._docs import doc_args
17
17
  from lamindb_setup.core.hashing import hash_set
18
- from lnschema_core.models import Collection, CollectionArtifact, FeatureSet
18
+ from lnschema_core.models import (
19
+ Collection,
20
+ CollectionArtifact,
21
+ FeatureManager,
22
+ FeatureSet,
23
+ )
19
24
  from lnschema_core.types import VisibilityChoice
20
25
 
21
26
  from lamindb._artifact import update_attributes
@@ -45,6 +50,7 @@ def __init__(
45
50
  *args,
46
51
  **kwargs,
47
52
  ):
53
+ collection.features = FeatureManager(collection)
48
54
  if len(args) == len(collection._meta.concrete_fields):
49
55
  super(Collection, collection).__init__(*args, **kwargs)
50
56
  return None
@@ -103,9 +109,9 @@ def __init__(
103
109
  if meta._state.adding:
104
110
  raise ValueError("Save meta artifact before creating collection!")
105
111
  if not feature_sets:
106
- feature_sets = meta.features.feature_set_by_slot
112
+ feature_sets = meta.features._feature_set_by_slot
107
113
  else:
108
- if len(meta.features.feature_set_by_slot) > 0:
114
+ if len(meta.features._feature_set_by_slot) > 0:
109
115
  logger.info("overwriting feature sets linked to artifact")
110
116
  # we ignore collections in trash containing the same hash
111
117
  if hash is not None:
@@ -129,7 +135,7 @@ def __init__(
129
135
  existing_collection.transform = run.transform
130
136
  init_self_from_db(collection, existing_collection)
131
137
  update_attributes(collection, {"description": description, "name": name})
132
- for slot, feature_set in collection.features.feature_set_by_slot.items():
138
+ for slot, feature_set in collection.features._feature_set_by_slot.items():
133
139
  if slot in feature_sets:
134
140
  if not feature_sets[slot] == feature_set:
135
141
  collection.feature_sets.remove(feature_set)
@@ -322,7 +328,7 @@ def delete(self, permanent: bool | None = None) -> None:
322
328
 
323
329
 
324
330
  # docstring handled through attach_func_to_class_method
325
- def save(self, transfer_labels: bool = False, using: str | None = None) -> None:
331
+ def save(self, using: str | None = None) -> None:
326
332
  if self.artifact is not None:
327
333
  self.artifact.save()
328
334
  # we don't need to save feature sets again
@@ -331,21 +337,19 @@ def save(self, transfer_labels: bool = False, using: str | None = None) -> None:
331
337
  # we don't allow updating the collection of artifacts
332
338
  # if users want to update the set of artifacts, they
333
339
  # have to create a new collection
334
- links = [
335
- CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
336
- for artifact in self._artifacts
337
- ]
338
- # the below seems to preserve the order of the list in the
339
- # auto-incrementing integer primary
340
- # merely using .unordered_artifacts.set(*...) doesn't achieve this
341
- # we need ignore_conflicts=True so that this won't error if links already exist
342
- CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
340
+ if hasattr(self, "_artifacts"):
341
+ links = [
342
+ CollectionArtifact(collection_id=self.id, artifact_id=artifact.id)
343
+ for artifact in self._artifacts
344
+ ]
345
+ # the below seems to preserve the order of the list in the
346
+ # auto-incrementing integer primary
347
+ # merely using .unordered_artifacts.set(*...) doesn't achieve this
348
+ # we need ignore_conflicts=True so that this won't error if links already exist
349
+ CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
343
350
  save_feature_set_links(self)
344
351
  if using is not None:
345
352
  logger.warning("using argument is ignored")
346
- if transfer_labels:
347
- for artifact in self._artifacts:
348
- self.labels.add_from(artifact)
349
353
 
350
354
 
351
355
  # docstring handled through attach_func_to_class_method
lamindb/_feature_set.py CHANGED
@@ -73,7 +73,7 @@ def __init__(self, *args, **kwargs):
73
73
  features_hash = hash_set({feature.uid for feature in features})
74
74
  feature_set = FeatureSet.filter(hash=features_hash).one_or_none()
75
75
  if feature_set is not None:
76
- logger.success(f"loaded: {feature_set}")
76
+ logger.debug(f"loaded: {feature_set}")
77
77
  init_self_from_db(self, feature_set)
78
78
  return None
79
79
  else:
lamindb/_filter.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from lnschema_core import Artifact, Collection, Registry
3
+ from lnschema_core import Artifact, Collection, Feature, Registry
4
4
  from lnschema_core.types import VisibilityChoice
5
5
 
6
6
  from lamindb import settings
@@ -14,7 +14,11 @@ def filter(Registry: type[Registry], **expressions) -> QuerySet:
14
14
  _using_key = expressions.pop("_using_key")
15
15
  if Registry in {Artifact, Collection}:
16
16
  # visibility is set to 0 unless expressions contains id or uid equality
17
- if not ("id" in expressions or "uid" in expressions):
17
+ if not (
18
+ "id" in expressions
19
+ or "uid" in expressions
20
+ or "uid__startswith" in expressions
21
+ ):
18
22
  visibility = "visibility"
19
23
  if not any(e.startswith(visibility) for e in expressions):
20
24
  expressions[
lamindb/_finish.py CHANGED
@@ -63,6 +63,7 @@ def save_run_context_core(
63
63
  filepath: Path,
64
64
  transform_family: QuerySet | None = None,
65
65
  finished_at: bool = False,
66
+ from_cli: bool = False,
66
67
  ) -> str | None:
67
68
  import lamindb as ln
68
69
 
@@ -70,9 +71,10 @@ def save_run_context_core(
70
71
 
71
72
  # for scripts, things are easy
72
73
  is_consecutive = True
74
+ is_notebook = transform.type == TransformType.notebook
73
75
  source_code_path = filepath
74
76
  # for notebooks, we need more work
75
- if transform.type == TransformType.notebook:
77
+ if is_notebook:
76
78
  try:
77
79
  import nbstripout
78
80
  from nbproject.dev import (
@@ -85,7 +87,9 @@ def save_run_context_core(
85
87
  )
86
88
  return None
87
89
  notebook_content = read_notebook(filepath) # type: ignore
88
- is_consecutive = check_consecutiveness(notebook_content)
90
+ is_consecutive = check_consecutiveness(
91
+ notebook_content, calling_statement="ln.finish()"
92
+ )
89
93
  if not is_consecutive:
90
94
  msg = " Do you still want to proceed with finishing? (y/n) "
91
95
  if os.getenv("LAMIN_TESTING") is None:
@@ -106,13 +110,13 @@ def save_run_context_core(
106
110
  # in an existing storage location -> we want to move associated
107
111
  # artifacts into default storage and not register them in an existing
108
112
  # location
109
- filepath_html_orig = filepath.with_suffix(".html") # current location
110
- filepath_html = ln_setup.settings.storage.cache_dir / filepath_html_orig.name
113
+ report_path_orig = filepath.with_suffix(".html") # current location
114
+ report_path = ln_setup.settings.storage.cache_dir / report_path_orig.name
111
115
  # don't use Path.rename here because of cross-device link error
112
116
  # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
113
117
  shutil.move(
114
- filepath_html_orig, # type: ignore
115
- filepath_html,
118
+ report_path_orig, # type: ignore
119
+ report_path,
116
120
  )
117
121
  # strip the output from the notebook to create the source code file
118
122
  # first, copy the notebook file to a temporary file in the cache
@@ -159,6 +163,8 @@ def save_run_context_core(
159
163
  else:
160
164
  logger.warning("Please re-run `ln.track()` to make a new version")
161
165
  return "rerun-the-notebook"
166
+ else:
167
+ logger.important("source code is already saved")
162
168
  else:
163
169
  source_code = ln.Artifact(
164
170
  source_code_path,
@@ -168,57 +174,73 @@ def save_run_context_core(
168
174
  visibility=0, # hidden file
169
175
  run=False,
170
176
  )
171
- source_code.save(upload=True)
177
+ source_code.save(upload=True, print_progress=False)
172
178
  transform.source_code = source_code
173
- logger.success(f"saved transform.source_code: {transform.source_code}")
179
+ logger.debug(f"saved transform.source_code: {transform.source_code}")
174
180
 
175
181
  # track environment
176
- filepath_env = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
177
- if filepath_env.exists():
178
- hash, _ = hash_file(filepath_env)
179
- artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
180
- new_env_artifact = artifact is None
181
- if new_env_artifact:
182
- artifact = ln.Artifact(
183
- filepath_env,
184
- description="requirements.txt",
185
- visibility=0,
186
- run=False,
187
- )
188
- artifact.save(upload=True)
189
- run.environment = artifact
190
- if new_env_artifact:
191
- logger.success(f"saved run.environment: {run.environment}")
182
+ env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
183
+ if env_path.exists():
184
+ overwrite_env = True
185
+ if run.environment_id is not None and from_cli:
186
+ logger.important("run.environment is already saved")
187
+ overwrite_env = False
188
+ if overwrite_env:
189
+ hash, _ = hash_file(env_path)
190
+ artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
191
+ new_env_artifact = artifact is None
192
+ if new_env_artifact:
193
+ artifact = ln.Artifact(
194
+ env_path,
195
+ description="requirements.txt",
196
+ visibility=0,
197
+ run=False,
198
+ )
199
+ artifact.save(upload=True, print_progress=False)
200
+ run.environment = artifact
201
+ if new_env_artifact:
202
+ logger.debug(f"saved run.environment: {run.environment}")
192
203
 
193
204
  # set finished_at
194
205
  if finished_at:
195
206
  run.finished_at = datetime.now(timezone.utc)
196
207
 
197
208
  # track report and set is_consecutive
198
- if not transform.type == TransformType.notebook:
209
+ if not is_notebook:
199
210
  run.is_consecutive = True
200
211
  run.save()
201
212
  else:
202
213
  if run.report_id is not None:
203
- logger.warning(
204
- "there is already an existing report for this run, replacing it"
205
- )
206
- run.report.replace(filepath_html)
207
- run.report.save(upload=True)
214
+ hash, _ = hash_file(report_path) # ignore hash_type for now
215
+ if hash != run.report.hash:
216
+ if os.getenv("LAMIN_TESTING") is None:
217
+ # in test, auto-confirm overwrite
218
+ response = input(
219
+ f"You are about to replace (overwrite) an existing run report (hash '{run.report.hash}'). Proceed? (y/n)"
220
+ )
221
+ else:
222
+ response = "y"
223
+ if response == "y":
224
+ run.report.replace(report_path)
225
+ run.report.save(upload=True)
226
+ else:
227
+ logger.important("keeping old report")
228
+ else:
229
+ logger.important("report is already saved")
208
230
  else:
209
231
  report_file = ln.Artifact(
210
- filepath_html,
232
+ report_path,
211
233
  description=f"Report of run {run.uid}",
212
234
  is_new_version_of=prev_report,
213
235
  visibility=0, # hidden file
214
236
  run=False,
215
237
  )
216
- report_file.save(upload=True)
238
+ report_file.save(upload=True, print_progress=False)
217
239
  run.report = report_file
218
240
  run.is_consecutive = is_consecutive
219
241
  run.save()
220
242
  transform.latest_report = run.report
221
- logger.success(f"saved transform.latest_report: {transform.latest_report}")
243
+ logger.debug(f"saved transform.latest_report: {transform.latest_report}")
222
244
  transform.save()
223
245
 
224
246
  # finalize
@@ -227,6 +249,15 @@ def save_run_context_core(
227
249
  logger.important(
228
250
  f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}"
229
251
  )
252
+ if not from_cli:
253
+ thing, name = (
254
+ ("notebook", "notebook.ipynb")
255
+ if is_notebook
256
+ else ("script", "script.py")
257
+ )
258
+ logger.important(
259
+ f"if you want to update your {thing} without re-running it, use `lamin save {name}`"
260
+ )
230
261
  # because run & transform changed, update the global run_context
231
262
  run_context.run = run
232
263
  run_context.transform = transform
lamindb/_from_values.py CHANGED
@@ -18,12 +18,16 @@ def get_or_create_records(
18
18
  iterable: ListLike,
19
19
  field: StrField,
20
20
  *,
21
+ create: bool = False,
21
22
  from_public: bool = False,
22
23
  organism: Registry | str | None = None,
23
24
  public_source: Registry | None = None,
24
25
  mute: bool = False,
25
26
  ) -> list[Registry]:
26
27
  """Get or create records from iterables."""
28
+ Registry = field.field.model
29
+ if create:
30
+ return [Registry(**{field.field.name: value}) for value in iterable]
27
31
  upon_create_search_names = settings.upon_create_search_names
28
32
  feature: Feature = None
29
33
  organism = _get_organism_record(field, organism)
@@ -34,7 +38,6 @@ def get_or_create_records(
34
38
  kwargs["public_source"] = public_source
35
39
  settings.upon_create_search_names = False
36
40
  try:
37
- Registry = field.field.model
38
41
  iterable_idx = index_iterable(iterable)
39
42
 
40
43
  # returns existing records & non-existing values
@@ -274,10 +277,13 @@ def index_iterable(iterable: Iterable) -> pd.Index:
274
277
  return idx[(idx != "") & (~idx.isnull())]
275
278
 
276
279
 
277
- def _print_values(names: Iterable, n: int = 20) -> str:
280
+ def _print_values(names: Iterable, n: int = 20, quotes: bool = True) -> str:
278
281
  names = (name for name in names if name != "None")
279
282
  unique_names = list(dict.fromkeys(names))[:n]
280
- print_values = ", ".join(f"'{name}'" for name in unique_names)
283
+ if quotes:
284
+ print_values = ", ".join(f"'{name}'" for name in unique_names)
285
+ else:
286
+ print_values = ", ".join(f"{name}" for name in unique_names)
281
287
  if len(unique_names) > n:
282
288
  print_values += ", ..."
283
289
  return print_values
lamindb/_parents.py CHANGED
@@ -30,6 +30,8 @@ def _transform_emoji(transform: Transform):
30
30
 
31
31
 
32
32
  def _view(u):
33
+ from graphviz.backend import ExecutableNotFound
34
+
33
35
  try:
34
36
  if is_run_from_ipython:
35
37
  from IPython import get_ipython
@@ -39,10 +41,12 @@ def _view(u):
39
41
  if get_ipython().__class__.__name__ == "TerminalInteractiveShell":
40
42
  return u.view()
41
43
  else:
42
- display(u)
44
+ # call u._repr_mimebundle_() manually that exception gets raised properly and not just printed by
45
+ # call to display()
46
+ display(u._repr_mimebundle_(), raw=True)
43
47
  else:
44
48
  return u
45
- except (FileNotFoundError, RuntimeError): # pragma: no cover
49
+ except (FileNotFoundError, RuntimeError, ExecutableNotFound): # pragma: no cover
46
50
  logger.error(
47
51
  "please install the graphviz executable on your system:\n - Ubuntu: `sudo"
48
52
  " apt-get install graphviz`\n - Windows:"
@@ -177,9 +181,11 @@ def _view_parents(
177
181
  )
178
182
  u.node(
179
183
  record.uid,
180
- label=_record_label(record)
181
- if record.__class__.__name__ == "Transform"
182
- else _add_emoji(record, record_label),
184
+ label=(
185
+ _record_label(record)
186
+ if record.__class__.__name__ == "Transform"
187
+ else _add_emoji(record, record_label)
188
+ ),
183
189
  fillcolor=LAMIN_GREEN_LIGHTER,
184
190
  )
185
191
  if df_edges is not None:
lamindb/_query_manager.py CHANGED
@@ -9,7 +9,7 @@ from lnschema_core.models import Registry
9
9
 
10
10
  from lamindb.core._settings import settings
11
11
 
12
- from .core._feature_manager import get_feature_set_by_slot
12
+ from .core._feature_manager import get_feature_set_by_slot_
13
13
 
14
14
  if TYPE_CHECKING:
15
15
  from lnschema_core.types import StrField
@@ -107,7 +107,7 @@ class QueryManager(models.Manager):
107
107
  source_field_name in {"artifact", "collection"}
108
108
  and target_field_name == "feature_set"
109
109
  ):
110
- return get_feature_set_by_slot(host=self.instance).get(item)
110
+ return get_feature_set_by_slot_(host=self.instance).get(item)
111
111
 
112
112
  except Exception: # pragma: no cover
113
113
  return