lamindb 0.73.2__py3-none-any.whl → 0.74.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -14,6 +14,7 @@ Registries:
14
14
  ULabel
15
15
  Feature
16
16
  FeatureSet
17
+ Param
17
18
 
18
19
  Key functionality:
19
20
 
@@ -41,12 +42,13 @@ Modules & settings:
41
42
  """
42
43
 
43
44
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
44
- __version__ = "0.73.2"
45
+ __version__ = "0.74.1"
45
46
 
46
47
  import os as _os
47
48
 
48
49
  import lamindb_setup as _lamindb_setup
49
- from lamindb_setup._check_setup import InstanceNotSetupError, _check_instance_setup
50
+ from lamindb_setup._check_setup import InstanceNotSetupError as _InstanceNotSetupError
51
+ from lamindb_setup._check_setup import _check_instance_setup
50
52
  from lamindb_setup._connect_instance import connect
51
53
  from lamindb_setup.core.upath import UPath
52
54
 
@@ -54,17 +56,18 @@ from . import setup
54
56
 
55
57
 
56
58
  def __getattr__(name):
57
- raise InstanceNotSetupError()
59
+ raise _InstanceNotSetupError()
58
60
 
59
61
 
60
62
  if _check_instance_setup(from_lamindb=True):
61
- del InstanceNotSetupError
63
+ del _InstanceNotSetupError
62
64
  del __getattr__ # delete so that imports work out
63
65
  from lnschema_core.models import (
64
66
  Artifact,
65
67
  Collection,
66
68
  Feature,
67
69
  FeatureSet,
70
+ Param,
68
71
  Run,
69
72
  Storage,
70
73
  Transform,
@@ -87,17 +90,14 @@ if _check_instance_setup(from_lamindb=True):
87
90
  _storage,
88
91
  _transform,
89
92
  _ulabel,
93
+ integrations,
90
94
  )
91
-
92
- dev = core # backward compat
93
- from . import integrations
94
95
  from ._annotate import Annotate
95
96
  from ._finish import finish
96
97
  from ._save import save
97
98
  from ._view import view
98
99
  from .core._run_context import run_context as _run_context
99
100
  from .core._settings import settings
100
- from .core._transform_settings import transform # backward compat
101
101
 
102
102
  # schema modules
103
103
  if not _os.environ.get("LAMINDB_MULTI_INSTANCE") == "true":
lamindb/_artifact.py CHANGED
@@ -23,7 +23,7 @@ from lamindb_setup.core.upath import (
23
23
  get_stat_dir_cloud,
24
24
  get_stat_file_cloud,
25
25
  )
26
- from lnschema_core.models import Artifact, FeatureManager, Run, Storage
26
+ from lnschema_core.models import Artifact, FeatureManager, ParamManager, Run, Storage
27
27
  from lnschema_core.types import (
28
28
  VisibilityChoice,
29
29
  )
@@ -84,7 +84,7 @@ def process_pathlike(
84
84
  pass
85
85
  if isinstance(filepath, LocalPathClasses):
86
86
  filepath = filepath.resolve()
87
- if check_path_is_child_of_root(filepath, default_storage.root_as_path()):
87
+ if check_path_is_child_of_root(filepath, default_storage.root):
88
88
  use_existing_storage_key = True
89
89
  return default_storage, use_existing_storage_key
90
90
  else:
@@ -190,7 +190,7 @@ def get_stat_or_artifact(
190
190
  using_key: str | None = None,
191
191
  ) -> tuple[int, str | None, str | None, int | None] | Artifact:
192
192
  n_objects = None
193
- if settings.upon_file_create_skip_size_hash:
193
+ if settings.creation.artifact_skip_size_hash:
194
194
  return None, None, None, n_objects
195
195
  stat = path.stat() # one network request
196
196
  if not isinstance(path, LocalPathClasses):
@@ -242,14 +242,14 @@ def get_stat_or_artifact(
242
242
  Artifact.objects.using(using_key).filter(hash=hash, visibility=None).all()
243
243
  )
244
244
  if len(result) > 0:
245
- if settings.upon_artifact_create_if_hash_exists == "error":
245
+ if settings.creation.artifact_if_hash_exists == "error":
246
246
  msg = f"artifact with same hash exists: {result[0]}"
247
247
  hint = (
248
248
  "💡 you can make this error a warning:\n"
249
- " ln.settings.upon_artifact_create_if_hash_exists"
249
+ " ln.settings.creation.artifact_if_hash_exists"
250
250
  )
251
251
  raise FileExistsError(f"{msg}\n{hint}")
252
- elif settings.upon_artifact_create_if_hash_exists == "warn_create_new":
252
+ elif settings.creation.artifact_if_hash_exists == "warn_create_new":
253
253
  logger.warning(
254
254
  "creating new Artifact object despite existing artifact with same hash:"
255
255
  f" {result[0]}"
@@ -272,28 +272,17 @@ def check_path_in_existing_storage(
272
272
  ) -> Storage | bool:
273
273
  for storage in Storage.objects.using(using_key).filter().all():
274
274
  # if path is part of storage, return it
275
- if check_path_is_child_of_root(path, root=create_path(storage.root)):
275
+ if check_path_is_child_of_root(path, root=storage.root):
276
276
  return storage
277
277
  return False
278
278
 
279
279
 
280
280
  def check_path_is_child_of_root(path: Path | UPath, root: Path | UPath | None) -> bool:
281
- path = UPath(str(path)) if not isinstance(path, UPath) else path
282
- root = UPath(str(root)) if not isinstance(root, UPath) else root
283
-
284
- # the following comparisons can fail if types aren't comparable
285
- if not isinstance(path, LocalPathClasses) and not isinstance(
286
- root, LocalPathClasses
287
- ):
288
- # the following tests equivalency of two UPath objects
289
- # via string representations; otherwise
290
- # S3Path('s3://lndb-storage/') and S3Path('s3://lamindb-ci/')
291
- # test as equivalent
292
- return list(path.parents)[-1].as_posix() == root.as_posix()
293
- elif isinstance(path, LocalPathClasses) and isinstance(root, LocalPathClasses):
294
- return root.resolve() in path.resolve().parents
295
- else:
296
- return False
281
+ # str is needed to eliminate UPath storage_options
282
+ # from the equality checks below
283
+ path = UPath(str(path))
284
+ root = UPath(str(root))
285
+ return root.resolve() in path.resolve().parents
297
286
 
298
287
 
299
288
  def get_relative_path_to_directory(
@@ -358,7 +347,7 @@ def get_artifact_kwargs_from_data(
358
347
  check_path_in_storage = False
359
348
  if use_existing_storage_key:
360
349
  inferred_key = get_relative_path_to_directory(
361
- path=path, directory=storage.root_as_path()
350
+ path=path, directory=UPath(storage.root)
362
351
  ).as_posix()
363
352
  if key is None:
364
353
  key = inferred_key
@@ -387,7 +376,7 @@ def get_artifact_kwargs_from_data(
387
376
  )
388
377
 
389
378
  # do we use a virtual or an actual storage key?
390
- key_is_virtual = settings.artifact_use_virtual_keys
379
+ key_is_virtual = settings.creation._artifact_use_virtual_keys
391
380
 
392
381
  # if the file is already in storage, independent of the default
393
382
  # we use an actual storage key
@@ -516,6 +505,7 @@ def update_attributes(data: HasFeatures, attributes: Mapping[str, str]):
516
505
 
517
506
  def __init__(artifact: Artifact, *args, **kwargs):
518
507
  artifact.features = FeatureManager(artifact)
508
+ artifact.params = ParamManager(artifact)
519
509
  # Below checks for the Django-internal call in from_db()
520
510
  # it'd be better if we could avoid this, but not being able to create a Artifact
521
511
  # from data with the default constructor renders the central class of the API
@@ -532,6 +522,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
532
522
  raise ValueError("Only one non-keyword arg allowed: data")
533
523
 
534
524
  data: str | Path = kwargs.pop("data") if len(args) == 0 else args[0]
525
+ type: str = kwargs.pop("type") if "type" in kwargs else "dataset"
535
526
  key: str | None = kwargs.pop("key") if "key" in kwargs else None
536
527
  run: Run | None = kwargs.pop("run") if "run" in kwargs else None
537
528
  description: str | None = (
@@ -616,6 +607,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
616
607
  "to _retain_ the old state by duplicating the entire folder, do _not_ pass `is_new_version_of`"
617
608
  )
618
609
 
610
+ kwargs["type"] = type
619
611
  kwargs["uid"] = provisional_uid
620
612
  kwargs["version"] = version
621
613
  kwargs["description"] = description
@@ -662,6 +654,7 @@ def from_df(
662
654
  version=version,
663
655
  is_new_version_of=is_new_version_of,
664
656
  accessor="DataFrame",
657
+ type="dataset",
665
658
  **kwargs,
666
659
  )
667
660
  return artifact
@@ -690,6 +683,7 @@ def from_anndata(
690
683
  version=version,
691
684
  is_new_version_of=is_new_version_of,
692
685
  accessor="AnnData",
686
+ type="dataset",
693
687
  **kwargs,
694
688
  )
695
689
  return artifact
@@ -716,6 +710,7 @@ def from_mudata(
716
710
  version=version,
717
711
  is_new_version_of=is_new_version_of,
718
712
  accessor="MuData",
713
+ type="dataset",
719
714
  **kwargs,
720
715
  )
721
716
  return artifact
@@ -752,7 +747,7 @@ def from_dir(
752
747
  else:
753
748
  # maintain the hierachy within an existing storage location
754
749
  folder_key_path = get_relative_path_to_directory(
755
- folderpath, storage.root_as_path()
750
+ folderpath, UPath(storage.root)
756
751
  )
757
752
  else:
758
753
  folder_key_path = Path(key)
lamindb/_feature.py CHANGED
@@ -27,12 +27,12 @@ FEATURE_TYPES = {
27
27
  }
28
28
 
29
29
 
30
- def convert_numpy_dtype_to_lamin_feature_type(dtype) -> str:
30
+ def convert_numpy_dtype_to_lamin_feature_type(dtype, str_as_cat: bool = True) -> str:
31
31
  orig_type = dtype.name
32
32
  # strip precision qualifiers
33
33
  type = "".join(i for i in orig_type if not i.isdigit())
34
34
  if type == "object" or type == "str":
35
- type = "cat"
35
+ type = "cat" if str_as_cat else "str"
36
36
  return type
37
37
 
38
38
 
lamindb/_finish.py CHANGED
@@ -33,13 +33,19 @@ def get_seconds_since_modified(filepath) -> float:
33
33
  return datetime.now().timestamp() - filepath.stat().st_mtime
34
34
 
35
35
 
36
- def finish():
36
+ def finish() -> None:
37
37
  """Mark a tracked run as finished.
38
38
 
39
39
  Saves source code and, for notebooks, a run report to your default storage location.
40
40
  """
41
- if run_context.path is None:
41
+ if run_context.run is None:
42
42
  raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
43
+ if run_context.path is None:
44
+ assert run_context.transform.type not in {"script", "notebook"}
45
+ run_context.run.finished_at = datetime.now(timezone.utc)
46
+ run_context.run.save()
47
+ # nothing else to do
48
+ return None
43
49
  if is_run_from_ipython: # notebooks
44
50
  if (
45
51
  get_seconds_since_modified(run_context.path) > 3
@@ -138,7 +144,7 @@ def save_run_context_core(
138
144
  prev_report = prev_transform.latest_report
139
145
  if prev_transform.source_code_id is not None:
140
146
  prev_source = prev_transform.source_code
141
- ln.settings.silence_file_run_transform_warning = True
147
+ ln.settings.creation.artifact_silence_missing_run_warning = True
142
148
 
143
149
  # track source code
144
150
  if transform.source_code_id is not None:
lamindb/_from_values.py CHANGED
@@ -28,7 +28,7 @@ def get_or_create_records(
28
28
  Registry = field.field.model
29
29
  if create:
30
30
  return [Registry(**{field.field.name: value}) for value in iterable]
31
- upon_create_search_names = settings.upon_create_search_names
31
+ creation_search_names = settings.creation.search_names
32
32
  feature: Feature = None
33
33
  organism = _get_organism_record(field, organism)
34
34
  kwargs: dict = {}
@@ -36,7 +36,7 @@ def get_or_create_records(
36
36
  kwargs["organism"] = organism
37
37
  if public_source is not None:
38
38
  kwargs["public_source"] = public_source
39
- settings.upon_create_search_names = False
39
+ settings.creation.search_names = False
40
40
  try:
41
41
  iterable_idx = index_iterable(iterable)
42
42
 
@@ -88,7 +88,7 @@ def get_or_create_records(
88
88
  logger.debug(f"added default feature '{feature_name}'")
89
89
  return records
90
90
  finally:
91
- settings.upon_create_search_names = upon_create_search_names
91
+ settings.creation.search_names = creation_search_names
92
92
 
93
93
 
94
94
  def get_existing_records(
@@ -278,14 +278,26 @@ def index_iterable(iterable: Iterable) -> pd.Index:
278
278
 
279
279
 
280
280
  def _print_values(names: Iterable, n: int = 20, quotes: bool = True) -> str:
281
- names = (name for name in names if name != "None")
282
- unique_names = list(dict.fromkeys(names))[:n]
283
- if quotes:
284
- print_values = ", ".join(f"'{name}'" for name in unique_names)
281
+ if isinstance(names, dict):
282
+ items = {
283
+ f"{key}: {value}": None
284
+ for key, value in names.items()
285
+ if key != "None" and value != "None"
286
+ }
285
287
  else:
286
- print_values = ", ".join(f"{name}" for name in unique_names)
287
- if len(unique_names) > n:
288
+ # Use a dictionary instead of a list to have unique values and preserve order
289
+ items = {str(name): None for name in names if name != "None"}
290
+
291
+ unique_items = list(items.keys())
292
+
293
+ if quotes:
294
+ unique_items = [f"'{item}'" for item in unique_items]
295
+
296
+ print_values = ", ".join(unique_items[:n])
297
+
298
+ if len(unique_items) > n:
288
299
  print_values += ", ..."
300
+
289
301
  return print_values
290
302
 
291
303
 
lamindb/_query_manager.py CHANGED
@@ -45,7 +45,7 @@ class QueryManager(models.Manager):
45
45
 
46
46
  if (
47
47
  run_context.run is None
48
- and not settings.silence_file_run_transform_warning
48
+ and not settings.creation.artifact_silence_missing_run_warning
49
49
  ):
50
50
  logger.warning(WARNING_RUN_TRANSFORM)
51
51
  _track_run_input(self.instance)
lamindb/_query_set.py CHANGED
@@ -97,7 +97,9 @@ class QuerySet(models.QuerySet, CanValidate):
97
97
  """
98
98
 
99
99
  @doc_args(Registry.df.__doc__)
100
- def df(self, include: str | list[str] | None = None) -> pd.DataFrame:
100
+ def df(
101
+ self, include: str | list[str] | None = None, join: str = "inner"
102
+ ) -> pd.DataFrame:
101
103
  """{}."""
102
104
  # re-order the columns
103
105
  exclude_field_names = ["created_at"]
@@ -173,7 +175,7 @@ class QuerySet(models.QuerySet, CanValidate):
173
175
  link_groupby = link_df.groupby(left_side_link_model)[
174
176
  values_expression
175
177
  ].apply(list)
176
- df = pd.concat((link_groupby, df), axis=1, join="inner")
178
+ df = pd.concat((link_groupby, df), axis=1, join=join)
177
179
  df.rename(columns={values_expression: expression}, inplace=True)
178
180
  else:
179
181
  # the F() based implementation could also work for many-to-many,
@@ -185,7 +187,7 @@ class QuerySet(models.QuerySet, CanValidate):
185
187
  )
186
188
  df_anno = df_anno.set_index(pk_column_name)
187
189
  df_anno.rename(columns={"expression": expression}, inplace=True)
188
- df = pd.concat((df_anno, df), axis=1, join="inner")
190
+ df = pd.concat((df_anno, df), axis=1, join=join)
189
191
  return df
190
192
 
191
193
  def delete(self, *args, **kwargs):
lamindb/_registry.py CHANGED
@@ -82,7 +82,7 @@ def __init__(orm: Registry, *args, **kwargs):
82
82
  has_consciously_provided_uid = False
83
83
  if "_has_consciously_provided_uid" in kwargs:
84
84
  has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
85
- if settings.upon_create_search_names and not has_consciously_provided_uid:
85
+ if settings.creation.search_names and not has_consciously_provided_uid:
86
86
  match = suggest_records_with_similar_names(orm, kwargs)
87
87
  if match:
88
88
  if "version" in kwargs:
@@ -447,7 +447,7 @@ def transfer_to_default_db(
447
447
  if run_context.run is not None:
448
448
  record.run_id = run_context.run.id
449
449
  else:
450
- if not settings.silence_file_run_transform_warning:
450
+ if not settings.creation.artifact_silence_missing_run_warning:
451
451
  logger.warning(WARNING_RUN_TRANSFORM)
452
452
  record.run_id = None
453
453
  if hasattr(record, "transform_id") and record._meta.model_name != "run":
lamindb/_run.py CHANGED
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from lnschema_core.models import Run, Transform
3
+ from lnschema_core.models import ParamManager, Run, Transform
4
4
 
5
5
 
6
6
  def __init__(run: Run, *args, **kwargs):
7
+ run.params = ParamManager(run)
7
8
  if len(args) == len(run._meta.concrete_fields):
8
9
  super(Run, run).__init__(*args, **kwargs)
9
10
  return None
lamindb/_storage.py CHANGED
@@ -3,11 +3,6 @@ from lamindb_setup.core.upath import UPath, create_path
3
3
  from lnschema_core import Storage
4
4
 
5
5
 
6
- def root_as_path(self) -> UPath:
7
- access_token = self._access_token if hasattr(self, "_access_token") else None
8
- return create_path(self.root, access_token=access_token)
9
-
10
-
11
6
  @property # type: ignore
12
7
  @doc_args(Storage.path.__doc__)
13
8
  def path(self) -> UPath:
@@ -16,5 +11,4 @@ def path(self) -> UPath:
16
11
  return create_path(self.root, access_token=access_token)
17
12
 
18
13
 
19
- Storage.root_as_path = root_as_path
20
14
  Storage.path = path
lamindb/core/__init__.py CHANGED
@@ -10,7 +10,9 @@ Registries:
10
10
  QueryManager
11
11
  RecordsList
12
12
  HasFeatures
13
+ HasParams
13
14
  FeatureManager
15
+ ParamManager
14
16
  LabelManager
15
17
  IsVersioned
16
18
  CanValidate
@@ -30,13 +32,12 @@ Annotators:
30
32
  MuDataAnnotator
31
33
  AnnotateLookup
32
34
 
33
- Classes:
35
+ Other:
34
36
 
35
37
  .. autosummary::
36
38
  :toctree: .
37
39
 
38
40
  Settings
39
- TransformSettings
40
41
  MappedCollection
41
42
  run_context
42
43
 
@@ -49,6 +50,7 @@ Modules:
49
50
  storage
50
51
  types
51
52
  exceptions
53
+ subsettings
52
54
 
53
55
  """
54
56
 
@@ -56,6 +58,7 @@ from lamin_utils._inspect import InspectResult
56
58
  from lnschema_core.models import (
57
59
  CanValidate,
58
60
  HasFeatures,
61
+ HasParams,
59
62
  HasParents,
60
63
  IsVersioned,
61
64
  Registry,
@@ -71,11 +74,10 @@ from lamindb._annotate import (
71
74
  )
72
75
  from lamindb._query_manager import QueryManager
73
76
  from lamindb._query_set import QuerySet, RecordsList
74
- from lamindb.core._feature_manager import FeatureManager
77
+ from lamindb.core._feature_manager import FeatureManager, ParamManager
75
78
  from lamindb.core._label_manager import LabelManager
76
79
 
77
- from . import _data, datasets, exceptions, fields, types
80
+ from . import _data, datasets, exceptions, fields, subsettings, types
78
81
  from ._mapped_collection import MappedCollection
79
82
  from ._run_context import run_context
80
83
  from ._settings import Settings
81
- from ._transform_settings import TransformSettings
lamindb/core/_data.py CHANGED
@@ -24,13 +24,12 @@ from lamindb._registry import get_default_str_field
24
24
  from lamindb.core._settings import settings
25
25
 
26
26
  from ._feature_manager import (
27
- FeatureManager,
28
27
  get_feature_set_links,
29
28
  get_host_id_field,
30
29
  get_label_links,
31
30
  print_features,
32
31
  )
33
- from ._label_manager import LabelManager, print_labels
32
+ from ._label_manager import print_labels
34
33
  from ._run_context import run_context
35
34
  from .exceptions import ValidationError
36
35
  from .schema import (
@@ -47,7 +46,7 @@ WARNING_RUN_TRANSFORM = "no run & transform get linked, consider calling ln.trac
47
46
  def get_run(run: Run | None) -> Run | None:
48
47
  if run is None:
49
48
  run = run_context.run
50
- if run is None and not settings.silence_file_run_transform_warning:
49
+ if run is None and not settings.creation.artifact_silence_missing_run_warning:
51
50
  logger.warning(WARNING_RUN_TRANSFORM)
52
51
  # suppress run by passing False
53
52
  elif not run:
@@ -159,7 +158,11 @@ def describe(self: HasFeatures, print_types: bool = False):
159
158
  msg += f" {colors.italic('Provenance')}\n"
160
159
  msg += prov_msg
161
160
  msg += print_labels(self, print_types=print_types)
162
- msg += print_features(self, print_types=print_types) # type: ignore
161
+ msg += print_features( # type: ignore
162
+ self,
163
+ print_types=print_types,
164
+ print_params=hasattr(self, "type") and self.type == "model",
165
+ )
163
166
  logger.print(msg)
164
167
 
165
168