lamindb 1.4.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. lamindb/__init__.py +52 -36
  2. lamindb/_finish.py +17 -10
  3. lamindb/_tracked.py +1 -1
  4. lamindb/base/__init__.py +3 -1
  5. lamindb/base/fields.py +40 -22
  6. lamindb/base/ids.py +1 -94
  7. lamindb/base/types.py +2 -0
  8. lamindb/base/uids.py +117 -0
  9. lamindb/core/_context.py +203 -102
  10. lamindb/core/_settings.py +38 -25
  11. lamindb/core/datasets/__init__.py +11 -4
  12. lamindb/core/datasets/_core.py +5 -5
  13. lamindb/core/datasets/_small.py +0 -93
  14. lamindb/core/datasets/mini_immuno.py +172 -0
  15. lamindb/core/loaders.py +1 -1
  16. lamindb/core/storage/_backed_access.py +100 -6
  17. lamindb/core/storage/_polars_lazy_df.py +51 -0
  18. lamindb/core/storage/_pyarrow_dataset.py +15 -30
  19. lamindb/core/storage/_tiledbsoma.py +29 -13
  20. lamindb/core/storage/objects.py +6 -0
  21. lamindb/core/subsettings/__init__.py +2 -0
  22. lamindb/core/subsettings/_annotation_settings.py +11 -0
  23. lamindb/curators/__init__.py +7 -3349
  24. lamindb/curators/_legacy.py +2056 -0
  25. lamindb/curators/core.py +1534 -0
  26. lamindb/errors.py +11 -0
  27. lamindb/examples/__init__.py +27 -0
  28. lamindb/examples/schemas/__init__.py +12 -0
  29. lamindb/examples/schemas/_anndata.py +25 -0
  30. lamindb/examples/schemas/_simple.py +19 -0
  31. lamindb/integrations/_vitessce.py +8 -5
  32. lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +24 -0
  33. lamindb/migrations/0092_alter_artifactfeaturevalue_artifact_and_more.py +75 -0
  34. lamindb/migrations/0093_alter_schemacomponent_unique_together.py +16 -0
  35. lamindb/models/__init__.py +4 -1
  36. lamindb/models/_describe.py +21 -4
  37. lamindb/models/_feature_manager.py +382 -287
  38. lamindb/models/_label_manager.py +8 -2
  39. lamindb/models/artifact.py +177 -106
  40. lamindb/models/artifact_set.py +122 -0
  41. lamindb/models/collection.py +73 -52
  42. lamindb/models/core.py +1 -1
  43. lamindb/models/feature.py +51 -17
  44. lamindb/models/has_parents.py +69 -14
  45. lamindb/models/project.py +1 -1
  46. lamindb/models/query_manager.py +221 -22
  47. lamindb/models/query_set.py +247 -172
  48. lamindb/models/record.py +65 -247
  49. lamindb/models/run.py +4 -4
  50. lamindb/models/save.py +8 -2
  51. lamindb/models/schema.py +456 -184
  52. lamindb/models/transform.py +2 -2
  53. lamindb/models/ulabel.py +8 -5
  54. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/METADATA +6 -6
  55. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/RECORD +57 -43
  56. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/LICENSE +0 -0
  57. {lamindb-1.4.0.dist-info → lamindb-1.5.1.dist-info}/WHEEL +0 -0
lamindb/core/_context.py CHANGED
@@ -54,19 +54,21 @@ def get_uid_ext(version: str) -> str:
54
54
  return encodebytes(hashlib.md5(version.encode()).digest())[:4] # noqa: S324
55
55
 
56
56
 
57
- def get_notebook_path() -> Path:
57
+ def get_notebook_path() -> tuple[Path, str]:
58
58
  from nbproject.dev._jupyter_communicate import (
59
59
  notebook_path as get_notebook_path,
60
60
  )
61
61
 
62
62
  path = None
63
63
  try:
64
- path = get_notebook_path()
65
- except Exception:
66
- raise RuntimeError(msg_path_failed) from None
64
+ path, env = get_notebook_path(return_env=True)
65
+ except ValueError as ve:
66
+ raise ve
67
+ except Exception as error:
68
+ raise RuntimeError(msg_path_failed) from error
67
69
  if path is None:
68
70
  raise RuntimeError(msg_path_failed) from None
69
- return Path(path)
71
+ return Path(path), env
70
72
 
71
73
 
72
74
  # from https://stackoverflow.com/questions/61901628
@@ -178,20 +180,7 @@ class LogStreamTracker:
178
180
  class Context:
179
181
  """Run context.
180
182
 
181
- Enables convenient data lineage tracking by managing a transform & run
182
- upon :meth:`~lamindb.core.Context.track` & :meth:`~lamindb.core.Context.finish`.
183
-
184
- Guide: :doc:`/track`
185
-
186
- Examples:
187
-
188
- Is typically used via the global :class:`~lamindb.context` object via `ln.track()` and `ln.finish()`:
189
-
190
- >>> import lamindb as ln
191
- >>> ln.track()
192
- >>> # do things
193
- >>> ln.finish()
194
-
183
+ Is the book keeper for :meth:`~lamindb.core.Context.track`.
195
184
  """
196
185
 
197
186
  def __init__(self):
@@ -201,13 +190,14 @@ class Context:
201
190
  self._transform: Transform | None = None
202
191
  self._run: Run | None = None
203
192
  self._path: Path | None = None
204
- """A local path to the script that's running."""
193
+ """A local path to the script or notebook that's running."""
205
194
  self._project: Project | None = None
206
195
  self._space: Space | None = None
207
196
  self._logging_message_track: str = ""
208
197
  self._logging_message_imports: str = ""
209
198
  self._stream_tracker: LogStreamTracker = LogStreamTracker()
210
199
  self._is_finish_retry: bool = False
200
+ self._notebook_runner: str | None = None
211
201
 
212
202
  @property
213
203
  def transform(self) -> Transform | None:
@@ -265,31 +255,29 @@ class Context:
265
255
  """Managed run of context."""
266
256
  return self._run
267
257
 
268
- def track(
258
+ def _track(
269
259
  self,
270
260
  transform: str | Transform | None = None,
271
261
  *,
272
- project: str | None = None,
273
- space: str | None = None,
262
+ project: str | Project | None = None,
263
+ space: str | Space | None = None,
274
264
  params: dict | None = None,
275
265
  new_run: bool | None = None,
276
266
  path: str | None = None,
277
267
  ) -> None:
278
- """Track a global run in your compute session.
268
+ """Track a run of your notebook or script.
279
269
 
280
- - sets :attr:`~lamindb.core.Context.transform` &
281
- :attr:`~lamindb.core.Context.run` by creating or loading `Transform` &
282
- `Run` records
283
- - writes compute environment to prepare populating: `run.environment`
284
- - if :attr:`~lamindb.core.Settings.sync_git_repo` is set, checks whether a script-like
285
- transform exists in a git repository and links it
270
+ Populates the global run :class:`~lamindb.context` by managing `Transform` & `Run` records and caching the compute environment.
286
271
 
287
- Guide: :doc:`/track`
272
+ If :attr:`~lamindb.core.Settings.sync_git_repo` is set, checks whether a script-like transform exists in a git repository and links it.
288
273
 
289
274
  Args:
290
- transform: A transform `uid` or record. If `None`, manages the `transform` based on the script or notebook that calls `ln.track()`.
291
- project: A project `name` or `uid` for labeling entities created during the run.
292
- space: A space `name` or `uid` for creating entities during the run. This doesn't affect bionty entities given they should typically be commonly accessible.
275
+ transform: A transform (stem) `uid` (or record). If `None`, auto-creates a `transform` with its `uid`.
276
+ project: A project, its `name` or `uid` for labeling entities created during the run.
277
+ space: A restricted space, its `name` or `uid` for creating sensitive entities are created during the run.
278
+ The default is the common `"All"` space that every LaminDB instance has.
279
+ The `space` argument doesn't affect `Storage`, `ULabel`, `Feature`, `Schema`, `Param` and bionty entities as these provide structure that should typically be commonly accessible.
280
+ If you want to manually move entities to a different space, set the `.space` field (:doc:`docs:access`).
293
281
  params: A dictionary of parameters to track for the run.
294
282
  new_run: If `False`, loads the latest run of transform
295
283
  (default notebook), if `True`, creates new run (default non-notebook).
@@ -301,11 +289,15 @@ class Context:
301
289
  To track the run of a notebook or script, call::
302
290
 
303
291
  ln.track()
292
+ #> → created Transform('Onv04I53OgtT0000'), started new Run('dpSfd7Ds...') at 2025-04-25 11:00:03 UTC
293
+ #> • recommendation: to identify the notebook across renames, pass the uid: ln.track("Onv04I53OgtT")
304
294
 
305
- If you want to ensure a single version history across renames of the notebook or script, pass the auto-generated `uid` that you'll find in the logs::
295
+ Ensure one version history across file renames::
306
296
 
307
- ln.track("Onv04I53OgtT0000") # example uid, the last four characters encode the version of the transform
297
+ ln.track("Onv04I53OgtT")
298
+ #> → created Transform('Onv04I53OgtT0000'), started new Run('dpSfd7Ds...') at 2025-04-25 11:00:03 UTC
308
299
 
300
+ More examples: :doc:`/track`
309
301
  """
310
302
  from lamindb.models import Project, Space
311
303
 
@@ -318,26 +310,41 @@ class Context:
318
310
  if project is None:
319
311
  project = os.environ.get("LAMIN_CURRENT_PROJECT")
320
312
  if project is not None:
321
- project_record = Project.filter(
322
- Q(name=project) | Q(uid=project)
323
- ).one_or_none()
324
- if project_record is None:
325
- raise InvalidArgument(
326
- f"Project '{project}' not found, either create it with `ln.Project(name='...').save()` or fix typos."
313
+ if isinstance(project, Project):
314
+ assert project._state.adding is False, ( # noqa: S101
315
+ "Project must be saved before passing it to track()"
327
316
  )
317
+ project_record = project
318
+ else:
319
+ project_record = Project.filter(
320
+ Q(name=project) | Q(uid=project)
321
+ ).one_or_none()
322
+ if project_record is None:
323
+ raise InvalidArgument(
324
+ f"Project '{project}' not found, either create it with `ln.Project(name='...').save()` or fix typos."
325
+ )
328
326
  self._project = project_record
329
327
  if space is not None:
330
- space_record = Space.filter(Q(name=space) | Q(uid=space)).one_or_none()
331
- if space_record is None:
332
- raise InvalidArgument(
333
- f"Space '{space}', please check on the hub UI whether you have the correct `uid` or `name`."
328
+ if isinstance(space, Space):
329
+ assert space._state.adding is False, ( # noqa: S101
330
+ "Space must be saved before passing it to track()"
334
331
  )
332
+ space_record = space
333
+ else:
334
+ space_record = Space.filter(Q(name=space) | Q(uid=space)).one_or_none()
335
+ if space_record is None:
336
+ raise InvalidArgument(
337
+ f"Space '{space}', please check on the hub UI whether you have the correct `uid` or `name`."
338
+ )
335
339
  self._space = space_record
336
340
  self._logging_message_track = ""
337
341
  self._logging_message_imports = ""
338
342
  if transform is not None and isinstance(transform, str):
339
343
  self.uid = transform
340
344
  transform = None
345
+ uid_was_none = False
346
+ else:
347
+ uid_was_none = True
341
348
  self._path = None
342
349
  if transform is None:
343
350
  description = None
@@ -384,7 +391,14 @@ class Context:
384
391
  self._transform = transform_exists
385
392
 
386
393
  if new_run is None: # for notebooks, default to loading latest runs
387
- new_run = False if self._transform.type == "notebook" else True # type: ignore
394
+ new_run = (
395
+ False
396
+ if (
397
+ self._transform.type == "notebook"
398
+ and self._notebook_runner != "nbconvert"
399
+ )
400
+ else True
401
+ ) # type: ignore
388
402
 
389
403
  run = None
390
404
  if not new_run: # try loading latest run by same user
@@ -431,6 +445,22 @@ class Context:
431
445
  logger.important(self._logging_message_track)
432
446
  if self._logging_message_imports:
433
447
  logger.important(self._logging_message_imports)
448
+ if uid_was_none:
449
+ notebook_or_script = (
450
+ "notebook" if self._transform.type == "notebook" else "script"
451
+ )
452
+ r_or_python = "."
453
+ if self._path is not None:
454
+ r_or_python = "." if self._path.suffix in {".py", ".ipynb"} else "$"
455
+ project_str = f', project="{project}"' if project is not None else ""
456
+ space_str = f', space="{space}"' if space is not None else ""
457
+ params_str = (
458
+ ", params={...}" if params is not None else ""
459
+ ) # do not put the values because typically parameterized by user
460
+ kwargs_str = f"{project_str}{space_str}{params_str}"
461
+ logger.important_hint(
462
+ f'recommendation: to identify the {notebook_or_script} across renames, pass the uid: ln{r_or_python}track("{self.transform.uid[:-4]}"{kwargs_str})'
463
+ )
434
464
 
435
465
  def _track_source_code(
436
466
  self,
@@ -472,7 +502,7 @@ class Context:
472
502
  path_str: str | None,
473
503
  ) -> tuple[Path, str | None]:
474
504
  if path_str is None:
475
- path = get_notebook_path()
505
+ path, self._notebook_runner = get_notebook_path()
476
506
  else:
477
507
  path = Path(path_str)
478
508
  description = None
@@ -504,6 +534,53 @@ class Context:
504
534
  pass
505
535
  return path, description
506
536
 
537
+ def _process_aux_transform(
538
+ self,
539
+ aux_transform: Transform,
540
+ transform_hash: str,
541
+ ) -> tuple[str, Transform | None, str]:
542
+ # first part of the if condition: no version bump, second part: version bump
543
+ message = ""
544
+ if (
545
+ # if a user hasn't yet saved the transform source code AND is the same user
546
+ (
547
+ aux_transform.source_code is None
548
+ and aux_transform.created_by_id == ln_setup.settings.user.id
549
+ )
550
+ # if the transform source code is unchanged
551
+ # if aux_transform.type == "notebook", we anticipate the user makes changes to the notebook source code
552
+ # in an interactive session, hence we *pro-actively bump* the version number by setting `revises` / 'nbconvert' execution is NOT interactive
553
+ # in the second part of the if condition even though the source code is unchanged at point of running track()
554
+ or (
555
+ aux_transform.hash == transform_hash
556
+ and (
557
+ aux_transform.type != "notebook"
558
+ or self._notebook_runner == "nbconvert"
559
+ )
560
+ )
561
+ ):
562
+ uid = aux_transform.uid
563
+ return uid, aux_transform, message
564
+ else:
565
+ uid = f"{aux_transform.uid[:-4]}{increment_base62(aux_transform.uid[-4:])}"
566
+ message = (
567
+ f"found {aux_transform.type} {aux_transform.key}, making new version"
568
+ )
569
+ if (
570
+ aux_transform.hash == transform_hash
571
+ and aux_transform.type == "notebook"
572
+ ):
573
+ message += " -- anticipating changes"
574
+ elif aux_transform.hash != transform_hash:
575
+ message += (
576
+ "" # could log "source code changed", but this seems too much
577
+ )
578
+ elif aux_transform.created_by_id != ln_setup.settings.user.id:
579
+ message += (
580
+ f" -- {aux_transform.created_by.handle} already works on this draft"
581
+ )
582
+ return uid, None, message
583
+
507
584
  def _create_or_load_transform(
508
585
  self,
509
586
  *,
@@ -512,8 +589,22 @@ class Context:
512
589
  transform_ref_type: str | None = None,
513
590
  transform_type: TransformType = None,
514
591
  ):
515
- # the user did not pass the uid
516
- if self.uid is None:
592
+ from .._finish import notebook_to_script
593
+
594
+ if not self._path.suffix == ".ipynb":
595
+ transform_hash, _ = hash_file(self._path)
596
+ else:
597
+ # need to convert to stripped py:percent format for hashing
598
+ source_code_path = ln_setup.settings.cache_dir / self._path.name.replace(
599
+ ".ipynb", ".py"
600
+ )
601
+ notebook_to_script(description, self._path, source_code_path)
602
+ transform_hash, _ = hash_file(source_code_path)
603
+ # see whether we find a transform with the exact same hash
604
+ aux_transform = Transform.filter(hash=transform_hash).one_or_none()
605
+ # if the user did not pass a uid and there is no matching aux_transform
606
+ # need to search for the transform based on the filename
607
+ if self.uid is None and aux_transform is None:
517
608
 
518
609
  class SlashCount(Func):
519
610
  template = "LENGTH(%(expressions)s) - LENGTH(REPLACE(%(expressions)s, '/', ''))"
@@ -528,47 +619,15 @@ class Context:
528
619
  uid = f"{base62_12()}0000"
529
620
  key = self._path.name
530
621
  target_transform = None
531
- hash, _ = hash_file(self._path)
532
622
  if len(transforms) != 0:
533
623
  message = ""
534
624
  found_key = False
535
625
  for aux_transform in transforms:
536
626
  if aux_transform.key in self._path.as_posix():
537
627
  key = aux_transform.key
538
- # first part of the if condition: no version bump, second part: version bump
539
- if (
540
- # if a user hasn't yet saved the transform source code, needs to be same user
541
- (
542
- aux_transform.source_code is None
543
- and aux_transform.created_by_id
544
- == ln_setup.settings.user.id
545
- )
546
- # if the transform source code is unchanged
547
- # if aux_transform.type == "notebook", we anticipate the user makes changes to the notebook source code
548
- # in an interactive session, hence we *pro-actively bump* the version number by setting `revises`
549
- # in the second part of the if condition even though the source code is unchanged at point of running track()
550
- or (
551
- aux_transform.hash == hash
552
- and aux_transform.type != "notebook"
553
- )
554
- ):
555
- uid = aux_transform.uid
556
- target_transform = aux_transform
557
- else:
558
- uid = f"{aux_transform.uid[:-4]}{increment_base62(aux_transform.uid[-4:])}"
559
- message = f"there already is a {aux_transform.type} with key '{aux_transform.key}'"
560
- if (
561
- aux_transform.hash == hash
562
- and aux_transform.type == "notebook"
563
- ):
564
- message += " -- anticipating changes"
565
- elif aux_transform.hash != hash:
566
- message += "" # could log "source code changed", but this seems too much
567
- elif (
568
- aux_transform.created_by_id != ln_setup.settings.user.id
569
- ):
570
- message += f" -- {aux_transform.created_by.handle} already works on this draft"
571
- message += f", creating new version '{uid}'"
628
+ uid, target_transform, message = self._process_aux_transform(
629
+ aux_transform, transform_hash
630
+ )
572
631
  found_key = True
573
632
  break
574
633
  if not found_key:
@@ -584,7 +643,7 @@ class Context:
584
643
  logger.important(message)
585
644
  self.uid, transform = uid, target_transform
586
645
  # the user did pass the uid
587
- else:
646
+ elif self.uid is not None and len(self.uid) == 16:
588
647
  transform = Transform.filter(uid=self.uid).one_or_none()
589
648
  if transform is not None:
590
649
  if transform.key not in self._path.as_posix():
@@ -599,6 +658,33 @@ class Context:
599
658
  key = transform.key # type: ignore
600
659
  else:
601
660
  key = self._path.name
661
+ else:
662
+ if self.uid is not None:
663
+ assert len(self.uid) == 12, ( # noqa: S101
664
+ "uid must be 12 (stem) or 16 (full) characters long"
665
+ )
666
+ aux_transform = (
667
+ Transform.filter(uid__startswith=self.uid)
668
+ .order_by("-created_at")
669
+ .first()
670
+ )
671
+ if aux_transform is not None:
672
+ if aux_transform.key.endswith(self._path.name):
673
+ key = aux_transform.key
674
+ else:
675
+ key = "/".join(
676
+ aux_transform.key.split("/")[:-1] + [self._path.name]
677
+ )
678
+ uid, target_transform, message = self._process_aux_transform(
679
+ aux_transform, transform_hash
680
+ )
681
+ if message != "":
682
+ logger.important(message)
683
+ else:
684
+ uid = f"{self.uid}0000" if self.uid is not None else None
685
+ target_transform = None
686
+ key = self._path.name
687
+ self.uid, transform = uid, target_transform
602
688
  if self.version is not None:
603
689
  # test inconsistent version passed
604
690
  if (
@@ -610,15 +696,16 @@ class Context:
610
696
  f"✗ please pass consistent version: ln.context.version = '{transform.version}'" # type: ignore
611
697
  )
612
698
  # test whether version was already used for another member of the family
613
- suid, vuid = (self.uid[:-4], self.uid[-4:])
614
- transform = Transform.filter(
615
- uid__startswith=suid, version=self.version
616
- ).one_or_none()
617
- if transform is not None and vuid != transform.uid[-4:]:
618
- better_version = bump_version_function(self.version)
619
- raise SystemExit(
620
- f"✗ version '{self.version}' is already taken by Transform('{transform.uid}'); please set another version, e.g., ln.context.version = '{better_version}'"
621
- )
699
+ if self.uid is not None and len(self.uid) == 16:
700
+ suid, vuid = (self.uid[:-4], self.uid[-4:])
701
+ transform = Transform.filter(
702
+ uid__startswith=suid, version=self.version
703
+ ).one_or_none()
704
+ if transform is not None and vuid != transform.uid[-4:]:
705
+ better_version = bump_version_function(self.version)
706
+ raise SystemExit(
707
+ f"✗ version '{self.version}' is already taken by Transform('{transform.uid}'); please set another version, e.g., ln.context.version = '{better_version}'"
708
+ )
622
709
  # make a new transform record
623
710
  if transform is None:
624
711
  assert key is not None # noqa: S101
@@ -659,13 +746,15 @@ class Context:
659
746
  # check whether transform source code was already saved
660
747
  if transform_was_saved:
661
748
  bump_revision = False
662
- if transform.type == "notebook":
749
+ if (
750
+ transform.type == "notebook"
751
+ and self._notebook_runner != "nbconvert"
752
+ ):
663
753
  # we anticipate the user makes changes to the notebook source code
664
754
  # in an interactive session, hence we pro-actively bump the version number
665
755
  bump_revision = True
666
756
  else:
667
- hash, _ = hash_file(self._path) # ignore hash_type for now
668
- if hash != transform.hash:
757
+ if transform_hash != transform.hash:
669
758
  bump_revision = True
670
759
  else:
671
760
  self._logging_message_track += (
@@ -674,7 +763,10 @@ class Context:
674
763
  if bump_revision:
675
764
  change_type = (
676
765
  "re-running notebook with already-saved source code"
677
- if transform.type == "notebook"
766
+ if (
767
+ transform.type == "notebook"
768
+ and self._notebook_runner != "nbconvert"
769
+ )
678
770
  else "source code changed"
679
771
  )
680
772
  raise UpdateContext(
@@ -684,11 +776,11 @@ class Context:
684
776
  self._logging_message_track += f"loaded Transform('{transform.uid}')"
685
777
  self._transform = transform
686
778
 
687
- def finish(self, ignore_non_consecutive: None | bool = None) -> None:
688
- """Finish a tracked run.
779
+ def _finish(self, ignore_non_consecutive: None | bool = None) -> None:
780
+ """Finish the run and write a run report.
689
781
 
690
782
  - writes a timestamp: `run.finished_at`
691
- - saves the source code: `transform.source_code`
783
+ - saves the source code if it is not yet saved: `transform.source_code`
692
784
  - saves a run report: `run.report`
693
785
 
694
786
  When called in the last cell of a notebook:
@@ -732,6 +824,7 @@ class Context:
732
824
  finished_at=True,
733
825
  ignore_non_consecutive=ignore_non_consecutive,
734
826
  is_retry=self._is_finish_retry,
827
+ notebook_runner=self._notebook_runner,
735
828
  )
736
829
  if return_code == "retry":
737
830
  self._is_finish_retry = True
@@ -746,5 +839,13 @@ class Context:
746
839
  self._version = None
747
840
  self._description = None
748
841
 
842
+ @deprecated("ln.track()")
843
+ def track(self, *args, **kwargs):
844
+ return self._track(*args, **kwargs)
845
+
846
+ @deprecated("ln.finish()")
847
+ def finish(self, *args, **kwargs):
848
+ return self._finish(*args, **kwargs)
849
+
749
850
 
750
851
  context: Context = Context()
lamindb/core/_settings.py CHANGED
@@ -9,6 +9,7 @@ from lamindb_setup._set_managed_storage import set_managed_storage
9
9
  from lamindb_setup.core._settings import settings as setup_settings
10
10
  from lamindb_setup.core._settings_instance import sanitize_git_repo_url
11
11
 
12
+ from .subsettings._annotation_settings import AnnotationSettings, annotation_settings
12
13
  from .subsettings._creation_settings import CreationSettings, creation_settings
13
14
 
14
15
  if TYPE_CHECKING:
@@ -34,13 +35,13 @@ VERBOSITY_TO_STR: dict[int, str] = dict(
34
35
  class Settings:
35
36
  """Settings.
36
37
 
37
- Use ``lamindb.settings`` instead of instantiating this class yourself.
38
+ Use `lamindb.settings` instead of instantiating this class yourself.
38
39
  """
39
40
 
40
- def __init__(self, git_repo: str | None):
41
+ def __init__(self):
41
42
  self._verbosity_int: int = 1 # warning-level logging
42
43
  logger.set_verbosity(self._verbosity_int)
43
- self._sync_git_repo: str | None = git_repo
44
+ self._sync_git_repo: str | None = None
44
45
 
45
46
  @property
46
47
  def creation(self) -> CreationSettings:
@@ -51,6 +52,15 @@ class Settings:
51
52
  """
52
53
  return creation_settings
53
54
 
55
+ @property
56
+ def annotation(self) -> AnnotationSettings:
57
+ """Artifact annotation settings.
58
+
59
+ For example, `ln.settings.creation.search_names = False` will disable
60
+ searching for records with similar names during creation.
61
+ """
62
+ return annotation_settings
63
+
54
64
  track_run_inputs: bool = True
55
65
  """Track files as input upon `.load()`, `.cache()` and `.open()`.
56
66
 
@@ -85,13 +95,18 @@ class Settings:
85
95
 
86
96
  Provide the full git repo URL.
87
97
  """
88
- return self._sync_git_repo
98
+ if self._sync_git_repo is not None:
99
+ return self._sync_git_repo
100
+ elif os.environ.get("LAMINDB_MULTI_INSTANCE") == "true":
101
+ return None
102
+ else:
103
+ return setup_settings.instance.git_repo
89
104
 
90
105
  @sync_git_repo.setter
91
106
  def sync_git_repo(self, value) -> None:
92
107
  """Sync transforms with scripts in git repository.
93
108
 
94
- For example: `ln.sync_git_repo = https://github.com/laminlabs/redun-lamin`
109
+ For example: `ln.settings.sync_git_repo = https://github.com/laminlabs/redun-lamin`
95
110
  """
96
111
  self._sync_git_repo = sanitize_git_repo_url(value)
97
112
  if not self._sync_git_repo.startswith("https://"): # pragma: nocover
@@ -99,28 +114,31 @@ class Settings:
99
114
 
100
115
  @property
101
116
  def storage(self) -> StorageSettings:
102
- """Default storage location.
117
+ """Current default storage location for writes.
103
118
 
104
119
  Examples:
105
120
 
106
- >>> ln.settings.storage
107
- StorageSettings(root='s3://my-bucket', uid='j7MaPxtLxPeE')
121
+ Retrieve the storage settings::
122
+
123
+ ln.settings.storage
124
+ #> StorageSettings(root='s3://my-bucket')
108
125
 
109
- >>> ln.settings.storage.root
110
- UPath('s3://my-bucket')
126
+ Retrieve the storage root::
111
127
 
112
- You can switch the default storage location to another managed storage
113
- location by passing a string:
128
+ ln.settings.storage.root
129
+ #> UPath('s3://my-bucket')
114
130
 
115
- >>> ln.settings.storage = "s3://some-bucket"
131
+ You can write artifacts to other storage locations by switching the current default storage location::
116
132
 
117
- You can also pass additional fsspec kwargs via:
133
+ ln.settings.storage = "s3://some-bucket"
118
134
 
119
- >>> kwargs = dict(
120
- >>> profile="some_profile", # fsspec arg
121
- >>> cache_regions=True # fsspec arg for s3
122
- >>> )
123
- >>> ln.settings.storage = "s3://some-bucket", kwargs
135
+ You can also pass additional fsspec kwargs via::
136
+
137
+ kwargs = dict(
138
+ profile="some_profile", # fsspec arg
139
+ cache_regions=True # fsspec arg for s3
140
+ )
141
+ ln.settings.storage = "s3://some-bucket", kwargs
124
142
  """
125
143
  return self._storage_settings
126
144
 
@@ -174,9 +192,4 @@ class Settings:
174
192
  logger.set_verbosity(verbosity_int)
175
193
 
176
194
 
177
- if os.environ.get("LAMINDB_MULTI_INSTANCE") == "true":
178
- git_repo = None
179
- else:
180
- git_repo = setup_settings.instance.git_repo
181
-
182
- settings = Settings(git_repo=git_repo)
195
+ settings = Settings()
@@ -1,12 +1,17 @@
1
1
  """Test datasets.
2
2
 
3
+ The mini immuno dataset.
4
+
5
+ .. autosummary::
6
+ :toctree: .
7
+
8
+ mini_immuno
9
+
3
10
  Small in-memory datasets.
4
11
 
5
12
  .. autosummary::
6
13
  :toctree: .
7
14
 
8
- small_dataset1
9
- small_dataset2
10
15
  anndata_with_obs
11
16
 
12
17
  Files.
@@ -59,6 +64,7 @@ Other.
59
64
  fake_bio_notebook_titles
60
65
  """
61
66
 
67
+ from . import mini_immuno
62
68
  from ._core import (
63
69
  anndata_file_pbmc68k_test,
64
70
  anndata_human_immune_cells,
@@ -88,7 +94,8 @@ from ._core import (
88
94
  from ._fake import fake_bio_notebook_titles
89
95
  from ._small import (
90
96
  anndata_with_obs,
91
- small_dataset1,
92
- small_dataset2,
93
97
  small_dataset3_cellxgene,
94
98
  )
99
+
100
+ small_dataset1 = mini_immuno.get_dataset1 # backward compat
101
+ small_dataset2 = mini_immuno.get_dataset2 # backward compat