lamindb 0.75.1__py3-none-any.whl → 0.76.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -21,9 +21,8 @@ Key functionality:
21
21
  .. autosummary::
22
22
  :toctree: .
23
23
 
24
+ context
24
25
  connect
25
- track
26
- finish
27
26
  Curate
28
27
  view
29
28
  save
@@ -42,7 +41,7 @@ Modules & settings:
42
41
  """
43
42
 
44
43
  # denote a release candidate for 0.1.0 with 0.1rc1, 0.1a1, 0.1b1, etc.
45
- __version__ = "0.75.1"
44
+ __version__ = "0.76.1"
46
45
 
47
46
  import os as _os
48
47
 
@@ -93,10 +92,9 @@ if _check_instance_setup(from_lamindb=True):
93
92
  integrations,
94
93
  )
95
94
  from ._curate import Curate
96
- from ._finish import finish
97
95
  from ._save import save
98
96
  from ._view import view
99
- from .core._run_context import run_context as _run_context
97
+ from .core._context import context
100
98
  from .core._settings import settings
101
99
 
102
100
  # schema modules
@@ -107,8 +105,8 @@ if _check_instance_setup(from_lamindb=True):
107
105
 
108
106
  _reload_schema_modules(_lamindb_setup.settings.instance)
109
107
 
110
- track = _run_context._track
108
+ track = context.track # backward compat
109
+ finish = context.finish # backward compat
111
110
  settings.__doc__ = """Global :class:`~lamindb.core.Settings`."""
111
+ context.__doc__ = """Global :class:`~lamindb.core.Context`."""
112
112
  from django.db.models import Q
113
-
114
- Annotate = Curate # backward compat
lamindb/_artifact.py CHANGED
@@ -107,7 +107,7 @@ def process_pathlike(
107
107
  new_root = list(filepath.parents)[-1]
108
108
  # do not register remote storage locations on hub if the current instance
109
109
  # is not managed on the hub
110
- storage_settings = init_storage(
110
+ storage_settings, _ = init_storage(
111
111
  new_root, prevent_register_hub=not setup_settings.instance.is_on_hub
112
112
  )
113
113
  storage_record = register_storage_in_instance(storage_settings)
@@ -594,6 +594,7 @@ def __init__(artifact: Artifact, *args, **kwargs):
594
594
  kwargs["description"] = description
595
595
  kwargs["visibility"] = visibility
596
596
  kwargs["_accessor"] = accessor
597
+ kwargs["is_new_version_of"] = is_new_version_of
597
598
  # this check needs to come down here because key might be populated from an
598
599
  # existing file path during get_artifact_kwargs_from_data()
599
600
  if (
lamindb/_collection.py CHANGED
@@ -158,6 +158,7 @@ def __init__(
158
158
  run=run,
159
159
  version=version,
160
160
  visibility=visibility,
161
+ is_new_version_of=is_new_version_of,
161
162
  **kwargs,
162
163
  )
163
164
  collection._artifacts = artifacts
lamindb/_curate.py CHANGED
@@ -313,11 +313,11 @@ class DataFrameCurator:
313
313
 
314
314
  def clean_up_failed_runs(self):
315
315
  """Clean up previous failed runs that don't save any outputs."""
316
- from lamindb.core._run_context import run_context
316
+ from lamindb.core._context import context
317
317
 
318
- if run_context.transform is not None:
319
- Run.filter(transform=run_context.transform, output_artifacts=None).exclude(
320
- uid=run_context.run.uid
318
+ if context.run is not None:
319
+ Run.filter(transform=context.run.transform, output_artifacts=None).exclude(
320
+ uid=context.run.uid
321
321
  ).delete()
322
322
 
323
323
 
lamindb/_feature.py CHANGED
@@ -42,7 +42,7 @@ def __init__(self, *args, **kwargs):
42
42
  return None
43
43
  # now we proceed with the user-facing constructor
44
44
  if len(args) != 0:
45
- raise ValueError("Only non-keyword args allowed")
45
+ raise ValueError("Only keyword args allowed")
46
46
  dtype: type | str = kwargs.pop("dtype") if "dtype" in kwargs else None
47
47
  # cast type
48
48
  if dtype is None:
lamindb/_finish.py CHANGED
@@ -9,9 +9,6 @@ from typing import TYPE_CHECKING
9
9
  import lamindb_setup as ln_setup
10
10
  from lamin_utils import logger
11
11
  from lamindb_setup.core.hashing import hash_file
12
- from lnschema_core.types import TransformType
13
-
14
- from .core._run_context import is_run_from_ipython, run_context
15
12
 
16
13
  if TYPE_CHECKING:
17
14
  from pathlib import Path
@@ -21,51 +18,7 @@ if TYPE_CHECKING:
21
18
  from ._query_set import QuerySet
22
19
 
23
20
 
24
- class TrackNotCalled(SystemExit):
25
- pass
26
-
27
-
28
- class NotebookNotSaved(SystemExit):
29
- pass
30
-
31
-
32
- def get_seconds_since_modified(filepath) -> float:
33
- return datetime.now().timestamp() - filepath.stat().st_mtime
34
-
35
-
36
- def finish() -> None:
37
- """Mark a tracked run as finished.
38
-
39
- Saves source code and, for notebooks, a run report to your default storage location.
40
- """
41
- if run_context.run is None:
42
- raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
43
- if run_context.path is None:
44
- if run_context.transform.type in {"script", "notebook"}:
45
- raise ValueError(
46
- f"Transform type is not allowed to be 'script' or 'notebook' but is {run_context.transform.type}."
47
- )
48
- run_context.run.finished_at = datetime.now(timezone.utc)
49
- run_context.run.save()
50
- # nothing else to do
51
- return None
52
- if is_run_from_ipython: # notebooks
53
- if (
54
- get_seconds_since_modified(run_context.path) > 3
55
- and os.getenv("LAMIN_TESTING") is None
56
- ):
57
- raise NotebookNotSaved(
58
- "Please save the notebook in your editor right before running `ln.finish()`"
59
- )
60
- save_run_context_core(
61
- run=run_context.run,
62
- transform=run_context.transform,
63
- filepath=run_context.path,
64
- finished_at=True,
65
- )
66
-
67
-
68
- def save_run_context_core(
21
+ def save_context_core(
69
22
  *,
70
23
  run: Run,
71
24
  transform: Transform,
@@ -76,6 +29,8 @@ def save_run_context_core(
76
29
  ) -> str | None:
77
30
  import lamindb as ln
78
31
 
32
+ from .core._context import context, is_run_from_ipython
33
+
79
34
  ln.settings.verbosity = "success"
80
35
 
81
36
  # for scripts, things are easy
@@ -182,7 +137,9 @@ def save_run_context_core(
182
137
  f"replaced transform._source_code_artifact: {transform._source_code_artifact}"
183
138
  )
184
139
  else:
185
- logger.warning("Please re-run `ln.track()` to make a new version")
140
+ logger.warning(
141
+ "Please re-run `ln.context.track()` to make a new version"
142
+ )
186
143
  return "rerun-the-notebook"
187
144
  else:
188
145
  logger.important("source code is already saved")
@@ -282,7 +239,7 @@ def save_run_context_core(
282
239
  logger.important(
283
240
  f"if you want to update your {thing} without re-running it, use `lamin save {name}`"
284
241
  )
285
- # because run & transform changed, update the global run_context
286
- run_context.run = run
287
- run_context.transform = transform
242
+ # because run & transform changed, update the global context
243
+ context._run = run
244
+ context._transform = transform
288
245
  return None
lamindb/_from_values.py CHANGED
@@ -47,15 +47,35 @@ def get_or_create_records(
47
47
 
48
48
  # new records to be created based on new values
49
49
  if len(nonexist_values) > 0:
50
- if source:
51
- from_source = not source.in_db
52
- elif (
53
- records
54
- and hasattr(records[0], "source_id")
55
- and records[0].source_id
56
- and records[0].source.in_db
57
- ):
50
+ source_record = None
51
+ if from_source:
52
+ if isinstance(source, Record):
53
+ source_record = source
54
+ elif (
55
+ len(records) > 0
56
+ and hasattr(records[0], "source_id")
57
+ and records[0].source_id
58
+ ):
59
+ source_record = records[0].source
60
+ if not source_record and hasattr(Record, "public"):
61
+ from bionty._bionty import get_source_record
62
+
63
+ source_record = get_source_record(Record.public(organism=organism))
64
+ if source_record:
65
+ from bionty.core._add_ontology import check_source_in_db
66
+
67
+ check_source_in_db(
68
+ registry=Record,
69
+ source=source_record,
70
+ update=True,
71
+ )
72
+
73
+ from_source = not source_record.in_db
74
+ elif hasattr(Record, "source_id"):
75
+ from_source = True
76
+ else:
58
77
  from_source = False
78
+
59
79
  if from_source:
60
80
  records_bionty, unmapped_values = create_records_from_source(
61
81
  iterable_idx=nonexist_values,
@@ -211,10 +231,6 @@ def create_records_from_source(
211
231
  return records, iterable_idx
212
232
  # add source record to the kwargs
213
233
  source_record = get_source_record(public_ontology)
214
- if source_record is not None and source_record.in_db:
215
- # skips the creation of records from public if the source is already in the db
216
- return records, iterable_idx
217
-
218
234
  kwargs.update({"source": source_record})
219
235
 
220
236
  # filter the columns in bionty df based on fields
lamindb/_query_manager.py CHANGED
@@ -40,11 +40,11 @@ class QueryManager(models.Manager):
40
40
  self.source_field_name == "collection"
41
41
  and self.target_field_name == "artifact"
42
42
  ):
43
+ from lamindb.core._context import context
43
44
  from lamindb.core._data import WARNING_RUN_TRANSFORM, _track_run_input
44
- from lamindb.core._run_context import run_context
45
45
 
46
46
  if (
47
- run_context.run is None
47
+ context.run is None
48
48
  and not settings.creation.artifact_silence_missing_run_warning
49
49
  ):
50
50
  logger.warning(WARNING_RUN_TRANSFORM)
lamindb/_query_set.py CHANGED
@@ -17,14 +17,12 @@ from lnschema_core.models import (
17
17
  Transform,
18
18
  )
19
19
 
20
+ from lamindb.core.exceptions import DoesNotExist
21
+
20
22
  if TYPE_CHECKING:
21
23
  from lnschema_core.types import ListLike, StrField
22
24
 
23
25
 
24
- class NoResultFound(Exception):
25
- pass
26
-
27
-
28
26
  class MultipleResultsFound(Exception):
29
27
  pass
30
28
 
@@ -59,7 +57,7 @@ def get_keys_from_df(data: list, registry: Record) -> list[str]:
59
57
 
60
58
  def one_helper(self):
61
59
  if len(self) == 0:
62
- raise NoResultFound
60
+ raise DoesNotExist
63
61
  elif len(self) > 1:
64
62
  raise MultipleResultsFound(self)
65
63
  else:
@@ -243,10 +241,10 @@ class QuerySet(models.QuerySet, CanValidate):
243
241
  else:
244
242
  raise MultipleResultsFound(self.all())
245
243
 
246
- def latest_version(self) -> RecordsList:
244
+ def latest_version(self) -> QuerySet:
247
245
  """Filter every version family by latest version."""
248
246
  if issubclass(self.model, IsVersioned):
249
- return filter_query_set_by_latest_version(self)
247
+ return self.filter(is_latest=True)
250
248
  else:
251
249
  raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
252
250
 
@@ -288,29 +286,6 @@ class QuerySet(models.QuerySet, CanValidate):
288
286
  return _standardize(cls=self, values=values, field=field, **kwargs)
289
287
 
290
288
 
291
- def filter_query_set_by_latest_version(ordered_query_set: QuerySet) -> RecordsList:
292
- # evaluating length can be very costly, hence, the try-except block
293
- try:
294
- first_record = ordered_query_set[0]
295
- except IndexError:
296
- return ordered_query_set
297
- records_in_view = {}
298
- records_in_view[first_record.stem_uid] = first_record
299
- for record in ordered_query_set:
300
- # this overwrites user-provided ordering (relevant records ordered by a
301
- # certain field will not show if they are not the latest version)
302
- if record.stem_uid not in records_in_view:
303
- records_in_view[record.stem_uid] = record
304
- else:
305
- if record.created_at > records_in_view[record.stem_uid].created_at:
306
- # deleting the entry is needed to preserve the integrity of
307
- # user-provided ordering
308
- del records_in_view[record.stem_uid]
309
- records_in_view[record.stem_uid] = record
310
- list_records_in_view = RecordsList(records_in_view.values())
311
- return list_records_in_view
312
-
313
-
314
289
  models.QuerySet.df = QuerySet.df
315
290
  models.QuerySet.list = QuerySet.list
316
291
  models.QuerySet.first = QuerySet.first
lamindb/_record.py CHANGED
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, List, NamedTuple
5
5
 
6
6
  import dj_database_url
7
7
  import lamindb_setup as ln_setup
8
- from django.db import connections
8
+ from django.db import connections, transaction
9
9
  from django.db.models import IntegerField, Manager, Q, QuerySet, Value
10
10
  from lamin_utils import logger
11
11
  from lamin_utils._lookup import Lookup
@@ -123,24 +123,38 @@ def filter(cls, **expressions) -> QuerySet:
123
123
 
124
124
  @classmethod # type:ignore
125
125
  @doc_args(Record.get.__doc__)
126
- def get(cls, idlike: int | str) -> Record:
126
+ def get(
127
+ cls,
128
+ idlike: int | str | None = None,
129
+ **expressions,
130
+ ) -> Record:
127
131
  """{}""" # noqa: D415
128
132
  from lamindb._filter import filter
129
133
 
130
134
  if isinstance(idlike, int):
131
135
  return filter(cls, id=idlike).one()
132
- else:
136
+ elif isinstance(idlike, str):
133
137
  qs = filter(cls, uid__startswith=idlike)
134
138
  if issubclass(cls, IsVersioned):
135
- return qs.latest_version().one()
139
+ if len(idlike) <= cls._len_stem_uid:
140
+ return qs.latest_version().one()
141
+ else:
142
+ return qs.one()
136
143
  else:
137
144
  return qs.one()
145
+ else:
146
+ assert idlike is None # noqa: S101
147
+ # below behaves exactly like `.one()`
148
+ return cls.objects.get(**expressions)
138
149
 
139
150
 
140
151
  @classmethod # type:ignore
141
152
  @doc_args(Record.df.__doc__)
142
153
  def df(
143
- cls, include: str | list[str] | None = None, join: str = "inner"
154
+ cls,
155
+ include: str | list[str] | None = None,
156
+ join: str = "inner",
157
+ limit: int = 100,
144
158
  ) -> pd.DataFrame:
145
159
  """{}""" # noqa: D415
146
160
  from lamindb._filter import filter
@@ -148,7 +162,7 @@ def df(
148
162
  query_set = filter(cls)
149
163
  if hasattr(cls, "updated_at"):
150
164
  query_set = query_set.order_by("-updated_at")
151
- return query_set.df(include=include, join=join)
165
+ return query_set[:limit].df(include=include, join=join)
152
166
 
153
167
 
154
168
  # from_values doesn't apply for QuerySet or Manager
@@ -165,9 +179,7 @@ def from_values(
165
179
  ) -> list[Record]:
166
180
  """{}""" # noqa: D415
167
181
  from_source = True if cls.__module__.startswith("bionty.") else False
168
- # if records from source is already saved in db, skip from_source
169
- if isinstance(source, Record) and source.in_db:
170
- from_source = False
182
+
171
183
  field_str = get_name_field(cls, field=field)
172
184
  return get_or_create_records(
173
185
  iterable=values,
@@ -472,8 +484,8 @@ def transfer_to_default_db(
472
484
  return record_on_default
473
485
  if not mute:
474
486
  logger.hint(f"saving from instance {db} to default instance: {record}")
487
+ from lamindb.core._context import context
475
488
  from lamindb.core._data import WARNING_RUN_TRANSFORM
476
- from lamindb.core._run_context import run_context
477
489
 
478
490
  if hasattr(record, "created_by_id"):
479
491
  # this line is needed to point created_by to default db
@@ -481,16 +493,16 @@ def transfer_to_default_db(
481
493
  record.created_by_id = ln_setup.settings.user.id
482
494
  if hasattr(record, "run_id"):
483
495
  record.run = None
484
- if run_context.run is not None:
485
- record.run_id = run_context.run.id
496
+ if context.run is not None:
497
+ record.run_id = context.run.id
486
498
  else:
487
499
  if not settings.creation.artifact_silence_missing_run_warning:
488
500
  logger.warning(WARNING_RUN_TRANSFORM)
489
501
  record.run_id = None
490
502
  if hasattr(record, "transform_id") and record._meta.model_name != "run":
491
503
  record.transform = None
492
- if run_context.transform is not None:
493
- record.transform_id = run_context.transform.id
504
+ if context.run is not None:
505
+ record.transform_id = context.run.transform_id
494
506
  else:
495
507
  record.transform_id = None
496
508
  # transfer other foreign key fields
@@ -528,7 +540,28 @@ def save(self, *args, **kwargs) -> Record:
528
540
  if result is not None:
529
541
  init_self_from_db(self, result)
530
542
  else:
531
- super(Record, self).save(*args, **kwargs)
543
+ # save versioned record
544
+ if isinstance(self, IsVersioned) and self._is_new_version_of is not None:
545
+ if self._is_new_version_of.is_latest:
546
+ is_new_version_of = self._is_new_version_of
547
+ else:
548
+ # need one additional request
549
+ is_new_version_of = self.__class__.objects.get(
550
+ is_latest=True, uid__startswith=self.stem_uid
551
+ )
552
+ logger.warning(
553
+ f"didn't pass the latest version in `is_new_version_of`, retrieved it: {is_new_version_of}"
554
+ )
555
+ is_new_version_of.is_latest = False
556
+ with transaction.atomic():
557
+ is_new_version_of._is_new_version_of = (
558
+ None # ensure we don't start a recursion
559
+ )
560
+ is_new_version_of.save()
561
+ super(Record, self).save(*args, **kwargs)
562
+ # save unversioned record
563
+ else:
564
+ super(Record, self).save(*args, **kwargs)
532
565
  # perform transfer of many-to-many fields
533
566
  # only supported for Artifact and Collection records
534
567
  if db is not None and db != "default" and using_key is None:
@@ -553,6 +586,30 @@ def save(self, *args, **kwargs) -> Record:
553
586
  return self
554
587
 
555
588
 
589
+ def delete(self) -> None:
590
+ """Delete the record."""
591
+ # note that the logic below does not fire if a record is moved to the trash
592
+ # the idea is that moving a record to the trash should move its entire version family
593
+ # to the trash, whereas permanently deleting should default to only deleting a single record
594
+ # of a version family
595
+ # we can consider making it easy to permanently delete entire version families as well,
596
+ # but that's for another time
597
+ if isinstance(self, IsVersioned) and self.is_latest:
598
+ new_latest = (
599
+ self.__class__.filter(is_latest=False, uid__startswith=self.stem_uid)
600
+ .order_by("-created_at")
601
+ .first()
602
+ )
603
+ if new_latest is not None:
604
+ new_latest.is_latest = True
605
+ with transaction.atomic():
606
+ new_latest.save()
607
+ super(Record, self).delete()
608
+ logger.warning(f"new latest version is {new_latest}")
609
+ return None
610
+ super(Record, self).delete()
611
+
612
+
556
613
  METHOD_NAMES = [
557
614
  "__init__",
558
615
  "filter",
@@ -561,6 +618,7 @@ METHOD_NAMES = [
561
618
  "search",
562
619
  "lookup",
563
620
  "save",
621
+ "delete",
564
622
  "from_values",
565
623
  "using",
566
624
  ]
lamindb/_run.py CHANGED
@@ -42,7 +42,7 @@ def delete_run_artifacts(run: Run) -> None:
42
42
  run.save()
43
43
  if environment is not None:
44
44
  # only delete if there are no other runs attached to this environment
45
- if environment.environment_of.count() == 0:
45
+ if environment._environment_of.count() == 0:
46
46
  environment.delete(permanent=True)
47
47
  if report is not None:
48
48
  report.delete(permanent=True)
lamindb/_transform.py CHANGED
@@ -22,7 +22,6 @@ def __init__(transform: Transform, *args, **kwargs):
22
22
  is_new_version_of: Transform | None = (
23
23
  kwargs.pop("is_new_version_of") if "is_new_version_of" in kwargs else None
24
24
  )
25
- (kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None)
26
25
  version: str | None = kwargs.pop("version") if "version" in kwargs else None
27
26
  type: TransformType | None = kwargs.pop("type") if "type" in kwargs else "pipeline"
28
27
  reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
@@ -55,6 +54,7 @@ def __init__(transform: Transform, *args, **kwargs):
55
54
  reference=reference,
56
55
  reference_type=reference_type,
57
56
  _has_consciously_provided_uid=has_consciously_provided_uid,
57
+ is_new_version_of=is_new_version_of,
58
58
  )
59
59
 
60
60
 
lamindb/core/__init__.py CHANGED
@@ -35,14 +35,20 @@ Curators:
35
35
  MuDataCurator
36
36
  CurateLookup
37
37
 
38
- Other:
38
+ Settings & context:
39
39
 
40
40
  .. autosummary::
41
41
  :toctree: .
42
42
 
43
43
  Settings
44
+ Context
45
+
46
+ Data loaders:
47
+
48
+ .. autosummary::
49
+ :toctree: .
50
+
44
51
  MappedCollection
45
- run_context
46
52
 
47
53
  Modules:
48
54
 
@@ -84,6 +90,6 @@ from lamindb.core._feature_manager import FeatureManager, ParamManager
84
90
  from lamindb.core._label_manager import LabelManager
85
91
 
86
92
  from . import _data, datasets, exceptions, fields, subsettings, types
93
+ from ._context import Context
87
94
  from ._mapped_collection import MappedCollection
88
- from ._run_context import run_context
89
95
  from ._settings import Settings