lamindb 0.77.0__py3-none-any.whl → 0.77.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/core/__init__.py CHANGED
@@ -9,7 +9,7 @@ Registries:
9
9
  Registry
10
10
  QuerySet
11
11
  QueryManager
12
- RecordsList
12
+ RecordList
13
13
  FeatureManager
14
14
  ParamManager
15
15
  LabelManager
@@ -88,7 +88,7 @@ from lamindb._curate import (
88
88
  MuDataCurator,
89
89
  )
90
90
  from lamindb._query_manager import QueryManager
91
- from lamindb._query_set import QuerySet, RecordsList
91
+ from lamindb._query_set import QuerySet, RecordList
92
92
  from lamindb.core._feature_manager import FeatureManager, ParamManager
93
93
  from lamindb.core._label_manager import LabelManager
94
94
 
lamindb/core/_context.py CHANGED
@@ -90,7 +90,7 @@ def raise_missing_context(transform_type: str, key: str) -> bool:
90
90
  f"you already have a transform with key '{key}': Transform('{transform.uid[:8]}')\n"
91
91
  f' (1) to make a revision, run: ln.track("{new_uid}")\n (2) to create a new transform, rename your {transform_type} file and re-run: ln.track()'
92
92
  )
93
- if transform_type == "notebook":
93
+ if is_run_from_ipython:
94
94
  print(f"→ {message}")
95
95
  response = input("→ Ready to re-run? (y/n)")
96
96
  if response == "y":
@@ -343,7 +343,7 @@ class Context:
343
343
  )
344
344
  if run is not None: # loaded latest run
345
345
  run.started_at = datetime.now(timezone.utc) # update run time
346
- self._logging_message_track += f", started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}"
346
+ self._logging_message_track += f", re-started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}"
347
347
 
348
348
  if run is None: # create new run
349
349
  run = Run(
@@ -433,7 +433,7 @@ class Context:
433
433
  nb = nbproject.dev.read_notebook(path_str)
434
434
  self._logging_message_imports += (
435
435
  "notebook imports:"
436
- f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}\n"
436
+ f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
437
437
  )
438
438
  except Exception:
439
439
  logger.debug("inferring imported packages failed")
@@ -579,15 +579,11 @@ class Context:
579
579
  `lamin save script.py` or `lamin save notebook.ipynb` → `docs </cli#lamin-save>`__
580
580
 
581
581
  """
582
- from lamindb._finish import save_context_core
583
-
584
- def get_seconds_since_modified(filepath) -> float:
585
- return datetime.now().timestamp() - filepath.stat().st_mtime
586
-
587
- def get_shortcut() -> str:
588
- import platform
589
-
590
- return "CMD + s" if platform.system() == "Darwin" else "CTRL + s"
582
+ from lamindb._finish import (
583
+ get_seconds_since_modified,
584
+ get_shortcut,
585
+ save_context_core,
586
+ )
591
587
 
592
588
  if self.run is None:
593
589
  raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
@@ -609,7 +605,7 @@ class Context:
609
605
  self.transform.save()
610
606
  if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING:
611
607
  raise NotebookNotSaved(
612
- f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) right before calling `ln.finish()`"
608
+ f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) within 2 sec before calling `ln.finish()`"
613
609
  )
614
610
  save_context_core(
615
611
  run=self.run,
lamindb/core/_data.py CHANGED
@@ -18,21 +18,16 @@ from lnschema_core.models import (
18
18
  record_repr,
19
19
  )
20
20
 
21
- from lamindb._parents import view_lineage
22
21
  from lamindb._query_set import QuerySet
23
- from lamindb._record import get_name_field
24
22
  from lamindb.core._settings import settings
25
23
 
26
24
  from ._context import context
27
25
  from ._django import get_artifact_with_related, get_related_model
28
26
  from ._feature_manager import (
29
27
  add_label_feature_links,
30
- get_feature_set_links,
31
28
  get_host_id_field,
32
29
  get_label_links,
33
- print_features,
34
30
  )
35
- from ._label_manager import print_labels
36
31
  from .exceptions import ValidationError
37
32
  from .schema import (
38
33
  dict_related_model_to_related_name,
@@ -129,21 +124,10 @@ def format_input_of_runs(self, print_types):
129
124
  return ""
130
125
 
131
126
 
132
- def format_labels_and_features(self, related_data, print_types):
133
- msg = print_labels(
134
- self, m2m_data=related_data.get("m2m", {}), print_types=print_types
135
- )
136
- if isinstance(self, Artifact):
137
- msg += print_features( # type: ignore
138
- self,
139
- related_data=related_data,
140
- print_types=print_types,
141
- print_params=hasattr(self, "type") and self.type == "model",
142
- )
143
- return msg
144
-
145
-
146
127
  def _describe_postgres(self: Artifact | Collection, print_types: bool = False):
128
+ from ._describe import describe_general
129
+ from ._feature_manager import describe_features
130
+
147
131
  model_name = self.__class__.__name__
148
132
  msg = f"{colors.green(model_name)}{record_repr(self, include_foreign_keys=False).lstrip(model_name)}\n"
149
133
  if self._state.db is not None and self._state.db != "default":
@@ -161,30 +145,21 @@ def _describe_postgres(self: Artifact | Collection, print_types: bool = False):
161
145
  else:
162
146
  result = get_artifact_with_related(self, include_fk=True, include_m2m=True)
163
147
  related_data = result.get("related_data", {})
164
- fk_data = related_data.get("fk", {})
165
-
166
- # Provenance
167
- prov_msg = format_provenance(self, fk_data, print_types)
168
- if prov_msg:
169
- msg += f" {colors.italic('Provenance')}\n{prov_msg}"
170
-
171
- # Input of runs
172
- input_of_message = format_input_of_runs(self, print_types)
173
- if input_of_message:
174
- msg += f" {colors.italic('Usage')}\n{input_of_message}"
175
-
176
- # Labels and features
177
- msg += format_labels_and_features(self, related_data, print_types)
148
+ # TODO: fk_data = related_data.get("fk", {})
149
+
150
+ tree = describe_general(self)
151
+ return describe_features(
152
+ self,
153
+ tree=tree,
154
+ related_data=related_data,
155
+ with_labels=True,
156
+ print_params=hasattr(self, "type") and self.type == "model",
157
+ )
178
158
 
179
- # Print entire message
180
- logger.print(msg)
181
159
 
182
-
183
- @doc_args(Artifact.describe.__doc__)
184
- def describe(self: Artifact | Collection, print_types: bool = False):
185
- """{}""" # noqa: D415
186
- if not self._state.adding and connections[self._state.db].vendor == "postgresql":
187
- return _describe_postgres(self, print_types=print_types)
160
+ def _describe_sqlite(self: Artifact | Collection, print_types: bool = False):
161
+ from ._describe import describe_general
162
+ from ._feature_manager import describe_features
188
163
 
189
164
  model_name = self.__class__.__name__
190
165
  msg = f"{colors.green(model_name)}{record_repr(self, include_foreign_keys=False).lstrip(model_name)}\n"
@@ -192,7 +167,6 @@ def describe(self: Artifact | Collection, print_types: bool = False):
192
167
  msg += f" {colors.italic('Database instance')}\n"
193
168
  msg += f" slug: {self._state.db}\n"
194
169
 
195
- prov_msg = ""
196
170
  fields = self._meta.fields
197
171
  direct_fields = []
198
172
  foreign_key_fields = []
@@ -219,35 +193,26 @@ def describe(self: Artifact | Collection, print_types: bool = False):
219
193
  .prefetch_related(*many_to_many_fields)
220
194
  .get(id=self.id)
221
195
  )
196
+ tree = describe_general(self)
197
+ return describe_features(
198
+ self,
199
+ tree=tree,
200
+ with_labels=True,
201
+ print_params=hasattr(self, "type") and self.type == "model",
202
+ )
222
203
 
223
- # provenance
224
- if len(foreign_key_fields) > 0: # always True for Artifact and Collection
225
- fields_values = [(field, getattr(self, field)) for field in foreign_key_fields]
226
- type_str = lambda attr: (
227
- f": {attr.__class__.__get_name_with_schema__()}" if print_types else ""
228
- )
229
- related_msg = "".join(
230
- [
231
- f" .{field_name}{type_str(attr)} = {format_field_value(getattr(attr, get_name_field(attr)))}\n"
232
- for (field_name, attr) in fields_values
233
- if attr is not None
234
- ]
235
- )
236
- prov_msg += related_msg
237
- if prov_msg:
238
- msg += f" {colors.italic('Provenance')}\n"
239
- msg += prov_msg
240
204
 
241
- # Input of runs
242
- input_of_message = format_input_of_runs(self, print_types)
243
- if input_of_message:
244
- msg += f" {colors.italic('Usage')}\n{input_of_message}"
205
+ @doc_args(Artifact.describe.__doc__)
206
+ def describe(self: Artifact | Collection, print_types: bool = False):
207
+ """{}""" # noqa: D415
208
+ from ._describe import print_rich_tree
245
209
 
246
- # Labels and features
247
- msg += format_labels_and_features(self, {}, print_types)
210
+ if not self._state.adding and connections[self._state.db].vendor == "postgresql":
211
+ tree = _describe_postgres(self, print_types=print_types)
212
+ else:
213
+ tree = _describe_sqlite(self, print_types=print_types)
248
214
 
249
- # Print entire message
250
- logger.print(msg)
215
+ print_rich_tree(tree)
251
216
 
252
217
 
253
218
  def validate_feature(feature: Feature, records: list[Record]) -> None:
@@ -314,6 +279,7 @@ def add_labels(
314
279
  field: StrField | None = None,
315
280
  feature_ref_is_name: bool | None = None,
316
281
  label_ref_is_name: bool | None = None,
282
+ from_curator: bool = False,
317
283
  ) -> None:
318
284
  """{}""" # noqa: D415
319
285
  if self._state.adding:
@@ -374,11 +340,36 @@ def add_labels(
374
340
  else:
375
341
  validate_feature(feature, records) # type:ignore
376
342
  records_by_registry = defaultdict(list)
343
+ feature_sets = self.feature_sets.filter(registry="Feature").all()
344
+ internal_features = set() # type: ignore
345
+ if len(feature_sets) > 0:
346
+ for feature_set in feature_sets:
347
+ internal_features = internal_features.union(
348
+ set(feature_set.members.values_list("name", flat=True))
349
+ ) # type: ignore
377
350
  for record in records:
378
351
  records_by_registry[record.__class__.__get_name_with_schema__()].append(
379
352
  record
380
353
  )
381
354
  for registry_name, records in records_by_registry.items():
355
+ if not from_curator and feature.name in internal_features:
356
+ raise ValidationError(
357
+ "Cannot manually annotate internal feature with label. Please use ln.Curator"
358
+ )
359
+ if registry_name not in feature.dtype:
360
+ if not feature.dtype.startswith("cat"):
361
+ raise ValidationError(
362
+ f"Feature {feature.name} needs dtype='cat' for label annotation, currently has dtype='{feature.dtype}'"
363
+ )
364
+ if feature.dtype == "cat":
365
+ feature.dtype = f"cat[{registry_name}]"
366
+ feature.save()
367
+ elif registry_name not in feature.dtype:
368
+ new_dtype = feature.dtype.rstrip("]") + f"|{registry_name}]"
369
+ raise ValidationError(
370
+ f"Label type {registry_name} is not valid for Feature(name='{feature.name}', dtype='{feature.dtype}'), consider updating to dtype='{new_dtype}'"
371
+ )
372
+
382
373
  if registry_name not in self.features._accessor_by_registry:
383
374
  logger.warning(f"skipping {registry_name}")
384
375
  continue
@@ -393,27 +384,6 @@ def add_labels(
393
384
  feature_ref_is_name=feature_ref_is_name,
394
385
  label_ref_is_name=label_ref_is_name,
395
386
  )
396
- links_feature_set = get_feature_set_links(self)
397
- feature_set_ids = [link.featureset_id for link in links_feature_set.all()]
398
- # get all linked features of type Feature
399
- feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
400
- {
401
- links_feature_set.filter(featureset_id=feature_set.id)
402
- .one()
403
- .slot: feature_set.features.all()
404
- for feature_set in feature_sets
405
- if "Feature" == feature_set.registry
406
- }
407
- for registry_name, _ in records_by_registry.items():
408
- if registry_name not in feature.dtype:
409
- logger.debug(
410
- f"updated categorical feature '{feature.name}' type with registry '{registry_name}'"
411
- )
412
- if not feature.dtype.startswith("cat["):
413
- feature.dtype = f"cat[{registry_name}]"
414
- elif registry_name not in feature.dtype:
415
- feature.dtype = feature.dtype.rstrip("]") + f"|{registry_name}]"
416
- feature.save()
417
387
 
418
388
 
419
389
  def _track_run_input(
@@ -0,0 +1,139 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ from typing import TYPE_CHECKING
5
+
6
+ from lamin_utils import logger
7
+ from rich.text import Text
8
+ from rich.tree import Tree
9
+
10
+ if TYPE_CHECKING:
11
+ from lnschema_core.models import Artifact, Collection, Run
12
+
13
+
14
+ def highlight_time(iso: str):
15
+ tz = datetime.datetime.now().astimezone().tzinfo
16
+ res = (
17
+ datetime.datetime.fromisoformat(iso)
18
+ .replace(tzinfo=datetime.timezone.utc)
19
+ .astimezone(tz)
20
+ .strftime("%Y-%m-%d %H:%M:%S")
21
+ )
22
+ return Text(res, style="dim")
23
+
24
+
25
+ # Define consistent column widths
26
+ NAME_WIDTH = 25
27
+ TYPE_WIDTH = 25
28
+ VALUES_WIDTH = 40
29
+
30
+
31
+ def print_rich_tree(tree: Tree, fallback=str):
32
+ from rich.console import Console
33
+
34
+ console = Console(force_terminal=True)
35
+
36
+ if tree.children:
37
+ try:
38
+ from IPython import get_ipython
39
+ from IPython.core.interactiveshell import InteractiveShell
40
+ from IPython.display import display
41
+
42
+ shell = get_ipython()
43
+ if isinstance(shell, InteractiveShell): # Covers all interactive shells
44
+ display(tree)
45
+ return ""
46
+ else:
47
+ with console.capture() as capture:
48
+ console.print(tree)
49
+ return capture.get()
50
+ except (ImportError, NameError):
51
+ with console.capture() as capture:
52
+ console.print(tree)
53
+ return capture.get()
54
+ else:
55
+ return fallback
56
+
57
+
58
+ def describe_header(self: Artifact | Collection | Run) -> Tree:
59
+ if hasattr(self, "is_latest") and not self.is_latest:
60
+ logger.warning(
61
+ f"This is not the latest version of the {self.__class__.__name__}."
62
+ )
63
+ if hasattr(self, "visibility"):
64
+ if self.visibility == 0:
65
+ logger.warning("This artifact is hidden.")
66
+ elif self.visibility == -1:
67
+ logger.warning("This artifact is the trash.")
68
+ # initialize tree
69
+ suffix = self.suffix if hasattr(self, "suffix") and self.suffix else ""
70
+ accessor = self._accessor if hasattr(self, "_accessor") and self._accessor else ""
71
+ suffix_accessor = (
72
+ f"{suffix}/{accessor}" if suffix and accessor else suffix or accessor or ""
73
+ )
74
+
75
+ tree = Tree(
76
+ Text.assemble(
77
+ (self.__class__.__name__, "bold"), (f" {suffix_accessor}", "bold dim")
78
+ ),
79
+ guide_style="dim", # dim the connecting lines
80
+ )
81
+ return tree
82
+
83
+
84
+ def describe_general(self: Artifact | Collection, tree: Tree | None = None) -> Tree:
85
+ if tree is None:
86
+ tree = describe_header(self)
87
+
88
+ # add general information (order is the same as in API docs)
89
+ general = tree.add(Text("General", style="bold bright_cyan"))
90
+ general.add(f".uid = '{self.uid}'")
91
+ if hasattr(self, "key") and self.key:
92
+ general.add(
93
+ f".key = '{self.key}'" if self._key_is_virtual else f".key = {self.key}"
94
+ )
95
+ if hasattr(self, "size") and self.size:
96
+ general.add(f".size = {self.size}")
97
+ if hasattr(self, "hash") and self.hash:
98
+ general.add(f".hash = '{self.hash}'")
99
+ if hasattr(self, "n_objects") and self.n_objects:
100
+ general.add(f".n_objects = {self.n_objects}")
101
+ if hasattr(self, "n_observations") and self.n_observations:
102
+ general.add(Text(f".n_observations = {self.n_observations}"))
103
+ if hasattr(self, "version") and self.version:
104
+ general.add(Text(f".version = '{self.version}'"))
105
+
106
+ if hasattr(self, "storage"):
107
+ storage_root = self.storage.root
108
+ # general.add(f".storage = {storage_root}")
109
+ general.add(
110
+ Text.assemble(
111
+ ".path = ",
112
+ (storage_root, "dim"),
113
+ f"{str(self.path).removeprefix(storage_root)}",
114
+ )
115
+ )
116
+ if hasattr(self, "created_by") and self.created_by:
117
+ general.add(
118
+ Text.assemble(
119
+ ".created_by = ",
120
+ (
121
+ self.created_by.handle
122
+ if self.created_by.name is None
123
+ else f"{self.created_by.handle} ({self.created_by.name})"
124
+ ),
125
+ )
126
+ )
127
+ if hasattr(self, "created_at") and self.created_at:
128
+ general.add(
129
+ Text.assemble(".created_at = ", highlight_time(str(self.created_at)))
130
+ )
131
+ if hasattr(self, "transform") and self.transform:
132
+ general.add(
133
+ Text(
134
+ f".transform = '{self.transform.name}'",
135
+ style="cyan3",
136
+ )
137
+ )
138
+
139
+ return tree
lamindb/core/_django.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from django.contrib.postgres.aggregates import ArrayAgg
2
4
  from django.db import connection
3
5
  from django.db.models import F, OuterRef, Q, Subquery
@@ -35,7 +37,7 @@ def get_artifact_with_related(
35
37
  """Fetch an artifact with its related data."""
36
38
  from lamindb._can_curate import get_name_field
37
39
 
38
- from ._label_manager import LABELS_EXCLUDE_SET
40
+ from ._label_manager import EXCLUDE_LABELS
39
41
 
40
42
  model = artifact.__class__
41
43
  schema_modules = get_schemas_modules(artifact._state.db)
@@ -54,7 +56,7 @@ def get_artifact_with_related(
54
56
  for v in dict_related_model_to_related_name(
55
57
  model, instance=artifact._state.db
56
58
  ).values()
57
- if not v.startswith("_") and v not in LABELS_EXCLUDE_SET
59
+ if not v.startswith("_") and v not in EXCLUDE_LABELS
58
60
  ]
59
61
  )
60
62
  link_tables = (
@@ -179,14 +181,11 @@ def get_featureset_m2m_relations(
179
181
 
180
182
  # Get the correct field names for the through table
181
183
  through_model = getattr(FeatureSet, name).through
182
- related_field = (
183
- through_model.__name__.replace("FeatureSet", "").lower().replace("_", "")
184
- )
185
184
 
186
185
  # Subquery to get limited related records
187
186
  limited_related = Subquery(
188
187
  through_model.objects.filter(featureset=OuterRef("pk")).values(
189
- related_field
188
+ related_model.__name__.lower()
190
189
  )[:limit]
191
190
  )
192
191