lamindb 0.74.3__py3-none-any.whl → 0.75.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_record.py CHANGED
@@ -36,9 +36,9 @@ def init_self_from_db(self: Record, existing_record: Record):
36
36
  self._state.db = "default"
37
37
 
38
38
 
39
- def validate_required_fields(orm: Record, kwargs):
39
+ def validate_required_fields(record: Record, kwargs):
40
40
  required_fields = {
41
- k.name for k in orm._meta.fields if not k.null and k.default is None
41
+ k.name for k in record._meta.fields if not k.null and k.default is None
42
42
  }
43
43
  required_fields_not_passed = {k: None for k in required_fields if k not in kwargs}
44
44
  kwargs.update(required_fields_not_passed)
@@ -77,9 +77,9 @@ def suggest_records_with_similar_names(record: Record, kwargs) -> bool:
77
77
  return False
78
78
 
79
79
 
80
- def __init__(orm: Record, *args, **kwargs):
80
+ def __init__(record: Record, *args, **kwargs):
81
81
  if not args:
82
- validate_required_fields(orm, kwargs)
82
+ validate_required_fields(record, kwargs)
83
83
 
84
84
  # do not search for names if an id is passed; this is important
85
85
  # e.g. when synching ids from the notebook store to lamindb
@@ -87,29 +87,29 @@ def __init__(orm: Record, *args, **kwargs):
87
87
  if "_has_consciously_provided_uid" in kwargs:
88
88
  has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
89
89
  if settings.creation.search_names and not has_consciously_provided_uid:
90
- match = suggest_records_with_similar_names(orm, kwargs)
90
+ match = suggest_records_with_similar_names(record, kwargs)
91
91
  if match:
92
92
  if "version" in kwargs:
93
93
  version_comment = " and version"
94
- existing_record = orm.__class__.filter(
94
+ existing_record = record.__class__.filter(
95
95
  name=kwargs["name"], version=kwargs["version"]
96
96
  ).one_or_none()
97
97
  else:
98
98
  version_comment = ""
99
- existing_record = orm.__class__.filter(name=kwargs["name"]).one()
99
+ existing_record = record.__class__.filter(name=kwargs["name"]).one()
100
100
  if existing_record is not None:
101
101
  logger.important(
102
- f"returning existing {orm.__class__.__name__} record with same"
102
+ f"returning existing {record.__class__.__name__} record with same"
103
103
  f" name{version_comment}: '{kwargs['name']}'"
104
104
  )
105
- init_self_from_db(orm, existing_record)
105
+ init_self_from_db(record, existing_record)
106
106
  return None
107
- super(Record, orm).__init__(**kwargs)
108
- elif len(args) != len(orm._meta.concrete_fields):
107
+ super(Record, record).__init__(**kwargs)
108
+ elif len(args) != len(record._meta.concrete_fields):
109
109
  raise ValueError("please provide keyword arguments, not plain arguments")
110
110
  else:
111
111
  # object is loaded from DB (**kwargs could be omitted below, I believe)
112
- super(Record, orm).__init__(*args, **kwargs)
112
+ super(Record, record).__init__(*args, **kwargs)
113
113
 
114
114
 
115
115
  @classmethod # type:ignore
@@ -160,19 +160,22 @@ def from_values(
160
160
  field: StrField | None = None,
161
161
  create: bool = False,
162
162
  organism: Record | str | None = None,
163
- public_source: Record | None = None,
163
+ source: Record | None = None,
164
164
  mute: bool = False,
165
165
  ) -> list[Record]:
166
166
  """{}""" # noqa: D415
167
- from_public = True if cls.__module__.startswith("lnschema_bionty.") else False
168
- field_str = get_default_str_field(cls, field=field)
167
+ from_source = True if cls.__module__.startswith("bionty.") else False
168
+ # if records from source is already saved in db, skip from_source
169
+ if isinstance(source, Record) and source.in_db:
170
+ from_source = False
171
+ field_str = get_name_field(cls, field=field)
169
172
  return get_or_create_records(
170
173
  iterable=values,
171
174
  field=getattr(cls, field_str),
172
175
  create=create,
173
- from_public=from_public,
176
+ from_source=from_source,
174
177
  organism=organism,
175
- public_source=public_source,
178
+ source=source,
176
179
  mute=mute,
177
180
  )
178
181
 
@@ -188,11 +191,11 @@ def _search(
188
191
  truncate_words: bool = False,
189
192
  ) -> QuerySet:
190
193
  input_queryset = _queryset(cls, using_key=using_key)
191
- orm = input_queryset.model
194
+ registry = input_queryset.model
192
195
  if field is None:
193
196
  fields = [
194
197
  field.name
195
- for field in orm._meta.fields
198
+ for field in registry._meta.fields
196
199
  if field.get_internal_type() in {"CharField", "TextField"}
197
200
  ]
198
201
  else:
@@ -284,7 +287,7 @@ def _lookup(
284
287
  ) -> NamedTuple:
285
288
  """{}""" # noqa: D415
286
289
  queryset = _queryset(cls, using_key=using_key)
287
- field = get_default_str_field(orm=queryset.model, field=field)
290
+ field = get_name_field(registry=queryset.model, field=field)
288
291
 
289
292
  return Lookup(
290
293
  records=queryset,
@@ -293,7 +296,7 @@ def _lookup(
293
296
  prefix="ln",
294
297
  ).lookup(
295
298
  return_field=(
296
- get_default_str_field(orm=queryset.model, field=return_field)
299
+ get_name_field(registry=queryset.model, field=return_field)
297
300
  if return_field is not None
298
301
  else None
299
302
  )
@@ -311,35 +314,32 @@ def lookup(
311
314
  return _lookup(cls=cls, field=field, return_field=return_field)
312
315
 
313
316
 
314
- def get_default_str_field(
315
- orm: Record | QuerySet | Manager,
317
+ def get_name_field(
318
+ registry: type[Record] | QuerySet | Manager,
316
319
  *,
317
320
  field: str | StrField | None = None,
318
321
  ) -> str:
319
- """Get the 1st char or text field from the orm."""
320
- if isinstance(orm, (QuerySet, Manager)):
321
- orm = orm.model
322
- model_field_names = [i.name for i in orm._meta.fields]
322
+ """Get the 1st char or text field from the registry."""
323
+ if isinstance(registry, (QuerySet, Manager)):
324
+ registry = registry.model
325
+ model_field_names = [i.name for i in registry._meta.fields]
323
326
 
324
- # set default field
327
+ # set to default name field
325
328
  if field is None:
326
- if orm._meta.model.__name__ == "Run":
327
- field = orm._meta.get_field("created_at")
328
- elif orm._meta.model.__name__ == "User":
329
- field = orm._meta.get_field("handle")
329
+ if hasattr(registry, "_name_field"):
330
+ field = registry._meta.get_field(registry._name_field)
330
331
  elif "name" in model_field_names:
331
- # by default use the name field
332
- field = orm._meta.get_field("name")
332
+ field = registry._meta.get_field("name")
333
333
  else:
334
334
  # first char or text field that doesn't contain "id"
335
- for i in orm._meta.fields:
335
+ for i in registry._meta.fields:
336
336
  if "id" in i.name:
337
337
  continue
338
338
  if i.get_internal_type() in {"CharField", "TextField"}:
339
339
  field = i
340
340
  break
341
341
 
342
- # no default field can be found
342
+ # no default name field can be found
343
343
  if field is None:
344
344
  raise ValueError(
345
345
  "please pass a Record string field, e.g., `CellType.name`!"
@@ -360,7 +360,7 @@ def get_default_str_field(
360
360
  def _queryset(cls: Record | QuerySet | Manager, using_key: str) -> QuerySet:
361
361
  if isinstance(cls, (QuerySet, Manager)):
362
362
  return cls.all()
363
- elif using_key is None:
363
+ elif using_key is None or using_key == "default":
364
364
  return cls.objects.all()
365
365
  else:
366
366
  # using must be called on cls, otherwise the connection isn't found
@@ -443,9 +443,8 @@ def update_fk_to_default_db(
443
443
 
444
444
  FKBULK = [
445
445
  "organism",
446
- "public_source",
447
- "latest_report", # Transform
448
- "source_code", # Transform
446
+ "source",
447
+ "_source_code_artifact", # Transform
449
448
  "report", # Run
450
449
  ]
451
450
 
@@ -523,7 +522,7 @@ def save(self, *args, **kwargs) -> Record:
523
522
  artifacts: list = []
524
523
  if self.__class__.__name__ == "Collection" and self.id is not None:
525
524
  # when creating a new collection without being able to access artifacts
526
- artifacts = self.artifacts.list()
525
+ artifacts = self.ordered_artifacts.list()
527
526
  # transfer of the record to the default db with fk fields
528
527
  result = transfer_to_default_db(self, using_key)
529
528
  if result is not None:
@@ -538,7 +537,7 @@ def save(self, *args, **kwargs) -> Record:
538
537
  logger.info("transfer artifacts")
539
538
  for artifact in artifacts:
540
539
  artifact.save()
541
- self.unordered_artifacts.add(*artifacts)
540
+ self.artifacts.add(*artifacts)
542
541
  if hasattr(self, "labels"):
543
542
  from copy import copy
544
543
 
lamindb/_save.py CHANGED
@@ -85,9 +85,9 @@ def save(records: Iterable[Record], ignore_conflicts: bool | None = False) -> No
85
85
  r for r in non_artifacts_new if hasattr(r, "_parents")
86
86
  ]
87
87
  if len(non_artifacts_with_parents) > 0:
88
- # this can only happen within lnschema_bionty right now!!
88
+ # this can only happen within bionty right now!!
89
89
  # we might extend to core lamindb later
90
- from lnschema_bionty.core import add_ontology
90
+ from bionty.core import add_ontology
91
91
 
92
92
  add_ontology(non_artifacts_with_parents)
93
93
 
@@ -108,21 +108,21 @@ def bulk_create(records: Iterable[Record], ignore_conflicts: bool | None = False
108
108
  records_by_orm = defaultdict(list)
109
109
  for record in records:
110
110
  records_by_orm[record.__class__].append(record)
111
- for orm, records in records_by_orm.items():
112
- orm.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
111
+ for registry, records in records_by_orm.items():
112
+ registry.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
113
113
 
114
114
 
115
115
  def bulk_update(records: Iterable[Record], ignore_conflicts: bool | None = False):
116
116
  records_by_orm = defaultdict(list)
117
117
  for record in records:
118
118
  records_by_orm[record.__class__].append(record)
119
- for orm, records in records_by_orm.items():
119
+ for registry, records in records_by_orm.items():
120
120
  field_names = [
121
121
  field.name
122
- for field in orm._meta.fields
122
+ for field in registry._meta.fields
123
123
  if (field.name != "created_at" and field.name != "id")
124
124
  ]
125
- orm.objects.bulk_update(records, field_names)
125
+ registry.objects.bulk_update(records, field_names)
126
126
 
127
127
 
128
128
  # This is also used within Artifact.save()
lamindb/_transform.py CHANGED
@@ -1,11 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from typing import TYPE_CHECKING
4
+
5
+ from lamindb_setup.core._docs import doc_args
3
6
  from lnschema_core.models import Run, Transform
4
- from lnschema_core.types import TransformType
5
7
 
8
+ from ._parents import _view_parents
6
9
  from ._run import delete_run_artifacts
7
10
  from .core.versioning import process_is_new_version_of
8
11
 
12
+ if TYPE_CHECKING:
13
+ from lnschema_core.types import TransformType
14
+
9
15
 
10
16
  def __init__(transform: Transform, *args, **kwargs):
11
17
  if len(args) == len(transform._meta.concrete_fields):
@@ -18,9 +24,7 @@ def __init__(transform: Transform, *args, **kwargs):
18
24
  )
19
25
  (kwargs.pop("initial_version_id") if "initial_version_id" in kwargs else None)
20
26
  version: str | None = kwargs.pop("version") if "version" in kwargs else None
21
- type: TransformType | None = (
22
- kwargs.pop("type") if "type" in kwargs else TransformType.pipeline
23
- )
27
+ type: TransformType | None = kwargs.pop("type") if "type" in kwargs else "pipeline"
24
28
  reference: str | None = kwargs.pop("reference") if "reference" in kwargs else None
25
29
  reference_type: str | None = (
26
30
  kwargs.pop("reference_type") if "reference_type" in kwargs else None
@@ -55,19 +59,13 @@ def __init__(transform: Transform, *args, **kwargs):
55
59
 
56
60
 
57
61
  def delete(self) -> None:
58
- # set latest_report to None, it's tracked through the latest run
59
- latest_report = None
60
- if self.latest_report is not None:
61
- latest_report = self.latest_report
62
- self.latest_report = None
63
- source_code = None
64
- if self.source_code is not None:
65
- source_code = self.source_code
66
- self.source_code = None
67
- if latest_report is not None or source_code is not None:
62
+ _source_code_artifact = None
63
+ if self._source_code_artifact is not None:
64
+ _source_code_artifact = self._source_code_artifact
65
+ self._source_code_artifact = None
68
66
  self.save()
69
- if source_code is not None:
70
- source_code.delete(permanent=True)
67
+ if _source_code_artifact is not None:
68
+ _source_code_artifact.delete(permanent=True)
71
69
  # query all runs and delete their artifacts
72
70
  runs = Run.filter(transform=self)
73
71
  for run in runs:
@@ -78,10 +76,23 @@ def delete(self) -> None:
78
76
 
79
77
 
80
78
  @property # type: ignore
79
+ @doc_args(Transform.latest_run.__doc__)
81
80
  def latest_run(self) -> Run:
81
+ """{}""" # noqa: D415
82
82
  return self.runs.order_by("-started_at").first()
83
83
 
84
84
 
85
+ def view_lineage(self, with_successors: bool = False, distance: int = 5):
86
+ return _view_parents(
87
+ record=self,
88
+ field="name",
89
+ with_children=with_successors,
90
+ distance=distance,
91
+ attr_name="predecessors",
92
+ )
93
+
94
+
85
95
  Transform.__init__ = __init__
86
96
  Transform.delete = delete
87
97
  Transform.latest_run = latest_run
98
+ Transform.view_lineage = view_lineage
lamindb/_view.py CHANGED
@@ -41,15 +41,17 @@ def view(
41
41
  schema_module = importlib.import_module(get_schema_module_name(schema_name))
42
42
 
43
43
  all_registries = {
44
- orm
45
- for orm in schema_module.__dict__.values()
46
- if inspect.isclass(orm)
47
- and issubclass(orm, Record)
48
- and orm.__name__ != "Record"
44
+ registry
45
+ for registry in schema_module.__dict__.values()
46
+ if inspect.isclass(registry)
47
+ and issubclass(registry, Record)
48
+ and registry is not Record
49
49
  }
50
50
  if registries is not None:
51
51
  filtered_registries = {
52
- orm for orm in all_registries if orm.__name__ in registries
52
+ registry
53
+ for registry in all_registries
54
+ if registry.__name__ in registries
53
55
  }
54
56
  else:
55
57
  filtered_registries = all_registries
@@ -59,12 +61,12 @@ def view(
59
61
  logger.print("*" * len(section_no_color))
60
62
  logger.print(section)
61
63
  logger.print("*" * len(section_no_color))
62
- for orm in sorted(filtered_registries, key=lambda x: x.__name__):
63
- if hasattr(orm, "updated_at"):
64
- df = orm.filter().order_by("-updated_at")[:n].df()
64
+ for registry in sorted(filtered_registries, key=lambda x: x.__name__):
65
+ if hasattr(registry, "updated_at"):
66
+ df = registry.filter().order_by("-updated_at")[:n].df()
65
67
  else:
66
68
  # need to adjust in the future
67
- df = orm.df().iloc[-n:]
69
+ df = registry.df().iloc[-n:]
68
70
  if df.shape[0] > 0:
69
- logger.print(colors.blue(colors.bold(orm.__name__)))
71
+ logger.print(colors.blue(colors.bold(registry.__name__)))
70
72
  show(df)
lamindb/core/__init__.py CHANGED
@@ -6,6 +6,7 @@ Registries:
6
6
  :toctree: .
7
7
 
8
8
  Record
9
+ Registry
9
10
  QuerySet
10
11
  QueryManager
11
12
  RecordsList
@@ -66,6 +67,7 @@ from lnschema_core.models import (
66
67
  IsVersioned,
67
68
  ParamValue,
68
69
  Record,
70
+ Registry,
69
71
  TracksRun,
70
72
  TracksUpdates,
71
73
  )
lamindb/core/_data.py CHANGED
@@ -14,13 +14,13 @@ from lnschema_core.models import (
14
14
  Record,
15
15
  Run,
16
16
  ULabel,
17
- __repr__,
18
17
  format_field_value,
18
+ record_repr,
19
19
  )
20
20
 
21
21
  from lamindb._parents import view_lineage
22
22
  from lamindb._query_set import QuerySet
23
- from lamindb._record import get_default_str_field
23
+ from lamindb._record import get_name_field
24
24
  from lamindb.core._settings import settings
25
25
 
26
26
  from ._feature_manager import (
@@ -108,7 +108,7 @@ def describe(self: HasFeatures, print_types: bool = False):
108
108
  # )
109
109
 
110
110
  model_name = self.__class__.__name__
111
- msg = f"{colors.green(model_name)}{__repr__(self, include_foreign_keys=False).lstrip(model_name)}\n"
111
+ msg = f"{colors.green(model_name)}{record_repr(self, include_foreign_keys=False).lstrip(model_name)}\n"
112
112
  prov_msg = ""
113
113
 
114
114
  fields = self._meta.fields
@@ -129,31 +129,29 @@ def describe(self: HasFeatures, print_types: bool = False):
129
129
  # prefetch m-2-m relationships
130
130
  self = (
131
131
  self.__class__.objects.using(self._state.db)
132
- .prefetch_related("feature_sets", "input_of")
132
+ .prefetch_related("feature_sets", "input_of_runs")
133
133
  .get(id=self.id)
134
134
  )
135
135
 
136
136
  # provenance
137
137
  if len(foreign_key_fields) > 0: # always True for Artifact and Collection
138
138
  fields_values = [(field, getattr(self, field)) for field in foreign_key_fields]
139
- type_str = (
140
- lambda attr: f": {attr.__class__.__get_name_with_schema__()}"
141
- if print_types
142
- else ""
139
+ type_str = lambda attr: (
140
+ f": {attr.__class__.__get_name_with_schema__()}" if print_types else ""
143
141
  )
144
142
  related_msg = "".join(
145
143
  [
146
- f" .{field_name}{type_str(attr)} = {format_field_value(getattr(attr, get_default_str_field(attr)))}\n"
144
+ f" .{field_name}{type_str(attr)} = {format_field_value(getattr(attr, get_name_field(attr)))}\n"
147
145
  for (field_name, attr) in fields_values
148
146
  if attr is not None
149
147
  ]
150
148
  )
151
149
  prov_msg += related_msg
152
150
  # input of
153
- if self.id is not None and self.input_of.exists():
154
- values = [format_field_value(i.started_at) for i in self.input_of.all()]
151
+ if self.id is not None and self.input_of_runs.exists():
152
+ values = [format_field_value(i.started_at) for i in self.input_of_runs.all()]
155
153
  type_str = ": Run" if print_types else "" # type: ignore
156
- prov_msg += f" .input_of{type_str} = {values}\n"
154
+ prov_msg += f" .input_of_runs{type_str} = {values}\n"
157
155
  if prov_msg:
158
156
  msg += f" {colors.italic('Provenance')}\n"
159
157
  msg += prov_msg
@@ -210,11 +208,11 @@ def get_labels(
210
208
  ).all()
211
209
  if flat_names:
212
210
  # returns a flat list of names
213
- from lamindb._record import get_default_str_field
211
+ from lamindb._record import get_name_field
214
212
 
215
213
  values = []
216
214
  for v in qs_by_registry.values():
217
- values += v.list(get_default_str_field(v))
215
+ values += v.list(get_name_field(v))
218
216
  return values
219
217
  if len(registries_to_check) == 1 and registry in qs_by_registry:
220
218
  return qs_by_registry[registry]
@@ -253,8 +251,8 @@ def add_labels(
253
251
  if feature.dtype.startswith("cat["):
254
252
  orm_dict = dict_schema_name_to_model_name(Artifact)
255
253
  for reg in feature.dtype.replace("cat[", "").rstrip("]").split("|"):
256
- orm = orm_dict.get(reg)
257
- records_validated += orm.from_values(records, field=field)
254
+ registry = orm_dict.get(reg)
255
+ records_validated += registry.from_values(records, field=field)
258
256
 
259
257
  # feature doesn't have registries and therefore can't create records from values
260
258
  # ask users to pass records
@@ -304,12 +302,12 @@ def add_labels(
304
302
  if len(linked_labels) > 0:
305
303
  labels_accessor.remove(*linked_labels)
306
304
  labels_accessor.add(*records, through_defaults={"feature_id": feature.id})
307
- feature_set_links = get_feature_set_links(self)
308
- feature_set_ids = [link.featureset_id for link in feature_set_links.all()]
305
+ links_feature_set = get_feature_set_links(self)
306
+ feature_set_ids = [link.featureset_id for link in links_feature_set.all()]
309
307
  # get all linked features of type Feature
310
308
  feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
311
309
  {
312
- feature_set_links.filter(featureset_id=feature_set.id)
310
+ links_feature_set.filter(featureset_id=feature_set.id)
313
311
  .one()
314
312
  .slot: feature_set.features.all()
315
313
  for feature_set in feature_sets
@@ -415,7 +413,7 @@ def _track_run_input(
415
413
  # generalize below for more than one data batch
416
414
  if len(input_data) == 1:
417
415
  if input_data[0].transform is not None:
418
- run.transform.parents.add(input_data[0].transform)
416
+ run.transform.predecessors.add(input_data[0].transform)
419
417
 
420
418
 
421
419
  HasFeatures.describe = describe