lamindb 0.72.1__py3-none-any.whl → 0.73.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_annotate.py +30 -20
- lamindb/_artifact.py +12 -11
- lamindb/_collection.py +21 -17
- lamindb/_feature_set.py +1 -1
- lamindb/_filter.py +6 -2
- lamindb/_finish.py +64 -33
- lamindb/_from_values.py +9 -3
- lamindb/_parents.py +11 -5
- lamindb/_query_manager.py +2 -2
- lamindb/_query_set.py +24 -3
- lamindb/_registry.py +77 -68
- lamindb/_save.py +14 -3
- lamindb/_ulabel.py +0 -14
- lamindb/core/__init__.py +4 -2
- lamindb/core/_data.py +38 -112
- lamindb/core/_feature_manager.py +535 -342
- lamindb/core/_label_manager.py +86 -85
- lamindb/core/_run_context.py +55 -31
- lamindb/core/_sync_git.py +4 -3
- lamindb/core/datasets/_core.py +1 -1
- lamindb/core/exceptions.py +34 -1
- lamindb/core/schema.py +17 -14
- lamindb/core/storage/paths.py +14 -4
- lamindb/core/versioning.py +49 -9
- lamindb/integrations/_vitessce.py +7 -3
- {lamindb-0.72.1.dist-info → lamindb-0.73.1.dist-info}/METADATA +5 -4
- lamindb-0.73.1.dist-info/RECORD +55 -0
- lamindb-0.72.1.dist-info/RECORD +0 -55
- {lamindb-0.72.1.dist-info → lamindb-0.73.1.dist-info}/LICENSE +0 -0
- {lamindb-0.72.1.dist-info → lamindb-0.73.1.dist-info}/WHEEL +0 -0
lamindb/_query_set.py
CHANGED
@@ -99,9 +99,30 @@ class QuerySet(models.QuerySet, CanValidate):
|
|
99
99
|
@doc_args(Registry.df.__doc__)
|
100
100
|
def df(self, include: str | list[str] | None = None) -> pd.DataFrame:
|
101
101
|
"""{}."""
|
102
|
-
|
103
|
-
|
104
|
-
|
102
|
+
# re-order the columns
|
103
|
+
exclude_field_names = ["created_at"]
|
104
|
+
field_names = [
|
105
|
+
field.name
|
106
|
+
for field in self.model._meta.fields
|
107
|
+
if (
|
108
|
+
not isinstance(field, models.ForeignKey)
|
109
|
+
and field.name not in exclude_field_names
|
110
|
+
)
|
111
|
+
]
|
112
|
+
field_names += [
|
113
|
+
f"{field.name}_id"
|
114
|
+
for field in self.model._meta.fields
|
115
|
+
if isinstance(field, models.ForeignKey)
|
116
|
+
]
|
117
|
+
for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]:
|
118
|
+
if field_name in field_names:
|
119
|
+
field_names.remove(field_name)
|
120
|
+
field_names.append(field_name)
|
121
|
+
if field_names[0] != "uid" and "uid" in field_names:
|
122
|
+
field_names.remove("uid")
|
123
|
+
field_names.insert(0, "uid")
|
124
|
+
# create the dataframe
|
125
|
+
df = pd.DataFrame(self.values(), columns=field_names)
|
105
126
|
# if len(df) > 0 and "updated_at" in df:
|
106
127
|
# df.updated_at = format_and_convert_to_local_time(df.updated_at)
|
107
128
|
# if len(df) > 0 and "started_at" in df:
|
lamindb/_registry.py
CHANGED
@@ -5,27 +5,21 @@ from typing import TYPE_CHECKING, Iterable, List, NamedTuple
|
|
5
5
|
|
6
6
|
import dj_database_url
|
7
7
|
import lamindb_setup as ln_setup
|
8
|
-
from django.core.exceptions import FieldDoesNotExist
|
9
8
|
from django.db import connections
|
10
|
-
from django.db.models import Manager, Q, QuerySet
|
9
|
+
from django.db.models import IntegerField, Manager, Q, QuerySet, Value
|
11
10
|
from lamin_utils import logger
|
12
11
|
from lamin_utils._lookup import Lookup
|
13
|
-
from lamin_utils._search import search as base_search
|
14
12
|
from lamindb_setup._connect_instance import get_owner_name_from_identifier
|
15
|
-
from lamindb_setup._init_instance import InstanceSettings
|
16
13
|
from lamindb_setup.core._docs import doc_args
|
17
14
|
from lamindb_setup.core._hub_core import connect_instance
|
18
|
-
from lamindb_setup.core._settings_storage import StorageSettings
|
19
15
|
from lnschema_core import Registry
|
20
16
|
|
21
17
|
from lamindb._utils import attach_func_to_class_method
|
22
18
|
from lamindb.core._settings import settings
|
23
|
-
from lamindb.core.exceptions import ValidationError
|
24
19
|
|
25
20
|
from ._from_values import get_or_create_records
|
26
21
|
|
27
22
|
if TYPE_CHECKING:
|
28
|
-
import pandas as pd
|
29
23
|
from lnschema_core.types import ListLike, StrField
|
30
24
|
|
31
25
|
IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
@@ -53,31 +47,30 @@ def validate_required_fields(orm: Registry, kwargs):
|
|
53
47
|
raise TypeError(f"{missing_fields} are required.")
|
54
48
|
|
55
49
|
|
56
|
-
def
|
50
|
+
def suggest_records_with_similar_names(record: Registry, kwargs) -> bool:
|
51
|
+
"""Returns True if found exact match, otherwise False.
|
52
|
+
|
53
|
+
Logs similar matches if found.
|
54
|
+
"""
|
57
55
|
if kwargs.get("name") is None:
|
58
|
-
return
|
56
|
+
return False
|
57
|
+
queryset = _search(record.__class__, kwargs["name"], truncate_words=True, limit=5)
|
58
|
+
if not queryset.exists(): # empty queryset
|
59
|
+
return False
|
60
|
+
for alternative_record in queryset:
|
61
|
+
if alternative_record.name == kwargs["name"]:
|
62
|
+
return True
|
63
|
+
s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
|
64
|
+
msg = f"record{s} with similar name{s} exist{nots}! did you mean to load {it}?"
|
65
|
+
if IPYTHON:
|
66
|
+
from IPython.display import display
|
67
|
+
|
68
|
+
logger.warning(f"{msg}")
|
69
|
+
if settings._verbosity_int >= 1:
|
70
|
+
display(queryset.df())
|
59
71
|
else:
|
60
|
-
|
61
|
-
|
62
|
-
return None
|
63
|
-
else:
|
64
|
-
for record in queryset:
|
65
|
-
if record.name == kwargs["name"]:
|
66
|
-
return "object-with-same-name-exists"
|
67
|
-
else:
|
68
|
-
s, it = ("", "it") if len(queryset) == 1 else ("s", "one of them")
|
69
|
-
msg = (
|
70
|
-
f"record{s} with similar name{s} exist! did you mean to load {it}?"
|
71
|
-
)
|
72
|
-
if IPYTHON:
|
73
|
-
from IPython.display import display
|
74
|
-
|
75
|
-
logger.warning(f"{msg}")
|
76
|
-
if settings._verbosity_int >= 1:
|
77
|
-
display(queryset.df())
|
78
|
-
else:
|
79
|
-
logger.warning(f"{msg}\n{queryset}")
|
80
|
-
return None
|
72
|
+
logger.warning(f"{msg}\n{queryset}")
|
73
|
+
return False
|
81
74
|
|
82
75
|
|
83
76
|
def __init__(orm: Registry, *args, **kwargs):
|
@@ -90,8 +83,8 @@ def __init__(orm: Registry, *args, **kwargs):
|
|
90
83
|
if "_has_consciously_provided_uid" in kwargs:
|
91
84
|
has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
|
92
85
|
if settings.upon_create_search_names and not has_consciously_provided_uid:
|
93
|
-
|
94
|
-
if
|
86
|
+
match = suggest_records_with_similar_names(orm, kwargs)
|
87
|
+
if match:
|
95
88
|
if "version" in kwargs:
|
96
89
|
version_comment = " and version"
|
97
90
|
existing_record = orm.filter(
|
@@ -101,10 +94,9 @@ def __init__(orm: Registry, *args, **kwargs):
|
|
101
94
|
version_comment = ""
|
102
95
|
existing_record = orm.filter(name=kwargs["name"]).one()
|
103
96
|
if existing_record is not None:
|
104
|
-
logger.
|
105
|
-
f"
|
106
|
-
f" name{version_comment}: '{kwargs['name']}'
|
107
|
-
"(disable via `ln.settings.upon_create_search_names`)"
|
97
|
+
logger.important(
|
98
|
+
f"returning existing {orm.__class__.__name__} record with same"
|
99
|
+
f" name{version_comment}: '{kwargs['name']}'"
|
108
100
|
)
|
109
101
|
init_self_from_db(orm, existing_record)
|
110
102
|
return None
|
@@ -123,6 +115,7 @@ def from_values(
|
|
123
115
|
cls,
|
124
116
|
values: ListLike,
|
125
117
|
field: StrField | None = None,
|
118
|
+
create: bool = False,
|
126
119
|
organism: Registry | str | None = None,
|
127
120
|
public_source: Registry | None = None,
|
128
121
|
mute: bool = False,
|
@@ -133,6 +126,7 @@ def from_values(
|
|
133
126
|
return get_or_create_records(
|
134
127
|
iterable=values,
|
135
128
|
field=getattr(cls, field_str),
|
129
|
+
create=create,
|
136
130
|
from_public=from_public,
|
137
131
|
organism=organism,
|
138
132
|
public_source=public_source,
|
@@ -156,6 +150,7 @@ def _search(
|
|
156
150
|
limit: int | None = 20,
|
157
151
|
case_sensitive: bool = False,
|
158
152
|
using_key: str | None = None,
|
153
|
+
truncate_words: bool = False,
|
159
154
|
) -> QuerySet:
|
160
155
|
input_queryset = _queryset(cls, using_key=using_key)
|
161
156
|
orm = input_queryset.model
|
@@ -181,14 +176,49 @@ def _search(
|
|
181
176
|
) from error
|
182
177
|
else:
|
183
178
|
fields.append(field)
|
179
|
+
|
180
|
+
# decompose search string
|
181
|
+
def truncate_word(word) -> str:
|
182
|
+
if len(word) > 5:
|
183
|
+
n_80_pct = int(len(word) * 0.8)
|
184
|
+
return word[:n_80_pct]
|
185
|
+
elif len(word) > 3:
|
186
|
+
return word[:3]
|
187
|
+
else:
|
188
|
+
return word
|
189
|
+
|
190
|
+
decomposed_string = string.split()
|
191
|
+
# add the entire string back
|
192
|
+
decomposed_string += [string]
|
193
|
+
for word in decomposed_string:
|
194
|
+
# will not search against words with 3 or fewer characters
|
195
|
+
if len(word) <= 3:
|
196
|
+
decomposed_string.remove(word)
|
197
|
+
if truncate_words:
|
198
|
+
decomposed_string = [truncate_word(word) for word in decomposed_string]
|
199
|
+
# construct the query
|
184
200
|
expression = Q()
|
185
201
|
case_sensitive_i = "" if case_sensitive else "i"
|
186
202
|
for field in fields:
|
187
|
-
|
203
|
+
for word in decomposed_string:
|
204
|
+
query = {f"{field}__{case_sensitive_i}contains": word}
|
205
|
+
expression |= Q(**query)
|
206
|
+
output_queryset = input_queryset.filter(expression)
|
207
|
+
# ensure exact matches are at the top
|
208
|
+
narrow_expression = Q()
|
209
|
+
for field in fields:
|
188
210
|
query = {f"{field}__{case_sensitive_i}contains": string}
|
189
|
-
|
190
|
-
|
191
|
-
|
211
|
+
narrow_expression |= Q(**query)
|
212
|
+
refined_output_queryset = output_queryset.filter(narrow_expression).annotate(
|
213
|
+
ordering=Value(1, output_field=IntegerField())
|
214
|
+
)
|
215
|
+
remaining_output_queryset = output_queryset.exclude(narrow_expression).annotate(
|
216
|
+
ordering=Value(2, output_field=IntegerField())
|
217
|
+
)
|
218
|
+
combined_queryset = refined_output_queryset.union(
|
219
|
+
remaining_output_queryset
|
220
|
+
).order_by("ordering")[:limit]
|
221
|
+
return combined_queryset
|
192
222
|
|
193
223
|
|
194
224
|
@classmethod # type: ignore
|
@@ -376,11 +406,9 @@ def update_fk_to_default_db(
|
|
376
406
|
FKBULK = [
|
377
407
|
"organism",
|
378
408
|
"public_source",
|
379
|
-
"initial_version",
|
380
409
|
"latest_report", # Transform
|
381
410
|
"source_code", # Transform
|
382
411
|
"report", # Run
|
383
|
-
"file", # Collection
|
384
412
|
]
|
385
413
|
|
386
414
|
|
@@ -401,8 +429,8 @@ def transfer_to_default_db(
|
|
401
429
|
registry = record.__class__
|
402
430
|
record_on_default = registry.objects.filter(uid=record.uid).one_or_none()
|
403
431
|
if record_on_default is not None:
|
404
|
-
logger.
|
405
|
-
f"
|
432
|
+
logger.important(
|
433
|
+
f"returning existing {record.__class__.__name__}(uid='{record.uid}') on default database"
|
406
434
|
)
|
407
435
|
return record_on_default
|
408
436
|
if not mute:
|
@@ -481,9 +509,13 @@ def save(self, *args, **kwargs) -> Registry:
|
|
481
509
|
if hasattr(self, "labels"):
|
482
510
|
from copy import copy
|
483
511
|
|
512
|
+
from lnschema_core.models import FeatureManager
|
513
|
+
|
514
|
+
# here we go back to original record on the source database
|
484
515
|
self_on_db = copy(self)
|
485
516
|
self_on_db._state.db = db
|
486
|
-
self_on_db.pk = pk_on_db
|
517
|
+
self_on_db.pk = pk_on_db # manually set the primary key
|
518
|
+
self_on_db.features = FeatureManager(self_on_db)
|
487
519
|
# by default, transfer parents of the labels to maintain ontological hierarchy
|
488
520
|
try:
|
489
521
|
import bionty as bt
|
@@ -492,9 +524,7 @@ def save(self, *args, **kwargs) -> Registry:
|
|
492
524
|
except ImportError:
|
493
525
|
parents = kwargs.get("parents", True)
|
494
526
|
add_from_kwargs = {"parents": parents}
|
495
|
-
logger.info("transfer features")
|
496
527
|
self.features._add_from(self_on_db, **add_from_kwargs)
|
497
|
-
logger.info("transfer labels")
|
498
528
|
self.labels.add_from(self_on_db, **add_from_kwargs)
|
499
529
|
return self
|
500
530
|
|
@@ -519,24 +549,3 @@ if ln_setup._TESTING: # type: ignore
|
|
519
549
|
|
520
550
|
for name in METHOD_NAMES:
|
521
551
|
attach_func_to_class_method(name, Registry, globals())
|
522
|
-
|
523
|
-
|
524
|
-
@classmethod # type: ignore
|
525
|
-
def __get_schema_name__(cls) -> str:
|
526
|
-
schema_module_name = cls.__module__.split(".")[0]
|
527
|
-
schema_name = schema_module_name.replace("lnschema_", "")
|
528
|
-
return schema_name
|
529
|
-
|
530
|
-
|
531
|
-
@classmethod # type: ignore
|
532
|
-
def __get_name_with_schema__(cls) -> str:
|
533
|
-
schema_name = cls.__get_schema_name__()
|
534
|
-
if schema_name == "core":
|
535
|
-
schema_prefix = ""
|
536
|
-
else:
|
537
|
-
schema_prefix = f"{schema_name}."
|
538
|
-
return f"{schema_prefix}{cls.__name__}"
|
539
|
-
|
540
|
-
|
541
|
-
Registry.__get_schema_name__ = __get_schema_name__
|
542
|
-
Registry.__get_name_with_schema__ = __get_name_with_schema__
|
lamindb/_save.py
CHANGED
@@ -148,13 +148,17 @@ def check_and_attempt_upload(
|
|
148
148
|
artifact: Artifact,
|
149
149
|
using_key: str | None = None,
|
150
150
|
access_token: str | None = None,
|
151
|
+
print_progress: bool = True,
|
151
152
|
) -> Exception | None:
|
152
153
|
# if Artifact object is either newly instantiated or replace() was called on
|
153
154
|
# a local env it will have a _local_filepath and needs to be uploaded
|
154
155
|
if hasattr(artifact, "_local_filepath"):
|
155
156
|
try:
|
156
157
|
storage_path = upload_artifact(
|
157
|
-
artifact,
|
158
|
+
artifact,
|
159
|
+
using_key,
|
160
|
+
access_token=access_token,
|
161
|
+
print_progress=print_progress,
|
158
162
|
)
|
159
163
|
except Exception as exception:
|
160
164
|
logger.warning(f"could not upload artifact: {artifact}")
|
@@ -264,6 +268,8 @@ def store_artifacts(
|
|
264
268
|
if artifact not in stored_artifacts:
|
265
269
|
artifact._delete_skip_storage()
|
266
270
|
error_message = prepare_error_message(artifacts, stored_artifacts, exception)
|
271
|
+
# this is bad because we're losing the original traceback
|
272
|
+
# needs to be refactored - also, the orginal error should be raised
|
267
273
|
raise RuntimeError(error_message)
|
268
274
|
return None
|
269
275
|
|
@@ -289,7 +295,10 @@ def prepare_error_message(records, stored_artifacts, exception) -> str:
|
|
289
295
|
|
290
296
|
|
291
297
|
def upload_artifact(
|
292
|
-
artifact,
|
298
|
+
artifact,
|
299
|
+
using_key: str | None = None,
|
300
|
+
access_token: str | None = None,
|
301
|
+
print_progress: bool = True,
|
293
302
|
) -> UPath:
|
294
303
|
"""Store and add file and its linked entries."""
|
295
304
|
# can't currently use filepath_from_artifact here because it resolves to ._local_filepath
|
@@ -299,5 +308,7 @@ def upload_artifact(
|
|
299
308
|
)
|
300
309
|
if hasattr(artifact, "_to_store") and artifact._to_store:
|
301
310
|
logger.save(f"storing artifact '{artifact.uid}' at '{storage_path}'")
|
302
|
-
store_file_or_folder(
|
311
|
+
store_file_or_folder(
|
312
|
+
artifact._local_filepath, storage_path, print_progress=print_progress
|
313
|
+
)
|
303
314
|
return storage_path
|
lamindb/_ulabel.py
CHANGED
@@ -8,8 +8,6 @@ from lnschema_core import ULabel
|
|
8
8
|
|
9
9
|
from lamindb._utils import attach_func_to_class_method
|
10
10
|
|
11
|
-
from ._from_values import get_or_create_records
|
12
|
-
|
13
11
|
if TYPE_CHECKING:
|
14
12
|
from lnschema_core.types import ListLike
|
15
13
|
|
@@ -41,20 +39,8 @@ def __init__(self, *args, **kwargs):
|
|
41
39
|
)
|
42
40
|
|
43
41
|
|
44
|
-
@classmethod # type:ignore
|
45
|
-
@doc_args(ULabel.from_values.__doc__)
|
46
|
-
def from_values(cls, values: ListLike, **kwargs) -> list[ULabel]:
|
47
|
-
"""{}."""
|
48
|
-
records = get_or_create_records(
|
49
|
-
iterable=values,
|
50
|
-
field=ULabel.name,
|
51
|
-
)
|
52
|
-
return records
|
53
|
-
|
54
|
-
|
55
42
|
METHOD_NAMES = [
|
56
43
|
"__init__",
|
57
|
-
"from_values",
|
58
44
|
]
|
59
45
|
|
60
46
|
if ln_setup._TESTING:
|
lamindb/core/__init__.py
CHANGED
@@ -9,7 +9,7 @@ Registries:
|
|
9
9
|
QuerySet
|
10
10
|
QueryManager
|
11
11
|
RecordsList
|
12
|
-
|
12
|
+
HasFeatures
|
13
13
|
FeatureManager
|
14
14
|
LabelManager
|
15
15
|
IsVersioned
|
@@ -36,6 +36,7 @@ Classes:
|
|
36
36
|
:toctree: .
|
37
37
|
|
38
38
|
Settings
|
39
|
+
TransformSettings
|
39
40
|
MappedCollection
|
40
41
|
run_context
|
41
42
|
|
@@ -54,7 +55,7 @@ Modules:
|
|
54
55
|
from lamin_utils._inspect import InspectResult
|
55
56
|
from lnschema_core.models import (
|
56
57
|
CanValidate,
|
57
|
-
|
58
|
+
HasFeatures,
|
58
59
|
HasParents,
|
59
60
|
IsVersioned,
|
60
61
|
Registry,
|
@@ -77,3 +78,4 @@ from . import _data, datasets, exceptions, fields, types
|
|
77
78
|
from ._mapped_collection import MappedCollection
|
78
79
|
from ._run_context import run_context
|
79
80
|
from ._settings import Settings
|
81
|
+
from ._transform_settings import TransformSettings
|
lamindb/core/_data.py
CHANGED
@@ -8,9 +8,9 @@ from lamindb_setup.core._docs import doc_args
|
|
8
8
|
from lnschema_core.models import (
|
9
9
|
Artifact,
|
10
10
|
Collection,
|
11
|
-
Data,
|
12
11
|
Feature,
|
13
12
|
FeatureSet,
|
13
|
+
HasFeatures,
|
14
14
|
Registry,
|
15
15
|
Run,
|
16
16
|
ULabel,
|
@@ -20,6 +20,7 @@ from lnschema_core.models import (
|
|
20
20
|
|
21
21
|
from lamindb._parents import view_lineage
|
22
22
|
from lamindb._query_set import QuerySet
|
23
|
+
from lamindb._registry import get_default_str_field
|
23
24
|
from lamindb.core._settings import settings
|
24
25
|
|
25
26
|
from ._feature_manager import (
|
@@ -94,25 +95,8 @@ def save_feature_set_links(self: Artifact | Collection) -> None:
|
|
94
95
|
bulk_create(links, ignore_conflicts=True)
|
95
96
|
|
96
97
|
|
97
|
-
|
98
|
-
|
99
|
-
exclude = [exclude]
|
100
|
-
exclude_fields = set() if exclude is None else set(exclude)
|
101
|
-
exclude_fields.update(["created_at", "updated_at"])
|
102
|
-
|
103
|
-
fields = [
|
104
|
-
f
|
105
|
-
for f in value.__repr__(include_foreign_keys=False).split(", ")
|
106
|
-
if not any(f"{excluded_field}=" in f for excluded_field in exclude_fields)
|
107
|
-
]
|
108
|
-
repr = ", ".join(fields)
|
109
|
-
if not repr.endswith(")"):
|
110
|
-
repr += ")"
|
111
|
-
return repr
|
112
|
-
|
113
|
-
|
114
|
-
@doc_args(Data.describe.__doc__)
|
115
|
-
def describe(self: Data):
|
98
|
+
@doc_args(HasFeatures.describe.__doc__)
|
99
|
+
def describe(self: HasFeatures, print_types: bool = False):
|
116
100
|
"""{}."""
|
117
101
|
# prefetch all many-to-many relationships
|
118
102
|
# doesn't work for describing using artifact
|
@@ -125,7 +109,8 @@ def describe(self: Data):
|
|
125
109
|
# )
|
126
110
|
|
127
111
|
model_name = self.__class__.__name__
|
128
|
-
msg = ""
|
112
|
+
msg = f"{colors.green(model_name)}{__repr__(self, include_foreign_keys=False).lstrip(model_name)}\n"
|
113
|
+
prov_msg = ""
|
129
114
|
|
130
115
|
fields = self._meta.fields
|
131
116
|
direct_fields = []
|
@@ -151,26 +136,30 @@ def describe(self: Data):
|
|
151
136
|
|
152
137
|
# provenance
|
153
138
|
if len(foreign_key_fields) > 0: # always True for Artifact and Collection
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
139
|
+
fields_values = [(field, getattr(self, field)) for field in foreign_key_fields]
|
140
|
+
type_str = (
|
141
|
+
lambda attr: f": {attr.__class__.__get_name_with_schema__()}"
|
142
|
+
if print_types
|
143
|
+
else ""
|
144
|
+
)
|
158
145
|
related_msg = "".join(
|
159
146
|
[
|
160
|
-
f"
|
161
|
-
for
|
162
|
-
if
|
147
|
+
f" .{field_name}{type_str(attr)} = {format_field_value(getattr(attr, get_default_str_field(attr)))}\n"
|
148
|
+
for (field_name, attr) in fields_values
|
149
|
+
if attr is not None
|
163
150
|
]
|
164
151
|
)
|
165
|
-
|
152
|
+
prov_msg += related_msg
|
166
153
|
# input of
|
167
154
|
if self.id is not None and self.input_of.exists():
|
168
155
|
values = [format_field_value(i.started_at) for i in self.input_of.all()]
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
156
|
+
type_str = ": Run" if print_types else "" # type: ignore
|
157
|
+
prov_msg += f" .input_of{type_str} = {values}\n"
|
158
|
+
if prov_msg:
|
159
|
+
msg += f" {colors.italic('Provenance')}\n"
|
160
|
+
msg += prov_msg
|
161
|
+
msg += print_labels(self, print_types=print_types)
|
162
|
+
msg += print_features(self, print_types=print_types) # type: ignore
|
174
163
|
logger.print(msg)
|
175
164
|
|
176
165
|
|
@@ -212,9 +201,9 @@ def get_labels(
|
|
212
201
|
qs_by_registry[registry] = ULabel.objects.using(self._state.db).filter(
|
213
202
|
id__in=label_ids
|
214
203
|
)
|
215
|
-
elif registry in self.features.
|
204
|
+
elif registry in self.features._accessor_by_registry:
|
216
205
|
qs_by_registry[registry] = getattr(
|
217
|
-
self, self.features.
|
206
|
+
self, self.features._accessor_by_registry[registry]
|
218
207
|
).all()
|
219
208
|
if flat_names:
|
220
209
|
# returns a flat list of names
|
@@ -301,11 +290,11 @@ def add_labels(
|
|
301
290
|
record
|
302
291
|
)
|
303
292
|
for registry_name, records in records_by_registry.items():
|
304
|
-
if registry_name not in self.features.
|
293
|
+
if registry_name not in self.features._accessor_by_registry:
|
305
294
|
logger.warning(f"skipping {registry_name}")
|
306
295
|
continue
|
307
296
|
labels_accessor = getattr(
|
308
|
-
self, self.features.
|
297
|
+
self, self.features._accessor_by_registry[registry_name]
|
309
298
|
)
|
310
299
|
# remove labels that are already linked as add doesn't perform update
|
311
300
|
linked_labels = [r for r in records if r in labels_accessor.filter()]
|
@@ -316,7 +305,7 @@ def add_labels(
|
|
316
305
|
feature_set_ids = [link.featureset_id for link in feature_set_links.all()]
|
317
306
|
# get all linked features of type Feature
|
318
307
|
feature_sets = FeatureSet.filter(id__in=feature_set_ids).all()
|
319
|
-
|
308
|
+
{
|
320
309
|
feature_set_links.filter(featureset_id=feature_set.id)
|
321
310
|
.one()
|
322
311
|
.slot: feature_set.features.all()
|
@@ -324,62 +313,19 @@ def add_labels(
|
|
324
313
|
if "Feature" == feature_set.registry
|
325
314
|
}
|
326
315
|
for registry_name, _ in records_by_registry.items():
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
):
|
332
|
-
if len(msg) > 0:
|
333
|
-
msg += ", "
|
334
|
-
msg += f"linked feature '{feature.name}' to registry '{registry_name}'"
|
316
|
+
if registry_name not in feature.dtype:
|
317
|
+
logger.debug(
|
318
|
+
f"updated categorical feature '{feature.name}' type with registry '{registry_name}'"
|
319
|
+
)
|
335
320
|
if not feature.dtype.startswith("cat["):
|
336
321
|
feature.dtype = f"cat[{registry_name}]"
|
337
322
|
elif registry_name not in feature.dtype:
|
338
323
|
feature.dtype = feature.dtype.rstrip("]") + f"|{registry_name}]"
|
339
324
|
feature.save()
|
340
|
-
if len(msg) > 0:
|
341
|
-
logger.save(msg)
|
342
|
-
# check whether we have to update the feature set that manages labels
|
343
|
-
# (Feature) to account for a new feature
|
344
|
-
found_feature = False
|
345
|
-
for _, linked_features in linked_features_by_slot.items():
|
346
|
-
if feature in linked_features:
|
347
|
-
found_feature = True
|
348
|
-
if not found_feature:
|
349
|
-
if "external" in linked_features_by_slot:
|
350
|
-
feature_set = self.features.feature_set_by_slot["external"]
|
351
|
-
features_list = feature_set.features.list()
|
352
|
-
else:
|
353
|
-
features_list = []
|
354
|
-
features_list.append(feature)
|
355
|
-
feature_set = FeatureSet(features_list)
|
356
|
-
feature_set.save()
|
357
|
-
if "external" in linked_features_by_slot:
|
358
|
-
old_feature_set_link = feature_set_links.filter(
|
359
|
-
slot="external"
|
360
|
-
).one()
|
361
|
-
old_feature_set_link.delete()
|
362
|
-
remaining_links = self.feature_sets.through.objects.filter(
|
363
|
-
featureset_id=feature_set.id
|
364
|
-
).all()
|
365
|
-
if len(remaining_links) == 0:
|
366
|
-
old_feature_set = FeatureSet.filter(
|
367
|
-
id=old_feature_set_link.featureset_id
|
368
|
-
).one()
|
369
|
-
logger.info(
|
370
|
-
"nothing links to it anymore, deleting feature set"
|
371
|
-
f" {old_feature_set}"
|
372
|
-
)
|
373
|
-
old_feature_set.delete()
|
374
|
-
self.features.add_feature_set(feature_set, slot="external")
|
375
|
-
logger.save(
|
376
|
-
f"linked new feature '{feature.name}' together with new feature set"
|
377
|
-
f" {feature_set}"
|
378
|
-
)
|
379
325
|
|
380
326
|
|
381
327
|
def _track_run_input(
|
382
|
-
data:
|
328
|
+
data: HasFeatures | Iterable[HasFeatures],
|
383
329
|
is_run_input: bool | None = None,
|
384
330
|
run: Run | None = None,
|
385
331
|
):
|
@@ -391,12 +337,12 @@ def _track_run_input(
|
|
391
337
|
elif run is None:
|
392
338
|
run = run_context.run
|
393
339
|
# consider that data is an iterable of Data
|
394
|
-
data_iter: Iterable[
|
340
|
+
data_iter: Iterable[HasFeatures] = [data] if isinstance(data, HasFeatures) else data
|
395
341
|
track_run_input = False
|
396
342
|
input_data = []
|
397
343
|
if run is not None:
|
398
344
|
# avoid cycles: data can't be both input and output
|
399
|
-
def is_valid_input(data:
|
345
|
+
def is_valid_input(data: HasFeatures):
|
400
346
|
return (
|
401
347
|
data.run_id != run.id
|
402
348
|
and not data._state.adding
|
@@ -432,7 +378,7 @@ def _track_run_input(
|
|
432
378
|
f" {input_data[0].transform.id}"
|
433
379
|
)
|
434
380
|
logger.info(
|
435
|
-
f"adding {data_class_name} {input_data_ids} as
|
381
|
+
f"adding {data_class_name} ids {input_data_ids} as inputs for run"
|
436
382
|
f" {run.id}{transform_note}"
|
437
383
|
)
|
438
384
|
track_run_input = True
|
@@ -469,25 +415,5 @@ def _track_run_input(
|
|
469
415
|
run.transform.parents.add(input_data[0].transform)
|
470
416
|
|
471
417
|
|
472
|
-
|
473
|
-
|
474
|
-
def features(self) -> FeatureManager:
|
475
|
-
"""{}."""
|
476
|
-
from lamindb.core._feature_manager import FeatureManager
|
477
|
-
|
478
|
-
return FeatureManager(self)
|
479
|
-
|
480
|
-
|
481
|
-
@property # type: ignore
|
482
|
-
@doc_args(Data.labels.__doc__)
|
483
|
-
def labels(self) -> LabelManager:
|
484
|
-
"""{}."""
|
485
|
-
from lamindb.core._label_manager import LabelManager
|
486
|
-
|
487
|
-
return LabelManager(self)
|
488
|
-
|
489
|
-
|
490
|
-
Data.features = features
|
491
|
-
Data.labels = labels
|
492
|
-
Data.describe = describe
|
493
|
-
Data.view_lineage = view_lineage
|
418
|
+
HasFeatures.describe = describe
|
419
|
+
HasFeatures.view_lineage = view_lineage
|