lamindb 0.76.15__py3-none-any.whl → 0.77.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +2 -2
- lamindb/_artifact.py +9 -4
- lamindb/{_can_validate.py → _can_curate.py} +32 -23
- lamindb/_curate.py +24 -13
- lamindb/_finish.py +12 -7
- lamindb/_parents.py +8 -1
- lamindb/_query_set.py +58 -13
- lamindb/_record.py +192 -62
- lamindb/core/__init__.py +4 -2
- lamindb/core/_context.py +59 -32
- lamindb/core/_django.py +2 -2
- lamindb/core/_label_manager.py +3 -3
- lamindb/core/loaders.py +15 -5
- lamindb/core/storage/_anndata_accessor.py +7 -4
- lamindb/core/storage/_zarr.py +8 -1
- {lamindb-0.76.15.dist-info → lamindb-0.77.0.dist-info}/METADATA +18 -6
- {lamindb-0.76.15.dist-info → lamindb-0.77.0.dist-info}/RECORD +19 -20
- lamindb/_filter.py +0 -21
- {lamindb-0.76.15.dist-info → lamindb-0.77.0.dist-info}/LICENSE +0 -0
- {lamindb-0.76.15.dist-info → lamindb-0.77.0.dist-info}/WHEEL +0 -0
lamindb/_record.py
CHANGED
@@ -1,12 +1,26 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import builtins
|
4
|
+
from functools import reduce
|
4
5
|
from typing import TYPE_CHECKING, NamedTuple
|
5
6
|
|
6
7
|
import dj_database_url
|
7
8
|
import lamindb_setup as ln_setup
|
9
|
+
from django.core.exceptions import FieldDoesNotExist
|
10
|
+
from django.core.exceptions import ValidationError as DjangoValidationError
|
8
11
|
from django.db import connections, transaction
|
9
|
-
from django.db.models import IntegerField, Manager, Q, QuerySet, Value
|
12
|
+
from django.db.models import F, IntegerField, Manager, Q, QuerySet, TextField, Value
|
13
|
+
from django.db.models.functions import Cast, Coalesce
|
14
|
+
from django.db.models.lookups import (
|
15
|
+
Contains,
|
16
|
+
Exact,
|
17
|
+
IContains,
|
18
|
+
IExact,
|
19
|
+
IRegex,
|
20
|
+
IStartsWith,
|
21
|
+
Regex,
|
22
|
+
StartsWith,
|
23
|
+
)
|
10
24
|
from lamin_utils import colors, logger
|
11
25
|
from lamin_utils._lookup import Lookup
|
12
26
|
from lamindb_setup._connect_instance import (
|
@@ -17,11 +31,24 @@ from lamindb_setup._connect_instance import (
|
|
17
31
|
from lamindb_setup.core._docs import doc_args
|
18
32
|
from lamindb_setup.core._hub_core import connect_instance_hub
|
19
33
|
from lamindb_setup.core._settings_store import instance_settings_file
|
20
|
-
from lnschema_core.models import
|
34
|
+
from lnschema_core.models import (
|
35
|
+
Artifact,
|
36
|
+
Collection,
|
37
|
+
Feature,
|
38
|
+
FeatureSet,
|
39
|
+
IsVersioned,
|
40
|
+
Param,
|
41
|
+
Record,
|
42
|
+
Run,
|
43
|
+
Transform,
|
44
|
+
ULabel,
|
45
|
+
ValidateFields,
|
46
|
+
)
|
47
|
+
from lnschema_core.validation import FieldValidationError
|
21
48
|
|
22
49
|
from ._utils import attach_func_to_class_method
|
23
50
|
from .core._settings import settings
|
24
|
-
from .core.exceptions import RecordNameChangeIntegrityError
|
51
|
+
from .core.exceptions import RecordNameChangeIntegrityError, ValidationError
|
25
52
|
|
26
53
|
if TYPE_CHECKING:
|
27
54
|
import pandas as pd
|
@@ -47,7 +74,11 @@ def update_attributes(record: Record, attributes: dict[str, str]):
|
|
47
74
|
setattr(record, key, value)
|
48
75
|
|
49
76
|
|
50
|
-
def
|
77
|
+
def validate_fields(record: Record, kwargs):
|
78
|
+
from lnschema_core.validation import validate_literal_fields
|
79
|
+
|
80
|
+
# validate required fields
|
81
|
+
# a "required field" is a Django field that has `null=False, default=None`
|
51
82
|
required_fields = {
|
52
83
|
k.name for k in record._meta.fields if not k.null and k.default is None
|
53
84
|
}
|
@@ -58,25 +89,49 @@ def validate_required_fields(record: Record, kwargs):
|
|
58
89
|
]
|
59
90
|
if missing_fields:
|
60
91
|
raise TypeError(f"{missing_fields} are required.")
|
92
|
+
# ensure the exact length of the internal uid for core entities
|
93
|
+
if "uid" in kwargs and record.__class__ in {
|
94
|
+
Artifact,
|
95
|
+
Collection,
|
96
|
+
Transform,
|
97
|
+
Run,
|
98
|
+
ULabel,
|
99
|
+
Feature,
|
100
|
+
FeatureSet,
|
101
|
+
Param,
|
102
|
+
}:
|
103
|
+
uid_max_length = record.__class__._meta.get_field(
|
104
|
+
"uid"
|
105
|
+
).max_length # triggers FieldDoesNotExist
|
106
|
+
if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
|
107
|
+
raise ValidationError(
|
108
|
+
f'`uid` must be exactly {uid_max_length} characters long, got {len(kwargs["uid"])}.'
|
109
|
+
)
|
110
|
+
# validate literals
|
111
|
+
validate_literal_fields(record, kwargs)
|
61
112
|
|
62
113
|
|
63
|
-
def suggest_records_with_similar_names(record: Record, kwargs) -> bool:
|
114
|
+
def suggest_records_with_similar_names(record: Record, name_field: str, kwargs) -> bool:
|
64
115
|
"""Returns True if found exact match, otherwise False.
|
65
116
|
|
66
117
|
Logs similar matches if found.
|
67
118
|
"""
|
68
|
-
if kwargs.get(
|
119
|
+
if kwargs.get(name_field) is None or not isinstance(kwargs.get(name_field), str):
|
69
120
|
return False
|
70
121
|
queryset = _search(
|
71
|
-
record.__class__,
|
122
|
+
record.__class__,
|
123
|
+
kwargs[name_field],
|
124
|
+
field=name_field,
|
125
|
+
truncate_string=True,
|
126
|
+
limit=3,
|
72
127
|
)
|
73
128
|
if not queryset.exists(): # empty queryset
|
74
129
|
return False
|
75
130
|
for alternative_record in queryset:
|
76
|
-
if alternative_record
|
131
|
+
if getattr(alternative_record, name_field) == kwargs[name_field]:
|
77
132
|
return True
|
78
133
|
s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
|
79
|
-
msg = f"record{s} with similar
|
134
|
+
msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
|
80
135
|
if IPYTHON:
|
81
136
|
from IPython.display import display
|
82
137
|
|
@@ -90,7 +145,7 @@ def suggest_records_with_similar_names(record: Record, kwargs) -> bool:
|
|
90
145
|
|
91
146
|
def __init__(record: Record, *args, **kwargs):
|
92
147
|
if not args:
|
93
|
-
|
148
|
+
validate_fields(record, kwargs)
|
94
149
|
|
95
150
|
# do not search for names if an id is passed; this is important
|
96
151
|
# e.g. when synching ids from the notebook store to lamindb
|
@@ -98,13 +153,17 @@ def __init__(record: Record, *args, **kwargs):
|
|
98
153
|
if "_has_consciously_provided_uid" in kwargs:
|
99
154
|
has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
|
100
155
|
if settings.creation.search_names and not has_consciously_provided_uid:
|
101
|
-
|
156
|
+
name_field = getattr(record, "_name_field", "name")
|
157
|
+
match = suggest_records_with_similar_names(record, name_field, kwargs)
|
102
158
|
if match:
|
103
159
|
if "version" in kwargs:
|
104
160
|
if kwargs["version"] is not None:
|
105
161
|
version_comment = " and version"
|
106
162
|
existing_record = record.__class__.filter(
|
107
|
-
|
163
|
+
**{
|
164
|
+
name_field: kwargs[name_field],
|
165
|
+
"version": kwargs["version"],
|
166
|
+
}
|
108
167
|
).one_or_none()
|
109
168
|
else:
|
110
169
|
# for a versioned record, an exact name match is not a
|
@@ -115,16 +174,26 @@ def __init__(record: Record, *args, **kwargs):
|
|
115
174
|
else:
|
116
175
|
version_comment = ""
|
117
176
|
existing_record = record.__class__.filter(
|
118
|
-
|
177
|
+
**{name_field: kwargs[name_field]}
|
119
178
|
).one_or_none()
|
120
179
|
if existing_record is not None:
|
121
180
|
logger.important(
|
122
181
|
f"returning existing {record.__class__.__name__} record with same"
|
123
|
-
f"
|
182
|
+
f" {name_field}{version_comment}: '{kwargs[name_field]}'"
|
124
183
|
)
|
125
184
|
init_self_from_db(record, existing_record)
|
126
185
|
return None
|
127
186
|
super(Record, record).__init__(**kwargs)
|
187
|
+
if isinstance(record, ValidateFields):
|
188
|
+
# this will trigger validation against django validators
|
189
|
+
try:
|
190
|
+
if hasattr(record, "clean_fields"):
|
191
|
+
record.clean_fields()
|
192
|
+
else:
|
193
|
+
record._Model__clean_fields()
|
194
|
+
except DjangoValidationError as e:
|
195
|
+
message = _format_django_validation_error(record, e)
|
196
|
+
raise FieldValidationError(message) from e
|
128
197
|
elif len(args) != len(record._meta.concrete_fields):
|
129
198
|
raise ValueError("please provide keyword arguments, not plain arguments")
|
130
199
|
else:
|
@@ -133,13 +202,48 @@ def __init__(record: Record, *args, **kwargs):
|
|
133
202
|
_store_record_old_name(record)
|
134
203
|
|
135
204
|
|
205
|
+
def _format_django_validation_error(record: Record, e: DjangoValidationError):
|
206
|
+
"""Pretty print Django validation errors."""
|
207
|
+
errors = {}
|
208
|
+
if hasattr(e, "error_dict"):
|
209
|
+
error_dict = e.error_dict
|
210
|
+
else:
|
211
|
+
error_dict = {"__all__": e.error_list}
|
212
|
+
|
213
|
+
for field_name, error_list in error_dict.items():
|
214
|
+
for error in error_list:
|
215
|
+
if hasattr(error, "message"):
|
216
|
+
msg = error.message
|
217
|
+
else:
|
218
|
+
msg = str(error)
|
219
|
+
|
220
|
+
if field_name == "__all__":
|
221
|
+
errors[field_name] = f"{colors.yellow(msg)}"
|
222
|
+
else:
|
223
|
+
current_value = getattr(record, field_name, None)
|
224
|
+
errors[field_name] = (
|
225
|
+
f"{field_name}: {colors.yellow(current_value)} is not valid\n → {msg}"
|
226
|
+
)
|
227
|
+
|
228
|
+
if errors:
|
229
|
+
message = "\n "
|
230
|
+
for _, error in errors.items():
|
231
|
+
message += error + "\n "
|
232
|
+
|
233
|
+
return message
|
234
|
+
|
235
|
+
|
136
236
|
@classmethod # type:ignore
|
137
237
|
@doc_args(Record.filter.__doc__)
|
138
238
|
def filter(cls, *queries, **expressions) -> QuerySet:
|
139
239
|
"""{}""" # noqa: D415
|
140
|
-
from lamindb.
|
240
|
+
from lamindb._query_set import QuerySet
|
241
|
+
|
242
|
+
_using_key = None
|
243
|
+
if "_using_key" in expressions:
|
244
|
+
_using_key = expressions.pop("_using_key")
|
141
245
|
|
142
|
-
return
|
246
|
+
return QuerySet(model=cls, using=_using_key).filter(*queries, **expressions)
|
143
247
|
|
144
248
|
|
145
249
|
@classmethod # type:ignore
|
@@ -150,8 +254,6 @@ def get(
|
|
150
254
|
**expressions,
|
151
255
|
) -> Record:
|
152
256
|
"""{}""" # noqa: D415
|
153
|
-
# this is the only place in which we need the lamindb queryset
|
154
|
-
# in this file; everywhere else it should be Django's
|
155
257
|
from lamindb._query_set import QuerySet
|
156
258
|
|
157
259
|
return QuerySet(model=cls).get(idlike, **expressions)
|
@@ -166,9 +268,7 @@ def df(
|
|
166
268
|
limit: int = 100,
|
167
269
|
) -> pd.DataFrame:
|
168
270
|
"""{}""" # noqa: D415
|
169
|
-
|
170
|
-
|
171
|
-
query_set = filter(cls)
|
271
|
+
query_set = cls.filter()
|
172
272
|
if hasattr(cls, "updated_at"):
|
173
273
|
query_set = query_set.order_by("-updated_at")
|
174
274
|
return query_set[:limit].df(include=include, join=join)
|
@@ -182,10 +282,11 @@ def _search(
|
|
182
282
|
limit: int | None = 20,
|
183
283
|
case_sensitive: bool = False,
|
184
284
|
using_key: str | None = None,
|
185
|
-
|
285
|
+
truncate_string: bool = False,
|
186
286
|
) -> QuerySet:
|
187
287
|
input_queryset = _queryset(cls, using_key=using_key)
|
188
288
|
registry = input_queryset.model
|
289
|
+
name_field = getattr(registry, "_name_field", "name")
|
189
290
|
if field is None:
|
190
291
|
fields = [
|
191
292
|
field.name
|
@@ -209,48 +310,77 @@ def _search(
|
|
209
310
|
else:
|
210
311
|
fields.append(field)
|
211
312
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
for word in decomposed_string:
|
226
|
-
# will not search against words with 3 or fewer characters
|
227
|
-
if len(word) <= 3:
|
228
|
-
decomposed_string.remove(word)
|
229
|
-
if truncate_words:
|
230
|
-
decomposed_string = [truncate_word(word) for word in decomposed_string]
|
231
|
-
# construct the query
|
232
|
-
expression = Q()
|
233
|
-
case_sensitive_i = "" if case_sensitive else "i"
|
234
|
-
for field in fields:
|
235
|
-
for word in decomposed_string:
|
236
|
-
query = {f"{field}__{case_sensitive_i}contains": word}
|
237
|
-
expression |= Q(**query)
|
238
|
-
output_queryset = input_queryset.filter(expression)
|
239
|
-
# ensure exact matches are at the top
|
240
|
-
narrow_expression = Q()
|
313
|
+
if truncate_string:
|
314
|
+
if (len_string := len(string)) > 5:
|
315
|
+
n_80_pct = int(len_string * 0.8)
|
316
|
+
string = string[:n_80_pct]
|
317
|
+
|
318
|
+
string = string.strip()
|
319
|
+
|
320
|
+
exact_lookup = Exact if case_sensitive else IExact
|
321
|
+
regex_lookup = Regex if case_sensitive else IRegex
|
322
|
+
contains_lookup = Contains if case_sensitive else IContains
|
323
|
+
|
324
|
+
ranks = []
|
325
|
+
contains_filters = []
|
241
326
|
for field in fields:
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
327
|
+
field_expr = Coalesce(
|
328
|
+
Cast(field, output_field=TextField()),
|
329
|
+
Value(""),
|
330
|
+
output_field=TextField(),
|
331
|
+
)
|
332
|
+
# exact rank
|
333
|
+
exact_expr = exact_lookup(field_expr, string)
|
334
|
+
exact_rank = Cast(exact_expr, output_field=IntegerField()) * 200
|
335
|
+
ranks.append(exact_rank)
|
336
|
+
# exact synonym
|
337
|
+
synonym_expr = regex_lookup(field_expr, rf"(?:^|.*\|){string}(?:\|.*|$)")
|
338
|
+
synonym_rank = Cast(synonym_expr, output_field=IntegerField()) * 200
|
339
|
+
ranks.append(synonym_rank)
|
340
|
+
# match as sub-phrase
|
341
|
+
sub_expr = regex_lookup(
|
342
|
+
field_expr, rf"(?:^|.*[ \|\.,;:]){string}(?:[ \|\.,;:].*|$)"
|
343
|
+
)
|
344
|
+
sub_rank = Cast(sub_expr, output_field=IntegerField()) * 10
|
345
|
+
ranks.append(sub_rank)
|
346
|
+
# startswith and avoid matching string with " " on the right
|
347
|
+
# mostly for truncated
|
348
|
+
startswith_expr = regex_lookup(field_expr, rf"(?:^|\|){string}[^ ]*(\||$)")
|
349
|
+
startswith_rank = Cast(startswith_expr, output_field=IntegerField()) * 8
|
350
|
+
ranks.append(startswith_rank)
|
351
|
+
# match as sub-phrase from the left, mostly for truncated
|
352
|
+
right_expr = regex_lookup(field_expr, rf"(?:^|.*[ \|]){string}.*")
|
353
|
+
right_rank = Cast(right_expr, output_field=IntegerField()) * 2
|
354
|
+
ranks.append(right_rank)
|
355
|
+
# match as sub-phrase from the right
|
356
|
+
left_expr = regex_lookup(field_expr, rf".*{string}(?:$|[ \|\.,;:].*)")
|
357
|
+
left_rank = Cast(left_expr, output_field=IntegerField()) * 2
|
358
|
+
ranks.append(left_rank)
|
359
|
+
# simple contains filter
|
360
|
+
contains_expr = contains_lookup(field_expr, string)
|
361
|
+
contains_filter = Q(contains_expr)
|
362
|
+
contains_filters.append(contains_filter)
|
363
|
+
# also rank by contains
|
364
|
+
contains_rank = Cast(contains_expr, output_field=IntegerField())
|
365
|
+
ranks.append(contains_rank)
|
366
|
+
# additional rule for truncated strings
|
367
|
+
# weight matches from the beginning of the string higher
|
368
|
+
# sometimes whole words get truncated and startswith_expr is not enough
|
369
|
+
if truncate_string and field == name_field:
|
370
|
+
startswith_lookup = StartsWith if case_sensitive else IStartsWith
|
371
|
+
name_startswith_expr = startswith_lookup(field_expr, string)
|
372
|
+
name_startswith_rank = (
|
373
|
+
Cast(name_startswith_expr, output_field=IntegerField()) * 2
|
374
|
+
)
|
375
|
+
ranks.append(name_startswith_rank)
|
376
|
+
|
377
|
+
ranked_queryset = (
|
378
|
+
input_queryset.filter(reduce(lambda a, b: a | b, contains_filters))
|
379
|
+
.alias(rank=sum(ranks))
|
380
|
+
.order_by("-rank")
|
249
381
|
)
|
250
|
-
|
251
|
-
|
252
|
-
).order_by("ordering")[:limit]
|
253
|
-
return combined_queryset
|
382
|
+
|
383
|
+
return ranked_queryset[:limit]
|
254
384
|
|
255
385
|
|
256
386
|
@classmethod # type: ignore
|
lamindb/core/__init__.py
CHANGED
@@ -14,13 +14,14 @@ Registries:
|
|
14
14
|
ParamManager
|
15
15
|
LabelManager
|
16
16
|
IsVersioned
|
17
|
-
|
17
|
+
CanCurate
|
18
18
|
HasParents
|
19
19
|
TracksRun
|
20
20
|
TracksUpdates
|
21
21
|
ParamValue
|
22
22
|
FeatureValue
|
23
23
|
InspectResult
|
24
|
+
ValidateFields
|
24
25
|
fields
|
25
26
|
|
26
27
|
Curators:
|
@@ -67,7 +68,7 @@ Modules:
|
|
67
68
|
from lamin_utils import logger
|
68
69
|
from lamin_utils._inspect import InspectResult
|
69
70
|
from lnschema_core.models import (
|
70
|
-
|
71
|
+
CanCurate,
|
71
72
|
FeatureValue,
|
72
73
|
HasParents,
|
73
74
|
IsVersioned,
|
@@ -76,6 +77,7 @@ from lnschema_core.models import (
|
|
76
77
|
Registry,
|
77
78
|
TracksRun,
|
78
79
|
TracksUpdates,
|
80
|
+
ValidateFields,
|
79
81
|
)
|
80
82
|
|
81
83
|
from lamindb._curate import (
|
lamindb/core/_context.py
CHANGED
@@ -82,11 +82,14 @@ def raise_missing_context(transform_type: str, key: str) -> bool:
|
|
82
82
|
transform = Transform.filter(key=key).latest_version().first()
|
83
83
|
if transform is None:
|
84
84
|
new_uid = f"{base62_12()}0000"
|
85
|
-
message = f'to track this {transform_type},
|
85
|
+
message = f'to track this {transform_type}, run: ln.track("{new_uid}")'
|
86
86
|
else:
|
87
87
|
uid = transform.uid
|
88
88
|
new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
|
89
|
-
message =
|
89
|
+
message = (
|
90
|
+
f"you already have a transform with key '{key}': Transform('{transform.uid[:8]}')\n"
|
91
|
+
f' (1) to make a revision, run: ln.track("{new_uid}")\n (2) to create a new transform, rename your {transform_type} file and re-run: ln.track()'
|
92
|
+
)
|
90
93
|
if transform_type == "notebook":
|
91
94
|
print(f"→ {message}")
|
92
95
|
response = input("→ Ready to re-run? (y/n)")
|
@@ -118,6 +121,8 @@ class Context:
|
|
118
121
|
Enables convenient data lineage tracking by managing a transform & run
|
119
122
|
upon :meth:`~lamindb.core.Context.track` & :meth:`~lamindb.core.Context.finish`.
|
120
123
|
|
124
|
+
Guide: :doc:`/track`
|
125
|
+
|
121
126
|
Examples:
|
122
127
|
|
123
128
|
Is typically used via the global :class:`~lamindb.context` object via `ln.track()` and `ln.finish()`:
|
@@ -137,7 +142,8 @@ class Context:
|
|
137
142
|
self._run: Run | None = None
|
138
143
|
self._path: Path | None = None
|
139
144
|
"""A local path to the script that's running."""
|
140
|
-
self.
|
145
|
+
self._logging_message_track: str = ""
|
146
|
+
self._logging_message_imports: str = ""
|
141
147
|
|
142
148
|
@property
|
143
149
|
def transform(self) -> Transform | None:
|
@@ -178,12 +184,11 @@ class Context:
|
|
178
184
|
|
179
185
|
def track(
|
180
186
|
self,
|
181
|
-
|
187
|
+
transform: str | Transform | None = None,
|
182
188
|
*,
|
183
189
|
params: dict | None = None,
|
184
190
|
new_run: bool | None = None,
|
185
191
|
path: str | None = None,
|
186
|
-
transform: Transform | None = None,
|
187
192
|
) -> None:
|
188
193
|
"""Initiate a run with tracked data lineage.
|
189
194
|
|
@@ -196,24 +201,31 @@ class Context:
|
|
196
201
|
script-like transform exists in a git repository and links it.
|
197
202
|
|
198
203
|
Args:
|
199
|
-
|
204
|
+
transform: A transform `uid` or record. If `None`, creates a `uid`.
|
200
205
|
params: A dictionary of parameters to track for the run.
|
201
206
|
new_run: If `False`, loads latest run of transform
|
202
207
|
(default notebook), if `True`, creates new run (default pipeline).
|
203
208
|
path: Filepath of notebook or script. Only needed if it can't be
|
204
209
|
automatically detected.
|
205
|
-
transform: Useful to track an abstract pipeline.
|
206
210
|
|
207
211
|
Examples:
|
208
212
|
|
209
|
-
To
|
213
|
+
To create a transform `uid` for tracking a script or notebook, call:
|
210
214
|
|
211
|
-
>>> import lamindb as ln
|
212
215
|
>>> ln.track()
|
213
216
|
|
217
|
+
To track the run of a notebook or script, call:
|
218
|
+
|
219
|
+
>>> ln.track("FPnfDtJz8qbE0000") # replace with your uid
|
220
|
+
|
214
221
|
"""
|
215
|
-
|
222
|
+
self._logging_message_track = ""
|
223
|
+
self._logging_message_imports = ""
|
224
|
+
uid = None
|
225
|
+
if transform is not None and isinstance(transform, str):
|
226
|
+
uid = transform
|
216
227
|
self.uid = uid
|
228
|
+
transform = None
|
217
229
|
self._path = None
|
218
230
|
if transform is None:
|
219
231
|
is_tracked = False
|
@@ -223,17 +235,20 @@ class Context:
|
|
223
235
|
)
|
224
236
|
transform = None
|
225
237
|
stem_uid = None
|
238
|
+
# you can set ln.context.uid and then call ln.track() without passing anythin
|
239
|
+
# that has been the preferred syntax for a while; we'll likely
|
240
|
+
# deprecate it at some point
|
226
241
|
if uid is not None or self.uid is not None:
|
227
242
|
transform = Transform.filter(uid=self.uid).one_or_none()
|
228
243
|
if self.version is not None:
|
229
244
|
# test inconsistent version passed
|
230
245
|
if (
|
231
246
|
transform is not None
|
232
|
-
and transform.version is not None
|
233
|
-
and self.version != transform.version
|
247
|
+
and transform.version is not None # type: ignore
|
248
|
+
and self.version != transform.version # type: ignore
|
234
249
|
):
|
235
250
|
raise SystemExit(
|
236
|
-
f"Please pass consistent version: ln.context.version = '{transform.version}'"
|
251
|
+
f"Please pass consistent version: ln.context.version = '{transform.version}'" # type: ignore
|
237
252
|
)
|
238
253
|
# test whether version was already used for another member of the family
|
239
254
|
suid, vuid = (
|
@@ -265,10 +280,12 @@ class Context:
|
|
265
280
|
transform_ref = None
|
266
281
|
transform_ref_type = None
|
267
282
|
else:
|
268
|
-
|
269
|
-
|
283
|
+
# the below function is typically used for `.py` scripts
|
284
|
+
# it is also used for `.Rmd` and `.qmd` files, which we classify
|
285
|
+
# as "notebook" because they typically come with an .html run report
|
286
|
+
(name, key, transform_type, transform_ref, transform_ref_type) = (
|
287
|
+
self._track_source_code(path=path)
|
270
288
|
)
|
271
|
-
transform_type = "script"
|
272
289
|
if self.uid is not None or transform_settings_are_set:
|
273
290
|
# overwrite whatever is auto-detected in the notebook or script
|
274
291
|
if self.name is not None:
|
@@ -302,10 +319,14 @@ class Context:
|
|
302
319
|
transform_exists = Transform.filter(id=transform.id).first()
|
303
320
|
if transform_exists is None:
|
304
321
|
transform.save()
|
305
|
-
self.
|
322
|
+
self._logging_message_track += (
|
323
|
+
f"created Transform('{transform.uid[:8]}')"
|
324
|
+
)
|
306
325
|
transform_exists = transform
|
307
326
|
else:
|
308
|
-
self.
|
327
|
+
self._logging_message_track += (
|
328
|
+
f"loaded Transform('{transform.uid[:8]}')"
|
329
|
+
)
|
309
330
|
self._transform = transform_exists
|
310
331
|
|
311
332
|
if new_run is None: # for notebooks, default to loading latest runs
|
@@ -322,7 +343,7 @@ class Context:
|
|
322
343
|
)
|
323
344
|
if run is not None: # loaded latest run
|
324
345
|
run.started_at = datetime.now(timezone.utc) # update run time
|
325
|
-
self.
|
346
|
+
self._logging_message_track += f", started Run('{run.uid[:8]}') at {format_field_value(run.started_at)}"
|
326
347
|
|
327
348
|
if run is None: # create new run
|
328
349
|
run = Run(
|
@@ -330,7 +351,7 @@ class Context:
|
|
330
351
|
params=params,
|
331
352
|
)
|
332
353
|
run.started_at = datetime.now(timezone.utc)
|
333
|
-
self.
|
354
|
+
self._logging_message_track += f", started new Run('{run.uid[:8]}') at {format_field_value(run.started_at)}"
|
334
355
|
# can only determine at ln.finish() if run was consecutive in
|
335
356
|
# interactive session, otherwise, is consecutive
|
336
357
|
run.is_consecutive = True if is_run_from_ipython else None
|
@@ -338,19 +359,20 @@ class Context:
|
|
338
359
|
run.save()
|
339
360
|
if params is not None:
|
340
361
|
run.params.add_values(params)
|
341
|
-
self.
|
362
|
+
self._logging_message_track += "\n→ params: " + " ".join(
|
342
363
|
f"{key}='{value}'" for key, value in params.items()
|
343
364
|
)
|
344
365
|
self._run = run
|
345
366
|
track_environment(run)
|
346
|
-
logger.important(self.
|
347
|
-
self.
|
367
|
+
logger.important(self._logging_message_track)
|
368
|
+
if self._logging_message_imports:
|
369
|
+
logger.important(self._logging_message_imports)
|
348
370
|
|
349
|
-
def
|
371
|
+
def _track_source_code(
|
350
372
|
self,
|
351
373
|
*,
|
352
374
|
path: UPathStr | None,
|
353
|
-
) -> tuple[str, str, str, str]:
|
375
|
+
) -> tuple[str, str, str, str, str]:
|
354
376
|
if path is None:
|
355
377
|
import inspect
|
356
378
|
|
@@ -359,6 +381,9 @@ class Context:
|
|
359
381
|
self._path = Path(module.__file__)
|
360
382
|
else:
|
361
383
|
self._path = Path(path)
|
384
|
+
transform_type = (
|
385
|
+
"notebook" if self._path.suffix in {".Rmd", ".qmd"} else "script"
|
386
|
+
)
|
362
387
|
name = self._path.name
|
363
388
|
key = name
|
364
389
|
reference = None
|
@@ -366,7 +391,7 @@ class Context:
|
|
366
391
|
if settings.sync_git_repo is not None:
|
367
392
|
reference = get_transform_reference_from_git_repo(self._path)
|
368
393
|
reference_type = "url"
|
369
|
-
return name, key, reference, reference_type
|
394
|
+
return name, key, transform_type, reference, reference_type
|
370
395
|
|
371
396
|
def _track_notebook(
|
372
397
|
self,
|
@@ -406,9 +431,9 @@ class Context:
|
|
406
431
|
from nbproject.dev._pypackage import infer_pypackages
|
407
432
|
|
408
433
|
nb = nbproject.dev.read_notebook(path_str)
|
409
|
-
|
434
|
+
self._logging_message_imports += (
|
410
435
|
"notebook imports:"
|
411
|
-
f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}"
|
436
|
+
f" {pretty_pypackages(infer_pypackages(nb, pin_versions=True))}\n"
|
412
437
|
)
|
413
438
|
except Exception:
|
414
439
|
logger.debug("inferring imported packages failed")
|
@@ -471,7 +496,7 @@ class Context:
|
|
471
496
|
raise_update_context = True
|
472
497
|
if raise_update_context:
|
473
498
|
raise UpdateContext(get_key_clashing_message(revises, key))
|
474
|
-
self.
|
499
|
+
self._logging_message_track += f"created Transform('{transform.uid[:8]}')"
|
475
500
|
else:
|
476
501
|
uid = transform.uid
|
477
502
|
# transform was already saved via `finish()`
|
@@ -485,7 +510,7 @@ class Context:
|
|
485
510
|
elif transform.name != name:
|
486
511
|
transform.name = name
|
487
512
|
transform.save()
|
488
|
-
self.
|
513
|
+
self._logging_message_track += (
|
489
514
|
"updated transform name, " # white space on purpose
|
490
515
|
)
|
491
516
|
elif (
|
@@ -509,7 +534,7 @@ class Context:
|
|
509
534
|
if condition:
|
510
535
|
bump_revision = True
|
511
536
|
else:
|
512
|
-
self.
|
537
|
+
self._logging_message_track += (
|
513
538
|
f"loaded Transform('{transform.uid[:8]}')"
|
514
539
|
)
|
515
540
|
if bump_revision:
|
@@ -523,7 +548,9 @@ class Context:
|
|
523
548
|
f'ln.track("{uid[:-4]}{increment_base62(uid[-4:])}")'
|
524
549
|
)
|
525
550
|
else:
|
526
|
-
self.
|
551
|
+
self._logging_message_track += (
|
552
|
+
f"loaded Transform('{transform.uid[:8]}')"
|
553
|
+
)
|
527
554
|
self._transform = transform
|
528
555
|
|
529
556
|
def finish(self, ignore_non_consecutive: None | bool = None) -> None:
|
lamindb/core/_django.py
CHANGED
@@ -33,7 +33,7 @@ def get_artifact_with_related(
|
|
33
33
|
include_featureset: bool = False,
|
34
34
|
) -> dict:
|
35
35
|
"""Fetch an artifact with its related data."""
|
36
|
-
from lamindb.
|
36
|
+
from lamindb._can_curate import get_name_field
|
37
37
|
|
38
38
|
from ._label_manager import LABELS_EXCLUDE_SET
|
39
39
|
|
@@ -163,7 +163,7 @@ def get_featureset_m2m_relations(
|
|
163
163
|
artifact: Artifact, slot_featureset: dict, limit: int = 20
|
164
164
|
):
|
165
165
|
"""Fetch all many-to-many relationships for given feature sets."""
|
166
|
-
from lamindb.
|
166
|
+
from lamindb._can_curate import get_name_field
|
167
167
|
|
168
168
|
m2m_relations = [
|
169
169
|
v
|