lamindb 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +3 -3
- lamindb/core/_context.py +64 -69
- lamindb/core/datasets/_small.py +2 -2
- lamindb/curators/__init__.py +683 -893
- lamindb/models/__init__.py +8 -1
- lamindb/models/_feature_manager.py +23 -19
- lamindb/models/_from_values.py +1 -1
- lamindb/models/_is_versioned.py +5 -15
- lamindb/models/artifact.py +210 -111
- lamindb/models/can_curate.py +4 -1
- lamindb/models/collection.py +6 -4
- lamindb/models/feature.py +27 -30
- lamindb/models/has_parents.py +22 -7
- lamindb/models/project.py +2 -2
- lamindb/models/query_set.py +6 -35
- lamindb/models/record.py +167 -117
- lamindb/models/run.py +56 -2
- lamindb/models/save.py +1 -3
- lamindb/models/schema.py +277 -77
- lamindb/models/transform.py +4 -13
- {lamindb-1.3.1.dist-info → lamindb-1.4.0.dist-info}/METADATA +6 -5
- {lamindb-1.3.1.dist-info → lamindb-1.4.0.dist-info}/RECORD +24 -24
- {lamindb-1.3.1.dist-info → lamindb-1.4.0.dist-info}/LICENSE +0 -0
- {lamindb-1.3.1.dist-info → lamindb-1.4.0.dist-info}/WHEEL +0 -0
lamindb/models/feature.py
CHANGED
@@ -332,7 +332,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
332
332
|
|
333
333
|
_name_field: str = "name"
|
334
334
|
_aux_fields: dict[str, tuple[str, type]] = {
|
335
|
-
"0": ("default_value",
|
335
|
+
"0": ("default_value", Any), # type: ignore
|
336
336
|
"1": ("nullable", bool),
|
337
337
|
"2": ("coerce_dtype", bool),
|
338
338
|
}
|
@@ -499,24 +499,11 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
499
499
|
super().save(*args, **kwargs)
|
500
500
|
return self
|
501
501
|
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
"""
|
508
|
-
if self._aux is not None and "af" in self._aux and "2" in self._aux["af"]: # type: ignore
|
509
|
-
return self._aux["af"]["2"] # type: ignore
|
510
|
-
else:
|
511
|
-
return False
|
512
|
-
|
513
|
-
@coerce_dtype.setter
|
514
|
-
def coerce_dtype(self, value: bool) -> None:
|
515
|
-
if self._aux is None: # type: ignore
|
516
|
-
self._aux = {} # type: ignore
|
517
|
-
if "af" not in self._aux:
|
518
|
-
self._aux["af"] = {}
|
519
|
-
self._aux["af"]["2"] = value
|
502
|
+
def with_config(self, optional: bool | None = None) -> tuple[Feature, dict]:
|
503
|
+
"""Pass addtional configurations to the schema."""
|
504
|
+
if optional is not None:
|
505
|
+
return self, {"optional": optional}
|
506
|
+
return self, {}
|
520
507
|
|
521
508
|
@property
|
522
509
|
def default_value(self) -> Any:
|
@@ -532,12 +519,9 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
532
519
|
return None
|
533
520
|
|
534
521
|
@default_value.setter
|
535
|
-
def default_value(self, value:
|
536
|
-
|
537
|
-
|
538
|
-
if "af" not in self._aux:
|
539
|
-
self._aux["af"] = {}
|
540
|
-
self._aux["af"]["0"] = value
|
522
|
+
def default_value(self, value: str | None) -> None:
|
523
|
+
self._aux = self._aux or {}
|
524
|
+
self._aux.setdefault("af", {})["0"] = value
|
541
525
|
|
542
526
|
@property
|
543
527
|
def nullable(self) -> bool:
|
@@ -568,11 +552,24 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
568
552
|
@nullable.setter
|
569
553
|
def nullable(self, value: bool) -> None:
|
570
554
|
assert isinstance(value, bool), value # noqa: S101
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
555
|
+
self._aux = self._aux or {}
|
556
|
+
self._aux.setdefault("af", {})["1"] = value
|
557
|
+
|
558
|
+
@property
|
559
|
+
def coerce_dtype(self) -> bool:
|
560
|
+
"""Whether dtypes should be coerced during validation.
|
561
|
+
|
562
|
+
For example, a `objects`-dtyped pandas column can be coerced to `categorical` and would pass validation if this is true.
|
563
|
+
"""
|
564
|
+
if self._aux is not None and "af" in self._aux and "2" in self._aux["af"]: # type: ignore
|
565
|
+
return self._aux["af"]["2"] # type: ignore
|
566
|
+
else:
|
567
|
+
return False
|
568
|
+
|
569
|
+
@coerce_dtype.setter
|
570
|
+
def coerce_dtype(self, value: bool) -> None:
|
571
|
+
self._aux = self._aux or {}
|
572
|
+
self._aux.setdefault("af", {})["2"] = value
|
576
573
|
|
577
574
|
|
578
575
|
class FeatureValue(Record, TracksRun):
|
lamindb/models/has_parents.py
CHANGED
@@ -4,12 +4,15 @@ from __future__ import annotations
|
|
4
4
|
import builtins
|
5
5
|
from typing import TYPE_CHECKING, Literal
|
6
6
|
|
7
|
+
import lamindb_setup as ln_setup
|
7
8
|
from lamin_utils import logger
|
8
9
|
|
9
10
|
from .record import format_field_value, get_name_field
|
10
11
|
from .run import Run
|
11
12
|
|
12
13
|
if TYPE_CHECKING:
|
14
|
+
from graphviz import Digraph
|
15
|
+
|
13
16
|
from lamindb.base.types import StrField
|
14
17
|
|
15
18
|
from .artifact import Artifact
|
@@ -78,7 +81,7 @@ class HasParents:
|
|
78
81
|
if not isinstance(field, str):
|
79
82
|
field = field.field.name
|
80
83
|
|
81
|
-
return
|
84
|
+
return view_parents(
|
82
85
|
record=self, # type: ignore
|
83
86
|
field=field,
|
84
87
|
with_children=with_children,
|
@@ -101,7 +104,7 @@ def _transform_emoji(transform: Transform):
|
|
101
104
|
return TRANSFORM_EMOJIS["pipeline"]
|
102
105
|
|
103
106
|
|
104
|
-
def
|
107
|
+
def view_digraph(u: Digraph):
|
105
108
|
from graphviz.backend import ExecutableNotFound
|
106
109
|
|
107
110
|
try:
|
@@ -117,7 +120,7 @@ def _view(u):
|
|
117
120
|
# call to display()
|
118
121
|
display(u._repr_mimebundle_(), raw=True)
|
119
122
|
else:
|
120
|
-
return u
|
123
|
+
return u.view()
|
121
124
|
except (FileNotFoundError, RuntimeError, ExecutableNotFound): # pragma: no cover
|
122
125
|
logger.error(
|
123
126
|
"please install the graphviz executable on your system:\n - Ubuntu: `sudo"
|
@@ -126,7 +129,9 @@ def _view(u):
|
|
126
129
|
)
|
127
130
|
|
128
131
|
|
129
|
-
def view_lineage(
|
132
|
+
def view_lineage(
|
133
|
+
data: Artifact | Collection, with_children: bool = True, return_graph: bool = False
|
134
|
+
) -> Digraph | None:
|
130
135
|
"""Graph of data flow.
|
131
136
|
|
132
137
|
Notes:
|
@@ -136,6 +141,13 @@ def view_lineage(data: Artifact | Collection, with_children: bool = True) -> Non
|
|
136
141
|
>>> collection.view_lineage()
|
137
142
|
>>> artifact.view_lineage()
|
138
143
|
"""
|
144
|
+
if ln_setup.settings.instance.is_on_hub:
|
145
|
+
instance_slug = ln_setup.settings.instance.slug
|
146
|
+
entity_slug = data.__class__.__name__.lower()
|
147
|
+
logger.important(
|
148
|
+
f"explore at: https://lamin.ai/{instance_slug}/{entity_slug}/{data.uid}"
|
149
|
+
)
|
150
|
+
|
139
151
|
import graphviz
|
140
152
|
|
141
153
|
df_values = _get_all_parent_runs(data)
|
@@ -189,10 +201,13 @@ def view_lineage(data: Artifact | Collection, with_children: bool = True) -> Non
|
|
189
201
|
shape="box",
|
190
202
|
)
|
191
203
|
|
192
|
-
|
204
|
+
if return_graph:
|
205
|
+
return u
|
206
|
+
else:
|
207
|
+
return view_digraph(u)
|
193
208
|
|
194
209
|
|
195
|
-
def
|
210
|
+
def view_parents(
|
196
211
|
record: Record,
|
197
212
|
field: str,
|
198
213
|
with_children: bool = False,
|
@@ -258,7 +273,7 @@ def _view_parents(
|
|
258
273
|
u.node(row["target"], label=row["target_label"])
|
259
274
|
u.edge(row["source"], row["target"], color="dimgrey")
|
260
275
|
|
261
|
-
|
276
|
+
view_digraph(u)
|
262
277
|
|
263
278
|
|
264
279
|
def _get_parents(
|
lamindb/models/project.py
CHANGED
@@ -366,7 +366,7 @@ class CollectionProject(BasicRecord, LinkORM, TracksRun):
|
|
366
366
|
|
367
367
|
class ULabelProject(BasicRecord, LinkORM, TracksRun):
|
368
368
|
id: int = models.BigAutoField(primary_key=True)
|
369
|
-
ulabel:
|
369
|
+
ulabel: ULabel = ForeignKey(ULabel, CASCADE, related_name="links_project")
|
370
370
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_ulabel")
|
371
371
|
|
372
372
|
class Meta:
|
@@ -375,7 +375,7 @@ class ULabelProject(BasicRecord, LinkORM, TracksRun):
|
|
375
375
|
|
376
376
|
class PersonProject(BasicRecord, LinkORM, TracksRun):
|
377
377
|
id: int = models.BigAutoField(primary_key=True)
|
378
|
-
person:
|
378
|
+
person: Person = ForeignKey(Person, CASCADE, related_name="links_project")
|
379
379
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_person")
|
380
380
|
role: str | None = CharField(null=True, default=None)
|
381
381
|
|
lamindb/models/query_set.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import re
|
4
|
-
import warnings
|
5
4
|
from collections import UserList
|
6
5
|
from collections.abc import Iterable
|
7
6
|
from collections.abc import Iterable as IterableType
|
@@ -75,49 +74,28 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
|
|
75
74
|
from lamindb.models import (
|
76
75
|
Artifact,
|
77
76
|
Collection,
|
78
|
-
Schema,
|
79
77
|
Transform,
|
80
78
|
)
|
81
79
|
|
82
80
|
if queryset.model in {Collection, Transform}:
|
83
81
|
name_mappings = {
|
84
|
-
"
|
85
|
-
"visibility": "_branch_code", # for convenience (and backward compat <1.0)
|
82
|
+
"visibility": "_branch_code",
|
86
83
|
}
|
87
84
|
elif queryset.model == Artifact:
|
88
85
|
name_mappings = {
|
89
|
-
"
|
90
|
-
"
|
91
|
-
"transform": "run__transform", # for convenience (and backward compat <1.0)
|
92
|
-
"type": "kind",
|
93
|
-
"_accessor": "otype",
|
94
|
-
}
|
95
|
-
elif queryset.model == Schema:
|
96
|
-
name_mappings = {
|
97
|
-
"registry": "itype",
|
86
|
+
"visibility": "_branch_code",
|
87
|
+
"transform": "run__transform",
|
98
88
|
}
|
99
89
|
else:
|
100
90
|
return expressions
|
101
91
|
was_list = False
|
102
92
|
if isinstance(expressions, list):
|
103
|
-
# make a dummy dictionary
|
104
93
|
was_list = True
|
105
94
|
expressions = {field: True for field in expressions}
|
106
95
|
mapped = {}
|
107
96
|
for field, value in expressions.items():
|
108
97
|
parts = field.split("__")
|
109
98
|
if parts[0] in name_mappings:
|
110
|
-
if parts[0] not in {
|
111
|
-
"transform",
|
112
|
-
"visibility",
|
113
|
-
"schemas",
|
114
|
-
"artifacts",
|
115
|
-
}:
|
116
|
-
warnings.warn(
|
117
|
-
f"{name_mappings[parts[0]]} is deprecated, please query for {parts[0]} instead",
|
118
|
-
DeprecationWarning,
|
119
|
-
stacklevel=2,
|
120
|
-
)
|
121
99
|
new_field = name_mappings[parts[0]] + (
|
122
100
|
"__" + "__".join(parts[1:]) if len(parts) > 1 else ""
|
123
101
|
)
|
@@ -631,15 +609,7 @@ class QuerySet(models.QuerySet):
|
|
631
609
|
"""Suggest available fields if an unknown field was passed."""
|
632
610
|
if "Cannot resolve keyword" in str(error):
|
633
611
|
field = str(error).split("'")[1]
|
634
|
-
fields = ", ".join(
|
635
|
-
sorted(
|
636
|
-
f.name
|
637
|
-
for f in self.model._meta.get_fields()
|
638
|
-
if not f.name.startswith("_")
|
639
|
-
and not f.name.startswith("links_")
|
640
|
-
and not f.name.endswith("_id")
|
641
|
-
)
|
642
|
-
)
|
612
|
+
fields = ", ".join(sorted(self.model.__get_available_fields__()))
|
643
613
|
raise FieldError(
|
644
614
|
f"Unknown field '{field}'. Available fields: {fields}"
|
645
615
|
) from None
|
@@ -680,7 +650,8 @@ class QuerySet(models.QuerySet):
|
|
680
650
|
)
|
681
651
|
|
682
652
|
expressions = process_expressions(self, expressions)
|
683
|
-
if
|
653
|
+
# need to run a query if queries or expressions are not empty
|
654
|
+
if queries or expressions:
|
684
655
|
try:
|
685
656
|
return super().filter(*queries, **expressions)
|
686
657
|
except FieldError as e:
|