lamindb 1.11a1__py3-none-any.whl → 1.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_finish.py +3 -3
- lamindb/core/_context.py +4 -2
- lamindb/curators/core.py +13 -5
- lamindb/errors.py +6 -0
- lamindb/examples/cellxgene/_cellxgene.py +1 -1
- lamindb/examples/croissant/__init__.py +20 -4
- lamindb/examples/datasets/_core.py +8 -1
- lamindb/examples/datasets/mini_immuno.py +0 -1
- lamindb/examples/fixtures/sheets.py +8 -2
- lamindb/integrations/_croissant.py +34 -11
- lamindb/migrations/0121_recorduser.py +7 -0
- lamindb/models/__init__.py +1 -0
- lamindb/models/_feature_manager.py +78 -18
- lamindb/models/artifact.py +71 -65
- lamindb/models/artifact_set.py +12 -3
- lamindb/models/query_set.py +170 -74
- lamindb/models/record.py +5 -1
- lamindb/models/run.py +2 -27
- lamindb/models/save.py +18 -10
- lamindb/models/sqlrecord.py +47 -33
- lamindb-1.11.2.dist-info/METADATA +180 -0
- {lamindb-1.11a1.dist-info → lamindb-1.11.2.dist-info}/RECORD +25 -25
- lamindb-1.11a1.dist-info/METADATA +0 -144
- {lamindb-1.11a1.dist-info → lamindb-1.11.2.dist-info}/LICENSE +0 -0
- {lamindb-1.11a1.dist-info → lamindb-1.11.2.dist-info}/WHEEL +0 -0
lamindb/__init__.py
CHANGED
lamindb/_finish.py
CHANGED
@@ -173,7 +173,7 @@ def notebook_to_script( # type: ignore
|
|
173
173
|
if script_path is None:
|
174
174
|
return py_content
|
175
175
|
else:
|
176
|
-
script_path.write_text(py_content)
|
176
|
+
script_path.write_text(py_content, encoding="utf-8")
|
177
177
|
|
178
178
|
|
179
179
|
def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
|
@@ -202,7 +202,7 @@ def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
|
|
202
202
|
) # RStudio might insert a newline
|
203
203
|
cleaned_content = cleaned_content.replace(orig_error_message, "")
|
204
204
|
cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}")
|
205
|
-
cleaned_path.write_text(cleaned_content)
|
205
|
+
cleaned_path.write_text(cleaned_content, encoding="utf-8")
|
206
206
|
return title_text, cleaned_path
|
207
207
|
|
208
208
|
|
@@ -474,7 +474,7 @@ def save_context_core(
|
|
474
474
|
# this can happen in interactively executed notebooks with a pro-active version bump in case it turns out that the user didn't make a change to the notebook
|
475
475
|
run.transform = transform
|
476
476
|
run.save()
|
477
|
-
ln.Transform.get(transform_id_prior_to_save).delete()
|
477
|
+
ln.Transform.get(transform_id_prior_to_save).delete(permanent=True)
|
478
478
|
|
479
479
|
# finalize
|
480
480
|
if finished_at and not from_cli and run is not None:
|
lamindb/core/_context.py
CHANGED
@@ -600,9 +600,11 @@ class Context:
|
|
600
600
|
if pypackages is None:
|
601
601
|
pypackages = True
|
602
602
|
description = None
|
603
|
+
if path.suffix == ".ipynb" and path.stem.startswith("Untitled"):
|
604
|
+
raise RuntimeError(
|
605
|
+
"Your notebook is untitled, please rename it before tracking"
|
606
|
+
)
|
603
607
|
path_str = path.as_posix()
|
604
|
-
if path_str.endswith("Untitled.ipynb"):
|
605
|
-
raise RuntimeError("Please rename your notebook before tracking it")
|
606
608
|
if path_str.startswith("/fileId="):
|
607
609
|
logger.warning("tracking on Google Colab is experimental")
|
608
610
|
path_str = get_notebook_key_colab()
|
lamindb/curators/core.py
CHANGED
@@ -411,7 +411,7 @@ class ComponentCurator(Curator):
|
|
411
411
|
"""Curator for `DataFrame`.
|
412
412
|
|
413
413
|
Provides all key functionality to validate Pandas DataFrames.
|
414
|
-
This class is not user facing unlike :class:`~lamindb.DataFrameCurator` which extends this
|
414
|
+
This class is not user facing unlike :class:`~lamindb.curators.DataFrameCurator` which extends this
|
415
415
|
class with functionality to validate the `attrs` slot.
|
416
416
|
|
417
417
|
Args:
|
@@ -671,7 +671,7 @@ class DataFrameCurator(SlotsCurator):
|
|
671
671
|
|
672
672
|
Examples:
|
673
673
|
|
674
|
-
For a simple example using a flexible schema, see :meth:`~lamindb.Artifact.
|
674
|
+
For a simple example using a flexible schema, see :meth:`~lamindb.Artifact.from_dataframe`.
|
675
675
|
|
676
676
|
Here is an example that enforces a minimal set of columns in the dataframe.
|
677
677
|
|
@@ -688,7 +688,7 @@ class DataFrameCurator(SlotsCurator):
|
|
688
688
|
.. literalinclude:: scripts/define_mini_immuno_features_labels.py
|
689
689
|
:language: python
|
690
690
|
|
691
|
-
|
691
|
+
It is also possible to curate the `attrs` slot.
|
692
692
|
|
693
693
|
.. literalinclude:: scripts/curate_dataframe_attrs.py
|
694
694
|
:language: python
|
@@ -885,12 +885,20 @@ class AnnDataCurator(SlotsCurator):
|
|
885
885
|
dataset: The AnnData-like object to validate & annotate.
|
886
886
|
schema: A :class:`~lamindb.Schema` object that defines the validation constraints.
|
887
887
|
|
888
|
-
|
888
|
+
Examples:
|
889
|
+
|
890
|
+
Curate Ensembl gene IDs and valid features in obs:
|
889
891
|
|
890
892
|
.. literalinclude:: scripts/curate_anndata_flexible.py
|
891
893
|
:language: python
|
892
894
|
:caption: curate_anndata_flexible.py
|
893
895
|
|
896
|
+
Curate `uns` dictionary:
|
897
|
+
|
898
|
+
.. literalinclude:: scripts/curate_anndata_uns.py
|
899
|
+
:language: python
|
900
|
+
:caption: curate_anndata_uns.py
|
901
|
+
|
894
902
|
See Also:
|
895
903
|
:meth:`~lamindb.Artifact.from_anndata`.
|
896
904
|
"""
|
@@ -903,7 +911,7 @@ class AnnDataCurator(SlotsCurator):
|
|
903
911
|
super().__init__(dataset=dataset, schema=schema)
|
904
912
|
if not data_is_scversedatastructure(self._dataset, "AnnData"):
|
905
913
|
raise InvalidArgument("dataset must be AnnData-like.")
|
906
|
-
if schema.otype != "AnnData":
|
914
|
+
if schema.otype and schema.otype != "AnnData":
|
907
915
|
raise InvalidArgument("Schema otype must be 'AnnData'.")
|
908
916
|
|
909
917
|
for slot, slot_schema in schema.slots.items():
|
lamindb/errors.py
CHANGED
@@ -11,35 +11,51 @@ import json
|
|
11
11
|
from pathlib import Path
|
12
12
|
|
13
13
|
|
14
|
-
def mini_immuno(
|
14
|
+
def mini_immuno(
|
15
|
+
n_files: int = 1, filepath_prefix: str = "", strip_version: bool = False
|
16
|
+
) -> list[Path]:
|
15
17
|
"""Return paths to the mini immuno dataset and its metadata as a Croissant file.
|
16
18
|
|
17
19
|
Args:
|
18
20
|
n_files: Number of files inside the croissant file. Default is 1.
|
21
|
+
filepath_prefix: Move the dataset and references to it in a specific directory.
|
19
22
|
|
20
23
|
Example
|
21
24
|
|
22
25
|
::
|
23
26
|
|
24
27
|
croissant_path, dataset1_path = ln.examples.croissant.mini_immuno()
|
28
|
+
croissant_path, dataset1_path, dataset2_path = ln.examples.croissant.mini_immuno(n_files=2)
|
25
29
|
"""
|
26
30
|
from ..datasets import file_mini_csv
|
27
31
|
from ..datasets.mini_immuno import get_dataset1
|
28
32
|
|
29
33
|
adata = get_dataset1(otype="AnnData")
|
30
|
-
|
34
|
+
if filepath_prefix:
|
35
|
+
dataset1_path = Path(filepath_prefix) / "mini_immuno.anndata.zarr"
|
36
|
+
else:
|
37
|
+
dataset1_path = Path("mini_immuno.anndata.zarr")
|
31
38
|
adata.write_zarr(dataset1_path)
|
32
39
|
orig_croissant_path = (
|
33
40
|
Path(__file__).parent / "mini_immuno.anndata.zarr_metadata.json"
|
34
41
|
)
|
35
42
|
with open(orig_croissant_path, encoding="utf-8") as f:
|
36
43
|
data = json.load(f)
|
44
|
+
if filepath_prefix:
|
45
|
+
assert data["distribution"][0]["@id"] == "mini_immuno.anndata.zarr" # noqa: S101
|
46
|
+
data["distribution"][0]["@id"] = str(Path(filepath_prefix) / dataset1_path.name)
|
47
|
+
if strip_version:
|
48
|
+
data.pop("version", None)
|
37
49
|
if n_files == 2:
|
38
|
-
|
50
|
+
file_mini_csv()
|
51
|
+
if filepath_prefix:
|
52
|
+
dataset2_path = Path(filepath_prefix) / "mini.csv"
|
53
|
+
else:
|
54
|
+
dataset2_path = Path("mini.csv")
|
39
55
|
data["distribution"].append(
|
40
56
|
{
|
41
57
|
"@type": "sc:FileObject",
|
42
|
-
"@id":
|
58
|
+
"@id": dataset2_path.as_posix(),
|
43
59
|
"name": "mini.csv",
|
44
60
|
"encodingFormat": "text/csv",
|
45
61
|
}
|
@@ -353,7 +353,7 @@ def anndata_suo22_Visium10X(): # pragma: no cover
|
|
353
353
|
return ad.read_h5ad(filepath)
|
354
354
|
|
355
355
|
|
356
|
-
def mudata_papalexi21_subset() -> MuData: # pragma: no cover
|
356
|
+
def mudata_papalexi21_subset(with_uns: bool = False) -> MuData: # pragma: no cover
|
357
357
|
"""A subsetted mudata from papalexi21.
|
358
358
|
|
359
359
|
To reproduce the subsetting:
|
@@ -415,6 +415,13 @@ def mudata_papalexi21_subset() -> MuData: # pragma: no cover
|
|
415
415
|
mdata["hto"].obs["technique"] = mdata["hto"].obs["technique"].astype("category")
|
416
416
|
mdata.pull_obs(["technique"], mods="hto")
|
417
417
|
|
418
|
+
if with_uns:
|
419
|
+
mdata.uns["study_metadata"] = {
|
420
|
+
"temperature": 21.6,
|
421
|
+
"experiment": "Experiment 1",
|
422
|
+
}
|
423
|
+
mdata["rna"].uns["site_metadata"] = {"pos": 99.9, "site_id": "SITE001"}
|
424
|
+
|
418
425
|
return mdata
|
419
426
|
|
420
427
|
|
@@ -78,7 +78,6 @@ def get_dataset1(
|
|
78
78
|
with_outdated_gene: bool = False,
|
79
79
|
with_wrong_subtype: bool = False,
|
80
80
|
with_index_type_mismatch: bool = False,
|
81
|
-
with_nested_uns: bool = False,
|
82
81
|
) -> pd.DataFrame | ad.AnnData:
|
83
82
|
"""A small tabular dataset measuring expression & metadata."""
|
84
83
|
# define the data in the dataset
|
@@ -46,6 +46,8 @@ def populate_sheets_compound_treatment():
|
|
46
46
|
|
47
47
|
# Samples ---------------------------
|
48
48
|
|
49
|
+
project = ln.Feature(name="project", dtype=ln.Project).save()
|
50
|
+
project1 = ln.Project(name="Project 1").save()
|
49
51
|
sample_type = ln.Record(name="BioSample", is_type=True).save()
|
50
52
|
treatment = ln.Feature(name="treatment", dtype=treatment_type).save()
|
51
53
|
cell_line = ln.Feature(name="cell_line", dtype=bt.CellLine).save()
|
@@ -54,7 +56,7 @@ def populate_sheets_compound_treatment():
|
|
54
56
|
cell_line.save()
|
55
57
|
schema1 = ln.Schema(
|
56
58
|
name="My samples schema 2025-06",
|
57
|
-
features=[treatment, cell_line, preparation_date],
|
59
|
+
features=[treatment, cell_line, preparation_date, project],
|
58
60
|
).save()
|
59
61
|
sample_sheet1 = ln.Record(
|
60
62
|
name="My samples 2025-06", schema=schema1, type=sample_type
|
@@ -69,6 +71,7 @@ def populate_sheets_compound_treatment():
|
|
69
71
|
ln.models.RecordJson(
|
70
72
|
record=sample1, feature=preparation_date, value="2025-06-01T05:00:00"
|
71
73
|
).save()
|
74
|
+
ln.models.RecordProject(record=sample1, feature=project, value=project1).save()
|
72
75
|
# populate sample2
|
73
76
|
sample2 = ln.Record(name="sample2", type=sample_sheet1).save()
|
74
77
|
ln.models.RecordRecord(record=sample2, feature=treatment, value=treatment2).save()
|
@@ -76,12 +79,13 @@ def populate_sheets_compound_treatment():
|
|
76
79
|
ln.models.RecordJson(
|
77
80
|
record=sample2, feature=preparation_date, value="2025-06-01T06:00:00"
|
78
81
|
).save()
|
82
|
+
ln.models.RecordProject(record=sample2, feature=project, value=project1).save()
|
79
83
|
|
80
84
|
# another sheet for samples
|
81
85
|
sample_note = ln.Feature(name="sample_note", dtype="str").save()
|
82
86
|
schema2 = ln.Schema(
|
83
87
|
name="My samples schema 2025-07",
|
84
|
-
features=[treatment, cell_line, sample_note],
|
88
|
+
features=[treatment, cell_line, sample_note, project],
|
85
89
|
).save()
|
86
90
|
# the sheet
|
87
91
|
sample_sheet2 = ln.Record(
|
@@ -94,6 +98,7 @@ def populate_sheets_compound_treatment():
|
|
94
98
|
ln.models.RecordJson(
|
95
99
|
record=sample3, feature=preparation_date, value="2025-06-02T05:00:00Z"
|
96
100
|
).save()
|
101
|
+
ln.models.RecordProject(record=sample3, feature=project, value=project1).save()
|
97
102
|
# populate sample4
|
98
103
|
sample4 = ln.Record(type=sample_sheet2).save()
|
99
104
|
ln.models.RecordRecord(record=sample4, feature=treatment, value=treatment2).save()
|
@@ -101,6 +106,7 @@ def populate_sheets_compound_treatment():
|
|
101
106
|
ln.models.RecordJson(
|
102
107
|
record=sample4, feature=preparation_date, value="2025-06-02T06:00:00Z"
|
103
108
|
).save()
|
109
|
+
ln.models.RecordProject(record=sample4, feature=project, value=project1).save()
|
104
110
|
|
105
111
|
yield treatments_sheet, sample_sheet1
|
106
112
|
|
@@ -4,6 +4,10 @@ import json
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import TYPE_CHECKING, Any
|
6
6
|
|
7
|
+
import lamindb_setup as ln_setup
|
8
|
+
from lamin_utils import logger
|
9
|
+
from lamindb_setup.core.upath import UPath
|
10
|
+
|
7
11
|
if TYPE_CHECKING:
|
8
12
|
import lamindb as ln
|
9
13
|
|
@@ -27,6 +31,8 @@ def curate_from_croissant(
|
|
27
31
|
"""
|
28
32
|
import lamindb as ln
|
29
33
|
|
34
|
+
from ..models.artifact import check_path_in_existing_storage
|
35
|
+
|
30
36
|
# Load CroissantML data
|
31
37
|
if isinstance(croissant_data, (str, Path)):
|
32
38
|
if not Path(croissant_data).exists():
|
@@ -49,10 +55,10 @@ def curate_from_croissant(
|
|
49
55
|
|
50
56
|
# Extract basic metadata
|
51
57
|
dataset_name = data["name"]
|
52
|
-
description = data.get("description",
|
53
|
-
version = data.get("version",
|
54
|
-
license_info = data.get("license",
|
55
|
-
project_name = data.get("cr:projectName",
|
58
|
+
description = data.get("description", None)
|
59
|
+
version = data.get("version", None)
|
60
|
+
license_info = data.get("license", None)
|
61
|
+
project_name = data.get("cr:projectName", None)
|
56
62
|
|
57
63
|
# Create license feature and label if license info exists
|
58
64
|
license_label = None
|
@@ -86,18 +92,35 @@ def curate_from_croissant(
|
|
86
92
|
content_url = dist.get("contentUrl", "")
|
87
93
|
file_path = content_url or data.get("url", "")
|
88
94
|
if not file_path:
|
89
|
-
raise ValueError(
|
90
|
-
|
95
|
+
raise ValueError(f"No file path found in croissant distribution: {dist}")
|
96
|
+
if not UPath(file_path).exists():
|
97
|
+
raise ValueError(f"Inferred file path does not exist: {file_path}")
|
98
|
+
result = check_path_in_existing_storage(
|
99
|
+
file_path, check_hub_register_storage=ln_setup.settings.instance.is_on_hub
|
100
|
+
)
|
101
|
+
if isinstance(result, ln.Storage):
|
102
|
+
key = None # will automatically use existing storage key
|
103
|
+
else:
|
104
|
+
current_storage_location = (
|
105
|
+
ln.settings.storage
|
106
|
+
if not ln.setup.settings.instance.keep_artifacts_local
|
107
|
+
else ln.settings.local_storage
|
108
|
+
)
|
109
|
+
logger.warning(
|
110
|
+
f"file path {file_path} is not part of a known storage location, will be duplicated to: {current_storage_location}"
|
91
111
|
)
|
112
|
+
key = file_id
|
92
113
|
if len(file_distributions) == 1:
|
93
|
-
|
94
|
-
if
|
95
|
-
|
96
|
-
|
114
|
+
# it doesn't make sense to have the dataset name on the individual
|
115
|
+
# artifact if it's part of a collection
|
116
|
+
artifact_description = dataset_name
|
117
|
+
if description is not None:
|
118
|
+
artifact_description += f" - {description}"
|
97
119
|
else:
|
98
|
-
artifact_description =
|
120
|
+
artifact_description = None
|
99
121
|
artifact = ln.Artifact( # type: ignore
|
100
122
|
file_path,
|
123
|
+
key=key,
|
101
124
|
description=artifact_description,
|
102
125
|
version=version,
|
103
126
|
kind="dataset",
|
@@ -50,4 +50,11 @@ class Migration(migrations.Migration):
|
|
50
50
|
},
|
51
51
|
bases=(models.Model, lamindb.models.sqlrecord.IsLink),
|
52
52
|
),
|
53
|
+
migrations.AddField(
|
54
|
+
model_name="record",
|
55
|
+
name="linked_users",
|
56
|
+
field=models.ManyToManyField(
|
57
|
+
related_name="records", through="lamindb.RecordUser", to="lamindb.user"
|
58
|
+
),
|
59
|
+
),
|
53
60
|
]
|
lamindb/models/__init__.py
CHANGED
@@ -23,7 +23,7 @@ from rich.table import Column, Table
|
|
23
23
|
from rich.text import Text
|
24
24
|
|
25
25
|
from lamindb.core.storage import LocalPathClasses
|
26
|
-
from lamindb.errors import DoesNotExist, ValidationError
|
26
|
+
from lamindb.errors import DoesNotExist, InvalidArgument, ValidationError
|
27
27
|
from lamindb.models._from_values import _format_values
|
28
28
|
from lamindb.models.feature import (
|
29
29
|
serialize_pandas_dtype,
|
@@ -33,7 +33,6 @@ from lamindb.models.save import save
|
|
33
33
|
from lamindb.models.schema import DICT_KEYS_TYPE, Schema
|
34
34
|
from lamindb.models.sqlrecord import (
|
35
35
|
REGISTRY_UNIQUE_FIELD,
|
36
|
-
Registry,
|
37
36
|
get_name_field,
|
38
37
|
transfer_fk_to_default_db_bulk,
|
39
38
|
transfer_to_default_db,
|
@@ -65,7 +64,7 @@ if TYPE_CHECKING:
|
|
65
64
|
Collection,
|
66
65
|
IsLink,
|
67
66
|
)
|
68
|
-
from lamindb.models.query_set import
|
67
|
+
from lamindb.models.query_set import BasicQuerySet
|
69
68
|
|
70
69
|
from .run import Run
|
71
70
|
|
@@ -100,7 +99,7 @@ def get_schema_by_slot_(host: Artifact) -> dict[str, Schema]:
|
|
100
99
|
|
101
100
|
def get_label_links(
|
102
101
|
host: Artifact | Collection, registry: str, feature: Feature
|
103
|
-
) ->
|
102
|
+
) -> BasicQuerySet:
|
104
103
|
kwargs = {"artifact_id": host.id, "feature_id": feature.id}
|
105
104
|
link_records = (
|
106
105
|
getattr(host, host.features._accessor_by_registry[registry]) # type: ignore
|
@@ -110,7 +109,7 @@ def get_label_links(
|
|
110
109
|
return link_records
|
111
110
|
|
112
111
|
|
113
|
-
def get_schema_links(host: Artifact | Collection) ->
|
112
|
+
def get_schema_links(host: Artifact | Collection) -> BasicQuerySet:
|
114
113
|
kwargs = {"artifact_id": host.id}
|
115
114
|
links_schema = host.feature_sets.through.objects.filter(**kwargs)
|
116
115
|
return links_schema
|
@@ -562,21 +561,29 @@ def infer_feature_type_convert_json(
|
|
562
561
|
|
563
562
|
|
564
563
|
def filter_base(
|
565
|
-
|
566
|
-
|
567
|
-
|
564
|
+
queryset: BasicQuerySet,
|
565
|
+
_skip_validation: bool = True,
|
566
|
+
**expression,
|
567
|
+
) -> BasicQuerySet:
|
568
|
+
from lamindb.models import Artifact, BasicQuerySet, QuerySet
|
569
|
+
|
570
|
+
# not QuerySet but only BasicQuerySet
|
571
|
+
assert isinstance(queryset, BasicQuerySet) and not isinstance(queryset, QuerySet) # noqa: S101
|
572
|
+
|
573
|
+
registry = queryset.model
|
574
|
+
db = queryset.db
|
568
575
|
|
569
576
|
model = Feature
|
570
577
|
value_model = FeatureValue
|
571
578
|
keys_normalized = [key.split("__")[0] for key in expression]
|
572
579
|
if not _skip_validation:
|
573
|
-
validated = model.validate(keys_normalized, field="name", mute=True)
|
580
|
+
validated = model.using(db).validate(keys_normalized, field="name", mute=True)
|
574
581
|
if sum(validated) != len(keys_normalized):
|
575
582
|
raise ValidationError(
|
576
583
|
f"Some keys in the filter expression are not registered as features: {np.array(keys_normalized)[~validated]}"
|
577
584
|
)
|
578
585
|
new_expression = {}
|
579
|
-
features = model.filter(name__in=keys_normalized).all().distinct()
|
586
|
+
features = model.using(db).filter(name__in=keys_normalized).all().distinct()
|
580
587
|
feature_param = "feature"
|
581
588
|
for key, value in expression.items():
|
582
589
|
split_key = key.split("__")
|
@@ -594,7 +601,7 @@ def filter_base(
|
|
594
601
|
from .artifact import ArtifactFeatureValue
|
595
602
|
|
596
603
|
if value: # True
|
597
|
-
return
|
604
|
+
return queryset.exclude(
|
598
605
|
id__in=Subquery(
|
599
606
|
ArtifactFeatureValue.objects.filter(
|
600
607
|
featurevalue__feature=feature
|
@@ -602,7 +609,7 @@ def filter_base(
|
|
602
609
|
)
|
603
610
|
)
|
604
611
|
else:
|
605
|
-
return
|
612
|
+
return queryset.exclude(
|
606
613
|
id__in=Subquery(
|
607
614
|
ArtifactFeatureValue.objects.filter(
|
608
615
|
featurevalue__feature=feature
|
@@ -626,9 +633,9 @@ def filter_base(
|
|
626
633
|
f"links_{result['registry'].__name__.lower()}__feature": feature
|
627
634
|
}
|
628
635
|
if value: # True
|
629
|
-
return
|
636
|
+
return queryset.exclude(**kwargs)
|
630
637
|
else:
|
631
|
-
return
|
638
|
+
return queryset.filter(**kwargs)
|
632
639
|
else:
|
633
640
|
# because SQL is sensitive to whether querying with __in or not
|
634
641
|
# and might return multiple equivalent records for the latter
|
@@ -642,7 +649,7 @@ def filter_base(
|
|
642
649
|
# we need the comparator here because users might query like so
|
643
650
|
# ln.Artifact.filter(experiment__contains="Experi")
|
644
651
|
expression = {f"{field_name}{comparator}": value}
|
645
|
-
labels = result["registry"].filter(**expression).all()
|
652
|
+
labels = result["registry"].using(db).filter(**expression).all()
|
646
653
|
if len(labels) == 0:
|
647
654
|
raise DoesNotExist(
|
648
655
|
f"Did not find a {label_registry.__name__} matching `{field_name}{comparator}={value}`"
|
@@ -668,9 +675,62 @@ def filter_base(
|
|
668
675
|
# find artifacts that are annotated by all of them at the same
|
669
676
|
# time; hence, we don't want the __in construct that we use to match strings
|
670
677
|
# https://laminlabs.slack.com/archives/C04FPE8V01W/p1688328084810609
|
671
|
-
if not
|
678
|
+
if not new_expression:
|
672
679
|
raise NotImplementedError
|
673
|
-
return
|
680
|
+
return queryset.filter(**new_expression)
|
681
|
+
|
682
|
+
|
683
|
+
def filter_with_features(
|
684
|
+
queryset: BasicQuerySet, *queries, **expressions
|
685
|
+
) -> BasicQuerySet:
|
686
|
+
from lamindb.models import Artifact, BasicQuerySet, QuerySet
|
687
|
+
|
688
|
+
if isinstance(queryset, QuerySet):
|
689
|
+
# need to avoid infinite recursion because
|
690
|
+
# filter_with_features is called in queryset.filter otherwise
|
691
|
+
filter_kwargs = {"_skip_filter_with_features": True}
|
692
|
+
else:
|
693
|
+
filter_kwargs = {}
|
694
|
+
|
695
|
+
registry = queryset.model
|
696
|
+
|
697
|
+
if registry is Artifact and not any(e.startswith("kind") for e in expressions):
|
698
|
+
exclude_kwargs = {"kind": "__lamindb_run__"}
|
699
|
+
else:
|
700
|
+
exclude_kwargs = {}
|
701
|
+
|
702
|
+
if expressions:
|
703
|
+
keys_normalized = [key.split("__")[0] for key in expressions]
|
704
|
+
field_or_feature_or_param = keys_normalized[0].split("__")[0]
|
705
|
+
if field_or_feature_or_param in registry.__get_available_fields__():
|
706
|
+
qs = queryset.filter(*queries, **expressions, **filter_kwargs)
|
707
|
+
elif all(
|
708
|
+
features_validated := Feature.objects.using(queryset.db).validate(
|
709
|
+
keys_normalized, field="name", mute=True
|
710
|
+
)
|
711
|
+
):
|
712
|
+
# filter_base requires qs to be BasicQuerySet
|
713
|
+
qs = filter_base(
|
714
|
+
queryset._to_class(BasicQuerySet, copy=True),
|
715
|
+
_skip_validation=True,
|
716
|
+
**expressions,
|
717
|
+
)._to_class(type(queryset), copy=False)
|
718
|
+
qs = qs.filter(*queries, **filter_kwargs)
|
719
|
+
else:
|
720
|
+
features = ", ".join(sorted(np.array(keys_normalized)[~features_validated]))
|
721
|
+
message = f"feature names: {features}"
|
722
|
+
avail_fields = registry.__get_available_fields__()
|
723
|
+
if "_branch_code" in avail_fields:
|
724
|
+
avail_fields.remove("_branch_code") # backward compat
|
725
|
+
fields = ", ".join(sorted(avail_fields))
|
726
|
+
raise InvalidArgument(
|
727
|
+
f"You can query either by available fields: {fields}\n"
|
728
|
+
f"Or fix invalid {message}"
|
729
|
+
)
|
730
|
+
else:
|
731
|
+
qs = queryset.filter(*queries, **filter_kwargs)
|
732
|
+
|
733
|
+
return qs.exclude(**exclude_kwargs) if exclude_kwargs else qs
|
674
734
|
|
675
735
|
|
676
736
|
# for deprecated functionality
|
@@ -765,7 +825,7 @@ class FeatureManager:
|
|
765
825
|
return describe_features(self._host, to_dict=True) # type: ignore
|
766
826
|
|
767
827
|
@deprecated("slots[slot].members")
|
768
|
-
def __getitem__(self, slot) ->
|
828
|
+
def __getitem__(self, slot) -> BasicQuerySet:
|
769
829
|
if slot not in self.slots:
|
770
830
|
raise ValueError(
|
771
831
|
f"No linked feature set for slot: {slot}\nDid you get validation"
|