lamindb 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +25 -6
- lamindb/_finish.py +5 -5
- lamindb/_tracked.py +1 -1
- lamindb/_view.py +4 -4
- lamindb/core/_context.py +32 -6
- lamindb/core/_settings.py +1 -1
- lamindb/core/datasets/mini_immuno.py +8 -0
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_anndata_accessor.py +9 -9
- lamindb/core/storage/_valid_suffixes.py +1 -0
- lamindb/core/storage/_zarr.py +32 -107
- lamindb/curators/__init__.py +19 -2
- lamindb/curators/_cellxgene_schemas/__init__.py +3 -3
- lamindb/curators/_legacy.py +15 -19
- lamindb/curators/core.py +247 -80
- lamindb/errors.py +2 -2
- lamindb/migrations/0069_squashed.py +8 -8
- lamindb/migrations/0071_lamindbv1_migrate_schema.py +3 -3
- lamindb/migrations/0073_merge_ourprojects.py +7 -7
- lamindb/migrations/0075_lamindbv1_part5.py +1 -1
- lamindb/migrations/0077_lamindbv1_part6b.py +3 -3
- lamindb/migrations/0080_polish_lamindbv1.py +2 -2
- lamindb/migrations/0088_schema_components.py +1 -1
- lamindb/migrations/0090_runproject_project_runs.py +2 -2
- lamindb/migrations/0091_alter_featurevalue_options_alter_space_options_and_more.py +1 -1
- lamindb/migrations/0094_writeloglock_writelogmigrationstate_and_more.py +84 -0
- lamindb/migrations/0095_remove_rundata_flextable.py +155 -0
- lamindb/migrations/0096_remove_artifact__param_values_and_more.py +266 -0
- lamindb/migrations/0097_remove_schemaparam_param_remove_paramvalue_param_and_more.py +27 -0
- lamindb/migrations/0098_alter_feature_type_alter_project_type_and_more.py +656 -0
- lamindb/migrations/0099_alter_writelog_seqno.py +22 -0
- lamindb/migrations/0100_branch_alter_artifact__branch_code_and_more.py +102 -0
- lamindb/migrations/0101_alter_artifact_hash_alter_feature_name_and_more.py +444 -0
- lamindb/migrations/0102_remove_writelog_branch_code_and_more.py +72 -0
- lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +46 -0
- lamindb/migrations/{0090_squashed.py → 0103_squashed.py} +1013 -1009
- lamindb/models/__init__.py +35 -18
- lamindb/models/_describe.py +4 -4
- lamindb/models/_django.py +38 -4
- lamindb/models/_feature_manager.py +66 -123
- lamindb/models/_from_values.py +13 -13
- lamindb/models/_label_manager.py +8 -6
- lamindb/models/_relations.py +7 -7
- lamindb/models/artifact.py +166 -156
- lamindb/models/can_curate.py +25 -25
- lamindb/models/collection.py +48 -18
- lamindb/models/core.py +3 -3
- lamindb/models/feature.py +88 -60
- lamindb/models/has_parents.py +17 -17
- lamindb/models/project.py +52 -24
- lamindb/models/query_manager.py +5 -5
- lamindb/models/query_set.py +61 -37
- lamindb/models/record.py +158 -1583
- lamindb/models/run.py +39 -176
- lamindb/models/save.py +6 -6
- lamindb/models/schema.py +32 -43
- lamindb/models/sqlrecord.py +1743 -0
- lamindb/models/transform.py +17 -33
- lamindb/models/ulabel.py +21 -15
- {lamindb-1.5.3.dist-info → lamindb-1.6.0.dist-info}/METADATA +7 -11
- lamindb-1.6.0.dist-info/RECORD +118 -0
- lamindb/core/storage/_anndata_sizes.py +0 -41
- lamindb/models/flextable.py +0 -163
- lamindb-1.5.3.dist-info/RECORD +0 -109
- {lamindb-1.5.3.dist-info → lamindb-1.6.0.dist-info}/LICENSE +0 -0
- {lamindb-1.5.3.dist-info → lamindb-1.6.0.dist-info}/WHEEL +0 -0
lamindb/models/has_parents.py
CHANGED
@@ -7,8 +7,8 @@ from typing import TYPE_CHECKING, Literal
|
|
7
7
|
import lamindb_setup as ln_setup
|
8
8
|
from lamin_utils import logger
|
9
9
|
|
10
|
-
from .record import format_field_value, get_name_field
|
11
10
|
from .run import Run
|
11
|
+
from .sqlrecord import format_field_value, get_name_field
|
12
12
|
|
13
13
|
if TYPE_CHECKING:
|
14
14
|
from graphviz import Digraph
|
@@ -18,7 +18,7 @@ if TYPE_CHECKING:
|
|
18
18
|
from .artifact import Artifact
|
19
19
|
from .collection import Collection
|
20
20
|
from .query_set import QuerySet
|
21
|
-
from .
|
21
|
+
from .sqlrecord import SQLRecord
|
22
22
|
from .transform import Transform
|
23
23
|
|
24
24
|
LAMIN_GREEN_LIGHTER = "#10b981"
|
@@ -38,7 +38,7 @@ is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
|
38
38
|
# this is optimized to have fewer recursive calls
|
39
39
|
# also len of QuerySet can be costly at times
|
40
40
|
def _query_relatives(
|
41
|
-
records: QuerySet | list[
|
41
|
+
records: QuerySet | list[SQLRecord],
|
42
42
|
kind: Literal["parents", "children"],
|
43
43
|
cls: type[HasParents],
|
44
44
|
) -> QuerySet:
|
@@ -242,7 +242,7 @@ def view_lineage(
|
|
242
242
|
|
243
243
|
|
244
244
|
def view_parents(
|
245
|
-
record:
|
245
|
+
record: SQLRecord,
|
246
246
|
field: str,
|
247
247
|
with_parents: bool = True,
|
248
248
|
with_children: bool = False,
|
@@ -332,7 +332,7 @@ def view_parents(
|
|
332
332
|
|
333
333
|
|
334
334
|
def _get_parents(
|
335
|
-
record:
|
335
|
+
record: SQLRecord,
|
336
336
|
field: str,
|
337
337
|
distance: int,
|
338
338
|
children: bool = False,
|
@@ -368,7 +368,7 @@ def _get_parents(
|
|
368
368
|
|
369
369
|
|
370
370
|
def _df_edges_from_parents(
|
371
|
-
record:
|
371
|
+
record: SQLRecord,
|
372
372
|
field: str,
|
373
373
|
distance: int,
|
374
374
|
children: bool = False,
|
@@ -418,7 +418,7 @@ def _df_edges_from_parents(
|
|
418
418
|
return df_edges
|
419
419
|
|
420
420
|
|
421
|
-
def _record_label(record:
|
421
|
+
def _record_label(record: SQLRecord, field: str | None = None):
|
422
422
|
from .artifact import Artifact
|
423
423
|
from .collection import Collection
|
424
424
|
from .transform import Transform
|
@@ -471,7 +471,7 @@ def _record_label(record: Record, field: str | None = None):
|
|
471
471
|
)
|
472
472
|
|
473
473
|
|
474
|
-
def _add_emoji(record:
|
474
|
+
def _add_emoji(record: SQLRecord, label: str):
|
475
475
|
if record.__class__.__name__ == "Transform":
|
476
476
|
emoji = TRANSFORM_EMOJIS.get(record.type, "💫")
|
477
477
|
elif record.__class__.__name__ == "Run":
|
@@ -493,22 +493,22 @@ def _get_all_parent_runs(data: Artifact | Collection) -> list:
|
|
493
493
|
inputs_run = (
|
494
494
|
r.__getattribute__(f"input_{name}s")
|
495
495
|
.all()
|
496
|
-
.filter(
|
496
|
+
.filter(branch_id__in=[0, 1])
|
497
497
|
.list()
|
498
498
|
)
|
499
499
|
if name == "artifact":
|
500
500
|
inputs_run += (
|
501
|
-
r.input_collections.all().filter(
|
501
|
+
r.input_collections.all().filter(branch_id__in=[0, 1]).list()
|
502
502
|
)
|
503
503
|
outputs_run = (
|
504
504
|
r.__getattribute__(f"output_{name}s")
|
505
505
|
.all()
|
506
|
-
.filter(
|
506
|
+
.filter(branch_id__in=[0, 1])
|
507
507
|
.list()
|
508
508
|
)
|
509
509
|
if name == "artifact":
|
510
510
|
outputs_run += (
|
511
|
-
r.output_collections.all().filter(
|
511
|
+
r.output_collections.all().filter(branch_id__in=[0, 1]).list()
|
512
512
|
)
|
513
513
|
# if inputs are outputs artifacts are the same, will result infinite loop
|
514
514
|
# so only show as outputs
|
@@ -542,7 +542,7 @@ def _get_all_child_runs(data: Artifact | Collection) -> list:
|
|
542
542
|
{
|
543
543
|
f.run
|
544
544
|
for f in data.run.output_collections.all()
|
545
|
-
.filter(
|
545
|
+
.filter(branch_id__in=[0, 1])
|
546
546
|
.all()
|
547
547
|
}
|
548
548
|
)
|
@@ -553,24 +553,24 @@ def _get_all_child_runs(data: Artifact | Collection) -> list:
|
|
553
553
|
inputs_run = (
|
554
554
|
r.__getattribute__(f"input_{name}s")
|
555
555
|
.all()
|
556
|
-
.filter(
|
556
|
+
.filter(branch_id__in=[0, 1])
|
557
557
|
.list()
|
558
558
|
)
|
559
559
|
if name == "artifact":
|
560
560
|
inputs_run += (
|
561
|
-
r.input_collections.all().filter(
|
561
|
+
r.input_collections.all().filter(branch_id__in=[0, 1]).list()
|
562
562
|
)
|
563
563
|
run_inputs_outputs += [(inputs_run, r)]
|
564
564
|
|
565
565
|
outputs_run = (
|
566
566
|
r.__getattribute__(f"output_{name}s")
|
567
567
|
.all()
|
568
|
-
.filter(
|
568
|
+
.filter(branch_id__in=[0, 1])
|
569
569
|
.list()
|
570
570
|
)
|
571
571
|
if name == "artifact":
|
572
572
|
outputs_run += (
|
573
|
-
r.output_collections.all().filter(
|
573
|
+
r.output_collections.all().filter(branch_id__in=[0, 1]).list()
|
574
574
|
)
|
575
575
|
run_inputs_outputs += [(r, outputs_run)]
|
576
576
|
|
lamindb/models/project.py
CHANGED
@@ -24,9 +24,10 @@ from .artifact import Artifact
|
|
24
24
|
from .can_curate import CanCurate
|
25
25
|
from .collection import Collection
|
26
26
|
from .feature import Feature
|
27
|
-
from .record import
|
27
|
+
from .record import Record, Sheet
|
28
28
|
from .run import Run, TracksRun, TracksUpdates, User
|
29
29
|
from .schema import Schema
|
30
|
+
from .sqlrecord import BaseSQLRecord, IsLink, SQLRecord, ValidateFields
|
30
31
|
from .transform import Transform
|
31
32
|
from .ulabel import ULabel
|
32
33
|
|
@@ -35,7 +36,7 @@ if TYPE_CHECKING:
|
|
35
36
|
from datetime import datetime
|
36
37
|
|
37
38
|
|
38
|
-
class Person(
|
39
|
+
class Person(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
39
40
|
"""People such as authors of a study or collaborators in a project.
|
40
41
|
|
41
42
|
This registry is distinct from `User` and exists for project management.
|
@@ -50,7 +51,7 @@ class Person(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
50
51
|
... ).save()
|
51
52
|
"""
|
52
53
|
|
53
|
-
class Meta(
|
54
|
+
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
54
55
|
abstract = False
|
55
56
|
|
56
57
|
id: int = models.AutoField(primary_key=True)
|
@@ -84,7 +85,7 @@ class Person(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
84
85
|
super().__init__(*args, **kwargs)
|
85
86
|
|
86
87
|
|
87
|
-
class Reference(
|
88
|
+
class Reference(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
88
89
|
"""References such as internal studies, papers, documents, or URLs.
|
89
90
|
|
90
91
|
Example:
|
@@ -100,7 +101,7 @@ class Reference(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
100
101
|
... ).save()
|
101
102
|
"""
|
102
103
|
|
103
|
-
class Meta(
|
104
|
+
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
104
105
|
abstract = False
|
105
106
|
|
106
107
|
id: int = models.AutoField(primary_key=True)
|
@@ -112,14 +113,14 @@ class Reference(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
112
113
|
name: str = CharField(db_index=True)
|
113
114
|
"""Title or name of the reference document."""
|
114
115
|
type: Reference | None = ForeignKey(
|
115
|
-
"self", PROTECT, null=True, related_name="
|
116
|
+
"self", PROTECT, null=True, related_name="references"
|
116
117
|
)
|
117
118
|
"""Type of reference (e.g., 'Study', 'Paper', 'Preprint').
|
118
119
|
|
119
120
|
Allows to group reference by type, e.g., internal studies vs. all papers etc.
|
120
121
|
"""
|
121
|
-
|
122
|
-
"""
|
122
|
+
references: Reference
|
123
|
+
"""References of this type (can only be non-empty if `is_type` is `True`)."""
|
123
124
|
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
124
125
|
"""Distinguish types from instances of the type."""
|
125
126
|
abbr: str | None = CharField(
|
@@ -189,7 +190,7 @@ class Reference(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
189
190
|
super().__init__(*args, **kwargs)
|
190
191
|
|
191
192
|
|
192
|
-
class Project(
|
193
|
+
class Project(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
193
194
|
"""Projects.
|
194
195
|
|
195
196
|
Example:
|
@@ -200,7 +201,7 @@ class Project(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
200
201
|
... ).save()
|
201
202
|
"""
|
202
203
|
|
203
|
-
class Meta(
|
204
|
+
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
204
205
|
abstract = False
|
205
206
|
|
206
207
|
id: int = models.AutoField(primary_key=True)
|
@@ -212,11 +213,11 @@ class Project(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
212
213
|
name: str = CharField(db_index=True)
|
213
214
|
"""Title or name of the Project."""
|
214
215
|
type: Project | None = ForeignKey(
|
215
|
-
"self", PROTECT, null=True, related_name="
|
216
|
+
"self", PROTECT, null=True, related_name="projects"
|
216
217
|
)
|
217
218
|
"""Type of project (e.g., 'Program', 'Project', 'GithubIssue', 'Task')."""
|
218
|
-
|
219
|
-
"""
|
219
|
+
projects: Project
|
220
|
+
"""Projects of this type (can only be non-empty if `is_type` is `True`)."""
|
220
221
|
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
221
222
|
"""Distinguish types from instances of the type."""
|
222
223
|
abbr: str | None = CharField(max_length=32, db_index=True, null=True)
|
@@ -273,6 +274,14 @@ class Project(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
273
274
|
Schema, through="SchemaProject", related_name="projects"
|
274
275
|
)
|
275
276
|
"""Linked schemas."""
|
277
|
+
records: Record = models.ManyToManyField(
|
278
|
+
Record, through="RecordProject", related_name="projects"
|
279
|
+
)
|
280
|
+
"""Linked records."""
|
281
|
+
sheets: Sheet = models.ManyToManyField(
|
282
|
+
Sheet, through="SheetProject", related_name="projects"
|
283
|
+
)
|
284
|
+
"""Linked sheets."""
|
276
285
|
collections: Collection = models.ManyToManyField(
|
277
286
|
Collection, through="CollectionProject", related_name="projects"
|
278
287
|
)
|
@@ -304,7 +313,7 @@ class Project(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
304
313
|
super().__init__(*args, **kwargs)
|
305
314
|
|
306
315
|
|
307
|
-
class ArtifactProject(
|
316
|
+
class ArtifactProject(BaseSQLRecord, IsLink, TracksRun):
|
308
317
|
id: int = models.BigAutoField(primary_key=True)
|
309
318
|
artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="links_project")
|
310
319
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_artifact")
|
@@ -323,7 +332,7 @@ class ArtifactProject(BasicRecord, LinkORM, TracksRun):
|
|
323
332
|
unique_together = ("artifact", "project", "feature")
|
324
333
|
|
325
334
|
|
326
|
-
class RunProject(
|
335
|
+
class RunProject(BaseSQLRecord, IsLink):
|
327
336
|
id: int = models.BigAutoField(primary_key=True)
|
328
337
|
run: Run = ForeignKey(Run, CASCADE, related_name="links_project")
|
329
338
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_run")
|
@@ -344,7 +353,7 @@ class RunProject(BasicRecord, LinkORM):
|
|
344
353
|
unique_together = ("run", "project")
|
345
354
|
|
346
355
|
|
347
|
-
class TransformProject(
|
356
|
+
class TransformProject(BaseSQLRecord, IsLink, TracksRun):
|
348
357
|
id: int = models.BigAutoField(primary_key=True)
|
349
358
|
transform: Transform = ForeignKey(Transform, CASCADE, related_name="links_project")
|
350
359
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_transform")
|
@@ -353,7 +362,7 @@ class TransformProject(BasicRecord, LinkORM, TracksRun):
|
|
353
362
|
unique_together = ("transform", "project")
|
354
363
|
|
355
364
|
|
356
|
-
class CollectionProject(
|
365
|
+
class CollectionProject(BaseSQLRecord, IsLink, TracksRun):
|
357
366
|
id: int = models.BigAutoField(primary_key=True)
|
358
367
|
collection: Collection = ForeignKey(
|
359
368
|
Collection, CASCADE, related_name="links_project"
|
@@ -364,7 +373,7 @@ class CollectionProject(BasicRecord, LinkORM, TracksRun):
|
|
364
373
|
unique_together = ("collection", "project")
|
365
374
|
|
366
375
|
|
367
|
-
class ULabelProject(
|
376
|
+
class ULabelProject(BaseSQLRecord, IsLink, TracksRun):
|
368
377
|
id: int = models.BigAutoField(primary_key=True)
|
369
378
|
ulabel: ULabel = ForeignKey(ULabel, CASCADE, related_name="links_project")
|
370
379
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_ulabel")
|
@@ -373,7 +382,7 @@ class ULabelProject(BasicRecord, LinkORM, TracksRun):
|
|
373
382
|
unique_together = ("ulabel", "project")
|
374
383
|
|
375
384
|
|
376
|
-
class PersonProject(
|
385
|
+
class PersonProject(BaseSQLRecord, IsLink, TracksRun):
|
377
386
|
id: int = models.BigAutoField(primary_key=True)
|
378
387
|
person: Person = ForeignKey(Person, CASCADE, related_name="links_project")
|
379
388
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_person")
|
@@ -383,7 +392,7 @@ class PersonProject(BasicRecord, LinkORM, TracksRun):
|
|
383
392
|
unique_together = ("person", "project")
|
384
393
|
|
385
394
|
|
386
|
-
class FeatureProject(
|
395
|
+
class FeatureProject(BaseSQLRecord, IsLink, TracksRun):
|
387
396
|
id: int = models.BigAutoField(primary_key=True)
|
388
397
|
feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_project")
|
389
398
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_feature")
|
@@ -392,7 +401,7 @@ class FeatureProject(BasicRecord, LinkORM, TracksRun):
|
|
392
401
|
unique_together = ("feature", "project")
|
393
402
|
|
394
403
|
|
395
|
-
class SchemaProject(
|
404
|
+
class SchemaProject(BaseSQLRecord, IsLink, TracksRun):
|
396
405
|
id: int = models.BigAutoField(primary_key=True)
|
397
406
|
schema: Schema = ForeignKey(Schema, CASCADE, related_name="links_project")
|
398
407
|
project: Project = ForeignKey(Project, PROTECT, related_name="links_schema")
|
@@ -401,7 +410,26 @@ class SchemaProject(BasicRecord, LinkORM, TracksRun):
|
|
401
410
|
unique_together = ("schema", "project")
|
402
411
|
|
403
412
|
|
404
|
-
class
|
413
|
+
class RecordProject(BaseSQLRecord, IsLink):
|
414
|
+
id: int = models.BigAutoField(primary_key=True)
|
415
|
+
record: Record = ForeignKey(Record, CASCADE, related_name="values_project")
|
416
|
+
feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_recordproject")
|
417
|
+
value: Project = ForeignKey(Project, PROTECT, related_name="links_record")
|
418
|
+
|
419
|
+
class Meta:
|
420
|
+
unique_together = ("record", "feature")
|
421
|
+
|
422
|
+
|
423
|
+
class SheetProject(BaseSQLRecord, IsLink, TracksRun):
|
424
|
+
id: int = models.BigAutoField(primary_key=True)
|
425
|
+
sheet: Sheet = ForeignKey(Sheet, CASCADE, related_name="links_project")
|
426
|
+
project: Project = ForeignKey(Project, PROTECT, related_name="links_sheet")
|
427
|
+
|
428
|
+
class Meta:
|
429
|
+
unique_together = ("sheet", "project")
|
430
|
+
|
431
|
+
|
432
|
+
class ArtifactReference(BaseSQLRecord, IsLink, TracksRun):
|
405
433
|
id: int = models.BigAutoField(primary_key=True)
|
406
434
|
artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="links_reference")
|
407
435
|
reference: Reference = ForeignKey(Reference, PROTECT, related_name="links_artifact")
|
@@ -420,7 +448,7 @@ class ArtifactReference(BasicRecord, LinkORM, TracksRun):
|
|
420
448
|
unique_together = ("artifact", "reference", "feature")
|
421
449
|
|
422
450
|
|
423
|
-
class TransformReference(
|
451
|
+
class TransformReference(BaseSQLRecord, IsLink, TracksRun):
|
424
452
|
id: int = models.BigAutoField(primary_key=True)
|
425
453
|
transform: Transform = ForeignKey(
|
426
454
|
Transform, CASCADE, related_name="links_reference"
|
@@ -433,7 +461,7 @@ class TransformReference(BasicRecord, LinkORM, TracksRun):
|
|
433
461
|
unique_together = ("transform", "reference")
|
434
462
|
|
435
463
|
|
436
|
-
class CollectionReference(
|
464
|
+
class CollectionReference(BaseSQLRecord, IsLink, TracksRun):
|
437
465
|
id: int = models.BigAutoField(primary_key=True)
|
438
466
|
collection: Collection = ForeignKey(
|
439
467
|
Collection, CASCADE, related_name="links_reference"
|
lamindb/models/query_manager.py
CHANGED
@@ -53,8 +53,8 @@ def _search(
|
|
53
53
|
If `return_queryset` is `True`. `QuerySet`.
|
54
54
|
|
55
55
|
See Also:
|
56
|
-
:meth:`~lamindb.models.
|
57
|
-
:meth:`~lamindb.models.
|
56
|
+
:meth:`~lamindb.models.SQLRecord.filter`
|
57
|
+
:meth:`~lamindb.models.SQLRecord.lookup`
|
58
58
|
|
59
59
|
Examples:
|
60
60
|
>>> ulabels = ln.ULabel.from_values(["ULabel1", "ULabel2", "ULabel3"], field="name")
|
@@ -87,7 +87,7 @@ def _search(
|
|
87
87
|
fields.append(field.field.name)
|
88
88
|
except AttributeError as error:
|
89
89
|
raise TypeError(
|
90
|
-
"Please pass a
|
90
|
+
"Please pass a SQLRecord string field, e.g., `CellType.name`!"
|
91
91
|
) from error
|
92
92
|
else:
|
93
93
|
fields.append(field)
|
@@ -185,7 +185,7 @@ def _lookup(
|
|
185
185
|
dictionary converter.
|
186
186
|
|
187
187
|
See Also:
|
188
|
-
:meth:`~lamindb.models.
|
188
|
+
:meth:`~lamindb.models.SQLRecord.search`
|
189
189
|
|
190
190
|
Examples:
|
191
191
|
>>> import bionty as bt
|
@@ -199,7 +199,7 @@ def _lookup(
|
|
199
199
|
>>> genes.ensg00000002745
|
200
200
|
>>> lookup_return_symbols = bt.Gene.lookup(field="ensembl_gene_id", return_field="symbol")
|
201
201
|
"""
|
202
|
-
from .
|
202
|
+
from .sqlrecord import get_name_field
|
203
203
|
|
204
204
|
queryset = cls.all() if isinstance(cls, (QuerySet, Manager)) else cls.objects.all()
|
205
205
|
field = get_name_field(registry=queryset.model, field=field)
|
lamindb/models/query_set.py
CHANGED
@@ -19,7 +19,7 @@ from ..errors import DoesNotExist
|
|
19
19
|
from ._is_versioned import IsVersioned
|
20
20
|
from .can_curate import CanCurate, _inspect, _standardize, _validate
|
21
21
|
from .query_manager import _lookup, _search
|
22
|
-
from .
|
22
|
+
from .sqlrecord import SQLRecord
|
23
23
|
|
24
24
|
if TYPE_CHECKING:
|
25
25
|
from lamindb.base.types import ListLike, StrField
|
@@ -40,7 +40,7 @@ pd.set_option("display.max_columns", 200)
|
|
40
40
|
# return (series + timedelta).dt.strftime("%Y-%m-%d %H:%M:%S %Z")
|
41
41
|
|
42
42
|
|
43
|
-
def get_keys_from_df(data: list, registry:
|
43
|
+
def get_keys_from_df(data: list, registry: SQLRecord) -> list[str]:
|
44
44
|
if len(data) > 0:
|
45
45
|
if isinstance(data[0], dict):
|
46
46
|
keys = list(data[0].keys())
|
@@ -62,9 +62,9 @@ def get_keys_from_df(data: list, registry: Record) -> list[str]:
|
|
62
62
|
return keys
|
63
63
|
|
64
64
|
|
65
|
-
def one_helper(self):
|
65
|
+
def one_helper(self, does_not_exist_msg: str | None = None):
|
66
66
|
if len(self) == 0:
|
67
|
-
raise DoesNotExist
|
67
|
+
raise DoesNotExist(does_not_exist_msg)
|
68
68
|
elif len(self) > 1:
|
69
69
|
raise MultipleResultsFound(self)
|
70
70
|
else:
|
@@ -80,11 +80,13 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
|
|
80
80
|
|
81
81
|
if queryset.model in {Collection, Transform}:
|
82
82
|
name_mappings = {
|
83
|
-
"visibility": "
|
83
|
+
"visibility": "branch_id",
|
84
|
+
"_branch_code": "branch_id",
|
84
85
|
}
|
85
86
|
elif queryset.model == Artifact:
|
86
87
|
name_mappings = {
|
87
|
-
"visibility": "
|
88
|
+
"visibility": "branch_id",
|
89
|
+
"_branch_code": "branch_id",
|
88
90
|
"transform": "run__transform",
|
89
91
|
}
|
90
92
|
else:
|
@@ -108,7 +110,7 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
|
|
108
110
|
|
109
111
|
def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
110
112
|
def _map_databases(value: Any, key: str, target_db: str) -> tuple[str, Any]:
|
111
|
-
if isinstance(value,
|
113
|
+
if isinstance(value, SQLRecord):
|
112
114
|
if value._state.db != target_db:
|
113
115
|
logger.warning(
|
114
116
|
f"passing record from database {value._state.db} to query {target_db}, matching on uid '{value.uid}'"
|
@@ -121,12 +123,14 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
|
121
123
|
and isinstance(value, IterableType)
|
122
124
|
and not isinstance(value, str)
|
123
125
|
):
|
124
|
-
if any(
|
126
|
+
if any(
|
127
|
+
isinstance(v, SQLRecord) and v._state.db != target_db for v in value
|
128
|
+
):
|
125
129
|
logger.warning(
|
126
130
|
f"passing records from another database to query {target_db}, matching on uids"
|
127
131
|
)
|
128
132
|
return key.replace("__in", "__uid__in"), [
|
129
|
-
v.uid if isinstance(v,
|
133
|
+
v.uid if isinstance(v, SQLRecord) else v for v in value
|
130
134
|
]
|
131
135
|
return key, value
|
132
136
|
|
@@ -137,21 +141,21 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
|
137
141
|
expressions,
|
138
142
|
)
|
139
143
|
|
140
|
-
if issubclass(queryset.model,
|
141
|
-
#
|
144
|
+
if issubclass(queryset.model, SQLRecord):
|
145
|
+
# branch_id is set to 0 unless expressions contains id or uid
|
142
146
|
if not (
|
143
147
|
"id" in expressions
|
144
148
|
or "uid" in expressions
|
145
149
|
or "uid__startswith" in expressions
|
146
150
|
):
|
147
|
-
|
148
|
-
if not any(e.startswith(
|
149
|
-
expressions[
|
150
|
-
# if
|
151
|
+
branch_id = "branch_id"
|
152
|
+
if not any(e.startswith(branch_id) for e in expressions):
|
153
|
+
expressions[branch_id] = 1 # default branch_id
|
154
|
+
# if branch_id is None, do not apply a filter
|
151
155
|
# otherwise, it would mean filtering for NULL values, which doesn't make
|
152
156
|
# sense for a non-NULLABLE column
|
153
|
-
elif
|
154
|
-
expressions.pop(
|
157
|
+
elif branch_id in expressions and expressions[branch_id] is None:
|
158
|
+
expressions.pop(branch_id)
|
155
159
|
if queryset._db is not None:
|
156
160
|
# only check for database mismatch if there is a defined database on the
|
157
161
|
# queryset
|
@@ -166,10 +170,10 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
|
166
170
|
|
167
171
|
|
168
172
|
def get(
|
169
|
-
registry_or_queryset: Union[type[
|
173
|
+
registry_or_queryset: Union[type[SQLRecord], QuerySet],
|
170
174
|
idlike: int | str | None = None,
|
171
175
|
**expressions,
|
172
|
-
) ->
|
176
|
+
) -> SQLRecord:
|
173
177
|
if isinstance(registry_or_queryset, QuerySet):
|
174
178
|
qs = registry_or_queryset
|
175
179
|
registry = qs.model
|
@@ -180,18 +184,24 @@ def get(
|
|
180
184
|
return super(QuerySet, qs).get(id=idlike) # type: ignore
|
181
185
|
elif isinstance(idlike, str):
|
182
186
|
qs = qs.filter(uid__startswith=idlike)
|
187
|
+
|
188
|
+
NAME_FIELD = (
|
189
|
+
registry._name_field if hasattr(registry, "_name_field") else "name"
|
190
|
+
)
|
191
|
+
DOESNOTEXIST_MSG = f"No record found with uid '{idlike}'. Did you forget a keyword as in {registry.__name__}.get({NAME_FIELD}='{idlike}')?"
|
192
|
+
|
183
193
|
if issubclass(registry, IsVersioned):
|
184
194
|
if len(idlike) <= registry._len_stem_uid:
|
185
|
-
return qs.latest_version()
|
195
|
+
return one_helper(qs.latest_version(), DOESNOTEXIST_MSG)
|
186
196
|
else:
|
187
|
-
return qs
|
197
|
+
return one_helper(qs, DOESNOTEXIST_MSG)
|
188
198
|
else:
|
189
|
-
return qs
|
199
|
+
return one_helper(qs, DOESNOTEXIST_MSG)
|
190
200
|
else:
|
191
201
|
assert idlike is None # noqa: S101
|
192
202
|
expressions = process_expressions(qs, expressions)
|
193
|
-
# don't want
|
194
|
-
expressions.pop("
|
203
|
+
# don't want branch_id here in .get(), only in .filter()
|
204
|
+
expressions.pop("branch_id", None)
|
195
205
|
# inject is_latest for consistency with idlike
|
196
206
|
is_latest_was_not_in_expressions = "is_latest" not in expressions
|
197
207
|
if issubclass(registry, IsVersioned) and is_latest_was_not_in_expressions:
|
@@ -213,7 +223,7 @@ def get(
|
|
213
223
|
raise registry.DoesNotExist from registry.DoesNotExist
|
214
224
|
|
215
225
|
|
216
|
-
class
|
226
|
+
class SQLRecordList(UserList, Generic[T]):
|
217
227
|
"""Is ordered, can't be queried, but has `.df()`."""
|
218
228
|
|
219
229
|
def __init__(self, records: Iterable[T]):
|
@@ -236,7 +246,7 @@ class RecordList(UserList, Generic[T]):
|
|
236
246
|
"""Exactly one result. Throws error if there are more or none."""
|
237
247
|
return one_helper(self)
|
238
248
|
|
239
|
-
def save(self) ->
|
249
|
+
def save(self) -> SQLRecordList[T]:
|
240
250
|
"""Save all records to the database."""
|
241
251
|
from lamindb.models.save import save
|
242
252
|
|
@@ -271,7 +281,7 @@ def get_basic_field_names(
|
|
271
281
|
"created_by_id",
|
272
282
|
"updated_at",
|
273
283
|
"_aux",
|
274
|
-
"
|
284
|
+
"branch_id",
|
275
285
|
]:
|
276
286
|
if field_name in field_names:
|
277
287
|
field_names.remove(field_name)
|
@@ -353,7 +363,7 @@ def get_feature_annotate_kwargs(
|
|
353
363
|
|
354
364
|
# https://claude.ai/share/16280046-6ae5-4f6a-99ac-dec01813dc3c
|
355
365
|
def analyze_lookup_cardinality(
|
356
|
-
model_class:
|
366
|
+
model_class: SQLRecord, lookup_paths: list[str] | None
|
357
367
|
) -> dict[str, str]:
|
358
368
|
"""Analyze lookup cardinality.
|
359
369
|
|
@@ -584,7 +594,7 @@ class BasicQuerySet(models.QuerySet):
|
|
584
594
|
new_cls = cls
|
585
595
|
return object.__new__(new_cls)
|
586
596
|
|
587
|
-
@doc_args(
|
597
|
+
@doc_args(SQLRecord.df.__doc__)
|
588
598
|
def df(
|
589
599
|
self,
|
590
600
|
include: str | list[str] | None = None,
|
@@ -664,7 +674,7 @@ class BasicQuerySet(models.QuerySet):
|
|
664
674
|
else:
|
665
675
|
super().delete(*args, **kwargs)
|
666
676
|
|
667
|
-
def list(self, field: str | None = None) -> list[
|
677
|
+
def list(self, field: str | None = None) -> list[SQLRecord] | list[str]:
|
668
678
|
"""Populate an (unordered) list with the results.
|
669
679
|
|
670
680
|
Note that the order in this list is only meaningful if you ordered the underlying query set with `.order_by()`.
|
@@ -679,7 +689,7 @@ class BasicQuerySet(models.QuerySet):
|
|
679
689
|
# list casting is necessary because values_list does not return a list
|
680
690
|
return list(self.values_list(field, flat=True))
|
681
691
|
|
682
|
-
def first(self) ->
|
692
|
+
def first(self) -> SQLRecord | None:
|
683
693
|
"""If non-empty, the first result in the query set, otherwise ``None``.
|
684
694
|
|
685
695
|
Examples:
|
@@ -689,11 +699,11 @@ class BasicQuerySet(models.QuerySet):
|
|
689
699
|
return None
|
690
700
|
return self[0]
|
691
701
|
|
692
|
-
def one(self) ->
|
702
|
+
def one(self) -> SQLRecord:
|
693
703
|
"""Exactly one result. Raises error if there are more or none."""
|
694
704
|
return one_helper(self)
|
695
705
|
|
696
|
-
def one_or_none(self) ->
|
706
|
+
def one_or_none(self) -> SQLRecord | None:
|
697
707
|
"""At most one result. Returns it if there is one, otherwise returns ``None``.
|
698
708
|
|
699
709
|
Examples:
|
@@ -712,7 +722,7 @@ class BasicQuerySet(models.QuerySet):
|
|
712
722
|
if issubclass(self.model, IsVersioned):
|
713
723
|
return self.filter(is_latest=True)
|
714
724
|
else:
|
715
|
-
raise ValueError("
|
725
|
+
raise ValueError("SQLRecord isn't subclass of `lamindb.core.IsVersioned`")
|
716
726
|
|
717
727
|
@doc_args(_search.__doc__)
|
718
728
|
def search(self, string: str, **kwargs):
|
@@ -769,16 +779,21 @@ class QuerySet(BasicQuerySet):
|
|
769
779
|
"""Suggest available fields if an unknown field was passed."""
|
770
780
|
if "Cannot resolve keyword" in str(error):
|
771
781
|
field = str(error).split("'")[1]
|
772
|
-
|
782
|
+
avail_fields = self.model.__get_available_fields__()
|
783
|
+
if "_branch_code" in avail_fields:
|
784
|
+
avail_fields.remove("_branch_code") # backward compat
|
785
|
+
fields = ", ".join(sorted(avail_fields))
|
773
786
|
raise FieldError(
|
774
787
|
f"Unknown field '{field}'. Available fields: {fields}"
|
775
788
|
) from None
|
776
789
|
raise error # pragma: no cover
|
777
790
|
|
778
|
-
def get(self, idlike: int | str | None = None, **expressions) ->
|
791
|
+
def get(self, idlike: int | str | None = None, **expressions) -> SQLRecord:
|
779
792
|
"""Query a single record. Raises error if there are more or none."""
|
793
|
+
is_run_input = expressions.pop("is_run_input", False)
|
794
|
+
|
780
795
|
try:
|
781
|
-
|
796
|
+
record = get(self, idlike, **expressions)
|
782
797
|
except ValueError as e:
|
783
798
|
# Pass through original error for explicit id lookups
|
784
799
|
if "Field 'id' expected a number" in str(e):
|
@@ -793,6 +808,15 @@ class QuerySet(BasicQuerySet):
|
|
793
808
|
self._handle_unknown_field(e)
|
794
809
|
raise # pragma: no cover
|
795
810
|
|
811
|
+
if is_run_input is not False: # might be None or True or Run
|
812
|
+
from lamindb.models.artifact import Artifact, _track_run_input
|
813
|
+
from lamindb.models.collection import Collection
|
814
|
+
|
815
|
+
if isinstance(record, (Artifact, Collection)):
|
816
|
+
_track_run_input(record, is_run_input)
|
817
|
+
|
818
|
+
return record
|
819
|
+
|
796
820
|
def filter(self, *queries, **expressions) -> QuerySet:
|
797
821
|
"""Query a set of records."""
|
798
822
|
# Suggest to use __name for related fields such as id when not passed
|