lamindb 1.11.3__py3-none-any.whl → 1.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +8 -14
- lamindb/_tracked.py +2 -0
- lamindb/base/types.py +1 -3
- lamindb/core/_context.py +16 -31
- lamindb/core/_mapped_collection.py +2 -2
- lamindb/core/storage/paths.py +5 -3
- lamindb/curators/core.py +15 -4
- lamindb/examples/__init__.py +3 -1
- lamindb/examples/croissant/__init__.py +3 -1
- lamindb/examples/mlflow/__init__.py +38 -0
- lamindb/examples/wandb/__init__.py +40 -0
- lamindb/integrations/__init__.py +26 -0
- lamindb/integrations/_lightning.py +87 -0
- lamindb/migrations/0120_add_record_fk_constraint.py +1 -1
- lamindb/migrations/0122_remove_personproject_person_and_more.py +219 -0
- lamindb/migrations/0123_alter_artifact_description_alter_branch_description_and_more.py +82 -0
- lamindb/migrations/0124_page_artifact_page_collection_page_feature_page_and_more.py +15 -0
- lamindb/migrations/0125_artifact_is_locked_collection_is_locked_and_more.py +79 -0
- lamindb/migrations/0126_alter_artifact_is_locked_alter_collection_is_locked_and_more.py +105 -0
- lamindb/migrations/0127_alter_run_status_code_feature_dtype.py +31 -0
- lamindb/migrations/0128_artifact__real_key.py +21 -0
- lamindb/migrations/0129_remove_feature_page_remove_project_page_and_more.py +779 -0
- lamindb/migrations/0130_branch_space_alter_artifactblock_artifact_and_more.py +170 -0
- lamindb/migrations/0131_record_unique_name_type_space.py +18 -0
- lamindb/migrations/0132_record_parents_record_reference_and_more.py +61 -0
- lamindb/migrations/0133_artifactuser_artifact_users.py +108 -0
- lamindb/migrations/{0119_squashed.py → 0133_squashed.py} +1211 -322
- lamindb/models/__init__.py +14 -4
- lamindb/models/_django.py +1 -2
- lamindb/models/_feature_manager.py +1 -0
- lamindb/models/_is_versioned.py +14 -16
- lamindb/models/_relations.py +7 -0
- lamindb/models/artifact.py +99 -56
- lamindb/models/artifact_set.py +20 -3
- lamindb/models/block.py +174 -0
- lamindb/models/can_curate.py +7 -9
- lamindb/models/collection.py +9 -9
- lamindb/models/feature.py +38 -38
- lamindb/models/has_parents.py +15 -6
- lamindb/models/project.py +44 -99
- lamindb/models/query_manager.py +1 -1
- lamindb/models/query_set.py +36 -8
- lamindb/models/record.py +169 -46
- lamindb/models/run.py +44 -10
- lamindb/models/save.py +7 -7
- lamindb/models/schema.py +9 -2
- lamindb/models/sqlrecord.py +87 -35
- lamindb/models/storage.py +13 -3
- lamindb/models/transform.py +7 -2
- lamindb/models/ulabel.py +6 -23
- {lamindb-1.11.3.dist-info → lamindb-1.12.1.dist-info}/METADATA +18 -21
- {lamindb-1.11.3.dist-info → lamindb-1.12.1.dist-info}/RECORD +54 -38
- {lamindb-1.11.3.dist-info → lamindb-1.12.1.dist-info}/LICENSE +0 -0
- {lamindb-1.11.3.dist-info → lamindb-1.12.1.dist-info}/WHEEL +0 -0
lamindb/models/query_set.py
CHANGED
@@ -13,6 +13,7 @@ from django.db import models
|
|
13
13
|
from django.db.models import F, ForeignKey, ManyToManyField, Q, Subquery
|
14
14
|
from django.db.models.fields.related import ForeignObjectRel
|
15
15
|
from lamin_utils import logger
|
16
|
+
from lamindb_setup import settings as setup_settings
|
16
17
|
from lamindb_setup.core import deprecated
|
17
18
|
from lamindb_setup.core._docs import doc_args
|
18
19
|
|
@@ -59,6 +60,25 @@ def get_keys_from_df(data: list, registry: SQLRecord) -> list[str]:
|
|
59
60
|
return keys
|
60
61
|
|
61
62
|
|
63
|
+
def get_default_branch_ids() -> list[int]:
|
64
|
+
"""Return branch IDs to include in default queries.
|
65
|
+
|
66
|
+
By default, queries include records on the main branch (branch_id=1) but exclude trashed (branch_id=-1)
|
67
|
+
and archived records (branch_id=0). This matches behavior of familiar tools like GitHub, Slack, and
|
68
|
+
email clients.
|
69
|
+
|
70
|
+
If a user switches to another branch via `lamin switch branch`, the main branch will still be included.
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
List containing the default branch and current branch if different.
|
74
|
+
"""
|
75
|
+
branch_id = setup_settings.branch.id
|
76
|
+
branch_ids = [branch_id]
|
77
|
+
if branch_id != 1: # add the main branch by default
|
78
|
+
branch_ids.append(1)
|
79
|
+
return branch_ids
|
80
|
+
|
81
|
+
|
62
82
|
def one_helper(
|
63
83
|
self: QuerySet | SQLRecordList,
|
64
84
|
does_not_exist_msg: str | None = None,
|
@@ -168,8 +188,7 @@ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
|
|
168
188
|
expressions_have_branch = True
|
169
189
|
break
|
170
190
|
if not expressions_have_branch:
|
171
|
-
|
172
|
-
expressions["branch_id"] = 1
|
191
|
+
expressions["branch_id__in"] = get_default_branch_ids()
|
173
192
|
else:
|
174
193
|
# if branch_id is None, do not apply a filter
|
175
194
|
# otherwise, it would mean filtering for NULL values, which doesn't make
|
@@ -313,11 +332,13 @@ def get_basic_field_names(
|
|
313
332
|
for field_name in [
|
314
333
|
"version",
|
315
334
|
"is_latest",
|
335
|
+
"is_locked",
|
316
336
|
"run_id",
|
317
337
|
"created_at",
|
318
338
|
"created_by_id",
|
319
339
|
"updated_at",
|
320
340
|
"_aux",
|
341
|
+
"_real_key",
|
321
342
|
"branch_id",
|
322
343
|
]:
|
323
344
|
if field_name in field_names:
|
@@ -359,7 +380,10 @@ def get_feature_annotate_kwargs(
|
|
359
380
|
for obj in registry._meta.related_objects:
|
360
381
|
if not hasattr(getattr(registry, obj.related_name), "through"):
|
361
382
|
continue
|
362
|
-
|
383
|
+
link_model = getattr(registry, obj.related_name).through
|
384
|
+
if link_model.__name__ == "Record_parents":
|
385
|
+
continue
|
386
|
+
links = link_model.filter(
|
363
387
|
**{registry.__name__.lower() + "_id__in": ids_list}
|
364
388
|
)
|
365
389
|
feature_names_for_link_model = links.values_list("feature__name", flat=True)
|
@@ -422,7 +446,7 @@ def get_feature_annotate_kwargs(
|
|
422
446
|
annotate_kwargs = {}
|
423
447
|
for link_attr, feature_type in link_attributes_on_models.items():
|
424
448
|
if link_attr == "links_project" and registry is Record:
|
425
|
-
# we're only interested in
|
449
|
+
# we're only interested in _values_project when "annotating" records
|
426
450
|
continue
|
427
451
|
annotate_kwargs[f"{link_attr}__feature__name"] = F(
|
428
452
|
f"{link_attr}__feature__name"
|
@@ -815,6 +839,8 @@ class BasicQuerySet(models.QuerySet):
|
|
815
839
|
Args:
|
816
840
|
permanent: Whether to permanently delete the record (skips trash).
|
817
841
|
Is only relevant for records that have the `branch` field.
|
842
|
+
If `None`, uses soft delete for records that have the `branch` field,
|
843
|
+
hard delete otherwise.
|
818
844
|
|
819
845
|
Note:
|
820
846
|
Calling `delete()` twice on the same queryset does NOT permanently delete in bulk operations.
|
@@ -834,8 +860,9 @@ class BasicQuerySet(models.QuerySet):
|
|
834
860
|
record.delete(*args, permanent=permanent, **kwargs)
|
835
861
|
elif self.model is Storage: # storage does not have soft delete
|
836
862
|
if permanent is False:
|
837
|
-
|
838
|
-
"
|
863
|
+
raise ValueError(
|
864
|
+
"Soft delete is not possible for Storage, "
|
865
|
+
"use 'permanent=True' or 'permanent=None' for permanent deletion."
|
839
866
|
)
|
840
867
|
for record in self:
|
841
868
|
record.delete()
|
@@ -845,8 +872,9 @@ class BasicQuerySet(models.QuerySet):
|
|
845
872
|
self.update(branch_id=-1)
|
846
873
|
else:
|
847
874
|
if permanent is False:
|
848
|
-
|
849
|
-
f"
|
875
|
+
raise ValueError(
|
876
|
+
f"Soft delete is not possible for {self.model.__name__}, "
|
877
|
+
"use 'permanent=True' for permanent deletion."
|
850
878
|
)
|
851
879
|
super().delete(*args, **kwargs)
|
852
880
|
|
lamindb/models/record.py
CHANGED
@@ -5,12 +5,14 @@ from typing import TYPE_CHECKING, Any, overload
|
|
5
5
|
from django.db import models
|
6
6
|
from django.db.models import CASCADE, PROTECT
|
7
7
|
from lamin_utils import logger
|
8
|
+
from lamindb_setup.core import deprecated
|
8
9
|
|
9
10
|
from lamindb.base.fields import (
|
10
11
|
BooleanField,
|
11
12
|
CharField,
|
12
13
|
ForeignKey,
|
13
14
|
JSONField,
|
15
|
+
TextField,
|
14
16
|
)
|
15
17
|
from lamindb.errors import FieldValidationError
|
16
18
|
|
@@ -18,9 +20,9 @@ from ..base.ids import base62_16
|
|
18
20
|
from .artifact import Artifact
|
19
21
|
from .can_curate import CanCurate
|
20
22
|
from .feature import Feature
|
21
|
-
from .has_parents import _query_relatives
|
23
|
+
from .has_parents import HasParents, _query_relatives
|
22
24
|
from .query_set import reorder_subset_columns_in_df
|
23
|
-
from .run import Run, TracksRun, TracksUpdates, User
|
25
|
+
from .run import Run, TracksRun, TracksUpdates, User, current_run
|
24
26
|
from .sqlrecord import BaseSQLRecord, IsLink, SQLRecord, _get_record_kwargs
|
25
27
|
from .transform import Transform
|
26
28
|
from .ulabel import ULabel
|
@@ -28,33 +30,92 @@ from .ulabel import ULabel
|
|
28
30
|
if TYPE_CHECKING:
|
29
31
|
import pandas as pd
|
30
32
|
|
31
|
-
from .
|
33
|
+
from .blocks import RunBlock
|
34
|
+
from .project import Project, Reference
|
32
35
|
from .query_set import QuerySet
|
33
36
|
from .schema import Schema
|
34
37
|
|
35
38
|
|
36
|
-
class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
37
|
-
"""
|
39
|
+
class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates, HasParents):
|
40
|
+
"""Metadata records for labeling and organizing entities in sheets.
|
38
41
|
|
39
|
-
|
40
|
-
|
41
|
-
This is currently more convenient to use through the UI.
|
42
|
-
|
43
|
-
A `Record` has a flexible schema: it can store data for arbitrary features.
|
44
|
-
Changing the fields of a :class:`~lamindb.models.SQLRecord`, you need to modify the columns of the underlying table in the database.
|
42
|
+
Is useful to manage samples, donors, cells, compounds, sequences.
|
45
43
|
|
46
44
|
Args:
|
47
45
|
name: `str` A name.
|
48
46
|
description: `str` A description.
|
47
|
+
type: `Record | None = None` The type of this record.
|
48
|
+
is_type: `bool = False` Whether this record is a type (a record that
|
49
|
+
classifies other records).
|
50
|
+
schema: `Schema | None = None` A schema to enforce for a type (optional).
|
51
|
+
reference: `str | None = None` For instance, an external ID or a URL.
|
52
|
+
reference_type: `str | None = None` For instance, `"url"`.
|
49
53
|
|
50
54
|
See Also:
|
51
55
|
:meth:`~lamindb.Feature`
|
52
|
-
Dimensions of measurement (e.g. column of a sheet).
|
56
|
+
Dimensions of measurement (e.g. column of a sheet, attribute of a record).
|
57
|
+
|
58
|
+
Examples:
|
59
|
+
|
60
|
+
Create a record type and then instances of that type::
|
61
|
+
|
62
|
+
sample_type = Record(name="Sample", is_type=True).save()
|
63
|
+
sample1 = Record(name="Sample 1", type=sample_type).save()
|
64
|
+
sample2 = Record(name="Sample 2", type=sample_type).save()
|
65
|
+
|
66
|
+
You can then annotate artifacts and other entities with these records, e.g.::
|
67
|
+
|
68
|
+
artifact.records.add(sample1)
|
69
|
+
|
70
|
+
To query artifacts by records::
|
71
|
+
|
72
|
+
ln.Artifact.filter(records=sample1).to_dataframe()
|
73
|
+
|
74
|
+
Through the UI can assign attributes to records in form of features. The Python API also allows to
|
75
|
+
assign features programmatically, but is currently still low-level::
|
76
|
+
|
77
|
+
feature = ln.Feature(name="age", type="int").save()
|
78
|
+
sample1.values_record.create(feature=feature, value=42)
|
79
|
+
sample2.values_record.create(feature=feature, value=23)
|
80
|
+
|
81
|
+
Records can also model flexible ontologies through their parents-children relationships::
|
82
|
+
|
83
|
+
cell_type = Record(name="CellType", is_type=True).save()
|
84
|
+
t_cell = Record(name="T Cell", type=cell_type).save()
|
85
|
+
cd4_t_cell = Record(name="CD4+ T Cell", type=cell_type).save()
|
86
|
+
t_cell.children.add(cd4_t_cell)
|
87
|
+
|
88
|
+
Often, a label is measured *within* a dataset. For instance, an artifact
|
89
|
+
might characterize 2 species of the Iris flower (`"setosa"` &
|
90
|
+
`"versicolor"`) measured by a `"species"` feature. For such cases, you can use
|
91
|
+
:class:`~lamindb.curators.DataFrameCurator` to automatically parse, validate, and
|
92
|
+
annotate with labels that are contained in `DataFrame` objects.
|
93
|
+
|
94
|
+
.. note::
|
95
|
+
|
96
|
+
If you work with complex entities like cell lines, cell types, tissues,
|
97
|
+
etc., consider using the pre-defined biological registries in
|
98
|
+
:mod:`bionty` to label artifacts & collections.
|
99
|
+
|
100
|
+
If you work with biological samples, likely, the only sustainable way of
|
101
|
+
tracking metadata, is to create a custom schema module.
|
102
|
+
|
103
|
+
.. note::
|
104
|
+
|
105
|
+
A `Record` has a flexible schema: it can store data for arbitrary features.
|
106
|
+
By contrast, if you want to change the fields of a :class:`~lamindb.models.SQLRecord`, you need to modify the columns of the underlying table in the database.
|
107
|
+
The latter is more efficient for large datasets and you can customize it through modules like the `bionty` or `wetlab` module.
|
108
|
+
|
53
109
|
"""
|
54
110
|
|
55
111
|
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
56
112
|
abstract = False
|
57
113
|
app_label = "lamindb"
|
114
|
+
constraints = [
|
115
|
+
models.UniqueConstraint(
|
116
|
+
fields=["name", "type", "space"], name="unique_name_type_space"
|
117
|
+
)
|
118
|
+
]
|
58
119
|
|
59
120
|
_name_field: str = "name"
|
60
121
|
|
@@ -65,7 +126,10 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
65
126
|
)
|
66
127
|
"""A universal random id, valid across DB instances."""
|
67
128
|
name: str = CharField(max_length=150, db_index=True, null=True)
|
68
|
-
"""Name or title of record (optional).
|
129
|
+
"""Name or title of record (optional).
|
130
|
+
|
131
|
+
Names for a given `type` and `space` are constrained to be unique.
|
132
|
+
"""
|
69
133
|
type: Record | None = ForeignKey("self", PROTECT, null=True, related_name="records")
|
70
134
|
"""Type of record, e.g., `Sample`, `Donor`, `Cell`, `Compound`, `Sequence`.
|
71
135
|
|
@@ -78,6 +142,12 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
78
142
|
|
79
143
|
For example, if a record "Compound" is a `type`, the actual compounds "darerinib", "tramerinib", would be instances of that `type`.
|
80
144
|
"""
|
145
|
+
description: str | None = TextField(null=True)
|
146
|
+
"""A description."""
|
147
|
+
reference: str | None = CharField(max_length=255, db_index=True, null=True)
|
148
|
+
"""A simple reference like a URL or external ID."""
|
149
|
+
reference_type: str | None = CharField(max_length=25, db_index=True, null=True)
|
150
|
+
"""Type of simple reference."""
|
81
151
|
schema: Schema | None = ForeignKey(
|
82
152
|
"Schema", CASCADE, null=True, related_name="records"
|
83
153
|
)
|
@@ -87,54 +157,88 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
87
157
|
|
88
158
|
If `is_type` is `True`, the schema is used to enforce certain features for each records of this type.
|
89
159
|
"""
|
90
|
-
# naming convention in analogy
|
160
|
+
# naming convention in analogy to Schema
|
91
161
|
components: Record = models.ManyToManyField(
|
92
162
|
"Record", through="RecordRecord", symmetrical=False, related_name="composites"
|
93
163
|
)
|
94
164
|
"""Record-like components of this record."""
|
95
165
|
composites: Record
|
96
166
|
"""Record-like composites of this record."""
|
97
|
-
|
98
|
-
|
99
|
-
linked_artifacts: Artifact = models.ManyToManyField(
|
100
|
-
Artifact, through="RecordArtifact", related_name="linked_in_records"
|
101
|
-
)
|
102
|
-
"""Linked artifacts."""
|
103
|
-
artifacts: Artifact = models.ManyToManyField(
|
104
|
-
Artifact, through="ArtifactRecord", related_name="records"
|
105
|
-
)
|
106
|
-
"""Annotated artifacts."""
|
107
|
-
linked_runs: Run = models.ManyToManyField(
|
108
|
-
Run, through="RecordRun", related_name="records"
|
109
|
-
)
|
110
|
-
"""Linked runs."""
|
111
|
-
linked_users: User = models.ManyToManyField(
|
112
|
-
User, through="RecordUser", related_name="records"
|
167
|
+
parents: ULabel = models.ManyToManyField(
|
168
|
+
"self", symmetrical=False, related_name="children"
|
113
169
|
)
|
114
|
-
"""
|
170
|
+
"""Parent entities of this record.
|
171
|
+
|
172
|
+
For advanced use cases, you can build an ontology under a given `type`.
|
173
|
+
|
174
|
+
Say, if you modeled `CellType` as a `Record`, you would introduce a type `CellType` and model the hiearchy of cell types under it.
|
175
|
+
"""
|
176
|
+
children: ULabel
|
177
|
+
"""Child entities of this record.
|
178
|
+
|
179
|
+
Reverse accessor for parents.
|
180
|
+
"""
|
181
|
+
# this is handled manually here because we want to se the related_name attribute
|
182
|
+
# (this doesn't happen via inheritance of TracksRun, everything else is the same)
|
115
183
|
run: Run | None = ForeignKey(
|
116
184
|
Run,
|
117
185
|
PROTECT,
|
118
186
|
related_name="output_records",
|
119
187
|
null=True,
|
120
|
-
default=
|
188
|
+
default=current_run,
|
121
189
|
editable=False,
|
122
190
|
)
|
123
191
|
"""Run that created the record."""
|
124
192
|
input_of_runs: Run = models.ManyToManyField(Run, related_name="input_records")
|
125
193
|
"""Runs that use this record as an input."""
|
126
|
-
|
194
|
+
artifacts: Artifact = models.ManyToManyField(
|
195
|
+
Artifact, through="ArtifactRecord", related_name="records"
|
196
|
+
)
|
197
|
+
"""Artifacts annotated by this record."""
|
198
|
+
projects: Project
|
199
|
+
"""Projects that annotate this record."""
|
200
|
+
references: Reference
|
201
|
+
"""References that annotate this record."""
|
202
|
+
values_json: RecordJson
|
203
|
+
"""JSON values (for lists, dicts, etc.)."""
|
204
|
+
values_record: RecordRecord
|
205
|
+
"""Record values with their features."""
|
206
|
+
values_ulabel: RecordULabel
|
207
|
+
"""ULabel values with their features."""
|
208
|
+
values_user: RecordUser
|
209
|
+
"""User values with their features."""
|
210
|
+
values_run: RecordRun
|
211
|
+
"""Run values with their features."""
|
212
|
+
values_artifact: RecordArtifact
|
213
|
+
"""Artifact values with their features."""
|
214
|
+
values_reference: Reference
|
215
|
+
"""Reference values with their features."""
|
216
|
+
values_project: Project
|
217
|
+
"""Project values with their features."""
|
218
|
+
linked_runs: Run = models.ManyToManyField(
|
219
|
+
Run, through="RecordRun", related_name="records"
|
220
|
+
)
|
221
|
+
"""Runs linked in this record as values."""
|
222
|
+
linked_users: User = models.ManyToManyField(
|
223
|
+
User, through="RecordUser", related_name="records"
|
224
|
+
)
|
225
|
+
"""Users linked in this record as values."""
|
226
|
+
linked_ulabels: ULabel = models.ManyToManyField(
|
127
227
|
ULabel,
|
128
228
|
through="RecordULabel",
|
129
|
-
related_name="
|
229
|
+
related_name="linked_in_records",
|
230
|
+
)
|
231
|
+
"""ULabels linked in this record as values."""
|
232
|
+
linked_artifacts: Artifact = models.ManyToManyField(
|
233
|
+
Artifact, through="RecordArtifact", related_name="linked_in_records"
|
130
234
|
)
|
131
|
-
"""
|
235
|
+
"""Artifacts linked in this record as values."""
|
132
236
|
linked_projects: Project
|
133
|
-
"""
|
237
|
+
"""Projects linked in this record as values."""
|
134
238
|
linked_references: Reference
|
135
|
-
"""
|
136
|
-
|
137
|
-
"""
|
239
|
+
"""References linked in this record as values."""
|
240
|
+
blocks: RunBlock
|
241
|
+
"""Blocks that annotate this record."""
|
138
242
|
|
139
243
|
@overload
|
140
244
|
def __init__(
|
@@ -143,6 +247,9 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
143
247
|
type: Record | None = None,
|
144
248
|
is_type: bool = False,
|
145
249
|
description: str | None = None,
|
250
|
+
schema: Schema | None = None,
|
251
|
+
reference: str | None = None,
|
252
|
+
reference_type: str | None = None,
|
146
253
|
): ...
|
147
254
|
|
148
255
|
@overload
|
@@ -165,7 +272,9 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
165
272
|
type: str | None = kwargs.pop("type", None)
|
166
273
|
is_type: bool = kwargs.pop("is_type", False)
|
167
274
|
description: str | None = kwargs.pop("description", None)
|
168
|
-
schema = kwargs.pop("schema", None)
|
275
|
+
schema: Schema | None = kwargs.pop("schema", None)
|
276
|
+
reference: str | None = kwargs.pop("reference", None)
|
277
|
+
reference_type: str | None = kwargs.pop("reference_type", None)
|
169
278
|
branch = kwargs.pop("branch", None)
|
170
279
|
branch_id = kwargs.pop("branch_id", 1)
|
171
280
|
space = kwargs.pop("space", None)
|
@@ -187,6 +296,8 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
187
296
|
type=type,
|
188
297
|
is_type=is_type,
|
189
298
|
description=description,
|
299
|
+
reference=reference,
|
300
|
+
reference_type=reference_type,
|
190
301
|
schema=schema,
|
191
302
|
branch=branch,
|
192
303
|
branch_id=branch_id,
|
@@ -202,17 +313,25 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
202
313
|
return self.schema is not None and self.is_type
|
203
314
|
|
204
315
|
def query_children(self) -> QuerySet:
|
205
|
-
"""Query all children of a record
|
316
|
+
"""Query all children of a record.
|
317
|
+
|
318
|
+
While `.children` retrieves the direct children, this method
|
319
|
+
retrieves all descendants of a record type.
|
320
|
+
"""
|
321
|
+
return _query_relatives([self], "children", self.__class__) # type: ignore
|
322
|
+
|
323
|
+
def query_records(self) -> QuerySet:
|
324
|
+
"""Query all records of a type.
|
206
325
|
|
207
326
|
While `.records` retrieves the direct children, this method
|
208
327
|
retrieves all descendants of a record type.
|
209
328
|
"""
|
210
329
|
return _query_relatives([self], "records", self.__class__) # type: ignore
|
211
330
|
|
212
|
-
def
|
213
|
-
"""Export all
|
331
|
+
def type_to_dataframe(self) -> pd.DataFrame:
|
332
|
+
"""Export all instances of this record type to a pandas DataFrame."""
|
214
333
|
assert self.is_type, "Only types can be exported as dataframes" # noqa: S101
|
215
|
-
df = self.
|
334
|
+
df = self.query_records().to_dataframe(features="queryset")
|
216
335
|
df.columns.values[0] = "__lamindb_record_uid__"
|
217
336
|
df.columns.values[1] = "__lamindb_record_name__"
|
218
337
|
if self.schema is not None:
|
@@ -226,8 +345,12 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
226
345
|
df = reorder_subset_columns_in_df(df, desired_order, position=0) # type: ignore
|
227
346
|
return df.sort_index() # order by id for now
|
228
347
|
|
348
|
+
@deprecated("type_to_dataframe")
|
349
|
+
def to_pandas(self) -> pd.DataFrame:
|
350
|
+
return self.type_to_dataframe()
|
351
|
+
|
229
352
|
def to_artifact(self, key: str = None) -> Artifact:
|
230
|
-
"""
|
353
|
+
"""Calls `type_to_dataframe()` to create an artifact."""
|
231
354
|
from lamindb.core._context import context
|
232
355
|
|
233
356
|
assert self.is_type, "Only types can be exported as artifacts" # noqa: S101
|
@@ -243,7 +366,7 @@ class Record(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
243
366
|
run = Run(transform, initiated_by_run=context.run).save()
|
244
367
|
run.input_records.add(self)
|
245
368
|
return Artifact.from_dataframe(
|
246
|
-
self.
|
369
|
+
self.type_to_dataframe(),
|
247
370
|
key=key,
|
248
371
|
description=f"Export of sheet {self.uid}{description}",
|
249
372
|
schema=self.schema,
|
lamindb/models/run.py
CHANGED
@@ -27,6 +27,7 @@ if TYPE_CHECKING:
|
|
27
27
|
|
28
28
|
from ._feature_manager import FeatureManager
|
29
29
|
from .artifact import Artifact
|
30
|
+
from .block import RunBlock
|
30
31
|
from .collection import Collection
|
31
32
|
from .feature import FeatureValue
|
32
33
|
from .project import Project
|
@@ -315,18 +316,16 @@ class Run(SQLRecord):
|
|
315
316
|
"""Runs that were initiated by this run."""
|
316
317
|
projects: Project
|
317
318
|
"""Linked projects."""
|
319
|
+
blocks: RunBlock
|
320
|
+
"""Blocks that annotate this run."""
|
321
|
+
records: Record
|
322
|
+
"""Records that annotate this run."""
|
318
323
|
_is_consecutive: bool | None = BooleanField(null=True)
|
319
324
|
"""Indicates whether code was consecutively executed. Is relevant for notebooks."""
|
320
|
-
_status_code: int = models.SmallIntegerField(
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
- -2: re-started
|
325
|
-
- -1: started
|
326
|
-
- 0: completed
|
327
|
-
- 1: errored
|
328
|
-
- 2: aborted
|
329
|
-
"""
|
325
|
+
_status_code: int = models.SmallIntegerField(
|
326
|
+
default=-3, db_default=-3, db_index=True, null=True
|
327
|
+
)
|
328
|
+
"""Status code of the run. See the status property for mapping to string."""
|
330
329
|
|
331
330
|
@overload
|
332
331
|
def __init__(
|
@@ -372,6 +371,41 @@ class Run(SQLRecord):
|
|
372
371
|
reference_type=reference_type,
|
373
372
|
)
|
374
373
|
|
374
|
+
@property
|
375
|
+
def status(self) -> str:
|
376
|
+
"""Get status of run.
|
377
|
+
|
378
|
+
Returns the status as a string, one of: `scheduled`, `re-started`, `started`, `completed`, `errored`, or `aborted`.
|
379
|
+
|
380
|
+
The string maps to an integer field `_status_code` of the run registry, with mapping:
|
381
|
+
- -3: `scheduled`
|
382
|
+
- -2: `re-started`
|
383
|
+
- -1: `started`
|
384
|
+
- 0: `completed`
|
385
|
+
- 1: `errored`
|
386
|
+
- 2: `aborted`
|
387
|
+
|
388
|
+
You can use this private integer field for queries.
|
389
|
+
|
390
|
+
Examples:
|
391
|
+
|
392
|
+
::
|
393
|
+
|
394
|
+
run.status
|
395
|
+
#> 'completed'
|
396
|
+
"""
|
397
|
+
if self._status_code is None:
|
398
|
+
return "unknown"
|
399
|
+
status_dict = {
|
400
|
+
-3: "scheduled",
|
401
|
+
-2: "re-started",
|
402
|
+
-1: "started",
|
403
|
+
0: "completed",
|
404
|
+
1: "errored",
|
405
|
+
2: "aborted",
|
406
|
+
}
|
407
|
+
return status_dict.get(self._status_code, "unknown")
|
408
|
+
|
375
409
|
@property
|
376
410
|
@deprecated("features")
|
377
411
|
def params(self) -> FeatureManager:
|
lamindb/models/save.py
CHANGED
@@ -224,7 +224,7 @@ def check_and_attempt_upload(
|
|
224
224
|
logger.warning(f"could not upload artifact: {artifact}")
|
225
225
|
# clear dangling storages if we were actually uploading or saving
|
226
226
|
if getattr(artifact, "_to_store", False):
|
227
|
-
artifact._clear_storagekey = auto_storage_key_from_artifact(artifact)
|
227
|
+
artifact._clear_storagekey = auto_storage_key_from_artifact(artifact) # type: ignore
|
228
228
|
return exception
|
229
229
|
# copies (if on-disk) or moves the temporary file (if in-memory) to the cache
|
230
230
|
if os.getenv("LAMINDB_MULTI_INSTANCE") is None:
|
@@ -313,18 +313,18 @@ def check_and_attempt_clearing(
|
|
313
313
|
# or if there was an exception during upload
|
314
314
|
if hasattr(artifact, "_clear_storagekey"):
|
315
315
|
try:
|
316
|
-
if artifact._clear_storagekey is not None:
|
316
|
+
if artifact._clear_storagekey is not None: # type: ignore
|
317
317
|
delete_msg = delete_storage_using_key(
|
318
318
|
artifact,
|
319
|
-
artifact._clear_storagekey,
|
319
|
+
artifact._clear_storagekey, # type: ignore
|
320
320
|
raise_file_not_found_error=raise_file_not_found_error,
|
321
321
|
using_key=using_key,
|
322
322
|
)
|
323
323
|
if delete_msg != "did-not-delete":
|
324
324
|
logger.success(
|
325
|
-
f"deleted stale object at storage key {artifact._clear_storagekey}"
|
325
|
+
f"deleted stale object at storage key {artifact._clear_storagekey}" # type: ignore
|
326
326
|
)
|
327
|
-
artifact._clear_storagekey = None
|
327
|
+
artifact._clear_storagekey = None # type: ignore
|
328
328
|
except Exception as exception:
|
329
329
|
return exception
|
330
330
|
# returning None means proceed (either success or no action needed)
|
@@ -370,7 +370,7 @@ def store_artifacts(
|
|
370
370
|
artifact, raise_file_not_found_error=True, using_key=using_key
|
371
371
|
)
|
372
372
|
if exception is not None:
|
373
|
-
logger.warning(f"clean up of {artifact._clear_storagekey} failed")
|
373
|
+
logger.warning(f"clean up of {artifact._clear_storagekey} failed") # type: ignore
|
374
374
|
break
|
375
375
|
|
376
376
|
if exception is not None:
|
@@ -385,7 +385,7 @@ def store_artifacts(
|
|
385
385
|
)
|
386
386
|
if exception_clear is not None:
|
387
387
|
logger.warning(
|
388
|
-
f"clean up of {artifact._clear_storagekey} after the upload error failed"
|
388
|
+
f"clean up of {artifact._clear_storagekey} after the upload error failed" # type: ignore
|
389
389
|
)
|
390
390
|
error_message = prepare_error_message(artifacts, stored_artifacts, exception)
|
391
391
|
# this is bad because we're losing the original traceback
|
lamindb/models/schema.py
CHANGED
@@ -19,6 +19,7 @@ from lamindb.base.fields import (
|
|
19
19
|
ForeignKey,
|
20
20
|
IntegerField,
|
21
21
|
JSONField,
|
22
|
+
TextField,
|
22
23
|
)
|
23
24
|
from lamindb.base.types import FieldAttr, ListLike
|
24
25
|
from lamindb.errors import FieldValidationError, InvalidArgument
|
@@ -54,6 +55,7 @@ if TYPE_CHECKING:
|
|
54
55
|
from .artifact import Artifact
|
55
56
|
from .project import Project
|
56
57
|
from .query_set import QuerySet, SQLRecordList
|
58
|
+
from .record import Record
|
57
59
|
|
58
60
|
|
59
61
|
NUMBER_TYPE = "num"
|
@@ -289,7 +291,7 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
289
291
|
during validation, see :attr:`~lamindb.Schema.coerce_dtype`.
|
290
292
|
|
291
293
|
See Also:
|
292
|
-
:meth:`~lamindb.Artifact.
|
294
|
+
:meth:`~lamindb.Artifact.from_dataframe`
|
293
295
|
Validate & annotate a `DataFrame` with a schema.
|
294
296
|
:meth:`~lamindb.Artifact.from_anndata`
|
295
297
|
Validate & annotate an `AnnData` with a schema.
|
@@ -377,7 +379,7 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
377
379
|
"""A universal id."""
|
378
380
|
name: str | None = CharField(max_length=150, null=True, db_index=True)
|
379
381
|
"""A name."""
|
380
|
-
description: str | None =
|
382
|
+
description: str | None = TextField(null=True)
|
381
383
|
"""A description."""
|
382
384
|
n: int = IntegerField()
|
383
385
|
"""Number of features in the schema."""
|
@@ -448,6 +450,11 @@ class Schema(SQLRecord, CanCurate, TracksRun):
|
|
448
450
|
"""The artifacts that were validated against this schema with a :class:`~lamindb.curators.core.Curator`."""
|
449
451
|
projects: Project
|
450
452
|
"""Linked projects."""
|
453
|
+
schemas: Schema
|
454
|
+
"""Schemas for this type."""
|
455
|
+
records: Record
|
456
|
+
"""Records that were annotated with this schema."""
|
457
|
+
|
451
458
|
_curation: dict[str, Any] = JSONField(default=None, db_default=None, null=True)
|
452
459
|
# lamindb v2
|
453
460
|
# _itype: ContentType = models.ForeignKey(ContentType, on_delete=models.CASCADE)
|