lamindb 1.0.5__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +17 -6
- lamindb/_artifact.py +202 -87
- lamindb/_can_curate.py +27 -8
- lamindb/_collection.py +86 -52
- lamindb/_feature.py +177 -41
- lamindb/_finish.py +21 -7
- lamindb/_from_values.py +83 -98
- lamindb/_parents.py +4 -4
- lamindb/_query_set.py +78 -18
- lamindb/_record.py +170 -53
- lamindb/_run.py +4 -4
- lamindb/_save.py +42 -11
- lamindb/_schema.py +135 -38
- lamindb/_storage.py +1 -1
- lamindb/_tracked.py +129 -0
- lamindb/_transform.py +21 -8
- lamindb/_ulabel.py +5 -14
- lamindb/base/users.py +1 -4
- lamindb/base/validation.py +2 -6
- lamindb/core/__init__.py +13 -14
- lamindb/core/_context.py +14 -9
- lamindb/core/_data.py +29 -25
- lamindb/core/_describe.py +1 -1
- lamindb/core/_django.py +1 -1
- lamindb/core/_feature_manager.py +53 -43
- lamindb/core/_label_manager.py +4 -4
- lamindb/core/_mapped_collection.py +24 -9
- lamindb/core/_track_environment.py +2 -1
- lamindb/core/datasets/__init__.py +6 -1
- lamindb/core/datasets/_core.py +12 -11
- lamindb/core/datasets/_small.py +67 -21
- lamindb/core/exceptions.py +1 -90
- lamindb/core/loaders.py +21 -15
- lamindb/core/relations.py +6 -4
- lamindb/core/storage/_anndata_accessor.py +49 -3
- lamindb/core/storage/_backed_access.py +12 -7
- lamindb/core/storage/_pyarrow_dataset.py +40 -15
- lamindb/core/storage/_tiledbsoma.py +56 -12
- lamindb/core/storage/paths.py +30 -24
- lamindb/core/subsettings/_creation_settings.py +4 -16
- lamindb/curators/__init__.py +2193 -846
- lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
- lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
- lamindb/errors.py +96 -0
- lamindb/integrations/_vitessce.py +3 -3
- lamindb/migrations/0069_squashed.py +76 -75
- lamindb/migrations/0075_lamindbv1_part5.py +4 -5
- lamindb/migrations/0082_alter_feature_dtype.py +21 -0
- lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
- lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
- lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
- lamindb/migrations/0086_various.py +95 -0
- lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
- lamindb/migrations/0088_schema_components.py +273 -0
- lamindb/migrations/0088_squashed.py +4372 -0
- lamindb/models.py +475 -168
- {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/METADATA +9 -7
- lamindb-1.1.1.dist-info/RECORD +95 -0
- lamindb/curators/_spatial.py +0 -528
- lamindb/migrations/0052_squashed.py +0 -1261
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
- lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
- lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
- lamindb/migrations/0060_alter_artifact__actions.py +0 -22
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
- lamindb/migrations/0062_add_is_latest_field.py +0 -32
- lamindb/migrations/0063_populate_latest_field.py +0 -45
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
- lamindb-1.0.5.dist-info/RECORD +0 -102
- {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/LICENSE +0 -0
- {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/WHEEL +0 -0
lamindb/models.py
CHANGED
@@ -65,6 +65,7 @@ if TYPE_CHECKING:
|
|
65
65
|
from pyarrow.dataset import Dataset as PyArrowDataset
|
66
66
|
from tiledbsoma import Collection as SOMACollection
|
67
67
|
from tiledbsoma import Experiment as SOMAExperiment
|
68
|
+
from tiledbsoma import Measurement as SOMAMeasurement
|
68
69
|
from upath import UPath
|
69
70
|
|
70
71
|
from lamindb.core import LabelManager, MappedCollection, QuerySet, RecordList
|
@@ -152,9 +153,13 @@ def current_run() -> Run | None:
|
|
152
153
|
if not _TRACKING_READY:
|
153
154
|
_TRACKING_READY = _check_instance_setup()
|
154
155
|
if _TRACKING_READY:
|
155
|
-
import lamindb
|
156
|
+
import lamindb
|
156
157
|
|
157
|
-
|
158
|
+
# also see get_run() in core._data
|
159
|
+
run = lamindb._tracked.get_current_tracked_run()
|
160
|
+
if run is None:
|
161
|
+
run = lamindb.context.run
|
162
|
+
return run
|
158
163
|
else:
|
159
164
|
return None
|
160
165
|
|
@@ -239,6 +244,7 @@ class CanCurate:
|
|
239
244
|
mute: bool = False,
|
240
245
|
organism: str | Record | None = None,
|
241
246
|
source: Record | None = None,
|
247
|
+
strict_source: bool = False,
|
242
248
|
) -> InspectResult:
|
243
249
|
"""Inspect if values are mappable to a field.
|
244
250
|
|
@@ -252,6 +258,10 @@ class CanCurate:
|
|
252
258
|
mute: Whether to mute logging.
|
253
259
|
organism: An Organism name or record.
|
254
260
|
source: A `bionty.Source` record that specifies the version to inspect against.
|
261
|
+
strict_source: Determines the validation behavior against records in the registry.
|
262
|
+
- If `False`, validation will include all records in the registry, ignoring the specified source.
|
263
|
+
- If `True`, validation will only include records in the registry that are linked to the specified source.
|
264
|
+
Note: this parameter won't affect validation against bionty/public sources.
|
255
265
|
|
256
266
|
See Also:
|
257
267
|
:meth:`~lamindb.core.CanCurate.validate`
|
@@ -278,10 +288,11 @@ class CanCurate:
|
|
278
288
|
mute: bool = False,
|
279
289
|
organism: str | Record | None = None,
|
280
290
|
source: Record | None = None,
|
291
|
+
strict_source: bool = False,
|
281
292
|
) -> np.ndarray:
|
282
293
|
"""Validate values against existing values of a string field.
|
283
294
|
|
284
|
-
Note this is
|
295
|
+
Note this is strict_source validation, only asserts exact matches.
|
285
296
|
|
286
297
|
Args:
|
287
298
|
values: Values that will be validated against the field.
|
@@ -291,6 +302,10 @@ class CanCurate:
|
|
291
302
|
mute: Whether to mute logging.
|
292
303
|
organism: An Organism name or record.
|
293
304
|
source: A `bionty.Source` record that specifies the version to validate against.
|
305
|
+
strict_source: Determines the validation behavior against records in the registry.
|
306
|
+
- If `False`, validation will include all records in the registry, ignoring the specified source.
|
307
|
+
- If `True`, validation will only include records in the registry that are linked to the specified source.
|
308
|
+
Note: this parameter won't affect validation against bionty/public sources.
|
294
309
|
|
295
310
|
Returns:
|
296
311
|
A vector of booleans indicating if an element is validated.
|
@@ -370,6 +385,7 @@ class CanCurate:
|
|
370
385
|
synonyms_field: str = "synonyms",
|
371
386
|
organism: str | Record | None = None,
|
372
387
|
source: Record | None = None,
|
388
|
+
strict_source: bool = False,
|
373
389
|
) -> list[str] | dict[str, str]:
|
374
390
|
"""Maps input synonyms to standardized names.
|
375
391
|
|
@@ -392,6 +408,10 @@ class CanCurate:
|
|
392
408
|
synonyms_field: A field containing the concatenated synonyms.
|
393
409
|
organism: An Organism name or record.
|
394
410
|
source: A `bionty.Source` record that specifies the version to validate against.
|
411
|
+
strict_source: Determines the validation behavior against records in the registry.
|
412
|
+
- If `False`, validation will include all records in the registry, ignoring the specified source.
|
413
|
+
- If `True`, validation will only include records in the registry that are linked to the specified source.
|
414
|
+
Note: this parameter won't affect validation against bionty/public sources.
|
395
415
|
|
396
416
|
Returns:
|
397
417
|
If `return_mapper` is `False`: a list of standardized names. Otherwise,
|
@@ -679,7 +699,7 @@ class Registry(ModelBase):
|
|
679
699
|
A record.
|
680
700
|
|
681
701
|
Raises:
|
682
|
-
:exc:`docs:lamindb.
|
702
|
+
:exc:`docs:lamindb.errors.DoesNotExist`: In case no matching record is found.
|
683
703
|
|
684
704
|
See Also:
|
685
705
|
- Guide: :doc:`docs:registries`
|
@@ -1187,7 +1207,7 @@ class Transform(Record, IsVersioned):
|
|
1187
1207
|
|
1188
1208
|
Create a transform for a pipeline:
|
1189
1209
|
|
1190
|
-
>>> transform = ln.Transform(
|
1210
|
+
>>> transform = ln.Transform(key="Cell Ranger", version="7.2.0", type="pipeline").save()
|
1191
1211
|
|
1192
1212
|
Create a transform from a notebook:
|
1193
1213
|
|
@@ -1230,7 +1250,11 @@ class Transform(Record, IsVersioned):
|
|
1230
1250
|
.. versionchanged:: 0.75
|
1231
1251
|
The `source_code` field is no longer an artifact, but a text field.
|
1232
1252
|
"""
|
1233
|
-
|
1253
|
+
# we have a unique constraint here but not on artifact because on artifact, we haven't yet
|
1254
|
+
# settled how we model the same artifact in different storage locations
|
1255
|
+
hash: str | None = CharField(
|
1256
|
+
max_length=HASH_LENGTH, db_index=True, null=True, unique=True
|
1257
|
+
)
|
1234
1258
|
"""Hash of the source code."""
|
1235
1259
|
reference: str | None = CharField(max_length=255, db_index=True, null=True)
|
1236
1260
|
"""Reference for the transform, e.g., a URL."""
|
@@ -1340,7 +1364,7 @@ class Param(Record, CanCurate, TracksRun, TracksUpdates):
|
|
1340
1364
|
_name_field: str = "name"
|
1341
1365
|
|
1342
1366
|
name: str = CharField(max_length=100, db_index=True)
|
1343
|
-
dtype: str = CharField(
|
1367
|
+
dtype: str | None = CharField(db_index=True, null=True)
|
1344
1368
|
"""Data type ("num", "cat", "int", "float", "bool", "datetime").
|
1345
1369
|
|
1346
1370
|
For categorical types, can define from which registry values are
|
@@ -1353,7 +1377,7 @@ class Param(Record, CanCurate, TracksRun, TracksUpdates):
|
|
1353
1377
|
"""
|
1354
1378
|
records: Param
|
1355
1379
|
"""Records of this type."""
|
1356
|
-
is_type: bool = BooleanField(default=
|
1380
|
+
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
1357
1381
|
"""Distinguish types from instances of the type."""
|
1358
1382
|
_expect_many: bool = models.BooleanField(default=False, db_default=False)
|
1359
1383
|
"""Indicates whether values for this param are expected to occur a single or multiple times for an artifact/run (default `False`).
|
@@ -1369,6 +1393,28 @@ class Param(Record, CanCurate, TracksRun, TracksUpdates):
|
|
1369
1393
|
values: ParamValue
|
1370
1394
|
"""Values for this parameter."""
|
1371
1395
|
|
1396
|
+
def __init__(self, *args, **kwargs):
|
1397
|
+
from ._feature import process_init_feature_param
|
1398
|
+
from .errors import ValidationError
|
1399
|
+
|
1400
|
+
if len(args) == len(self._meta.concrete_fields):
|
1401
|
+
super().__init__(*args, **kwargs)
|
1402
|
+
return None
|
1403
|
+
|
1404
|
+
dtype = kwargs.get("dtype", None)
|
1405
|
+
kwargs = process_init_feature_param(args, kwargs, is_param=True)
|
1406
|
+
super().__init__(*args, **kwargs)
|
1407
|
+
dtype_str = kwargs.pop("dtype", None)
|
1408
|
+
if not self._state.adding:
|
1409
|
+
if not (
|
1410
|
+
self.dtype.startswith("cat")
|
1411
|
+
if dtype == "cat"
|
1412
|
+
else self.dtype == dtype_str
|
1413
|
+
):
|
1414
|
+
raise ValidationError(
|
1415
|
+
f"Feature {self.name} already exists with dtype {self.dtype}, you passed {dtype_str}"
|
1416
|
+
)
|
1417
|
+
|
1372
1418
|
|
1373
1419
|
# FeatureValue behaves in many ways like a link in a LinkORM
|
1374
1420
|
# in particular, we don't want a _public field on it
|
@@ -1460,8 +1506,8 @@ class Run(Record):
|
|
1460
1506
|
|
1461
1507
|
Create a run record:
|
1462
1508
|
|
1463
|
-
>>> ln.Transform(
|
1464
|
-
>>> transform = ln.Transform.get(
|
1509
|
+
>>> ln.Transform(key="Cell Ranger", version="7.2.0", type="pipeline").save()
|
1510
|
+
>>> transform = ln.Transform.get(key="Cell Ranger", version="7.2.0")
|
1465
1511
|
>>> run = ln.Run(transform)
|
1466
1512
|
|
1467
1513
|
Create a global run context for a custom transform:
|
@@ -1687,7 +1733,7 @@ class ULabel(Record, HasParents, CanCurate, TracksRun, TracksUpdates):
|
|
1687
1733
|
"""
|
1688
1734
|
records: ULabel
|
1689
1735
|
"""Records of this type."""
|
1690
|
-
is_type: bool = BooleanField(default=
|
1736
|
+
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
1691
1737
|
"""Distinguish types from instances of the type.
|
1692
1738
|
|
1693
1739
|
For example, a ulabel "Project" would be a type, and the actual projects "Project 1", "Project 2", would be records of that `type`.
|
@@ -1727,6 +1773,8 @@ class ULabel(Record, HasParents, CanCurate, TracksRun, TracksUpdates):
|
|
1727
1773
|
def __init__(
|
1728
1774
|
self,
|
1729
1775
|
name: str,
|
1776
|
+
type: ULabel | None = None,
|
1777
|
+
is_type: bool = False,
|
1730
1778
|
description: str | None = None,
|
1731
1779
|
reference: str | None = None,
|
1732
1780
|
reference_type: str | None = None,
|
@@ -1765,12 +1813,15 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
1765
1813
|
|
1766
1814
|
Args:
|
1767
1815
|
name: `str` Name of the feature, typically. column name.
|
1768
|
-
dtype: `FeatureDtype | Registry | list[Registry]` See :class:`~lamindb.base.types.FeatureDtype`.
|
1816
|
+
dtype: `FeatureDtype | Registry | list[Registry] | FieldAttr` See :class:`~lamindb.base.types.FeatureDtype`.
|
1769
1817
|
For categorical types, can define from which registry values are
|
1770
1818
|
sampled, e.g., `ULabel` or `[ULabel, bionty.CellType]`.
|
1771
1819
|
unit: `str | None = None` Unit of measure, ideally SI (`"m"`, `"s"`, `"kg"`, etc.) or `"normalized"` etc.
|
1772
1820
|
description: `str | None = None` A description.
|
1773
1821
|
synonyms: `str | None = None` Bar-separated synonyms.
|
1822
|
+
nullable: `bool = True` Whether the feature can have null-like values (`None`, `pd.NA`, `NaN`, etc.), see :attr:`~lamindb.Feature.nullable`.
|
1823
|
+
default_value: `Any | None = None` Default value for the feature.
|
1824
|
+
cat_filters: `dict[str, str] | None = None` Subset a registry by additional filters to define valid categories.
|
1774
1825
|
|
1775
1826
|
Note:
|
1776
1827
|
|
@@ -1835,6 +1886,10 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
1835
1886
|
abstract = False
|
1836
1887
|
|
1837
1888
|
_name_field: str = "name"
|
1889
|
+
_aux_fields: dict[str, tuple[str, type]] = {
|
1890
|
+
"0": ("default_value", bool),
|
1891
|
+
"1": ("nullable", bool),
|
1892
|
+
}
|
1838
1893
|
|
1839
1894
|
id: int = models.AutoField(primary_key=True)
|
1840
1895
|
"""Internal id, valid only in one DB instance."""
|
@@ -1844,7 +1899,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
1844
1899
|
"""Universal id, valid across DB instances."""
|
1845
1900
|
name: str = CharField(max_length=150, db_index=True, unique=True)
|
1846
1901
|
"""Name of feature (hard unique constraint `unique=True`)."""
|
1847
|
-
dtype: FeatureDtype = CharField(db_index=True)
|
1902
|
+
dtype: FeatureDtype | None = CharField(db_index=True, null=True)
|
1848
1903
|
"""Data type (:class:`~lamindb.base.types.FeatureDtype`).
|
1849
1904
|
|
1850
1905
|
For categorical types, can define from which registry values are
|
@@ -1860,7 +1915,7 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
1860
1915
|
"""
|
1861
1916
|
records: Feature
|
1862
1917
|
"""Records of this type."""
|
1863
|
-
is_type: bool = BooleanField(default=
|
1918
|
+
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
1864
1919
|
"""Distinguish types from instances of the type."""
|
1865
1920
|
unit: str | None = CharField(max_length=30, db_index=True, null=True)
|
1866
1921
|
"""Unit of measure, ideally SI (`m`, `s`, `kg`, etc.) or 'normalized' etc. (optional)."""
|
@@ -1922,10 +1977,15 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
1922
1977
|
def __init__(
|
1923
1978
|
self,
|
1924
1979
|
name: str,
|
1925
|
-
dtype: FeatureDtype | Registry | list[Registry],
|
1926
|
-
|
1927
|
-
|
1928
|
-
|
1980
|
+
dtype: FeatureDtype | Registry | list[Registry] | FieldAttr,
|
1981
|
+
type: Feature | None = None,
|
1982
|
+
is_type: bool = False,
|
1983
|
+
unit: str | None = None,
|
1984
|
+
description: str | None = None,
|
1985
|
+
synonyms: str | None = None,
|
1986
|
+
nullable: bool = True,
|
1987
|
+
default_value: str | None = None,
|
1988
|
+
cat_filters: dict[str, str] | None = None,
|
1929
1989
|
): ...
|
1930
1990
|
|
1931
1991
|
@overload
|
@@ -1950,6 +2010,62 @@ class Feature(Record, CanCurate, TracksRun, TracksUpdates):
|
|
1950
2010
|
"""Save."""
|
1951
2011
|
pass
|
1952
2012
|
|
2013
|
+
@property
|
2014
|
+
def default_value(self) -> Any:
|
2015
|
+
"""A default value that overwrites missing values (default `None`).
|
2016
|
+
|
2017
|
+
This takes effect when you call `Curator.standardize()`.
|
2018
|
+
|
2019
|
+
If `default_value = None`, missing values like `pd.NA` or `np.nan` are kept.
|
2020
|
+
"""
|
2021
|
+
if self._aux is not None and "af" in self._aux and "0" in self._aux["af"]:
|
2022
|
+
return self._aux["af"]["0"]
|
2023
|
+
else:
|
2024
|
+
return None
|
2025
|
+
|
2026
|
+
@default_value.setter
|
2027
|
+
def default_value(self, value: bool) -> None:
|
2028
|
+
if self._aux is None:
|
2029
|
+
self._aux = {}
|
2030
|
+
if "af" not in self._aux:
|
2031
|
+
self._aux["af"] = {}
|
2032
|
+
self._aux["af"]["0"] = value
|
2033
|
+
|
2034
|
+
@property
|
2035
|
+
def nullable(self) -> bool:
|
2036
|
+
"""Indicates whether the feature can have nullable values (default `True`).
|
2037
|
+
|
2038
|
+
Example::
|
2039
|
+
|
2040
|
+
import lamindb as ln
|
2041
|
+
import pandas as pd
|
2042
|
+
|
2043
|
+
disease = ln.Feature(name="disease", dtype=ln.ULabel, nullable=False).save()
|
2044
|
+
schema = ln.Schema(features=[disease]).save()
|
2045
|
+
dataset = {"disease": pd.Categorical([pd.NA, "asthma"])}
|
2046
|
+
df = pd.DataFrame(dataset)
|
2047
|
+
curator = ln.curators.DataFrameCurator(df, schema)
|
2048
|
+
try:
|
2049
|
+
curator.validate()
|
2050
|
+
except ln.errors.ValidationError as e:
|
2051
|
+
assert str(e).startswith("non-nullable series 'disease' contains null values")
|
2052
|
+
|
2053
|
+
"""
|
2054
|
+
if self._aux is not None and "af" in self._aux and "1" in self._aux["af"]:
|
2055
|
+
value = self._aux["af"]["1"]
|
2056
|
+
return True if value is None else value
|
2057
|
+
else:
|
2058
|
+
return True
|
2059
|
+
|
2060
|
+
@nullable.setter
|
2061
|
+
def nullable(self, value: bool) -> None:
|
2062
|
+
assert isinstance(value, bool), value # noqa: S101
|
2063
|
+
if self._aux is None:
|
2064
|
+
self._aux = {}
|
2065
|
+
if "af" not in self._aux:
|
2066
|
+
self._aux["af"] = {}
|
2067
|
+
self._aux["af"]["1"] = value
|
2068
|
+
|
1953
2069
|
|
1954
2070
|
class FeatureValue(Record, TracksRun):
|
1955
2071
|
"""Non-categorical features values.
|
@@ -2000,9 +2116,10 @@ class FeatureValue(Record, TracksRun):
|
|
2000
2116
|
# Simple types: int, float, str, bool
|
2001
2117
|
if isinstance(value, (int, float, str, bool)):
|
2002
2118
|
try:
|
2003
|
-
return
|
2004
|
-
feature=feature, value=value, hash=None
|
2005
|
-
|
2119
|
+
return (
|
2120
|
+
cls.objects.create(feature=feature, value=value, hash=None),
|
2121
|
+
False,
|
2122
|
+
)
|
2006
2123
|
except IntegrityError:
|
2007
2124
|
return cls.objects.get(feature=feature, value=value), True
|
2008
2125
|
|
@@ -2010,49 +2127,64 @@ class FeatureValue(Record, TracksRun):
|
|
2010
2127
|
else:
|
2011
2128
|
hash = hash_dict(value)
|
2012
2129
|
try:
|
2013
|
-
return
|
2014
|
-
feature=feature, value=value, hash=hash
|
2015
|
-
|
2130
|
+
return (
|
2131
|
+
cls.objects.create(feature=feature, value=value, hash=hash),
|
2132
|
+
False,
|
2133
|
+
)
|
2016
2134
|
except IntegrityError:
|
2017
2135
|
return cls.objects.get(feature=feature, hash=hash), True
|
2018
2136
|
|
2019
2137
|
|
2020
2138
|
class Schema(Record, CanCurate, TracksRun):
|
2021
|
-
"""
|
2139
|
+
"""Schemas / feature sets.
|
2022
2140
|
|
2023
|
-
|
2024
|
-
that correspond to :class:`~lamindb.Feature`, :class:`~bionty.Gene`, :class:`~bionty.Protein` or other
|
2025
|
-
entities.
|
2141
|
+
A simple schema is just a set of columns in a `DataFrame`, a "feature set".
|
2026
2142
|
|
2027
|
-
|
2028
|
-
|
2029
|
-
1. Performance: Imagine you measure the same panel of 20k transcripts in
|
2030
|
-
1M samples. By modeling the panel as a feature set, you can link all
|
2031
|
-
your artifacts against one feature set and only need to store 1M
|
2032
|
-
instead of 1M x 20k = 20B links.
|
2033
|
-
2. Interpretation: Model protein panels, gene panels, etc.
|
2034
|
-
3. Data integration: Feature sets provide the information that determines whether two datasets can be meaningfully concatenated.
|
2035
|
-
|
2036
|
-
These reasons do not hold for label sets. Hence, LaminDB does not model label sets.
|
2143
|
+
A composite schema has multiple components, e.g. for an `AnnData`, each a feature set for `obs` and `var`.
|
2037
2144
|
|
2038
2145
|
Args:
|
2039
|
-
features: `Iterable[Record]` An iterable of :class:`~lamindb.Feature`
|
2146
|
+
features: `Iterable[Record] | None = None` An iterable of :class:`~lamindb.Feature`
|
2040
2147
|
records to hash, e.g., `[Feature(...), Feature(...)]`. Is turned into
|
2041
2148
|
a set upon instantiation. If you'd like to pass values, use
|
2042
2149
|
:meth:`~lamindb.Schema.from_values` or
|
2043
2150
|
:meth:`~lamindb.Schema.from_df`.
|
2151
|
+
components: `dict[str, Schema] | None = None` A dictionary mapping component names to
|
2152
|
+
their corresponding :class:`~lamindb.Schema` objects for composite schemas.
|
2153
|
+
name: `str | None = None` A name.
|
2154
|
+
description: `str | None = None` A description.
|
2044
2155
|
dtype: `str | None = None` The simple type. Defaults to
|
2045
2156
|
`None` for sets of :class:`~lamindb.Feature` records.
|
2046
2157
|
Otherwise defaults to `"num"` (e.g., for sets of :class:`~bionty.Gene`).
|
2047
|
-
|
2158
|
+
itype: `str | None = None` The feature identifier type (e.g. :class:`~lamindb.Feature`, :class:`~bionty.Gene`, ...).
|
2159
|
+
type: `Schema | None = None` A type.
|
2160
|
+
is_type: `bool = False` Distinguish types from instances of the type.
|
2161
|
+
otype: `str | None = None` An object type to define the structure of a composite schema.
|
2162
|
+
minimal_set: `bool = True` Whether the schema contains a minimal set of linked features.
|
2163
|
+
ordered_set: `bool = False` Whether features are required to be ordered.
|
2164
|
+
maximal_set: `bool = False` If `True`, no additional features are allowed.
|
2165
|
+
slot: `str | None = None` The slot name when this schema is used as a component in a
|
2166
|
+
composite schema.
|
2167
|
+
coerce_dtype: `bool = False` When True, attempts to coerce values to the specified dtype
|
2168
|
+
during validation, see :attr:`~lamindb.Schema.coerce_dtype`.
|
2169
|
+
|
2170
|
+
.. dropdown:: Why does LaminDB model schemas, not just features?
|
2171
|
+
|
2172
|
+
1. Performance: Imagine you measure the same panel of 20k transcripts in
|
2173
|
+
1M samples. By modeling the panel as a feature set, you can link all
|
2174
|
+
your artifacts against one feature set and only need to store 1M
|
2175
|
+
instead of 1M x 20k = 20B links.
|
2176
|
+
2. Interpretation: Model protein panels, gene panels, etc.
|
2177
|
+
3. Data integration: Feature sets provide the information that determines whether two datasets can be meaningfully concatenated.
|
2178
|
+
|
2179
|
+
These reasons do not hold for label sets. Hence, LaminDB does not model label sets.
|
2048
2180
|
|
2049
2181
|
Note:
|
2050
2182
|
|
2051
|
-
A feature set can be identified by the `hash` its feature uids.
|
2183
|
+
A feature set can be identified by the `hash` of its feature uids.
|
2052
2184
|
It's stored in the `.hash` field.
|
2053
2185
|
|
2054
|
-
A `slot` provides a string key to access feature sets.
|
2055
|
-
|
2186
|
+
A `slot` provides a string key to access feature sets. For instance, for the schema of an
|
2187
|
+
`AnnData` object, it would be `'obs'` for `adata.obs`.
|
2056
2188
|
|
2057
2189
|
See Also:
|
2058
2190
|
:meth:`~lamindb.Schema.from_values`
|
@@ -2062,24 +2194,20 @@ class Schema(Record, CanCurate, TracksRun):
|
|
2062
2194
|
|
2063
2195
|
Examples:
|
2064
2196
|
|
2065
|
-
Create a feature set
|
2197
|
+
Create a schema (feature set) from df with types:
|
2066
2198
|
|
2067
2199
|
>>> df = pd.DataFrame({"feat1": [1, 2], "feat2": [3.1, 4.2], "feat3": ["cond1", "cond2"]})
|
2068
|
-
>>>
|
2200
|
+
>>> schema = ln.Schema.from_df(df)
|
2069
2201
|
|
2070
|
-
Create a feature set
|
2202
|
+
Create a schema (feature set) from features:
|
2071
2203
|
|
2072
2204
|
>>> features = [ln.Feature(name=feat, dtype="float").save() for feat in ["feat1", "feat2"]]
|
2073
|
-
>>>
|
2205
|
+
>>> schema = ln.Schema(features)
|
2074
2206
|
|
2075
|
-
Create a feature set
|
2207
|
+
Create a schema (feature set) from identifier values:
|
2076
2208
|
|
2077
2209
|
>>> import bionty as bt
|
2078
|
-
>>>
|
2079
|
-
|
2080
|
-
Link a feature set to an artifact:
|
2081
|
-
|
2082
|
-
>>> artifact.features.add_feature_set(feature_set, slot="var")
|
2210
|
+
>>> schema = ln.Schema.from_values(adata.var["ensemble_id"], Gene.ensembl_gene_id, organism="mouse").save()
|
2083
2211
|
|
2084
2212
|
"""
|
2085
2213
|
|
@@ -2087,6 +2215,10 @@ class Schema(Record, CanCurate, TracksRun):
|
|
2087
2215
|
abstract = False
|
2088
2216
|
|
2089
2217
|
_name_field: str = "name"
|
2218
|
+
_aux_fields: dict[str, tuple[str, type]] = {
|
2219
|
+
"0": ("coerce_dtype", bool),
|
2220
|
+
"1": ("_index_feature_uid", str),
|
2221
|
+
}
|
2090
2222
|
|
2091
2223
|
id: int = models.AutoField(primary_key=True)
|
2092
2224
|
"""Internal id, valid only in one DB instance."""
|
@@ -2098,89 +2230,116 @@ class Schema(Record, CanCurate, TracksRun):
|
|
2098
2230
|
"""A description."""
|
2099
2231
|
n = IntegerField()
|
2100
2232
|
"""Number of features in the set."""
|
2101
|
-
dtype: str | None = CharField(max_length=64, null=True)
|
2233
|
+
dtype: str | None = CharField(max_length=64, null=True, editable=False)
|
2102
2234
|
"""Data type, e.g., "num", "float", "int". Is `None` for :class:`~lamindb.Feature`.
|
2103
2235
|
|
2104
2236
|
For :class:`~lamindb.Feature`, types are expected to be heterogeneous and defined on a per-feature level.
|
2105
2237
|
"""
|
2106
|
-
|
2107
|
-
|
2108
|
-
|
2238
|
+
itype: str | None = CharField(
|
2239
|
+
max_length=120, db_index=True, null=True, editable=False
|
2240
|
+
)
|
2109
2241
|
"""A registry that stores feature identifiers used in this schema, e.g., `'Feature'` or `'bionty.Gene'`.
|
2110
2242
|
|
2111
2243
|
Depending on the registry, `.members` stores, e.g., `Feature` or `bionty.Gene` records.
|
2112
2244
|
|
2113
2245
|
.. versionchanged:: 1.0.0
|
2114
|
-
Was called `
|
2246
|
+
Was called `registry` before.
|
2115
2247
|
"""
|
2116
|
-
type:
|
2117
|
-
|
2118
|
-
)
|
2119
|
-
"""Type of feature set (e.g., 'ExpressionPanel', 'ProteinPanel', 'Multimodal', 'Metadata', 'Embedding').
|
2248
|
+
type: Schema | None = ForeignKey("self", PROTECT, null=True, related_name="records")
|
2249
|
+
"""Type of schema.
|
2120
2250
|
|
2121
|
-
Allows to group
|
2251
|
+
Allows to group schemas by type, e.g., all meassurements evaluating gene expression vs. protein expression vs. multi modal.
|
2252
|
+
|
2253
|
+
You can define types via `ln.Schema(name="ProteinPanel", is_type=True)`.
|
2254
|
+
|
2255
|
+
Here are a few more examples for type names: `'ExpressionPanel'`, `'ProteinPanel'`, `'Multimodal'`, `'Metadata'`, `'Embedding'`.
|
2122
2256
|
"""
|
2123
|
-
records:
|
2257
|
+
records: Schema
|
2124
2258
|
"""Records of this type."""
|
2125
|
-
is_type: bool = BooleanField(default=
|
2259
|
+
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
2126
2260
|
"""Distinguish types from instances of the type."""
|
2127
2261
|
otype: str | None = CharField(max_length=64, db_index=True, null=True)
|
2128
2262
|
"""Default Python object type, e.g., DataFrame, AnnData."""
|
2129
|
-
hash: str | None = CharField(
|
2263
|
+
hash: str | None = CharField(
|
2264
|
+
max_length=HASH_LENGTH, db_index=True, null=True, editable=False
|
2265
|
+
)
|
2130
2266
|
"""A hash of the set of feature identifiers.
|
2131
2267
|
|
2132
2268
|
For a composite schema, the hash of hashes.
|
2133
2269
|
"""
|
2134
|
-
minimal_set: bool = BooleanField(default=True, db_index=True)
|
2270
|
+
minimal_set: bool = BooleanField(default=True, db_index=True, editable=False)
|
2135
2271
|
"""Whether the schema contains a minimal set of linked features (default `True`).
|
2136
2272
|
|
2137
2273
|
If `False`, no features are linked to this schema.
|
2138
2274
|
|
2139
2275
|
If `True`, features are linked and considered as a minimally required set in validation.
|
2140
2276
|
"""
|
2141
|
-
ordered_set: bool = BooleanField(default=False, db_index=True)
|
2142
|
-
"""Whether
|
2143
|
-
maximal_set: bool = BooleanField(default=False, db_index=True)
|
2277
|
+
ordered_set: bool = BooleanField(default=False, db_index=True, editable=False)
|
2278
|
+
"""Whether features are required to be ordered (default `False`)."""
|
2279
|
+
maximal_set: bool = BooleanField(default=False, db_index=True, editable=False)
|
2144
2280
|
"""If `False`, additional features are allowed (default `False`).
|
2145
2281
|
|
2146
2282
|
If `True`, the the minimal set is a maximal set and no additional features are allowed.
|
2147
2283
|
"""
|
2148
|
-
|
2149
|
-
"self",
|
2150
|
-
)
|
2151
|
-
"""The composite schema that contains this schema as a component.
|
2152
|
-
|
2153
|
-
The composite schema composes multiple simpler schemas into one object.
|
2154
|
-
|
2155
|
-
For example, an AnnData composes multiple schemas: `var[DataFrameT]`, `obs[DataFrame]`, `obsm[Array]`, `uns[dict]`, etc.
|
2156
|
-
"""
|
2157
|
-
slot: str | None = CharField(max_length=100, db_index=True, null=True)
|
2158
|
-
"""The slot in which the schema is stored in the composite schema."""
|
2159
|
-
validated_by: Schema | None = ForeignKey(
|
2160
|
-
"self", PROTECT, related_name="validated_schemas", default=None, null=True
|
2284
|
+
components: Schema = ManyToManyField(
|
2285
|
+
"self", through="SchemaComponent", symmetrical=False, related_name="composites"
|
2161
2286
|
)
|
2162
|
-
"""
|
2163
|
-
|
2164
|
-
|
2165
|
-
|
2166
|
-
For instance, the set of measured features might be a superset of the minimally required set of features.
|
2287
|
+
"""Components of this schema."""
|
2288
|
+
composites: Schema
|
2289
|
+
"""The composite schemas that contains this schema as a component.
|
2167
2290
|
|
2168
|
-
|
2291
|
+
For example, an `AnnData` composes multiple schemas: `var[DataFrameT]`, `obs[DataFrame]`, `obsm[Array]`, `uns[dict]`, etc.
|
2169
2292
|
"""
|
2170
2293
|
features: Feature
|
2171
2294
|
"""The features contained in the schema."""
|
2172
2295
|
params: Param
|
2173
2296
|
"""The params contained in the schema."""
|
2174
2297
|
artifacts: Artifact
|
2175
|
-
"""The artifacts that
|
2298
|
+
"""The artifacts that measure a feature set that matches this schema."""
|
2299
|
+
validated_artifacts: Artifact
|
2300
|
+
"""The artifacts that were validated against this schema with a :class:`~lamindb.curators.Curator`."""
|
2301
|
+
projects: Project
|
2302
|
+
"""Associated projects."""
|
2176
2303
|
_curation: dict[str, Any] = JSONField(default=None, db_default=None, null=True)
|
2304
|
+
# lamindb v2
|
2305
|
+
# _itype: ContentType = models.ForeignKey(ContentType, on_delete=models.CASCADE)
|
2306
|
+
# ""Index of the registry that stores the feature identifiers, e.g., `Feature` or `Gene`."""
|
2307
|
+
# -- the following two fields are dynamically removed from the API for now
|
2308
|
+
validated_by: Schema | None = ForeignKey(
|
2309
|
+
"self", PROTECT, related_name="validated_schemas", default=None, null=True
|
2310
|
+
)
|
2311
|
+
# """The schema that validated this schema during curation.
|
2312
|
+
|
2313
|
+
# When performing validation, the schema that enforced validation is often less concrete than what is validated.
|
2314
|
+
|
2315
|
+
# For instance, the set of measured features might be a superset of the minimally required set of features.
|
2316
|
+
# """
|
2317
|
+
# validated_schemas: Schema
|
2318
|
+
# """The schemas that were validated against this schema with a :class:`~lamindb.curators.Curator`."""
|
2319
|
+
composite: Schema | None = ForeignKey(
|
2320
|
+
"self", PROTECT, related_name="+", default=None, null=True
|
2321
|
+
)
|
2322
|
+
# The legacy foreign key
|
2323
|
+
slot: str | None = CharField(max_length=100, db_index=True, null=True)
|
2324
|
+
# The legacy slot
|
2177
2325
|
|
2178
2326
|
@overload
|
2179
2327
|
def __init__(
|
2180
2328
|
self,
|
2181
|
-
features: Iterable[Record],
|
2182
|
-
|
2329
|
+
features: Iterable[Record] | None = None,
|
2330
|
+
components: dict[str, Schema] | None = None,
|
2183
2331
|
name: str | None = None,
|
2332
|
+
description: str | None = None,
|
2333
|
+
dtype: str | None = None,
|
2334
|
+
itype: str | Registry | FieldAttr | None = None,
|
2335
|
+
type: Schema | None = None,
|
2336
|
+
is_type: bool = False,
|
2337
|
+
otype: str | None = None,
|
2338
|
+
minimal_set: bool = True,
|
2339
|
+
ordered_set: bool = False,
|
2340
|
+
maximal_set: bool = False,
|
2341
|
+
slot: str | None = None,
|
2342
|
+
coerce_dtype: bool = False,
|
2184
2343
|
): ...
|
2185
2344
|
|
2186
2345
|
@overload
|
@@ -2256,6 +2415,58 @@ class Schema(Record, CanCurate, TracksRun):
|
|
2256
2415
|
"""A queryset for the individual records of the set."""
|
2257
2416
|
pass
|
2258
2417
|
|
2418
|
+
@property
|
2419
|
+
def coerce_dtype(self) -> bool:
|
2420
|
+
"""Whether dtypes should be coerced during validation.
|
2421
|
+
|
2422
|
+
For example, a `objects`-dtyped pandas column can be coerced to `categorical` and would pass validation if this is true.
|
2423
|
+
"""
|
2424
|
+
if self._aux is not None and "af" in self._aux and "0" in self._aux["af"]:
|
2425
|
+
return self._aux["af"]["0"]
|
2426
|
+
else:
|
2427
|
+
return False
|
2428
|
+
|
2429
|
+
@coerce_dtype.setter
|
2430
|
+
def coerce_dtype(self, value: bool) -> None:
|
2431
|
+
if self._aux is None:
|
2432
|
+
self._aux = {}
|
2433
|
+
if "af" not in self._aux:
|
2434
|
+
self._aux["af"] = {}
|
2435
|
+
self._aux["af"]["0"] = value
|
2436
|
+
|
2437
|
+
@coerce_dtype.setter
|
2438
|
+
def coerce_dtype(self, value: bool) -> None:
|
2439
|
+
if self._aux is None:
|
2440
|
+
self._aux = {}
|
2441
|
+
if "af" not in self._aux:
|
2442
|
+
self._aux["af"] = {}
|
2443
|
+
self._aux["af"]["0"] = value
|
2444
|
+
|
2445
|
+
# @property
|
2446
|
+
# def index_feature(self) -> None | Feature:
|
2447
|
+
# # index_feature: `Record | None = None` A :class:`~lamindb.Feature` to validate the index of a `DataFrame`.
|
2448
|
+
# """The uid of the index feature, if `index_feature` was set."""
|
2449
|
+
# if self._index_feature_uid is None:
|
2450
|
+
# return None
|
2451
|
+
# else:
|
2452
|
+
# return self.features.get(uid=self._index_feature_uid)
|
2453
|
+
|
2454
|
+
# @property
|
2455
|
+
# def _index_feature_uid(self) -> None | str:
|
2456
|
+
# """The uid of the index feature, if `index_feature` was set."""
|
2457
|
+
# if self._aux is not None and "af" in self._aux and "1" in self._aux["af"]:
|
2458
|
+
# return self._aux["af"]["1"]
|
2459
|
+
# else:
|
2460
|
+
# return None
|
2461
|
+
|
2462
|
+
# @_index_feature_uid.setter
|
2463
|
+
# def _index_feature_uid(self, value: str) -> None:
|
2464
|
+
# if self._aux is None:
|
2465
|
+
# self._aux = {}
|
2466
|
+
# if "af" not in self._aux:
|
2467
|
+
# self._aux["af"] = {}
|
2468
|
+
# self._aux["af"]["1"] = value
|
2469
|
+
|
2259
2470
|
@property
|
2260
2471
|
@deprecated("itype")
|
2261
2472
|
def registry(self) -> str:
|
@@ -2265,8 +2476,23 @@ class Schema(Record, CanCurate, TracksRun):
|
|
2265
2476
|
def registry(self, value) -> None:
|
2266
2477
|
self.itype = value
|
2267
2478
|
|
2479
|
+
def describe(self, return_str=False) -> None | str:
|
2480
|
+
"""Describe schema."""
|
2481
|
+
message = str(self) + "\ncomponents:"
|
2482
|
+
for component in self.components.all():
|
2483
|
+
message += "\n " + str(component)
|
2484
|
+
if return_str:
|
2485
|
+
return message
|
2486
|
+
else:
|
2487
|
+
print(message)
|
2488
|
+
return None
|
2489
|
+
|
2490
|
+
def _get_component(self, slot: str) -> Schema:
|
2491
|
+
return self.components.get(links_component__slot=slot)
|
2492
|
+
|
2268
2493
|
|
2269
2494
|
class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
2495
|
+
# Note that this docstring has to be consistent with Curator.save_artifact()
|
2270
2496
|
"""Datasets & models stored as files, folders, or arrays.
|
2271
2497
|
|
2272
2498
|
Artifacts manage data in local or remote storage.
|
@@ -2276,10 +2502,10 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2276
2502
|
|
2277
2503
|
Args:
|
2278
2504
|
data: `UPathStr` A path to a local or remote folder or file.
|
2279
|
-
|
2280
|
-
key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a
|
2505
|
+
kind: `Literal["dataset", "model"] | None = None` Distinguish models from datasets from other files & folders.
|
2506
|
+
key: `str | None = None` A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`. Artifacts with the same key form a version family.
|
2281
2507
|
description: `str | None = None` A description.
|
2282
|
-
revises: `Artifact | None = None` Previous version of the artifact.
|
2508
|
+
revises: `Artifact | None = None` Previous version of the artifact. Is an alternative way to passing `key` to trigger a new version.
|
2283
2509
|
run: `Run | None = None` The run that creates the artifact.
|
2284
2510
|
|
2285
2511
|
.. dropdown:: Typical storage formats & their API accessors
|
@@ -2313,26 +2539,28 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2313
2539
|
|
2314
2540
|
Examples:
|
2315
2541
|
|
2316
|
-
Create an artifact
|
2542
|
+
Create an artifact by passing `key`:
|
2543
|
+
|
2544
|
+
>>> artifact = ln.Artifact("./my_file.parquet", key="example_datasets/my_file.parquet").save()
|
2545
|
+
>>> artifact = ln.Artifact("./my_folder", key="project1/my_folder").save()
|
2317
2546
|
|
2318
|
-
|
2319
|
-
|
2547
|
+
Calling `.save()` uploads the file to the default storage location of your lamindb instance.
|
2548
|
+
(If it's a local instance, the "upload" is a mere copy operation.)
|
2320
2549
|
|
2321
|
-
|
2550
|
+
If your artifact is already in the cloud, lamindb auto-populates the `key` field based on the S3 key and there is no upload:
|
2322
2551
|
|
2323
|
-
>>> artifact = ln.Artifact("
|
2552
|
+
>>> artifact = ln.Artifact("s3://my_bucket/my_folder/my_file.csv").save()
|
2324
2553
|
|
2325
|
-
|
2554
|
+
You can make a new version of the artifact with `key = "example_datasets/my_file.parquet"`
|
2326
2555
|
|
2327
|
-
>>>
|
2328
|
-
>>>
|
2329
|
-
>>> artifact = ln.Artifact("./my_local_folder", key="project1/my_target_folder")
|
2556
|
+
>>> artifact_v2 = ln.Artifact("./my_file.parquet", key="example_datasets/my_file.parquet").save()
|
2557
|
+
>>> artifact_v2.versions.df() # see all versions
|
2330
2558
|
|
2331
2559
|
.. dropdown:: Why does the API look this way?
|
2332
2560
|
|
2333
2561
|
It's inspired by APIs building on AWS S3.
|
2334
2562
|
|
2335
|
-
Both boto3 and quilt select a bucket (
|
2563
|
+
Both boto3 and quilt select a bucket (a storage location in LaminDB) and define a target path through a `key` argument.
|
2336
2564
|
|
2337
2565
|
In `boto3 <https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/bucket/upload_file.html>`__::
|
2338
2566
|
|
@@ -2349,16 +2577,18 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2349
2577
|
bucket = quilt3.Bucket('mybucket')
|
2350
2578
|
bucket.put_file('hello.txt', '/tmp/hello.txt')
|
2351
2579
|
|
2580
|
+
Sometimes you want to avoid mapping the artifact into a file hierarchy, and you can then _just_ populate `description` instead:
|
2352
2581
|
|
2353
|
-
|
2582
|
+
>>> artifact = ln.Artifact("s3://my_bucket/my_folder", description="My folder").save()
|
2583
|
+
>>> artifact = ln.Artifact("./my_local_folder", description="My local folder").save()
|
2354
2584
|
|
2355
|
-
|
2356
|
-
>>> artifact_v2 = ln.Artifact(df_updated, key="example_datasets/dataset1.parquet").save()
|
2585
|
+
Because you can then not use `key`-based versioning you have to pass `revises` to make a new artifact version:
|
2357
2586
|
|
2358
|
-
|
2587
|
+
>>> artifact_v2 = ln.Artifact("./my_file.parquet", revises=old_artifact).save()
|
2359
2588
|
|
2360
|
-
|
2361
|
-
|
2589
|
+
If an artifact with the exact same hash already exists, `Artifact()` returns the existing artifact. In concurrent workloads where
|
2590
|
+
the same artifact is created multiple times, `Artifact()` doesn't yet return the existing artifact but creates a new one; `.save()` however
|
2591
|
+
detects the duplication and will return the existing artifact.
|
2362
2592
|
|
2363
2593
|
"""
|
2364
2594
|
|
@@ -2455,9 +2685,11 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2455
2685
|
"""
|
2456
2686
|
description: str | None = CharField(db_index=True, null=True)
|
2457
2687
|
"""A description."""
|
2458
|
-
storage: Storage = ForeignKey(
|
2688
|
+
storage: Storage = ForeignKey(
|
2689
|
+
Storage, PROTECT, related_name="artifacts", editable=False
|
2690
|
+
)
|
2459
2691
|
"""Storage location, e.g. an S3 or GCP bucket or a local directory."""
|
2460
|
-
suffix: str = CharField(max_length=30, db_index=True)
|
2692
|
+
suffix: str = CharField(max_length=30, db_index=True, editable=False)
|
2461
2693
|
# Initially, we thought about having this be nullable to indicate folders
|
2462
2694
|
# But, for instance, .zarr is stored in a folder that ends with a .zarr suffix
|
2463
2695
|
"""Path suffix or empty string if no canonical suffix exists.
|
@@ -2470,19 +2702,27 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2470
2702
|
null=True,
|
2471
2703
|
)
|
2472
2704
|
""":class:`~lamindb.base.types.ArtifactKind` (default `None`)."""
|
2473
|
-
otype: str | None = CharField(
|
2705
|
+
otype: str | None = CharField(
|
2706
|
+
max_length=64, db_index=True, null=True, editable=False
|
2707
|
+
)
|
2474
2708
|
"""Default Python object type, e.g., DataFrame, AnnData."""
|
2475
|
-
size: int | None = BigIntegerField(
|
2709
|
+
size: int | None = BigIntegerField(
|
2710
|
+
null=True, db_index=True, default=None, editable=False
|
2711
|
+
)
|
2476
2712
|
"""Size in bytes.
|
2477
2713
|
|
2478
2714
|
Examples: 1KB is 1e3 bytes, 1MB is 1e6, 1GB is 1e9, 1TB is 1e12 etc.
|
2479
2715
|
"""
|
2480
|
-
hash: str | None = CharField(
|
2716
|
+
hash: str | None = CharField(
|
2717
|
+
max_length=HASH_LENGTH, db_index=True, null=True, unique=True, editable=False
|
2718
|
+
)
|
2481
2719
|
"""Hash or pseudo-hash of artifact content.
|
2482
2720
|
|
2483
2721
|
Useful to ascertain integrity and avoid duplication.
|
2484
2722
|
"""
|
2485
|
-
n_files: int | None = BigIntegerField(
|
2723
|
+
n_files: int | None = BigIntegerField(
|
2724
|
+
null=True, db_index=True, default=None, editable=False
|
2725
|
+
)
|
2486
2726
|
"""Number of files for folder-like artifacts, `None` for file-like artifacts.
|
2487
2727
|
|
2488
2728
|
Note that some arrays are also stored as folders, e.g., `.zarr` or `.tiledbsoma`.
|
@@ -2490,19 +2730,28 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2490
2730
|
.. versionchanged:: 1.0
|
2491
2731
|
Renamed from `n_objects` to `n_files`.
|
2492
2732
|
"""
|
2493
|
-
n_observations: int | None = BigIntegerField(
|
2733
|
+
n_observations: int | None = BigIntegerField(
|
2734
|
+
null=True, db_index=True, default=None, editable=False
|
2735
|
+
)
|
2494
2736
|
"""Number of observations.
|
2495
2737
|
|
2496
2738
|
Typically, this denotes the first array dimension.
|
2497
2739
|
"""
|
2498
|
-
_hash_type: str | None = CharField(
|
2740
|
+
_hash_type: str | None = CharField(
|
2741
|
+
max_length=30, db_index=True, null=True, editable=False
|
2742
|
+
)
|
2499
2743
|
"""Type of hash."""
|
2500
2744
|
ulabels: ULabel = models.ManyToManyField(
|
2501
2745
|
ULabel, through="ArtifactULabel", related_name="artifacts"
|
2502
2746
|
)
|
2503
2747
|
"""The ulabels measured in the artifact (:class:`~lamindb.ULabel`)."""
|
2504
2748
|
run: Run | None = ForeignKey(
|
2505
|
-
Run,
|
2749
|
+
Run,
|
2750
|
+
PROTECT,
|
2751
|
+
related_name="output_artifacts",
|
2752
|
+
null=True,
|
2753
|
+
default=None,
|
2754
|
+
editable=False,
|
2506
2755
|
)
|
2507
2756
|
"""Run that created the artifact."""
|
2508
2757
|
input_of_runs: Run = models.ManyToManyField(Run, related_name="input_artifacts")
|
@@ -2516,13 +2765,17 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2516
2765
|
collections: Collection
|
2517
2766
|
"""The collections that this artifact is part of."""
|
2518
2767
|
schema: Schema | None = ForeignKey(
|
2519
|
-
Schema,
|
2768
|
+
Schema,
|
2769
|
+
PROTECT,
|
2770
|
+
null=True,
|
2771
|
+
default=None,
|
2772
|
+
related_name="validated_artifacts",
|
2520
2773
|
)
|
2521
|
-
"""The schema
|
2522
|
-
|
2523
|
-
Schema, related_name="
|
2774
|
+
"""The schema that validated this artifact in a :class:`~lamindb.curators.Curator`."""
|
2775
|
+
feature_sets: Schema = models.ManyToManyField(
|
2776
|
+
Schema, related_name="artifacts", through="ArtifactSchema"
|
2524
2777
|
)
|
2525
|
-
"""
|
2778
|
+
"""The feature sets measured by the artifact."""
|
2526
2779
|
_feature_values: FeatureValue = models.ManyToManyField(
|
2527
2780
|
FeatureValue, through="ArtifactFeatureValue", related_name="artifacts"
|
2528
2781
|
)
|
@@ -2543,6 +2796,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2543
2796
|
PROTECT,
|
2544
2797
|
default=current_user_id,
|
2545
2798
|
related_name="created_artifacts",
|
2799
|
+
editable=False,
|
2546
2800
|
)
|
2547
2801
|
"""Creator of record."""
|
2548
2802
|
_overwrite_versions: bool = BooleanField(default=None)
|
@@ -2566,7 +2820,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2566
2820
|
# here; and we might refactor this but we might also keep that internal
|
2567
2821
|
# usage
|
2568
2822
|
data: UPathStr,
|
2569
|
-
|
2823
|
+
kind: ArtifactKind | None = None,
|
2570
2824
|
key: str | None = None,
|
2571
2825
|
description: str | None = None,
|
2572
2826
|
revises: Artifact | None = None,
|
@@ -2606,11 +2860,6 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2606
2860
|
def n_objects(self) -> int:
|
2607
2861
|
return self.n_files
|
2608
2862
|
|
2609
|
-
@property
|
2610
|
-
def feature_sets(self) -> QuerySet[Schema]:
|
2611
|
-
"""Feature sets linked to this artifact."""
|
2612
|
-
return self._schemas_m2m
|
2613
|
-
|
2614
2863
|
# add the below because this is what people will have in their code
|
2615
2864
|
# if they implement the recommended migration strategy
|
2616
2865
|
# - FeatureSet -> Schema
|
@@ -2620,14 +2869,14 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2620
2869
|
# def schemas(self) -> QuerySet[Schema]:
|
2621
2870
|
# """Schemas linked to artifact via many-to-many relationship.
|
2622
2871
|
|
2623
|
-
# Is now mediating the private `.
|
2872
|
+
# Is now mediating the private `.feature_sets` relationship during
|
2624
2873
|
# a transition period to better schema management.
|
2625
2874
|
|
2626
2875
|
# .. versionchanged: 1.0
|
2627
2876
|
# Was previously called `.feature_sets`.
|
2628
2877
|
|
2629
2878
|
# """
|
2630
|
-
# return self.
|
2879
|
+
# return self.feature_sets
|
2631
2880
|
|
2632
2881
|
@property
|
2633
2882
|
def path(self) -> Path:
|
@@ -2637,7 +2886,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2637
2886
|
|
2638
2887
|
>>> artifact = ln.Artifact("s3://my-bucket/my-file.csv").save()
|
2639
2888
|
>>> artifact.path
|
2640
|
-
|
2889
|
+
S3QueryPath('s3://my-bucket/my-file.csv')
|
2641
2890
|
|
2642
2891
|
File in local storage:
|
2643
2892
|
|
@@ -2652,6 +2901,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2652
2901
|
def from_df(
|
2653
2902
|
cls,
|
2654
2903
|
df: pd.DataFrame,
|
2904
|
+
*,
|
2655
2905
|
key: str | None = None,
|
2656
2906
|
description: str | None = None,
|
2657
2907
|
run: Run | None = None,
|
@@ -2692,6 +2942,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2692
2942
|
def from_anndata(
|
2693
2943
|
cls,
|
2694
2944
|
adata: AnnData | UPathStr,
|
2945
|
+
*,
|
2695
2946
|
key: str | None = None,
|
2696
2947
|
description: str | None = None,
|
2697
2948
|
run: Run | None = None,
|
@@ -2728,6 +2979,7 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2728
2979
|
def from_mudata(
|
2729
2980
|
cls,
|
2730
2981
|
mdata: MuData,
|
2982
|
+
*,
|
2731
2983
|
key: str | None = None,
|
2732
2984
|
description: str | None = None,
|
2733
2985
|
run: Run | None = None,
|
@@ -2760,11 +3012,38 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2760
3012
|
pass
|
2761
3013
|
|
2762
3014
|
@classmethod
|
2763
|
-
def
|
3015
|
+
def from_tiledbsoma(
|
2764
3016
|
cls,
|
2765
3017
|
path: UPathStr,
|
3018
|
+
*,
|
2766
3019
|
key: str | None = None,
|
3020
|
+
description: str | None = None,
|
3021
|
+
run: Run | None = None,
|
3022
|
+
revises: Artifact | None = None,
|
3023
|
+
**kwargs,
|
3024
|
+
) -> Artifact:
|
3025
|
+
"""Create from a tiledbsoma store.
|
3026
|
+
|
3027
|
+
Args:
|
3028
|
+
path: A tiledbsoma store with .tiledbsoma suffix.
|
3029
|
+
key: A relative path within default storage,
|
3030
|
+
e.g., `"myfolder/mystore.tiledbsoma"`.
|
3031
|
+
description: A description.
|
3032
|
+
revises: An old version of the artifact.
|
3033
|
+
run: The run that creates the artifact.
|
3034
|
+
|
3035
|
+
Examples:
|
3036
|
+
>>> artifact = ln.Artifact.from_tiledbsoma("s3://mybucket/store.tiledbsoma", description="a tiledbsoma store")
|
3037
|
+
>>> artifact.save()
|
3038
|
+
"""
|
3039
|
+
pass
|
3040
|
+
|
3041
|
+
@classmethod
|
3042
|
+
def from_dir(
|
3043
|
+
cls,
|
3044
|
+
path: UPathStr,
|
2767
3045
|
*,
|
3046
|
+
key: str | None = None,
|
2768
3047
|
run: Run | None = None,
|
2769
3048
|
) -> list[Artifact]:
|
2770
3049
|
"""Create a list of artifact objects from a directory.
|
@@ -2818,12 +3097,13 @@ class Artifact(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2818
3097
|
pass
|
2819
3098
|
|
2820
3099
|
def open(
|
2821
|
-
self, mode: str = "r", is_run_input: bool | None = None
|
3100
|
+
self, mode: str = "r", is_run_input: bool | None = None, **kwargs
|
2822
3101
|
) -> (
|
2823
3102
|
AnnDataAccessor
|
2824
3103
|
| BackedAccessor
|
2825
3104
|
| SOMACollection
|
2826
3105
|
| SOMAExperiment
|
3106
|
+
| SOMAMeasurement
|
2827
3107
|
| PyArrowDataset
|
2828
3108
|
):
|
2829
3109
|
"""Return a cloud-backed data object.
|
@@ -2966,13 +3246,13 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2966
3246
|
|
2967
3247
|
Args:
|
2968
3248
|
artifacts: `list[Artifact]` A list of artifacts.
|
2969
|
-
|
3249
|
+
key: `str` A file-path like key, analogous to the `key` parameter of `Artifact` and `Transform`.
|
2970
3250
|
description: `str | None = None` A description.
|
2971
3251
|
revises: `Collection | None = None` An old version of the collection.
|
2972
3252
|
run: `Run | None = None` The run that creates the collection.
|
2973
3253
|
meta: `Artifact | None = None` An artifact that defines metadata for the collection.
|
2974
|
-
reference: `str | None = None`
|
2975
|
-
reference_type: `str | None = None`
|
3254
|
+
reference: `str | None = None` A simple reference, e.g. an external ID or a URL.
|
3255
|
+
reference_type: `str | None = None` A way to indicate to indicate the type of the simple reference `"url"`.
|
2976
3256
|
|
2977
3257
|
See Also:
|
2978
3258
|
:class:`~lamindb.Artifact`
|
@@ -2981,11 +3261,11 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
2981
3261
|
|
2982
3262
|
Create a collection from a list of :class:`~lamindb.Artifact` objects:
|
2983
3263
|
|
2984
|
-
>>> collection = ln.Collection([artifact1, artifact2],
|
3264
|
+
>>> collection = ln.Collection([artifact1, artifact2], key="my_project/my_collection")
|
2985
3265
|
|
2986
3266
|
Create a collection that groups a data & a metadata artifact (e.g., here :doc:`docs:rxrx`):
|
2987
3267
|
|
2988
|
-
>>> collection = ln.Collection(data_artifact,
|
3268
|
+
>>> collection = ln.Collection(data_artifact, key="my_project/my_collection", meta=metadata_artifact)
|
2989
3269
|
|
2990
3270
|
"""
|
2991
3271
|
|
@@ -3008,13 +3288,15 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
3008
3288
|
"""Universal id, valid across DB instances."""
|
3009
3289
|
key: str = CharField(db_index=True)
|
3010
3290
|
"""Name or path-like key."""
|
3011
|
-
#
|
3291
|
+
# below is the only case in which we use a TextField
|
3012
3292
|
# for description; we do so because users had descriptions exceeding 255 chars
|
3013
3293
|
# in their instances
|
3014
3294
|
description: str | None = TextField(null=True, db_index=True)
|
3015
3295
|
"""A description or title."""
|
3016
|
-
hash: str | None = CharField(
|
3017
|
-
|
3296
|
+
hash: str | None = CharField(
|
3297
|
+
max_length=HASH_LENGTH, db_index=True, null=True, unique=True
|
3298
|
+
)
|
3299
|
+
"""Hash of collection content."""
|
3018
3300
|
reference: str | None = CharField(max_length=255, db_index=True, null=True)
|
3019
3301
|
"""A reference like URL or external ID."""
|
3020
3302
|
# also for reference_type here, we allow an extra long max_length
|
@@ -3058,7 +3340,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
3058
3340
|
def __init__(
|
3059
3341
|
self,
|
3060
3342
|
artifacts: list[Artifact],
|
3061
|
-
|
3343
|
+
key: str,
|
3062
3344
|
description: str | None = None,
|
3063
3345
|
meta: Any | None = None,
|
3064
3346
|
reference: str | None = None,
|
@@ -3084,21 +3366,39 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
3084
3366
|
"""Add an artifact to the collection.
|
3085
3367
|
|
3086
3368
|
Creates a new version of the collection.
|
3369
|
+
This does not modify the original collection in-place, but returns a new version
|
3370
|
+
of the original collection with the added artifact.
|
3087
3371
|
|
3088
3372
|
Args:
|
3089
3373
|
artifact: An artifact to add to the collection.
|
3090
3374
|
run: The run that creates the new version of the collection.
|
3091
3375
|
|
3376
|
+
Examples:
|
3377
|
+
>>> collection = ln.Collection(artifact, key="new collection")
|
3378
|
+
>>> collecton.save()
|
3379
|
+
>>> collection = collection.append(another_artifact) # returns a new version
|
3380
|
+
>>> collection.save() # save the new version
|
3381
|
+
|
3092
3382
|
.. versionadded:: 0.76.14
|
3093
3383
|
"""
|
3094
3384
|
pass
|
3095
3385
|
|
3386
|
+
def open(self, is_run_input: bool | None = None) -> PyArrowDataset:
|
3387
|
+
"""Return a cloud-backed pyarrow Dataset.
|
3388
|
+
|
3389
|
+
Works for `pyarrow` compatible formats.
|
3390
|
+
|
3391
|
+
Notes:
|
3392
|
+
For more info, see tutorial: :doc:`/arrays`.
|
3393
|
+
"""
|
3394
|
+
pass
|
3395
|
+
|
3096
3396
|
def mapped(
|
3097
3397
|
self,
|
3098
3398
|
layers_keys: str | list[str] | None = None,
|
3099
3399
|
obs_keys: str | list[str] | None = None,
|
3100
3400
|
obsm_keys: str | list[str] | None = None,
|
3101
|
-
obs_filter: dict[str, str |
|
3401
|
+
obs_filter: dict[str, str | list[str]] | None = None,
|
3102
3402
|
join: Literal["inner", "outer"] | None = "inner",
|
3103
3403
|
encode_labels: bool | list[str] = True,
|
3104
3404
|
unknown_label: str | dict[str, str] | None = None,
|
@@ -3136,7 +3436,7 @@ class Collection(Record, IsVersioned, TracksRun, TracksUpdates):
|
|
3136
3436
|
obsm_keys: Keys from the ``.obsm`` slots.
|
3137
3437
|
obs_filter: Select only observations with these values for the given obs columns.
|
3138
3438
|
Should be a dictionary with obs column names as keys
|
3139
|
-
and filtering values (a string or a
|
3439
|
+
and filtering values (a string or a list of strings) as values.
|
3140
3440
|
join: `"inner"` or `"outer"` virtual joins. If ``None`` is passed,
|
3141
3441
|
does not join.
|
3142
3442
|
encode_labels: Encode labels into integers.
|
@@ -3330,7 +3630,7 @@ class Project(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
3330
3630
|
"""Type of project (e.g., 'Program', 'Project', 'GithubIssue', 'Task')."""
|
3331
3631
|
records: Project
|
3332
3632
|
"""Records of this type."""
|
3333
|
-
is_type: bool = BooleanField(default=
|
3633
|
+
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
3334
3634
|
"""Distinguish types from instances of the type."""
|
3335
3635
|
abbr: str | None = CharField(max_length=32, db_index=True, null=True)
|
3336
3636
|
"""An abbreviation."""
|
@@ -3434,7 +3734,7 @@ class Reference(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
3434
3734
|
"""
|
3435
3735
|
records: Reference
|
3436
3736
|
"""Records of this type."""
|
3437
|
-
is_type: bool = BooleanField(default=
|
3737
|
+
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
3438
3738
|
"""Distinguish types from instances of the type."""
|
3439
3739
|
url: str | None = URLField(null=True)
|
3440
3740
|
"""URL linking to the reference."""
|
@@ -3476,7 +3776,7 @@ class Reference(Record, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
3476
3776
|
# -------------------------------------------------------------------------------------
|
3477
3777
|
# Data models
|
3478
3778
|
|
3479
|
-
from django.contrib.postgres.fields import JSONField
|
3779
|
+
from django.contrib.postgres.fields import JSONField # type: ignore
|
3480
3780
|
from django.core.exceptions import ValidationError
|
3481
3781
|
from django.db import models
|
3482
3782
|
|
@@ -3543,7 +3843,7 @@ class RunData(BasicRecord, DataMixin):
|
|
3543
3843
|
class Meta:
|
3544
3844
|
constraints = [
|
3545
3845
|
models.CheckConstraint(
|
3546
|
-
|
3846
|
+
condition=(
|
3547
3847
|
models.Q(feature__isnull=False, param__isnull=True)
|
3548
3848
|
| models.Q(feature__isnull=True, param__isnull=False)
|
3549
3849
|
),
|
@@ -3574,7 +3874,7 @@ class FlexTable(Record, TracksRun, TracksUpdates):
|
|
3574
3874
|
"""Type of tidy table, e.g., `Cell`, `SampleSheet`, etc."""
|
3575
3875
|
records: ULabel
|
3576
3876
|
"""Records of this type."""
|
3577
|
-
is_type: bool = BooleanField(default=
|
3877
|
+
is_type: bool = BooleanField(default=False, db_index=True, null=True)
|
3578
3878
|
"""Distinguish types from instances of the type."""
|
3579
3879
|
description: str = CharField(null=True, db_index=True)
|
3580
3880
|
"""A description."""
|
@@ -3593,7 +3893,7 @@ class FlexTableData(BasicRecord, DataMixin):
|
|
3593
3893
|
class Meta:
|
3594
3894
|
constraints = [
|
3595
3895
|
models.CheckConstraint(
|
3596
|
-
|
3896
|
+
condition=(
|
3597
3897
|
models.Q(feature__isnull=False, param__isnull=True)
|
3598
3898
|
| models.Q(feature__isnull=True, param__isnull=False)
|
3599
3899
|
),
|
@@ -3621,8 +3921,8 @@ class LinkORM:
|
|
3621
3921
|
|
3622
3922
|
class SchemaFeature(BasicRecord, LinkORM):
|
3623
3923
|
id: int = models.BigAutoField(primary_key=True)
|
3624
|
-
schema: Schema = ForeignKey(Schema, CASCADE, related_name="
|
3625
|
-
feature: Feature = ForeignKey(Feature, PROTECT, related_name="
|
3924
|
+
schema: Schema = ForeignKey(Schema, CASCADE, related_name="links_feature")
|
3925
|
+
feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_schema")
|
3626
3926
|
|
3627
3927
|
class Meta:
|
3628
3928
|
unique_together = ("schema", "feature")
|
@@ -3640,15 +3940,22 @@ class SchemaParam(BasicRecord, LinkORM):
|
|
3640
3940
|
class ArtifactSchema(BasicRecord, LinkORM, TracksRun):
|
3641
3941
|
id: int = models.BigAutoField(primary_key=True)
|
3642
3942
|
artifact: Artifact = ForeignKey(Artifact, CASCADE, related_name="_links_schema")
|
3643
|
-
# we follow the lower() case convention rather than snake case for link models
|
3644
3943
|
schema: Schema = ForeignKey(Schema, PROTECT, related_name="_links_artifact")
|
3645
|
-
slot: str | None = CharField(
|
3646
|
-
feature_ref_is_semantic: bool | None = BooleanField(
|
3647
|
-
|
3648
|
-
|
3944
|
+
slot: str | None = CharField(null=True)
|
3945
|
+
feature_ref_is_semantic: bool | None = BooleanField(null=True)
|
3946
|
+
|
3947
|
+
class Meta:
|
3948
|
+
unique_together = (("artifact", "schema"), ("artifact", "slot"))
|
3949
|
+
|
3950
|
+
|
3951
|
+
class SchemaComponent(BasicRecord, LinkORM, TracksRun):
|
3952
|
+
id: int = models.BigAutoField(primary_key=True)
|
3953
|
+
composite: Schema = ForeignKey(Schema, CASCADE, related_name="links_composite")
|
3954
|
+
component: Schema = ForeignKey(Schema, PROTECT, related_name="links_component")
|
3955
|
+
slot: str | None = CharField(null=True)
|
3649
3956
|
|
3650
3957
|
class Meta:
|
3651
|
-
unique_together = ("
|
3958
|
+
unique_together = (("composite", "component"), ("composite", "slot"))
|
3652
3959
|
|
3653
3960
|
|
3654
3961
|
class CollectionArtifact(BasicRecord, LinkORM, TracksRun):
|
@@ -3883,14 +4190,14 @@ class CollectionReference(BasicRecord, LinkORM, TracksRun):
|
|
3883
4190
|
unique_together = ("collection", "reference")
|
3884
4191
|
|
3885
4192
|
|
3886
|
-
|
3887
|
-
|
3888
|
-
|
3889
|
-
|
4193
|
+
class Migration(BasicRecord):
|
4194
|
+
app = CharField(max_length=255)
|
4195
|
+
name = CharField(max_length=255)
|
4196
|
+
applied: datetime = DateTimeField()
|
3890
4197
|
|
3891
|
-
|
3892
|
-
|
3893
|
-
|
4198
|
+
class Meta:
|
4199
|
+
db_table = "django_migrations"
|
4200
|
+
managed = False
|
3894
4201
|
|
3895
4202
|
|
3896
4203
|
# -------------------------------------------------------------------------------------
|