lamindb 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +3 -3
- lamindb/core/_context.py +64 -69
- lamindb/core/datasets/_small.py +2 -2
- lamindb/curators/__init__.py +683 -893
- lamindb/models/__init__.py +8 -1
- lamindb/models/_feature_manager.py +23 -19
- lamindb/models/_from_values.py +1 -1
- lamindb/models/_is_versioned.py +5 -15
- lamindb/models/artifact.py +210 -111
- lamindb/models/can_curate.py +4 -1
- lamindb/models/collection.py +6 -4
- lamindb/models/feature.py +27 -30
- lamindb/models/has_parents.py +22 -7
- lamindb/models/project.py +2 -2
- lamindb/models/query_set.py +6 -35
- lamindb/models/record.py +167 -117
- lamindb/models/run.py +56 -2
- lamindb/models/save.py +1 -3
- lamindb/models/schema.py +277 -77
- lamindb/models/transform.py +4 -13
- {lamindb-1.3.1.dist-info → lamindb-1.4.0.dist-info}/METADATA +6 -5
- {lamindb-1.3.1.dist-info → lamindb-1.4.0.dist-info}/RECORD +24 -24
- {lamindb-1.3.1.dist-info → lamindb-1.4.0.dist-info}/LICENSE +0 -0
- {lamindb-1.3.1.dist-info → lamindb-1.4.0.dist-info}/WHEEL +0 -0
lamindb/models/schema.py
CHANGED
@@ -80,55 +80,122 @@ def validate_features(features: list[Record]) -> Record:
|
|
80
80
|
return next(iter(feature_types)) # return value in set of cardinality 1
|
81
81
|
|
82
82
|
|
83
|
+
def get_features_config(
|
84
|
+
features: list[Record] | tuple[Record, dict],
|
85
|
+
) -> tuple[list[Record], list[tuple[Record, dict]]]:
|
86
|
+
"""Get features and their config from the return of feature.with_config."""
|
87
|
+
features_list = []
|
88
|
+
configs = []
|
89
|
+
try:
|
90
|
+
for feature in features:
|
91
|
+
if isinstance(feature, tuple):
|
92
|
+
features_list.append(feature[0])
|
93
|
+
configs.append(feature)
|
94
|
+
else:
|
95
|
+
features_list.append(feature)
|
96
|
+
return features_list, configs # type: ignore
|
97
|
+
except TypeError:
|
98
|
+
return features, configs # type: ignore
|
99
|
+
|
100
|
+
|
101
|
+
class SchemaOptionals:
|
102
|
+
"""Manage and access optional features in a schema."""
|
103
|
+
|
104
|
+
def __init__(self, schema) -> None:
|
105
|
+
self.schema = schema
|
106
|
+
|
107
|
+
def get_uids(self) -> list[str]:
|
108
|
+
"""Get the uids of the optional features.
|
109
|
+
|
110
|
+
Does **not** need an additional query to the database, while `get()` does.
|
111
|
+
"""
|
112
|
+
if (
|
113
|
+
self.schema._aux is not None
|
114
|
+
and "af" in self.schema._aux
|
115
|
+
and "1" in self.schema._aux["af"]
|
116
|
+
):
|
117
|
+
return self.schema._aux["af"]["1"]
|
118
|
+
else:
|
119
|
+
return []
|
120
|
+
|
121
|
+
def get(self) -> QuerySet:
|
122
|
+
"""Get the optional features."""
|
123
|
+
uids = self.get_uids()
|
124
|
+
if uids:
|
125
|
+
return Feature.objects.filter(uid__in=uids).order_by("links_schema__id")
|
126
|
+
else:
|
127
|
+
return Feature.objects.none() # empty QuerySet
|
128
|
+
|
129
|
+
def set(self, features: list[Feature]) -> None:
|
130
|
+
"""Set the optional features."""
|
131
|
+
if not isinstance(features, list) or not all(
|
132
|
+
isinstance(f, Feature) for f in features
|
133
|
+
):
|
134
|
+
raise TypeError("features must be a list of Feature records!")
|
135
|
+
self.schema._aux = self.schema._aux or {}
|
136
|
+
if len(features) > 0:
|
137
|
+
self.schema._aux.setdefault("af", {})["1"] = [f.uid for f in features]
|
138
|
+
|
139
|
+
def add(self, features: Feature | list[Feature]) -> None:
|
140
|
+
"""Add feature to the optional features."""
|
141
|
+
self.schema._aux = self.schema._aux or {}
|
142
|
+
if not isinstance(features, list):
|
143
|
+
features = [features]
|
144
|
+
if not all(isinstance(f, Feature) for f in features):
|
145
|
+
raise TypeError("features must be a list of Feature records!")
|
146
|
+
if len(features) > 0:
|
147
|
+
if "1" not in self.schema._aux.setdefault("af", {}):
|
148
|
+
self.set(features)
|
149
|
+
self.schema._aux.setdefault("af", {})["1"].extend([f.uid for f in features])
|
150
|
+
|
151
|
+
|
83
152
|
class Schema(Record, CanCurate, TracksRun):
|
84
153
|
"""Schemas.
|
85
154
|
|
86
|
-
|
155
|
+
A simple schema is a feature set such as the set of columns of a `DataFrame`.
|
87
156
|
|
88
157
|
A composite schema has multiple components, e.g., for an `AnnData`, one schema for `obs` and another one for `var`.
|
89
158
|
|
159
|
+
A schema can also merely define abstract constraints or instructions for dataset validation & annotation.
|
160
|
+
|
90
161
|
Args:
|
91
162
|
features: `Iterable[Record] | None = None` An iterable of :class:`~lamindb.Feature`
|
92
163
|
records to hash, e.g., `[Feature(...), Feature(...)]`. Is turned into
|
93
164
|
a set upon instantiation. If you'd like to pass values, use
|
94
165
|
:meth:`~lamindb.Schema.from_values` or
|
95
166
|
:meth:`~lamindb.Schema.from_df`.
|
96
|
-
components: `dict[str, Schema] | None = None` A dictionary mapping
|
97
|
-
|
167
|
+
components: `dict[str, Schema] | None = None` A dictionary mapping slot names to
|
168
|
+
components. A component is itself a :class:`~lamindb.Schema` object.
|
98
169
|
name: `str | None = None` A name.
|
99
170
|
description: `str | None = None` A description.
|
100
|
-
dtype: `str | None = None` The simple type. Defaults to
|
101
|
-
`None` for sets of :class:`~lamindb.Feature` records.
|
102
|
-
Otherwise defaults to `"num"` (e.g., for sets of :class:`~bionty.Gene`).
|
103
171
|
itype: `str | None = None` The feature identifier type (e.g. :class:`~lamindb.Feature`, :class:`~bionty.Gene`, ...).
|
172
|
+
flexible: `bool | None = None` Whether to include any feature of the same `itype` in validation
|
173
|
+
and annotation. If no features are passed, defaults to `True`, otherwise to `False`.
|
104
174
|
type: `Schema | None = None` A type.
|
105
175
|
is_type: `bool = False` Distinguish types from instances of the type.
|
106
176
|
otype: `str | None = None` An object type to define the structure of a composite schema.
|
107
|
-
|
177
|
+
dtype: `str | None = None` The simple type. Defaults to
|
178
|
+
`None` for sets of :class:`~lamindb.Feature` records.
|
179
|
+
Otherwise defaults to `"num"` (e.g., for sets of :class:`~bionty.Gene`).
|
180
|
+
minimal_set: `bool = True` Whether all passed features are to be considered required by default.
|
181
|
+
See :attr:`~lamindb.Schema.optionals` for more-fine-grained control.
|
108
182
|
ordered_set: `bool = False` Whether features are required to be ordered.
|
109
183
|
maximal_set: `bool = False` If `True`, no additional features are allowed.
|
110
|
-
slot: `str | None = None` The slot name when this schema is used as a component in a
|
111
|
-
composite schema.
|
112
184
|
coerce_dtype: `bool = False` When True, attempts to coerce values to the specified dtype
|
113
185
|
during validation, see :attr:`~lamindb.Schema.coerce_dtype`.
|
114
186
|
|
115
187
|
.. dropdown:: Why does LaminDB model schemas, not just features?
|
116
188
|
|
117
189
|
1. Performance: Imagine you measure the same panel of 20k transcripts in
|
118
|
-
1M samples. By modeling the panel as a
|
119
|
-
your artifacts against one
|
190
|
+
1M samples. By modeling the panel as a schema, you can link all
|
191
|
+
your artifacts against one schema and only need to store 1M
|
120
192
|
instead of 1M x 20k = 20B links.
|
121
193
|
2. Interpretation: Model protein panels, gene panels, etc.
|
122
|
-
3. Data integration:
|
123
|
-
|
124
|
-
These reasons do not hold for label sets. Hence, LaminDB does not model label sets.
|
194
|
+
3. Data integration: Schemas provide the information that determines whether two datasets can be meaningfully concatenated.
|
125
195
|
|
126
196
|
Note:
|
127
197
|
|
128
|
-
A
|
129
|
-
It's stored in the `.hash` field.
|
130
|
-
|
131
|
-
A `slot` provides a string key to access feature sets. For instance, for the schema of an
|
198
|
+
A `slot` provides a string key to access schema components. For instance, for the schema of an
|
132
199
|
`AnnData` object, it would be `'obs'` for `adata.obs`.
|
133
200
|
|
134
201
|
See Also:
|
@@ -139,21 +206,48 @@ class Schema(Record, CanCurate, TracksRun):
|
|
139
206
|
|
140
207
|
Examples:
|
141
208
|
|
142
|
-
Create
|
209
|
+
Create schemas::
|
143
210
|
|
144
|
-
|
145
|
-
|
211
|
+
import lamindb as ln
|
212
|
+
import bionty as bt
|
213
|
+
import pandas as pd
|
146
214
|
|
147
|
-
|
215
|
+
# From a dataframe
|
216
|
+
df = pd.DataFrame({"feat1": [1, 2], "feat2": [3.1, 4.2], "feat3": ["cond1", "cond2"]})
|
217
|
+
schema = ln.Schema.from_df(df)
|
148
218
|
|
149
|
-
|
150
|
-
|
219
|
+
# From explicitly defined features
|
220
|
+
schema = ln.Schema(
|
221
|
+
features=[
|
222
|
+
ln.Feature(name="required_feature", dtype=str).save(),
|
223
|
+
],
|
224
|
+
).save()
|
225
|
+
|
226
|
+
# By merely constraining an identifier type
|
227
|
+
schema = ln.Schema(itype=bt.Gene.ensembl_gene_id)
|
151
228
|
|
152
|
-
|
229
|
+
# A combination of the above
|
230
|
+
schema = ln.Schema(
|
231
|
+
features=[
|
232
|
+
ln.Feature(name="required_feature", dtype=str).save(),
|
233
|
+
],
|
234
|
+
flexible=True,
|
235
|
+
).save()
|
153
236
|
|
154
|
-
|
155
|
-
|
237
|
+
# By parsing & validating identifier values
|
238
|
+
schema = ln.Schema.from_values(
|
239
|
+
adata.var["ensemble_id"],
|
240
|
+
field=bt.Gene.ensembl_gene_id,
|
241
|
+
organism="mouse",
|
242
|
+
).save()
|
156
243
|
|
244
|
+
# Mark a single feature as optional and ignore other features of the same identifier type
|
245
|
+
schema = ln.Schema(
|
246
|
+
features=[
|
247
|
+
ln.Feature(name="required_feature", dtype=str).save(),
|
248
|
+
ln.Feature(name="feature2", dtype=int).save().with_config(optional=True),
|
249
|
+
],
|
250
|
+
).save()
|
157
251
|
"""
|
158
252
|
|
159
253
|
class Meta(Record.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
@@ -162,7 +256,8 @@ class Schema(Record, CanCurate, TracksRun):
|
|
162
256
|
_name_field: str = "name"
|
163
257
|
_aux_fields: dict[str, tuple[str, type]] = {
|
164
258
|
"0": ("coerce_dtype", bool),
|
165
|
-
"1": ("
|
259
|
+
"1": ("optionals", list[str]),
|
260
|
+
"2": ("flexible", bool),
|
166
261
|
}
|
167
262
|
|
168
263
|
id: int = models.AutoField(primary_key=True)
|
@@ -175,11 +270,6 @@ class Schema(Record, CanCurate, TracksRun):
|
|
175
270
|
"""A description."""
|
176
271
|
n = IntegerField()
|
177
272
|
"""Number of features in the set."""
|
178
|
-
dtype: str | None = CharField(max_length=64, null=True, editable=False)
|
179
|
-
"""Data type, e.g., "num", "float", "int". Is `None` for :class:`~lamindb.Feature`.
|
180
|
-
|
181
|
-
For :class:`~lamindb.Feature`, types are expected to be heterogeneous and defined on a per-feature level.
|
182
|
-
"""
|
183
273
|
itype: str | None = CharField(
|
184
274
|
max_length=120, db_index=True, null=True, editable=False
|
185
275
|
)
|
@@ -205,6 +295,11 @@ class Schema(Record, CanCurate, TracksRun):
|
|
205
295
|
"""Distinguish types from instances of the type."""
|
206
296
|
otype: str | None = CharField(max_length=64, db_index=True, null=True)
|
207
297
|
"""Default Python object type, e.g., DataFrame, AnnData."""
|
298
|
+
dtype: str | None = CharField(max_length=64, null=True, editable=False)
|
299
|
+
"""Data type, e.g., "num", "float", "int". Is `None` for :class:`~lamindb.Feature`.
|
300
|
+
|
301
|
+
For :class:`~lamindb.Feature`, types are expected to be heterogeneous and defined on a per-feature level.
|
302
|
+
"""
|
208
303
|
hash: str | None = CharField(
|
209
304
|
max_length=HASH_LENGTH, db_index=True, null=True, editable=False
|
210
305
|
)
|
@@ -213,18 +308,19 @@ class Schema(Record, CanCurate, TracksRun):
|
|
213
308
|
For a composite schema, the hash of hashes.
|
214
309
|
"""
|
215
310
|
minimal_set: bool = BooleanField(default=True, db_index=True, editable=False)
|
216
|
-
"""Whether
|
217
|
-
|
218
|
-
If `False`, no features are linked to this schema.
|
311
|
+
"""Whether all passed features are to be considered required by default (default `True`).
|
219
312
|
|
220
|
-
|
313
|
+
Note that features that are explicitly marked as `optional` via `feature.with_config(optional=True)`
|
314
|
+
are **not** required even if this `minimal_set` is true.
|
221
315
|
"""
|
222
316
|
ordered_set: bool = BooleanField(default=False, db_index=True, editable=False)
|
223
317
|
"""Whether features are required to be ordered (default `False`)."""
|
224
318
|
maximal_set: bool = BooleanField(default=False, db_index=True, editable=False)
|
225
|
-
"""
|
319
|
+
"""Whether all features present in the dataset must be in the schema (default `False`).
|
226
320
|
|
227
|
-
If `
|
321
|
+
If `False`, additional features are allowed to be present in the dataset.
|
322
|
+
|
323
|
+
If `True`, no additional features are allowed to be present in the dataset.
|
228
324
|
"""
|
229
325
|
components: Schema = ManyToManyField(
|
230
326
|
"self", through="SchemaComponent", symmetrical=False, related_name="composites"
|
@@ -280,10 +376,8 @@ class Schema(Record, CanCurate, TracksRun):
|
|
280
376
|
type: Schema | None = None,
|
281
377
|
is_type: bool = False,
|
282
378
|
otype: str | None = None,
|
283
|
-
minimal_set: bool = True,
|
284
379
|
ordered_set: bool = False,
|
285
380
|
maximal_set: bool = False,
|
286
|
-
slot: str | None = None,
|
287
381
|
coerce_dtype: bool = False,
|
288
382
|
): ...
|
289
383
|
|
@@ -313,26 +407,30 @@ class Schema(Record, CanCurate, TracksRun):
|
|
313
407
|
components: dict[str, Schema] = kwargs.pop("components", {})
|
314
408
|
name: str | None = kwargs.pop("name", None)
|
315
409
|
description: str | None = kwargs.pop("description", None)
|
316
|
-
dtype: str | None = kwargs.pop("dtype", None)
|
317
410
|
itype: str | Record | DeferredAttribute | None = kwargs.pop("itype", None)
|
411
|
+
flexible: bool | None = kwargs.pop("flexible", None)
|
318
412
|
type: Feature | None = kwargs.pop("type", None)
|
319
413
|
is_type: bool = kwargs.pop("is_type", False)
|
320
414
|
otype: str | None = kwargs.pop("otype", None)
|
415
|
+
dtype: str | None = kwargs.pop("dtype", None)
|
321
416
|
minimal_set: bool = kwargs.pop("minimal_set", True)
|
322
417
|
ordered_set: bool = kwargs.pop("ordered_set", False)
|
323
418
|
maximal_set: bool = kwargs.pop("maximal_set", False)
|
324
|
-
slot: str | None = kwargs.pop("slot", None)
|
325
419
|
coerce_dtype: bool | None = kwargs.pop("coerce_dtype", None)
|
420
|
+
optional_features = []
|
326
421
|
|
327
422
|
if kwargs:
|
328
423
|
raise ValueError(
|
329
424
|
f"Unexpected keyword arguments: {', '.join(kwargs.keys())}\n"
|
330
425
|
"Valid arguments are: features, description, dtype, itype, type, "
|
331
426
|
"is_type, otype, minimal_set, ordered_set, maximal_set, "
|
332
|
-
"
|
427
|
+
"coerce_dtype"
|
333
428
|
)
|
334
|
-
|
429
|
+
optional_features = []
|
430
|
+
if itype is not None:
|
431
|
+
itype = serialize_dtype(itype, is_itype=True)
|
335
432
|
if features:
|
433
|
+
features, configs = get_features_config(features)
|
336
434
|
features_registry = validate_features(features)
|
337
435
|
itype_compare = features_registry.__get_name_with_module__()
|
338
436
|
if itype is not None:
|
@@ -340,12 +438,18 @@ class Schema(Record, CanCurate, TracksRun):
|
|
340
438
|
else:
|
341
439
|
itype = itype_compare
|
342
440
|
n_features = len(features)
|
441
|
+
if features_registry == Feature:
|
442
|
+
optional_features = [
|
443
|
+
config[0] for config in configs if config[1].get("optional")
|
444
|
+
]
|
343
445
|
else:
|
344
446
|
n_features = -1
|
345
447
|
if dtype is None:
|
346
448
|
dtype = None if itype is not None and itype == "Feature" else NUMBER_TYPE
|
347
449
|
else:
|
348
450
|
dtype = get_type_str(dtype)
|
451
|
+
if flexible is None:
|
452
|
+
flexible = n_features < 0
|
349
453
|
components: dict[str, Schema]
|
350
454
|
if components:
|
351
455
|
itype = "Composite"
|
@@ -359,8 +463,8 @@ class Schema(Record, CanCurate, TracksRun):
|
|
359
463
|
"name": name,
|
360
464
|
"description": description,
|
361
465
|
"type": type,
|
362
|
-
"dtype": dtype,
|
363
466
|
"is_type": is_type,
|
467
|
+
"dtype": dtype,
|
364
468
|
"otype": otype,
|
365
469
|
"n": n_features,
|
366
470
|
"itype": itype_str,
|
@@ -370,19 +474,32 @@ class Schema(Record, CanCurate, TracksRun):
|
|
370
474
|
}
|
371
475
|
if coerce_dtype:
|
372
476
|
validated_kwargs["_aux"] = {"af": {"0": coerce_dtype}}
|
373
|
-
if
|
374
|
-
hash = hash_set({feature.uid for feature in features})
|
375
|
-
elif components:
|
477
|
+
if components:
|
376
478
|
hash = hash_set({component.hash for component in components.values()})
|
377
479
|
else:
|
378
|
-
|
480
|
+
# we do not want pure informational annotations like otype, name, type, is_type, otype to be part of the hash
|
481
|
+
hash_args = ["dtype", "itype", "minimal_set", "ordered_set", "maximal_set"]
|
482
|
+
union_set = {
|
483
|
+
str(validated_kwargs[arg])
|
484
|
+
for arg in hash_args
|
485
|
+
if validated_kwargs[arg] is not None
|
486
|
+
}
|
487
|
+
if flexible != n_features < 0:
|
488
|
+
union_set.add(f"flexible:{flexible}")
|
489
|
+
if features:
|
490
|
+
union_set = union_set.union({feature.uid for feature in features})
|
491
|
+
if optional_features:
|
492
|
+
union_set = union_set.union(
|
493
|
+
{f"optional:{feature.uid}" for feature in optional_features}
|
494
|
+
)
|
495
|
+
hash = hash_set(union_set)
|
379
496
|
validated_kwargs["hash"] = hash
|
380
|
-
validated_kwargs["slot"] = slot
|
381
497
|
schema = Schema.filter(hash=hash).one_or_none()
|
382
498
|
if schema is not None:
|
383
499
|
logger.important(f"returning existing schema with same hash: {schema}")
|
384
500
|
init_self_from_db(self, schema)
|
385
501
|
update_attributes(self, validated_kwargs)
|
502
|
+
self.optionals.set(optional_features)
|
386
503
|
return None
|
387
504
|
self._components: dict[str, Schema] = {}
|
388
505
|
if features:
|
@@ -397,6 +514,8 @@ class Schema(Record, CanCurate, TracksRun):
|
|
397
514
|
self._slots = components
|
398
515
|
validated_kwargs["uid"] = ids.base62_20()
|
399
516
|
super().__init__(**validated_kwargs)
|
517
|
+
self.optionals.set(optional_features)
|
518
|
+
self.flexible = flexible
|
400
519
|
|
401
520
|
@classmethod
|
402
521
|
def from_values( # type: ignore
|
@@ -426,13 +545,18 @@ class Schema(Record, CanCurate, TracksRun):
|
|
426
545
|
Raises:
|
427
546
|
ValidationError: If some values are not valid.
|
428
547
|
|
429
|
-
|
548
|
+
Example:
|
430
549
|
|
431
|
-
|
432
|
-
>>> schema = ln.Schema.from_values(features)
|
550
|
+
::
|
433
551
|
|
434
|
-
|
435
|
-
|
552
|
+
import lamindb as ln
|
553
|
+
import bionty as bt
|
554
|
+
|
555
|
+
features = [ln.Feature(name=feat, dtype="str").save() for feat in ["feat11", "feat21"]]
|
556
|
+
schema = ln.Schema.from_values(features)
|
557
|
+
|
558
|
+
genes = ["ENSG00000139618", "ENSG00000198786"]
|
559
|
+
schema = ln.Schema.from_values(features, bt.Gene.ensembl_gene_id, "float")
|
436
560
|
"""
|
437
561
|
if not isinstance(field, FieldAttr):
|
438
562
|
raise TypeError(
|
@@ -513,7 +637,6 @@ class Schema(Record, CanCurate, TracksRun):
|
|
513
637
|
features=validated_features,
|
514
638
|
name=name,
|
515
639
|
dtype=get_type_str(dtype),
|
516
|
-
otype="DataFrame",
|
517
640
|
)
|
518
641
|
return schema
|
519
642
|
|
@@ -579,11 +702,51 @@ class Schema(Record, CanCurate, TracksRun):
|
|
579
702
|
|
580
703
|
@coerce_dtype.setter
|
581
704
|
def coerce_dtype(self, value: bool) -> None:
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
705
|
+
self._aux = self._aux or {}
|
706
|
+
self._aux.setdefault("af", {})["0"] = value
|
707
|
+
|
708
|
+
@property
|
709
|
+
def flexible(self) -> bool:
|
710
|
+
"""Indicates how to handle validation and annotation in case features are not defined.
|
711
|
+
|
712
|
+
Examples:
|
713
|
+
|
714
|
+
Make a rigid schema flexible::
|
715
|
+
|
716
|
+
schema = ln.Schema.get(name="my_schema")
|
717
|
+
schema.flexible = True
|
718
|
+
schema.save()
|
719
|
+
|
720
|
+
During schema creation::
|
721
|
+
|
722
|
+
# if you're not passing features but just defining the itype, defaults to flexible = True
|
723
|
+
schema = ln.Schema(itype=ln.Feature).save()
|
724
|
+
assert not schema.flexible
|
725
|
+
|
726
|
+
# if you're passing features, defaults to flexible = False
|
727
|
+
schema = ln.Schema(
|
728
|
+
features=[ln.Feature(name="my_required_feature", dtype=int).save()],
|
729
|
+
)
|
730
|
+
assert not schema.flexible
|
731
|
+
|
732
|
+
# you can also validate & annotate features in addition to those that you're explicitly defining:
|
733
|
+
schema = ln.Schema(
|
734
|
+
features=[ln.Feature(name="my_required_feature", dtype=int).save()],
|
735
|
+
flexible=True,
|
736
|
+
)
|
737
|
+
assert schema.flexible
|
738
|
+
|
739
|
+
"""
|
740
|
+
if self._aux is not None and "af" in self._aux and "2" in self._aux["af"]: # type: ignore
|
741
|
+
return self._aux["af"]["2"] # type: ignore
|
742
|
+
else:
|
743
|
+
return self.n < 0
|
744
|
+
|
745
|
+
@flexible.setter
|
746
|
+
def flexible(self, value: bool) -> None:
|
747
|
+
if value != (self.n < 0):
|
748
|
+
self._aux = self._aux or {}
|
749
|
+
self._aux.setdefault("af", {})["2"] = value
|
587
750
|
|
588
751
|
# @property
|
589
752
|
# def index_feature(self) -> None | Feature:
|
@@ -604,11 +767,8 @@ class Schema(Record, CanCurate, TracksRun):
|
|
604
767
|
|
605
768
|
# @_index_feature_uid.setter
|
606
769
|
# def _index_feature_uid(self, value: str) -> None:
|
607
|
-
#
|
608
|
-
#
|
609
|
-
# if "af" not in self._aux:
|
610
|
-
# self._aux["af"] = {}
|
611
|
-
# self._aux["af"]["1"] = value
|
770
|
+
# self._aux = self._aux or {}
|
771
|
+
# self._aux.setdefault("af", {})["0"] = value
|
612
772
|
|
613
773
|
@property
|
614
774
|
@deprecated("itype")
|
@@ -623,18 +783,20 @@ class Schema(Record, CanCurate, TracksRun):
|
|
623
783
|
def slots(self) -> dict[str, Schema]:
|
624
784
|
"""Slots.
|
625
785
|
|
626
|
-
Examples
|
786
|
+
Examples:
|
627
787
|
|
628
|
-
|
629
|
-
anndata_schema = ln.Schema(
|
630
|
-
name="small_dataset1_anndata_schema",
|
631
|
-
otype="AnnData",
|
632
|
-
components={"obs": obs_schema, "var": var_schema},
|
633
|
-
).save()
|
788
|
+
::
|
634
789
|
|
635
|
-
|
636
|
-
|
637
|
-
|
790
|
+
# define composite schema
|
791
|
+
anndata_schema = ln.Schema(
|
792
|
+
name="small_dataset1_anndata_schema",
|
793
|
+
otype="AnnData",
|
794
|
+
components={"obs": obs_schema, "var": var_schema},
|
795
|
+
).save()
|
796
|
+
|
797
|
+
# access slots
|
798
|
+
anndata_schema.slots
|
799
|
+
# {'obs': <Schema: obs_schema>, 'var': <Schema: var_schema>}
|
638
800
|
"""
|
639
801
|
if hasattr(self, "_slots"):
|
640
802
|
return self._slots
|
@@ -646,6 +808,44 @@ class Schema(Record, CanCurate, TracksRun):
|
|
646
808
|
return self._slots
|
647
809
|
return {}
|
648
810
|
|
811
|
+
@property
|
812
|
+
def optionals(self) -> SchemaOptionals:
|
813
|
+
"""Manage optional features.
|
814
|
+
|
815
|
+
Example:
|
816
|
+
|
817
|
+
::
|
818
|
+
|
819
|
+
# a schema with optional "sample_name"
|
820
|
+
schema_optional_sample_name = ln.Schema(
|
821
|
+
features=[
|
822
|
+
ln.Feature(name="sample_id", dtype=str).save(), # required
|
823
|
+
ln.Feature(name="sample_name", dtype=str).save().with_config(optional=True), # optional
|
824
|
+
],
|
825
|
+
).save()
|
826
|
+
|
827
|
+
# raise ValidationError since `sample_id` is required
|
828
|
+
ln.curators.DataFrameCurator(
|
829
|
+
pd.DataFrame(
|
830
|
+
{
|
831
|
+
"sample_name": ["Sample 1", "Sample 2"],
|
832
|
+
}
|
833
|
+
),
|
834
|
+
schema=schema_optional_sample_name).validate()
|
835
|
+
)
|
836
|
+
|
837
|
+
# passes because an optional column is missing
|
838
|
+
ln.curators.DataFrameCurator(
|
839
|
+
pd.DataFrame(
|
840
|
+
{
|
841
|
+
"sample_id": ["sample1", "sample2"],
|
842
|
+
}
|
843
|
+
),
|
844
|
+
schema=schema_optional_sample_name).validate()
|
845
|
+
)
|
846
|
+
"""
|
847
|
+
return SchemaOptionals(self)
|
848
|
+
|
649
849
|
def describe(self, return_str=False) -> None | str:
|
650
850
|
"""Describe schema."""
|
651
851
|
message = str(self)
|
lamindb/models/transform.py
CHANGED
@@ -16,8 +16,7 @@ from lamindb.base.fields import (
|
|
16
16
|
)
|
17
17
|
from lamindb.base.users import current_user_id
|
18
18
|
|
19
|
-
from ..
|
20
|
-
from ..models._is_versioned import message_update_key_in_version_family, process_revises
|
19
|
+
from ..models._is_versioned import process_revises
|
21
20
|
from ._is_versioned import IsVersioned
|
22
21
|
from .record import Record, init_self_from_db, update_attributes
|
23
22
|
from .run import Run, User, delete_run_artifacts
|
@@ -281,15 +280,7 @@ class Transform(Record, IsVersioned):
|
|
281
280
|
update_attributes(self, {"description": description})
|
282
281
|
return None
|
283
282
|
if revises is not None and key is not None and revises.key != key:
|
284
|
-
|
285
|
-
suid=revises.stem_uid,
|
286
|
-
existing_key=revises.key,
|
287
|
-
new_key=key,
|
288
|
-
registry="Transform",
|
289
|
-
)
|
290
|
-
raise InconsistentKey(
|
291
|
-
f"`key` is '{key}', but `revises.key` is '{revises.key}'\n\nEither do *not* pass `key`.\n\n{note}"
|
292
|
-
)
|
283
|
+
logger.warning(f"renaming transform '{revises.key}' to {key}")
|
293
284
|
new_uid, version, key, description, revises = process_revises(
|
294
285
|
revises, version, key, description, Transform
|
295
286
|
)
|
@@ -349,9 +340,9 @@ class Transform(Record, IsVersioned):
|
|
349
340
|
|
350
341
|
def view_lineage(self, with_successors: bool = False, distance: int = 5):
|
351
342
|
"""View lineage of transforms."""
|
352
|
-
from .has_parents import
|
343
|
+
from .has_parents import view_parents
|
353
344
|
|
354
|
-
return
|
345
|
+
return view_parents(
|
355
346
|
record=self,
|
356
347
|
field="key",
|
357
348
|
with_children=with_successors,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lamindb
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.4.0
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.10,<3.14
|
@@ -10,8 +10,8 @@ Classifier: Programming Language :: Python :: 3.11
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.12
|
11
11
|
Classifier: Programming Language :: Python :: 3.13
|
12
12
|
Requires-Dist: lamin_utils==0.13.11
|
13
|
-
Requires-Dist: lamin_cli==1.
|
14
|
-
Requires-Dist: lamindb_setup[aws]==1.
|
13
|
+
Requires-Dist: lamin_cli==1.3.0
|
14
|
+
Requires-Dist: lamindb_setup[aws]==1.5.1
|
15
15
|
Requires-Dist: pyyaml
|
16
16
|
Requires-Dist: pyarrow
|
17
17
|
Requires-Dist: pandera
|
@@ -19,11 +19,11 @@ Requires-Dist: typing_extensions!=4.6.0
|
|
19
19
|
Requires-Dist: python-dateutil
|
20
20
|
Requires-Dist: scipy<1.15.0
|
21
21
|
Requires-Dist: pandas>=2.0.0
|
22
|
-
Requires-Dist: anndata>=0.8.0,<=0.11.
|
22
|
+
Requires-Dist: anndata>=0.8.0,<=0.11.4
|
23
23
|
Requires-Dist: fsspec
|
24
24
|
Requires-Dist: graphviz
|
25
25
|
Requires-Dist: psycopg2-binary
|
26
|
-
Requires-Dist: bionty>=1.
|
26
|
+
Requires-Dist: bionty>=1.3.0 ; extra == "bionty"
|
27
27
|
Requires-Dist: cellregistry ; extra == "cellregistry"
|
28
28
|
Requires-Dist: clinicore ; extra == "clinicore"
|
29
29
|
Requires-Dist: tomlkit ; extra == "dev"
|
@@ -46,6 +46,7 @@ Requires-Dist: nbconvert>=7.2.1 ; extra == "jupyter"
|
|
46
46
|
Requires-Dist: mistune!=3.1.0 ; extra == "jupyter"
|
47
47
|
Requires-Dist: omop ; extra == "omop"
|
48
48
|
Requires-Dist: wetlab ; extra == "wetlab"
|
49
|
+
Requires-Dist: numcodecs<0.16.0 ; extra == "zarr"
|
49
50
|
Requires-Dist: zarr>=2.16.0,<3.0.0a0 ; extra == "zarr"
|
50
51
|
Project-URL: Home, https://github.com/laminlabs/lamindb
|
51
52
|
Provides-Extra: bionty
|