lamindb 1.6.1__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -3
- lamindb/_finish.py +32 -16
- lamindb/base/types.py +6 -4
- lamindb/core/_context.py +127 -57
- lamindb/core/_mapped_collection.py +1 -1
- lamindb/core/_settings.py +44 -4
- lamindb/core/_track_environment.py +5 -2
- lamindb/core/loaders.py +1 -1
- lamindb/core/storage/_anndata_accessor.py +1 -1
- lamindb/core/storage/_tiledbsoma.py +14 -8
- lamindb/core/storage/_valid_suffixes.py +0 -1
- lamindb/core/storage/_zarr.py +1 -1
- lamindb/core/storage/objects.py +13 -8
- lamindb/core/storage/paths.py +9 -6
- lamindb/core/types.py +1 -1
- lamindb/curators/_legacy.py +2 -1
- lamindb/curators/core.py +106 -105
- lamindb/errors.py +9 -0
- lamindb/examples/fixtures/__init__.py +0 -0
- lamindb/examples/fixtures/sheets.py +224 -0
- lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +1 -1
- lamindb/migrations/0105_record_unique_name.py +20 -0
- lamindb/migrations/0106_transfer_data_migration.py +25 -0
- lamindb/migrations/0107_add_schema_to_record.py +68 -0
- lamindb/migrations/0108_remove_record_sheet_remove_sheetproject_sheet_and_more.py +30 -0
- lamindb/migrations/0109_record_input_of_runs_alter_record_run_and_more.py +123 -0
- lamindb/migrations/0110_rename_values_artifacts_record_linked_artifacts.py +17 -0
- lamindb/migrations/0111_remove_record__sort_order.py +148 -0
- lamindb/migrations/0112_alter_recordartifact_feature_and_more.py +105 -0
- lamindb/migrations/0113_lower_case_branch_and_space_names.py +62 -0
- lamindb/migrations/0114_alter_run__status_code.py +24 -0
- lamindb/migrations/0115_alter_space_uid.py +52 -0
- lamindb/migrations/{0104_squashed.py → 0115_squashed.py} +261 -257
- lamindb/models/__init__.py +4 -3
- lamindb/models/_describe.py +88 -31
- lamindb/models/_feature_manager.py +627 -658
- lamindb/models/_label_manager.py +1 -3
- lamindb/models/artifact.py +214 -99
- lamindb/models/collection.py +7 -1
- lamindb/models/feature.py +288 -60
- lamindb/models/has_parents.py +3 -3
- lamindb/models/project.py +32 -15
- lamindb/models/query_manager.py +7 -1
- lamindb/models/query_set.py +118 -41
- lamindb/models/record.py +140 -94
- lamindb/models/run.py +42 -42
- lamindb/models/save.py +102 -16
- lamindb/models/schema.py +41 -8
- lamindb/models/sqlrecord.py +105 -40
- lamindb/models/storage.py +278 -0
- lamindb/models/transform.py +10 -2
- lamindb/models/ulabel.py +9 -1
- lamindb/py.typed +0 -0
- lamindb/setup/__init__.py +2 -1
- lamindb/setup/_switch.py +16 -0
- lamindb/setup/errors/__init__.py +4 -0
- lamindb/setup/types/__init__.py +4 -0
- {lamindb-1.6.1.dist-info → lamindb-1.7.0.dist-info}/METADATA +5 -5
- {lamindb-1.6.1.dist-info → lamindb-1.7.0.dist-info}/RECORD +61 -44
- lamindb/models/core.py +0 -135
- {lamindb-1.6.1.dist-info → lamindb-1.7.0.dist-info}/LICENSE +0 -0
- {lamindb-1.6.1.dist-info → lamindb-1.7.0.dist-info}/WHEEL +0 -0
lamindb/models/collection.py
CHANGED
@@ -270,7 +270,10 @@ class Collection(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
270
270
|
run: Run | None = kwargs.pop("run", None)
|
271
271
|
revises: Collection | None = kwargs.pop("revises", None)
|
272
272
|
version: str | None = kwargs.pop("version", None)
|
273
|
-
|
273
|
+
branch = kwargs.pop("branch", None)
|
274
|
+
branch_id = kwargs.pop("branch_id", 1)
|
275
|
+
space = kwargs.pop("space", None)
|
276
|
+
space_id = kwargs.pop("space_id", 1)
|
274
277
|
key: str
|
275
278
|
if "name" in kwargs:
|
276
279
|
key = kwargs.pop("name")
|
@@ -338,7 +341,10 @@ class Collection(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
|
|
338
341
|
hash=hash,
|
339
342
|
run=run,
|
340
343
|
version=version,
|
344
|
+
branch=branch,
|
341
345
|
branch_id=branch_id,
|
346
|
+
space=space,
|
347
|
+
space_id=space_id,
|
342
348
|
revises=revises,
|
343
349
|
_skip_validation=_skip_validation,
|
344
350
|
)
|
lamindb/models/feature.py
CHANGED
@@ -12,6 +12,10 @@ from django.db.utils import IntegrityError
|
|
12
12
|
from lamin_utils import logger
|
13
13
|
from lamindb_setup._init_instance import get_schema_module_name
|
14
14
|
from lamindb_setup.core.hashing import HASH_LENGTH, hash_dict, hash_string
|
15
|
+
from lamindb_setup.errors import (
|
16
|
+
MODULE_WASNT_CONFIGURED_MESSAGE_TEMPLATE,
|
17
|
+
ModuleWasntConfigured,
|
18
|
+
)
|
15
19
|
from pandas.api.types import CategoricalDtype, is_string_dtype
|
16
20
|
from pandas.core.dtypes.base import ExtensionDtype
|
17
21
|
|
@@ -23,7 +27,7 @@ from lamindb.base.fields import (
|
|
23
27
|
TextField,
|
24
28
|
)
|
25
29
|
from lamindb.base.types import Dtype, FieldAttr
|
26
|
-
from lamindb.errors import FieldValidationError, ValidationError
|
30
|
+
from lamindb.errors import DoesNotExist, FieldValidationError, ValidationError
|
27
31
|
|
28
32
|
from ..base.ids import base62_12
|
29
33
|
from ._relations import dict_module_name_to_model_name
|
@@ -43,7 +47,7 @@ if TYPE_CHECKING:
|
|
43
47
|
FEATURE_DTYPES = set(get_args(Dtype))
|
44
48
|
|
45
49
|
|
46
|
-
def parse_dtype(dtype_str: str, is_param: bool = False) -> list[dict[str,
|
50
|
+
def parse_dtype(dtype_str: str, is_param: bool = False) -> list[dict[str, Any]]:
|
47
51
|
"""Parses feature data type string into a structured list of components."""
|
48
52
|
from .artifact import Artifact
|
49
53
|
|
@@ -92,35 +96,14 @@ def parse_cat_dtype(
|
|
92
96
|
assert isinstance(dtype_str, str) # noqa: S101
|
93
97
|
if related_registries is None:
|
94
98
|
related_registries = dict_module_name_to_model_name(Artifact)
|
95
|
-
|
96
|
-
#
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
sub_type_str = sub_type_field_split[0].strip("]")
|
104
|
-
field_str = ""
|
105
|
-
else:
|
106
|
-
sub_type_str = sub_type_field_split[0]
|
107
|
-
field_str = sub_type_field_split[1]
|
108
|
-
elif len(split_result) == 1:
|
109
|
-
registry_field_split = split_result[0].split(".")
|
110
|
-
if (
|
111
|
-
len(registry_field_split) == 2 and registry_field_split[1][0].isupper()
|
112
|
-
) or len(registry_field_split) == 3:
|
113
|
-
# bionty.CellType or bionty.CellType.name
|
114
|
-
registry_str = f"{registry_field_split[0]}.{registry_field_split[1]}"
|
115
|
-
field_str = (
|
116
|
-
"" if len(registry_field_split) == 2 else registry_field_split[2]
|
117
|
-
)
|
118
|
-
else:
|
119
|
-
# ULabel or ULabel.name
|
120
|
-
registry_str = registry_field_split[0]
|
121
|
-
field_str = (
|
122
|
-
"" if len(registry_field_split) == 1 else registry_field_split[1]
|
123
|
-
)
|
99
|
+
|
100
|
+
# Parse the string considering nested brackets
|
101
|
+
parsed = parse_nested_brackets(dtype_str)
|
102
|
+
|
103
|
+
registry_str = parsed["registry"]
|
104
|
+
sub_type_str = parsed["subtype"]
|
105
|
+
field_str = parsed["field"]
|
106
|
+
|
124
107
|
if not is_itype:
|
125
108
|
if registry_str not in related_registries:
|
126
109
|
raise ValidationError(
|
@@ -136,16 +119,14 @@ def parse_cat_dtype(
|
|
136
119
|
module_name_attempt, raise_import_error=False
|
137
120
|
)
|
138
121
|
if module_name is None:
|
139
|
-
raise
|
140
|
-
|
141
|
-
f"was not found.\nInstall the module with `pip install {module_name_attempt}`\n"
|
142
|
-
"and also add the module to this instance via instance settings page "
|
143
|
-
"under 'schema modules'."
|
122
|
+
raise ModuleWasntConfigured(
|
123
|
+
MODULE_WASNT_CONFIGURED_MESSAGE_TEMPLATE.format(module_name_attempt)
|
144
124
|
)
|
145
125
|
else:
|
146
126
|
module_name, class_name = "lamindb", registry_str
|
147
127
|
module = importlib.import_module(module_name)
|
148
128
|
registry = getattr(module, class_name)
|
129
|
+
|
149
130
|
if sub_type_str != "":
|
150
131
|
pass
|
151
132
|
# validate that the subtype is a record in the registry with is_type = True
|
@@ -154,7 +135,8 @@ def parse_cat_dtype(
|
|
154
135
|
# validate that field_str is an actual field of the module
|
155
136
|
else:
|
156
137
|
field_str = registry._name_field if hasattr(registry, "_name_field") else "name"
|
157
|
-
|
138
|
+
|
139
|
+
result = {
|
158
140
|
"registry": registry, # should be typed as CanCurate
|
159
141
|
"registry_str": registry_str,
|
160
142
|
"subtype_str": sub_type_str,
|
@@ -162,6 +144,138 @@ def parse_cat_dtype(
|
|
162
144
|
"field": getattr(registry, field_str),
|
163
145
|
}
|
164
146
|
|
147
|
+
# Add nested subtype information if present
|
148
|
+
if parsed.get("nested_subtypes"):
|
149
|
+
result["nested_subtypes"] = parsed["nested_subtypes"]
|
150
|
+
|
151
|
+
return result
|
152
|
+
|
153
|
+
|
154
|
+
def parse_nested_brackets(dtype_str: str) -> dict[str, str]:
|
155
|
+
"""Parse dtype string with potentially nested brackets.
|
156
|
+
|
157
|
+
Examples:
|
158
|
+
"A" -> {"registry": "A", "subtype": "", "field": ""}
|
159
|
+
"A.field" -> {"registry": "A", "subtype": "", "field": "field"}
|
160
|
+
"A[B]" -> {"registry": "A", "subtype": "B", "field": ""}
|
161
|
+
"A[B].field" -> {"registry": "A", "subtype": "B", "field": "field"}
|
162
|
+
"A[B[C]]" -> {"registry": "A", "subtype": "B[C]", "field": "", "nested_subtypes": ["B", "C"]}
|
163
|
+
"A[B[C]].field" -> {"registry": "A", "subtype": "B[C]", "field": "field", "nested_subtypes": ["B", "C"]}
|
164
|
+
|
165
|
+
Args:
|
166
|
+
dtype_str: The dtype string to parse
|
167
|
+
|
168
|
+
Returns:
|
169
|
+
Dictionary with parsed components
|
170
|
+
"""
|
171
|
+
if "[" not in dtype_str:
|
172
|
+
# No brackets - handle simple cases like "A" or "A.field"
|
173
|
+
if "." in dtype_str:
|
174
|
+
parts = dtype_str.split(".")
|
175
|
+
if len(parts) == 2 and parts[1][0].isupper():
|
176
|
+
# bionty.CellType
|
177
|
+
return {"registry": dtype_str, "subtype": "", "field": ""}
|
178
|
+
elif len(parts) == 3:
|
179
|
+
# bionty.CellType.name
|
180
|
+
return {
|
181
|
+
"registry": f"{parts[0]}.{parts[1]}",
|
182
|
+
"subtype": "",
|
183
|
+
"field": parts[2],
|
184
|
+
}
|
185
|
+
else:
|
186
|
+
# ULabel.name
|
187
|
+
return {"registry": parts[0], "subtype": "", "field": parts[1]}
|
188
|
+
else:
|
189
|
+
# Simple registry name
|
190
|
+
return {"registry": dtype_str, "subtype": "", "field": ""}
|
191
|
+
|
192
|
+
# Find the first opening bracket
|
193
|
+
first_bracket = dtype_str.index("[")
|
194
|
+
registry_part = dtype_str[:first_bracket]
|
195
|
+
|
196
|
+
# Find the matching closing bracket for the first opening bracket
|
197
|
+
bracket_count = 0
|
198
|
+
closing_bracket_pos = -1
|
199
|
+
|
200
|
+
for i in range(first_bracket, len(dtype_str)):
|
201
|
+
if dtype_str[i] == "[":
|
202
|
+
bracket_count += 1
|
203
|
+
elif dtype_str[i] == "]":
|
204
|
+
bracket_count -= 1
|
205
|
+
if bracket_count == 0:
|
206
|
+
closing_bracket_pos = i
|
207
|
+
break
|
208
|
+
|
209
|
+
if closing_bracket_pos == -1:
|
210
|
+
raise ValueError(f"Unmatched brackets in dtype string: {dtype_str}")
|
211
|
+
|
212
|
+
# Extract subtype (everything between first [ and matching ])
|
213
|
+
subtype_part = dtype_str[first_bracket + 1 : closing_bracket_pos]
|
214
|
+
|
215
|
+
# Check for field after the closing bracket
|
216
|
+
field_part = ""
|
217
|
+
remainder = dtype_str[closing_bracket_pos + 1 :]
|
218
|
+
if remainder.startswith("."):
|
219
|
+
field_part = remainder[1:] # Remove the dot
|
220
|
+
|
221
|
+
result = {"registry": registry_part, "subtype": subtype_part, "field": field_part}
|
222
|
+
|
223
|
+
# If subtype contains brackets, extract nested subtypes for reference
|
224
|
+
if "[" in subtype_part:
|
225
|
+
nested_subtypes = extract_nested_subtypes(subtype_part)
|
226
|
+
if nested_subtypes:
|
227
|
+
result["nested_subtypes"] = nested_subtypes # type: ignore
|
228
|
+
|
229
|
+
return result
|
230
|
+
|
231
|
+
|
232
|
+
def extract_nested_subtypes(subtype_str: str) -> list[str]:
|
233
|
+
"""Extract all nested subtype levels from a nested subtype string.
|
234
|
+
|
235
|
+
Examples:
|
236
|
+
"B[C]" -> ["B", "C"]
|
237
|
+
"B[C[D]]" -> ["B", "C", "D"]
|
238
|
+
"B[C[D[E]]]" -> ["B", "C", "D", "E"]
|
239
|
+
|
240
|
+
Args:
|
241
|
+
subtype_str: The subtype string with potential nesting
|
242
|
+
|
243
|
+
Returns:
|
244
|
+
List of subtype levels from outermost to innermost
|
245
|
+
"""
|
246
|
+
subtypes = []
|
247
|
+
current = subtype_str
|
248
|
+
|
249
|
+
while "[" in current:
|
250
|
+
# Find the first part before the bracket
|
251
|
+
bracket_pos = current.index("[")
|
252
|
+
subtypes.append(current[:bracket_pos])
|
253
|
+
|
254
|
+
# Find the matching closing bracket
|
255
|
+
bracket_count = 0
|
256
|
+
closing_pos = -1
|
257
|
+
|
258
|
+
for i in range(bracket_pos, len(current)):
|
259
|
+
if current[i] == "[":
|
260
|
+
bracket_count += 1
|
261
|
+
elif current[i] == "]":
|
262
|
+
bracket_count -= 1
|
263
|
+
if bracket_count == 0:
|
264
|
+
closing_pos = i
|
265
|
+
break
|
266
|
+
|
267
|
+
if closing_pos == -1:
|
268
|
+
break
|
269
|
+
|
270
|
+
# Move to the content inside the brackets
|
271
|
+
current = current[bracket_pos + 1 : closing_pos]
|
272
|
+
|
273
|
+
# Add the final innermost subtype
|
274
|
+
if current:
|
275
|
+
subtypes.append(current)
|
276
|
+
|
277
|
+
return subtypes
|
278
|
+
|
165
279
|
|
166
280
|
def serialize_dtype(
|
167
281
|
dtype: Registry
|
@@ -237,6 +351,7 @@ def serialize_dtype(
|
|
237
351
|
|
238
352
|
|
239
353
|
def serialize_pandas_dtype(pandas_dtype: ExtensionDtype) -> str:
|
354
|
+
"""Convert pandas ExtensionDtype to simplified string representation."""
|
240
355
|
if is_string_dtype(pandas_dtype):
|
241
356
|
if not isinstance(pandas_dtype, CategoricalDtype):
|
242
357
|
dtype = "str"
|
@@ -256,6 +371,76 @@ def serialize_pandas_dtype(pandas_dtype: ExtensionDtype) -> str:
|
|
256
371
|
return dtype
|
257
372
|
|
258
373
|
|
374
|
+
def parse_filter_string(filter_str: str) -> dict[str, tuple[str, str | None, str]]:
|
375
|
+
"""Parse comma-separated Django filter expressions into structured components.
|
376
|
+
|
377
|
+
Args:
|
378
|
+
filter_str: Comma-separated filters like 'name=value, relation__field=value'
|
379
|
+
|
380
|
+
Returns:
|
381
|
+
Dict mapping original filter key to (relation_name, field_name, value) tuple.
|
382
|
+
For direct fields: field_name is None.
|
383
|
+
For relations: field_name contains the lookup field.
|
384
|
+
"""
|
385
|
+
filters = {}
|
386
|
+
|
387
|
+
filter_parts = [part.strip() for part in filter_str.split(",")]
|
388
|
+
for part in filter_parts:
|
389
|
+
if "=" not in part:
|
390
|
+
raise ValueError(f"Invalid filter expression: '{part}' (missing '=' sign)")
|
391
|
+
|
392
|
+
key, value = part.split("=", 1)
|
393
|
+
key = key.strip()
|
394
|
+
value = value.strip().strip("'\"")
|
395
|
+
|
396
|
+
if not key:
|
397
|
+
raise ValueError(f"Invalid filter expression: '{part}' (empty key)")
|
398
|
+
if not value:
|
399
|
+
raise ValueError(f"Invalid filter expression: '{part}' (empty value)")
|
400
|
+
|
401
|
+
if "__" in key:
|
402
|
+
relation_name, field_name = key.split("__", 1)
|
403
|
+
filters[key] = (relation_name, field_name, value)
|
404
|
+
else:
|
405
|
+
filters[key] = (key, None, value)
|
406
|
+
|
407
|
+
return filters
|
408
|
+
|
409
|
+
|
410
|
+
def resolve_relation_filters(
|
411
|
+
parsed_filters: dict[str, tuple[str, str | None, str]], registry: SQLRecord
|
412
|
+
) -> dict[str, str | SQLRecord]:
|
413
|
+
"""Resolve relation filters actual model objects.
|
414
|
+
|
415
|
+
Args:
|
416
|
+
parsed_filters: Django filters like output from :func:`lamindb.models.feature.parse_filter_string`
|
417
|
+
registry: Model class to resolve relationships against
|
418
|
+
|
419
|
+
Returns:
|
420
|
+
Dict with resolved objects for successful relations, original values for direct fields and failed resolutions.
|
421
|
+
"""
|
422
|
+
resolved = {}
|
423
|
+
|
424
|
+
for filter_key, (relation_name, field_name, value) in parsed_filters.items():
|
425
|
+
if field_name is not None: # relation filter
|
426
|
+
if hasattr(registry, relation_name):
|
427
|
+
relation_field = getattr(registry, relation_name)
|
428
|
+
if (
|
429
|
+
hasattr(relation_field, "field")
|
430
|
+
and relation_field.field.is_relation
|
431
|
+
):
|
432
|
+
try:
|
433
|
+
related_model = relation_field.field.related_model
|
434
|
+
related_obj = related_model.get(**{field_name: value})
|
435
|
+
resolved[relation_name] = related_obj
|
436
|
+
continue
|
437
|
+
except (DoesNotExist, AttributeError):
|
438
|
+
pass # Fall back to original filter
|
439
|
+
resolved[filter_key] = value
|
440
|
+
|
441
|
+
return resolved
|
442
|
+
|
443
|
+
|
259
444
|
def process_init_feature_param(args, kwargs, is_param: bool = False):
|
260
445
|
# now we proceed with the user-facing constructor
|
261
446
|
if len(args) != 0:
|
@@ -265,12 +450,22 @@ def process_init_feature_param(args, kwargs, is_param: bool = False):
|
|
265
450
|
is_type: bool = kwargs.pop("is_type", None)
|
266
451
|
type_: Feature | str | None = kwargs.pop("type", None)
|
267
452
|
description: str | None = kwargs.pop("description", None)
|
453
|
+
branch = kwargs.pop("branch", None)
|
454
|
+
branch_id = kwargs.pop("branch_id", 1)
|
455
|
+
space = kwargs.pop("space", None)
|
456
|
+
space_id = kwargs.pop("space_id", 1)
|
457
|
+
_skip_validation = kwargs.pop("_skip_validation", False)
|
268
458
|
if kwargs:
|
269
459
|
valid_keywords = ", ".join([val[0] for val in _get_record_kwargs(Feature)])
|
270
460
|
raise FieldValidationError(f"Only {valid_keywords} are valid keyword arguments")
|
271
461
|
kwargs["name"] = name
|
272
462
|
kwargs["type"] = type_
|
273
463
|
kwargs["is_type"] = is_type
|
464
|
+
kwargs["branch"] = branch
|
465
|
+
kwargs["branch_id"] = branch_id
|
466
|
+
kwargs["space"] = space
|
467
|
+
kwargs["space_id"] = space_id
|
468
|
+
kwargs["_skip_validation"] = _skip_validation
|
274
469
|
if not is_param:
|
275
470
|
kwargs["description"] = description
|
276
471
|
# cast dtype
|
@@ -338,33 +533,40 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
338
533
|
|
339
534
|
Example:
|
340
535
|
|
341
|
-
A simple `"str"` feature
|
536
|
+
A simple `"str"` feature.::
|
537
|
+
|
538
|
+
ln.Feature(
|
539
|
+
name="sample_note",
|
540
|
+
dtype="str",
|
541
|
+
).save()
|
342
542
|
|
343
|
-
|
344
|
-
... name="sample_note",
|
345
|
-
... dtype="str",
|
346
|
-
... ).save()
|
543
|
+
A dtype `"cat[ULabel]"` can be more easily passed as below.::
|
347
544
|
|
348
|
-
|
545
|
+
ln.Feature(
|
546
|
+
name="project",
|
547
|
+
dtype=ln.ULabel,
|
548
|
+
).save()
|
349
549
|
|
350
|
-
|
351
|
-
... name="project",
|
352
|
-
... dtype=ln.ULabel,
|
353
|
-
... ).save()
|
550
|
+
A dtype `"cat[ULabel|bionty.CellType]"` can be more easily passed as below.::
|
354
551
|
|
355
|
-
|
552
|
+
ln.Feature(
|
553
|
+
name="cell_type",
|
554
|
+
dtype=[ln.ULabel, bt.CellType],
|
555
|
+
).save()
|
356
556
|
|
357
|
-
|
358
|
-
... name="cell_type",
|
359
|
-
... dtype=[ln.ULabel, bt.CellType],
|
360
|
-
... ).save()
|
557
|
+
A multivalue feature with a list of cell types.::
|
361
558
|
|
362
|
-
|
559
|
+
ln.Feature(
|
560
|
+
name="cell_types",
|
561
|
+
dtype=list[bt.CellType], # or list[str] for a list of strings
|
562
|
+
).save()
|
363
563
|
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
564
|
+
A path feature.::
|
565
|
+
|
566
|
+
ln.Feature(
|
567
|
+
name="image_path",
|
568
|
+
dtype="path", # will be validated as `str`
|
569
|
+
).save()
|
368
570
|
|
369
571
|
Hint:
|
370
572
|
|
@@ -383,7 +585,6 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
383
585
|
happened, ask yourself what the joint measurement was: a feature
|
384
586
|
qualifies variables in a joint measurement. The canonical data matrix
|
385
587
|
lists jointly measured variables in the columns.
|
386
|
-
|
387
588
|
"""
|
388
589
|
|
389
590
|
class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
|
@@ -503,7 +704,6 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
503
704
|
if len(args) == len(self._meta.concrete_fields):
|
504
705
|
super().__init__(*args, **kwargs)
|
505
706
|
return None
|
506
|
-
dtype = kwargs.get("dtype", None)
|
507
707
|
default_value = kwargs.pop("default_value", None)
|
508
708
|
nullable = kwargs.pop("nullable", True) # default value of nullable
|
509
709
|
cat_filters = kwargs.pop("cat_filters", None)
|
@@ -517,6 +717,32 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
517
717
|
if cat_filters:
|
518
718
|
assert "|" not in dtype_str # noqa: S101
|
519
719
|
assert "]]" not in dtype_str # noqa: S101
|
720
|
+
|
721
|
+
# Validate filter values and SQLRecord attributes
|
722
|
+
for filter_key, filter_value in cat_filters.items():
|
723
|
+
if not filter_value or (
|
724
|
+
isinstance(filter_value, str) and not filter_value.strip()
|
725
|
+
):
|
726
|
+
raise ValidationError(f"Empty value in filter {filter_key}")
|
727
|
+
# Check SQLRecord attributes for relation lookups
|
728
|
+
if isinstance(filter_value, SQLRecord) and "__" in filter_key:
|
729
|
+
field_name = filter_key.split("__", 1)[1]
|
730
|
+
if not hasattr(filter_value, field_name):
|
731
|
+
raise ValidationError(
|
732
|
+
f"SQLRecord {filter_value.__class__.__name__} has no attribute '{field_name}' in filter {filter_key}"
|
733
|
+
)
|
734
|
+
|
735
|
+
# If a SQLRecord is passed, we access its uid to apply a standard filter
|
736
|
+
cat_filters = {
|
737
|
+
f"{key}__uid"
|
738
|
+
if (
|
739
|
+
is_sqlrecord := isinstance(filter, SQLRecord)
|
740
|
+
and hasattr(filter, "uid")
|
741
|
+
)
|
742
|
+
else key: filter.uid if is_sqlrecord else filter
|
743
|
+
for key, filter in cat_filters.items()
|
744
|
+
}
|
745
|
+
|
520
746
|
fill_in = ", ".join(
|
521
747
|
f"{key}='{value}'" for (key, value) in cat_filters.items()
|
522
748
|
)
|
@@ -525,7 +751,9 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
|
|
525
751
|
if not self._state.adding:
|
526
752
|
if not (
|
527
753
|
self.dtype.startswith("cat")
|
528
|
-
if
|
754
|
+
if dtype_str == "cat"
|
755
|
+
else dtype_str.startswith("cat")
|
756
|
+
if self.dtype == "cat"
|
529
757
|
else self.dtype == dtype_str
|
530
758
|
):
|
531
759
|
raise ValidationError(
|
lamindb/models/has_parents.py
CHANGED
@@ -39,15 +39,15 @@ is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
|
39
39
|
# also len of QuerySet can be costly at times
|
40
40
|
def _query_relatives(
|
41
41
|
records: QuerySet | list[SQLRecord],
|
42
|
-
|
42
|
+
attr: str,
|
43
43
|
cls: type[HasParents],
|
44
44
|
) -> QuerySet:
|
45
45
|
relatives = cls.objects.none() # type: ignore
|
46
46
|
if len(records) == 0:
|
47
47
|
return relatives
|
48
48
|
for record in records:
|
49
|
-
relatives = relatives.union(getattr(record,
|
50
|
-
relatives = relatives.union(_query_relatives(relatives,
|
49
|
+
relatives = relatives.union(getattr(record, attr).all())
|
50
|
+
relatives = relatives.union(_query_relatives(relatives, attr, cls))
|
51
51
|
return relatives
|
52
52
|
|
53
53
|
|
lamindb/models/project.py
CHANGED
@@ -24,7 +24,7 @@ from .artifact import Artifact
|
|
24
24
|
from .can_curate import CanCurate
|
25
25
|
from .collection import Collection
|
26
26
|
from .feature import Feature
|
27
|
-
from .record import Record
|
27
|
+
from .record import Record
|
28
28
|
from .run import Run, TracksRun, TracksUpdates, User
|
29
29
|
from .schema import Schema
|
30
30
|
from .sqlrecord import BaseSQLRecord, IsLink, SQLRecord, ValidateFields
|
@@ -66,6 +66,10 @@ class Person(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
66
66
|
"""Email of the person."""
|
67
67
|
external: bool = BooleanField(default=True, db_index=True)
|
68
68
|
"""Whether the person is external to the organization."""
|
69
|
+
records: Record = models.ManyToManyField(
|
70
|
+
Record, through="RecordPerson", related_name="linked_people"
|
71
|
+
)
|
72
|
+
"""Linked records."""
|
69
73
|
|
70
74
|
@overload
|
71
75
|
def __init__(
|
@@ -164,6 +168,10 @@ class Reference(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
164
168
|
Collection, through="CollectionReference", related_name="references"
|
165
169
|
)
|
166
170
|
"""Collections associated with this reference."""
|
171
|
+
records: Record = models.ManyToManyField(
|
172
|
+
Record, through="RecordReference", related_name="linked_references"
|
173
|
+
)
|
174
|
+
"""Linked records."""
|
167
175
|
|
168
176
|
@overload
|
169
177
|
def __init__(
|
@@ -279,13 +287,9 @@ class Project(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
|
|
279
287
|
)
|
280
288
|
"""Linked schemas."""
|
281
289
|
records: Record = models.ManyToManyField(
|
282
|
-
Record, through="RecordProject", related_name="
|
290
|
+
Record, through="RecordProject", related_name="linked_projects"
|
283
291
|
)
|
284
292
|
"""Linked records."""
|
285
|
-
sheets: Sheet = models.ManyToManyField(
|
286
|
-
Sheet, through="SheetProject", related_name="projects"
|
287
|
-
)
|
288
|
-
"""Linked sheets."""
|
289
293
|
collections: Collection = models.ManyToManyField(
|
290
294
|
Collection, through="CollectionProject", related_name="projects"
|
291
295
|
)
|
@@ -414,23 +418,36 @@ class SchemaProject(BaseSQLRecord, IsLink, TracksRun):
|
|
414
418
|
unique_together = ("schema", "project")
|
415
419
|
|
416
420
|
|
417
|
-
class
|
421
|
+
class RecordPerson(BaseSQLRecord, IsLink):
|
418
422
|
id: int = models.BigAutoField(primary_key=True)
|
419
|
-
record: Record = ForeignKey(Record, CASCADE, related_name="
|
420
|
-
feature: Feature = ForeignKey(Feature,
|
421
|
-
value:
|
423
|
+
record: Record = ForeignKey(Record, CASCADE, related_name="values_person")
|
424
|
+
feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_recordperson")
|
425
|
+
value: Person = ForeignKey(Person, PROTECT, related_name="links_record")
|
422
426
|
|
423
427
|
class Meta:
|
424
|
-
unique_together = ("record", "feature")
|
428
|
+
unique_together = ("record", "feature", "value")
|
425
429
|
|
426
430
|
|
427
|
-
class
|
431
|
+
class RecordReference(BaseSQLRecord, IsLink):
|
428
432
|
id: int = models.BigAutoField(primary_key=True)
|
429
|
-
|
430
|
-
|
433
|
+
record: Record = ForeignKey(Record, CASCADE, related_name="values_reference")
|
434
|
+
feature: Feature = ForeignKey(
|
435
|
+
Feature, PROTECT, related_name="links_recordreference"
|
436
|
+
)
|
437
|
+
value: Reference = ForeignKey(Reference, PROTECT, related_name="links_record")
|
438
|
+
|
439
|
+
class Meta:
|
440
|
+
unique_together = ("record", "feature", "value")
|
441
|
+
|
442
|
+
|
443
|
+
class RecordProject(BaseSQLRecord, IsLink):
|
444
|
+
id: int = models.BigAutoField(primary_key=True)
|
445
|
+
record: Record = ForeignKey(Record, CASCADE, related_name="values_project")
|
446
|
+
feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_recordproject")
|
447
|
+
value: Project = ForeignKey(Project, PROTECT, related_name="links_record")
|
431
448
|
|
432
449
|
class Meta:
|
433
|
-
unique_together = ("
|
450
|
+
unique_together = ("record", "feature", "value")
|
434
451
|
|
435
452
|
|
436
453
|
class ArtifactReference(BaseSQLRecord, IsLink, TracksRun):
|
lamindb/models/query_manager.py
CHANGED
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import re
|
4
4
|
from functools import reduce
|
5
|
-
from typing import TYPE_CHECKING, NamedTuple
|
5
|
+
from typing import TYPE_CHECKING, Literal, NamedTuple
|
6
6
|
|
7
7
|
from django.db.models import (
|
8
8
|
IntegerField,
|
@@ -173,12 +173,17 @@ def _lookup(
|
|
173
173
|
field: StrField | None = None,
|
174
174
|
return_field: StrField | None = None,
|
175
175
|
using_key: str | None = None,
|
176
|
+
keep: Literal["first", "last", False] = "first",
|
176
177
|
) -> NamedTuple:
|
177
178
|
"""Return an auto-complete object for a field.
|
178
179
|
|
179
180
|
Args:
|
180
181
|
field: The field to look up the values for. Defaults to first string field.
|
181
182
|
return_field: The field to return. If `None`, returns the whole record.
|
183
|
+
keep: When multiple records are found for a lookup, how to return the records.
|
184
|
+
- `"first"`: return the first record.
|
185
|
+
- `"last"`: return the last record.
|
186
|
+
- `False`: return all records.
|
182
187
|
|
183
188
|
Returns:
|
184
189
|
A `NamedTuple` of lookup information of the field values with a
|
@@ -209,6 +214,7 @@ def _lookup(
|
|
209
214
|
values=[i.get(field) for i in queryset.values()],
|
210
215
|
tuple_name=cls.__class__.__name__,
|
211
216
|
prefix="ln",
|
217
|
+
keep=keep,
|
212
218
|
).lookup(
|
213
219
|
return_field=(
|
214
220
|
get_name_field(registry=queryset.model, field=return_field)
|