lamindb 1.6.2__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +1 -3
  2. lamindb/_finish.py +32 -16
  3. lamindb/base/types.py +6 -4
  4. lamindb/core/_context.py +127 -57
  5. lamindb/core/_mapped_collection.py +1 -1
  6. lamindb/core/_settings.py +44 -4
  7. lamindb/core/_track_environment.py +5 -2
  8. lamindb/core/loaders.py +1 -1
  9. lamindb/core/storage/_anndata_accessor.py +1 -1
  10. lamindb/core/storage/_tiledbsoma.py +14 -8
  11. lamindb/core/storage/_valid_suffixes.py +0 -1
  12. lamindb/core/storage/_zarr.py +1 -1
  13. lamindb/core/storage/objects.py +13 -8
  14. lamindb/core/storage/paths.py +9 -6
  15. lamindb/core/types.py +1 -1
  16. lamindb/curators/_legacy.py +2 -1
  17. lamindb/curators/core.py +106 -105
  18. lamindb/errors.py +9 -0
  19. lamindb/examples/fixtures/__init__.py +0 -0
  20. lamindb/examples/fixtures/sheets.py +224 -0
  21. lamindb/migrations/0103_remove_writelog_migration_state_and_more.py +1 -1
  22. lamindb/migrations/0105_record_unique_name.py +20 -0
  23. lamindb/migrations/0106_transfer_data_migration.py +25 -0
  24. lamindb/migrations/0107_add_schema_to_record.py +68 -0
  25. lamindb/migrations/0108_remove_record_sheet_remove_sheetproject_sheet_and_more.py +30 -0
  26. lamindb/migrations/0109_record_input_of_runs_alter_record_run_and_more.py +123 -0
  27. lamindb/migrations/0110_rename_values_artifacts_record_linked_artifacts.py +17 -0
  28. lamindb/migrations/0111_remove_record__sort_order.py +148 -0
  29. lamindb/migrations/0112_alter_recordartifact_feature_and_more.py +105 -0
  30. lamindb/migrations/0113_lower_case_branch_and_space_names.py +62 -0
  31. lamindb/migrations/0114_alter_run__status_code.py +24 -0
  32. lamindb/migrations/0115_alter_space_uid.py +52 -0
  33. lamindb/migrations/{0104_squashed.py → 0115_squashed.py} +261 -257
  34. lamindb/models/__init__.py +4 -3
  35. lamindb/models/_describe.py +88 -31
  36. lamindb/models/_feature_manager.py +627 -658
  37. lamindb/models/_label_manager.py +1 -3
  38. lamindb/models/artifact.py +214 -99
  39. lamindb/models/collection.py +7 -1
  40. lamindb/models/feature.py +288 -60
  41. lamindb/models/has_parents.py +3 -3
  42. lamindb/models/project.py +32 -15
  43. lamindb/models/query_manager.py +7 -1
  44. lamindb/models/query_set.py +118 -41
  45. lamindb/models/record.py +140 -94
  46. lamindb/models/run.py +42 -42
  47. lamindb/models/save.py +102 -16
  48. lamindb/models/schema.py +41 -8
  49. lamindb/models/sqlrecord.py +105 -40
  50. lamindb/models/storage.py +278 -0
  51. lamindb/models/transform.py +10 -2
  52. lamindb/models/ulabel.py +9 -1
  53. lamindb/py.typed +0 -0
  54. lamindb/setup/__init__.py +2 -1
  55. lamindb/setup/_switch.py +16 -0
  56. lamindb/setup/errors/__init__.py +4 -0
  57. lamindb/setup/types/__init__.py +4 -0
  58. {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/METADATA +5 -5
  59. {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/RECORD +61 -44
  60. lamindb/models/core.py +0 -135
  61. {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/LICENSE +0 -0
  62. {lamindb-1.6.2.dist-info → lamindb-1.7.0.dist-info}/WHEEL +0 -0
@@ -270,7 +270,10 @@ class Collection(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
270
270
  run: Run | None = kwargs.pop("run", None)
271
271
  revises: Collection | None = kwargs.pop("revises", None)
272
272
  version: str | None = kwargs.pop("version", None)
273
- branch_id: int | None = kwargs.pop("branch_id", 1)
273
+ branch = kwargs.pop("branch", None)
274
+ branch_id = kwargs.pop("branch_id", 1)
275
+ space = kwargs.pop("space", None)
276
+ space_id = kwargs.pop("space_id", 1)
274
277
  key: str
275
278
  if "name" in kwargs:
276
279
  key = kwargs.pop("name")
@@ -338,7 +341,10 @@ class Collection(SQLRecord, IsVersioned, TracksRun, TracksUpdates):
338
341
  hash=hash,
339
342
  run=run,
340
343
  version=version,
344
+ branch=branch,
341
345
  branch_id=branch_id,
346
+ space=space,
347
+ space_id=space_id,
342
348
  revises=revises,
343
349
  _skip_validation=_skip_validation,
344
350
  )
lamindb/models/feature.py CHANGED
@@ -12,6 +12,10 @@ from django.db.utils import IntegrityError
12
12
  from lamin_utils import logger
13
13
  from lamindb_setup._init_instance import get_schema_module_name
14
14
  from lamindb_setup.core.hashing import HASH_LENGTH, hash_dict, hash_string
15
+ from lamindb_setup.errors import (
16
+ MODULE_WASNT_CONFIGURED_MESSAGE_TEMPLATE,
17
+ ModuleWasntConfigured,
18
+ )
15
19
  from pandas.api.types import CategoricalDtype, is_string_dtype
16
20
  from pandas.core.dtypes.base import ExtensionDtype
17
21
 
@@ -23,7 +27,7 @@ from lamindb.base.fields import (
23
27
  TextField,
24
28
  )
25
29
  from lamindb.base.types import Dtype, FieldAttr
26
- from lamindb.errors import FieldValidationError, ValidationError
30
+ from lamindb.errors import DoesNotExist, FieldValidationError, ValidationError
27
31
 
28
32
  from ..base.ids import base62_12
29
33
  from ._relations import dict_module_name_to_model_name
@@ -43,7 +47,7 @@ if TYPE_CHECKING:
43
47
  FEATURE_DTYPES = set(get_args(Dtype))
44
48
 
45
49
 
46
- def parse_dtype(dtype_str: str, is_param: bool = False) -> list[dict[str, str]]:
50
+ def parse_dtype(dtype_str: str, is_param: bool = False) -> list[dict[str, Any]]:
47
51
  """Parses feature data type string into a structured list of components."""
48
52
  from .artifact import Artifact
49
53
 
@@ -92,35 +96,14 @@ def parse_cat_dtype(
92
96
  assert isinstance(dtype_str, str) # noqa: S101
93
97
  if related_registries is None:
94
98
  related_registries = dict_module_name_to_model_name(Artifact)
95
- split_result = dtype_str.split("[")
96
- # has sub type
97
- sub_type_str = ""
98
- if len(split_result) == 2:
99
- registry_str = split_result[0]
100
- assert "]" in split_result[1] # noqa: S101
101
- sub_type_field_split = split_result[1].split("].")
102
- if len(sub_type_field_split) == 1:
103
- sub_type_str = sub_type_field_split[0].strip("]")
104
- field_str = ""
105
- else:
106
- sub_type_str = sub_type_field_split[0]
107
- field_str = sub_type_field_split[1]
108
- elif len(split_result) == 1:
109
- registry_field_split = split_result[0].split(".")
110
- if (
111
- len(registry_field_split) == 2 and registry_field_split[1][0].isupper()
112
- ) or len(registry_field_split) == 3:
113
- # bionty.CellType or bionty.CellType.name
114
- registry_str = f"{registry_field_split[0]}.{registry_field_split[1]}"
115
- field_str = (
116
- "" if len(registry_field_split) == 2 else registry_field_split[2]
117
- )
118
- else:
119
- # ULabel or ULabel.name
120
- registry_str = registry_field_split[0]
121
- field_str = (
122
- "" if len(registry_field_split) == 1 else registry_field_split[1]
123
- )
99
+
100
+ # Parse the string considering nested brackets
101
+ parsed = parse_nested_brackets(dtype_str)
102
+
103
+ registry_str = parsed["registry"]
104
+ sub_type_str = parsed["subtype"]
105
+ field_str = parsed["field"]
106
+
124
107
  if not is_itype:
125
108
  if registry_str not in related_registries:
126
109
  raise ValidationError(
@@ -136,16 +119,14 @@ def parse_cat_dtype(
136
119
  module_name_attempt, raise_import_error=False
137
120
  )
138
121
  if module_name is None:
139
- raise ImportError(
140
- f"Can not parse dtype {dtype_str} because {module_name_attempt} "
141
- f"was not found.\nInstall the module with `pip install {module_name_attempt}`\n"
142
- "and also add the module to this instance via instance settings page "
143
- "under 'schema modules'."
122
+ raise ModuleWasntConfigured(
123
+ MODULE_WASNT_CONFIGURED_MESSAGE_TEMPLATE.format(module_name_attempt)
144
124
  )
145
125
  else:
146
126
  module_name, class_name = "lamindb", registry_str
147
127
  module = importlib.import_module(module_name)
148
128
  registry = getattr(module, class_name)
129
+
149
130
  if sub_type_str != "":
150
131
  pass
151
132
  # validate that the subtype is a record in the registry with is_type = True
@@ -154,7 +135,8 @@ def parse_cat_dtype(
154
135
  # validate that field_str is an actual field of the module
155
136
  else:
156
137
  field_str = registry._name_field if hasattr(registry, "_name_field") else "name"
157
- return {
138
+
139
+ result = {
158
140
  "registry": registry, # should be typed as CanCurate
159
141
  "registry_str": registry_str,
160
142
  "subtype_str": sub_type_str,
@@ -162,6 +144,138 @@ def parse_cat_dtype(
162
144
  "field": getattr(registry, field_str),
163
145
  }
164
146
 
147
+ # Add nested subtype information if present
148
+ if parsed.get("nested_subtypes"):
149
+ result["nested_subtypes"] = parsed["nested_subtypes"]
150
+
151
+ return result
152
+
153
+
154
+ def parse_nested_brackets(dtype_str: str) -> dict[str, str]:
155
+ """Parse dtype string with potentially nested brackets.
156
+
157
+ Examples:
158
+ "A" -> {"registry": "A", "subtype": "", "field": ""}
159
+ "A.field" -> {"registry": "A", "subtype": "", "field": "field"}
160
+ "A[B]" -> {"registry": "A", "subtype": "B", "field": ""}
161
+ "A[B].field" -> {"registry": "A", "subtype": "B", "field": "field"}
162
+ "A[B[C]]" -> {"registry": "A", "subtype": "B[C]", "field": "", "nested_subtypes": ["B", "C"]}
163
+ "A[B[C]].field" -> {"registry": "A", "subtype": "B[C]", "field": "field", "nested_subtypes": ["B", "C"]}
164
+
165
+ Args:
166
+ dtype_str: The dtype string to parse
167
+
168
+ Returns:
169
+ Dictionary with parsed components
170
+ """
171
+ if "[" not in dtype_str:
172
+ # No brackets - handle simple cases like "A" or "A.field"
173
+ if "." in dtype_str:
174
+ parts = dtype_str.split(".")
175
+ if len(parts) == 2 and parts[1][0].isupper():
176
+ # bionty.CellType
177
+ return {"registry": dtype_str, "subtype": "", "field": ""}
178
+ elif len(parts) == 3:
179
+ # bionty.CellType.name
180
+ return {
181
+ "registry": f"{parts[0]}.{parts[1]}",
182
+ "subtype": "",
183
+ "field": parts[2],
184
+ }
185
+ else:
186
+ # ULabel.name
187
+ return {"registry": parts[0], "subtype": "", "field": parts[1]}
188
+ else:
189
+ # Simple registry name
190
+ return {"registry": dtype_str, "subtype": "", "field": ""}
191
+
192
+ # Find the first opening bracket
193
+ first_bracket = dtype_str.index("[")
194
+ registry_part = dtype_str[:first_bracket]
195
+
196
+ # Find the matching closing bracket for the first opening bracket
197
+ bracket_count = 0
198
+ closing_bracket_pos = -1
199
+
200
+ for i in range(first_bracket, len(dtype_str)):
201
+ if dtype_str[i] == "[":
202
+ bracket_count += 1
203
+ elif dtype_str[i] == "]":
204
+ bracket_count -= 1
205
+ if bracket_count == 0:
206
+ closing_bracket_pos = i
207
+ break
208
+
209
+ if closing_bracket_pos == -1:
210
+ raise ValueError(f"Unmatched brackets in dtype string: {dtype_str}")
211
+
212
+ # Extract subtype (everything between first [ and matching ])
213
+ subtype_part = dtype_str[first_bracket + 1 : closing_bracket_pos]
214
+
215
+ # Check for field after the closing bracket
216
+ field_part = ""
217
+ remainder = dtype_str[closing_bracket_pos + 1 :]
218
+ if remainder.startswith("."):
219
+ field_part = remainder[1:] # Remove the dot
220
+
221
+ result = {"registry": registry_part, "subtype": subtype_part, "field": field_part}
222
+
223
+ # If subtype contains brackets, extract nested subtypes for reference
224
+ if "[" in subtype_part:
225
+ nested_subtypes = extract_nested_subtypes(subtype_part)
226
+ if nested_subtypes:
227
+ result["nested_subtypes"] = nested_subtypes # type: ignore
228
+
229
+ return result
230
+
231
+
232
+ def extract_nested_subtypes(subtype_str: str) -> list[str]:
233
+ """Extract all nested subtype levels from a nested subtype string.
234
+
235
+ Examples:
236
+ "B[C]" -> ["B", "C"]
237
+ "B[C[D]]" -> ["B", "C", "D"]
238
+ "B[C[D[E]]]" -> ["B", "C", "D", "E"]
239
+
240
+ Args:
241
+ subtype_str: The subtype string with potential nesting
242
+
243
+ Returns:
244
+ List of subtype levels from outermost to innermost
245
+ """
246
+ subtypes = []
247
+ current = subtype_str
248
+
249
+ while "[" in current:
250
+ # Find the first part before the bracket
251
+ bracket_pos = current.index("[")
252
+ subtypes.append(current[:bracket_pos])
253
+
254
+ # Find the matching closing bracket
255
+ bracket_count = 0
256
+ closing_pos = -1
257
+
258
+ for i in range(bracket_pos, len(current)):
259
+ if current[i] == "[":
260
+ bracket_count += 1
261
+ elif current[i] == "]":
262
+ bracket_count -= 1
263
+ if bracket_count == 0:
264
+ closing_pos = i
265
+ break
266
+
267
+ if closing_pos == -1:
268
+ break
269
+
270
+ # Move to the content inside the brackets
271
+ current = current[bracket_pos + 1 : closing_pos]
272
+
273
+ # Add the final innermost subtype
274
+ if current:
275
+ subtypes.append(current)
276
+
277
+ return subtypes
278
+
165
279
 
166
280
  def serialize_dtype(
167
281
  dtype: Registry
@@ -237,6 +351,7 @@ def serialize_dtype(
237
351
 
238
352
 
239
353
  def serialize_pandas_dtype(pandas_dtype: ExtensionDtype) -> str:
354
+ """Convert pandas ExtensionDtype to simplified string representation."""
240
355
  if is_string_dtype(pandas_dtype):
241
356
  if not isinstance(pandas_dtype, CategoricalDtype):
242
357
  dtype = "str"
@@ -256,6 +371,76 @@ def serialize_pandas_dtype(pandas_dtype: ExtensionDtype) -> str:
256
371
  return dtype
257
372
 
258
373
 
374
+ def parse_filter_string(filter_str: str) -> dict[str, tuple[str, str | None, str]]:
375
+ """Parse comma-separated Django filter expressions into structured components.
376
+
377
+ Args:
378
+ filter_str: Comma-separated filters like 'name=value, relation__field=value'
379
+
380
+ Returns:
381
+ Dict mapping original filter key to (relation_name, field_name, value) tuple.
382
+ For direct fields: field_name is None.
383
+ For relations: field_name contains the lookup field.
384
+ """
385
+ filters = {}
386
+
387
+ filter_parts = [part.strip() for part in filter_str.split(",")]
388
+ for part in filter_parts:
389
+ if "=" not in part:
390
+ raise ValueError(f"Invalid filter expression: '{part}' (missing '=' sign)")
391
+
392
+ key, value = part.split("=", 1)
393
+ key = key.strip()
394
+ value = value.strip().strip("'\"")
395
+
396
+ if not key:
397
+ raise ValueError(f"Invalid filter expression: '{part}' (empty key)")
398
+ if not value:
399
+ raise ValueError(f"Invalid filter expression: '{part}' (empty value)")
400
+
401
+ if "__" in key:
402
+ relation_name, field_name = key.split("__", 1)
403
+ filters[key] = (relation_name, field_name, value)
404
+ else:
405
+ filters[key] = (key, None, value)
406
+
407
+ return filters
408
+
409
+
410
+ def resolve_relation_filters(
411
+ parsed_filters: dict[str, tuple[str, str | None, str]], registry: SQLRecord
412
+ ) -> dict[str, str | SQLRecord]:
413
+ """Resolve relation filters actual model objects.
414
+
415
+ Args:
416
+ parsed_filters: Django filters like output from :func:`lamindb.models.feature.parse_filter_string`
417
+ registry: Model class to resolve relationships against
418
+
419
+ Returns:
420
+ Dict with resolved objects for successful relations, original values for direct fields and failed resolutions.
421
+ """
422
+ resolved = {}
423
+
424
+ for filter_key, (relation_name, field_name, value) in parsed_filters.items():
425
+ if field_name is not None: # relation filter
426
+ if hasattr(registry, relation_name):
427
+ relation_field = getattr(registry, relation_name)
428
+ if (
429
+ hasattr(relation_field, "field")
430
+ and relation_field.field.is_relation
431
+ ):
432
+ try:
433
+ related_model = relation_field.field.related_model
434
+ related_obj = related_model.get(**{field_name: value})
435
+ resolved[relation_name] = related_obj
436
+ continue
437
+ except (DoesNotExist, AttributeError):
438
+ pass # Fall back to original filter
439
+ resolved[filter_key] = value
440
+
441
+ return resolved
442
+
443
+
259
444
  def process_init_feature_param(args, kwargs, is_param: bool = False):
260
445
  # now we proceed with the user-facing constructor
261
446
  if len(args) != 0:
@@ -265,12 +450,22 @@ def process_init_feature_param(args, kwargs, is_param: bool = False):
265
450
  is_type: bool = kwargs.pop("is_type", None)
266
451
  type_: Feature | str | None = kwargs.pop("type", None)
267
452
  description: str | None = kwargs.pop("description", None)
453
+ branch = kwargs.pop("branch", None)
454
+ branch_id = kwargs.pop("branch_id", 1)
455
+ space = kwargs.pop("space", None)
456
+ space_id = kwargs.pop("space_id", 1)
457
+ _skip_validation = kwargs.pop("_skip_validation", False)
268
458
  if kwargs:
269
459
  valid_keywords = ", ".join([val[0] for val in _get_record_kwargs(Feature)])
270
460
  raise FieldValidationError(f"Only {valid_keywords} are valid keyword arguments")
271
461
  kwargs["name"] = name
272
462
  kwargs["type"] = type_
273
463
  kwargs["is_type"] = is_type
464
+ kwargs["branch"] = branch
465
+ kwargs["branch_id"] = branch_id
466
+ kwargs["space"] = space
467
+ kwargs["space_id"] = space_id
468
+ kwargs["_skip_validation"] = _skip_validation
274
469
  if not is_param:
275
470
  kwargs["description"] = description
276
471
  # cast dtype
@@ -338,33 +533,40 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
338
533
 
339
534
  Example:
340
535
 
341
- A simple `"str"` feature.
536
+ A simple `"str"` feature.::
537
+
538
+ ln.Feature(
539
+ name="sample_note",
540
+ dtype="str",
541
+ ).save()
342
542
 
343
- >>> ln.Feature(
344
- ... name="sample_note",
345
- ... dtype="str",
346
- ... ).save()
543
+ A dtype `"cat[ULabel]"` can be more easily passed as below.::
347
544
 
348
- A dtype `"cat[ULabel]"` can be more easily passed as below.
545
+ ln.Feature(
546
+ name="project",
547
+ dtype=ln.ULabel,
548
+ ).save()
349
549
 
350
- >>> ln.Feature(
351
- ... name="project",
352
- ... dtype=ln.ULabel,
353
- ... ).save()
550
+ A dtype `"cat[ULabel|bionty.CellType]"` can be more easily passed as below.::
354
551
 
355
- A dtype `"cat[ULabel|bionty.CellType]"` can be more easily passed as below.
552
+ ln.Feature(
553
+ name="cell_type",
554
+ dtype=[ln.ULabel, bt.CellType],
555
+ ).save()
356
556
 
357
- >>> ln.Feature(
358
- ... name="cell_type",
359
- ... dtype=[ln.ULabel, bt.CellType],
360
- ... ).save()
557
+ A multivalue feature with a list of cell types.::
361
558
 
362
- A multivalue feature with a list of cell types.
559
+ ln.Feature(
560
+ name="cell_types",
561
+ dtype=list[bt.CellType], # or list[str] for a list of strings
562
+ ).save()
363
563
 
364
- >>> ln.Feature(
365
- ... name="cell_types",
366
- ... dtype=list[bt.CellType], # or list[str] for a list of strings
367
- ... ).save()
564
+ A path feature.::
565
+
566
+ ln.Feature(
567
+ name="image_path",
568
+ dtype="path", # will be validated as `str`
569
+ ).save()
368
570
 
369
571
  Hint:
370
572
 
@@ -383,7 +585,6 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
383
585
  happened, ask yourself what the joint measurement was: a feature
384
586
  qualifies variables in a joint measurement. The canonical data matrix
385
587
  lists jointly measured variables in the columns.
386
-
387
588
  """
388
589
 
389
590
  class Meta(SQLRecord.Meta, TracksRun.Meta, TracksUpdates.Meta):
@@ -503,7 +704,6 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
503
704
  if len(args) == len(self._meta.concrete_fields):
504
705
  super().__init__(*args, **kwargs)
505
706
  return None
506
- dtype = kwargs.get("dtype", None)
507
707
  default_value = kwargs.pop("default_value", None)
508
708
  nullable = kwargs.pop("nullable", True) # default value of nullable
509
709
  cat_filters = kwargs.pop("cat_filters", None)
@@ -517,6 +717,32 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
517
717
  if cat_filters:
518
718
  assert "|" not in dtype_str # noqa: S101
519
719
  assert "]]" not in dtype_str # noqa: S101
720
+
721
+ # Validate filter values and SQLRecord attributes
722
+ for filter_key, filter_value in cat_filters.items():
723
+ if not filter_value or (
724
+ isinstance(filter_value, str) and not filter_value.strip()
725
+ ):
726
+ raise ValidationError(f"Empty value in filter {filter_key}")
727
+ # Check SQLRecord attributes for relation lookups
728
+ if isinstance(filter_value, SQLRecord) and "__" in filter_key:
729
+ field_name = filter_key.split("__", 1)[1]
730
+ if not hasattr(filter_value, field_name):
731
+ raise ValidationError(
732
+ f"SQLRecord {filter_value.__class__.__name__} has no attribute '{field_name}' in filter {filter_key}"
733
+ )
734
+
735
+ # If a SQLRecord is passed, we access its uid to apply a standard filter
736
+ cat_filters = {
737
+ f"{key}__uid"
738
+ if (
739
+ is_sqlrecord := isinstance(filter, SQLRecord)
740
+ and hasattr(filter, "uid")
741
+ )
742
+ else key: filter.uid if is_sqlrecord else filter
743
+ for key, filter in cat_filters.items()
744
+ }
745
+
520
746
  fill_in = ", ".join(
521
747
  f"{key}='{value}'" for (key, value) in cat_filters.items()
522
748
  )
@@ -525,7 +751,9 @@ class Feature(SQLRecord, CanCurate, TracksRun, TracksUpdates):
525
751
  if not self._state.adding:
526
752
  if not (
527
753
  self.dtype.startswith("cat")
528
- if dtype == "cat"
754
+ if dtype_str == "cat"
755
+ else dtype_str.startswith("cat")
756
+ if self.dtype == "cat"
529
757
  else self.dtype == dtype_str
530
758
  ):
531
759
  raise ValidationError(
@@ -39,15 +39,15 @@ is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
39
39
  # also len of QuerySet can be costly at times
40
40
  def _query_relatives(
41
41
  records: QuerySet | list[SQLRecord],
42
- kind: Literal["parents", "children"],
42
+ attr: str,
43
43
  cls: type[HasParents],
44
44
  ) -> QuerySet:
45
45
  relatives = cls.objects.none() # type: ignore
46
46
  if len(records) == 0:
47
47
  return relatives
48
48
  for record in records:
49
- relatives = relatives.union(getattr(record, kind).all())
50
- relatives = relatives.union(_query_relatives(relatives, kind, cls))
49
+ relatives = relatives.union(getattr(record, attr).all())
50
+ relatives = relatives.union(_query_relatives(relatives, attr, cls))
51
51
  return relatives
52
52
 
53
53
 
lamindb/models/project.py CHANGED
@@ -24,7 +24,7 @@ from .artifact import Artifact
24
24
  from .can_curate import CanCurate
25
25
  from .collection import Collection
26
26
  from .feature import Feature
27
- from .record import Record, Sheet
27
+ from .record import Record
28
28
  from .run import Run, TracksRun, TracksUpdates, User
29
29
  from .schema import Schema
30
30
  from .sqlrecord import BaseSQLRecord, IsLink, SQLRecord, ValidateFields
@@ -66,6 +66,10 @@ class Person(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
66
66
  """Email of the person."""
67
67
  external: bool = BooleanField(default=True, db_index=True)
68
68
  """Whether the person is external to the organization."""
69
+ records: Record = models.ManyToManyField(
70
+ Record, through="RecordPerson", related_name="linked_people"
71
+ )
72
+ """Linked records."""
69
73
 
70
74
  @overload
71
75
  def __init__(
@@ -164,6 +168,10 @@ class Reference(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
164
168
  Collection, through="CollectionReference", related_name="references"
165
169
  )
166
170
  """Collections associated with this reference."""
171
+ records: Record = models.ManyToManyField(
172
+ Record, through="RecordReference", related_name="linked_references"
173
+ )
174
+ """Linked records."""
167
175
 
168
176
  @overload
169
177
  def __init__(
@@ -279,13 +287,9 @@ class Project(SQLRecord, CanCurate, TracksRun, TracksUpdates, ValidateFields):
279
287
  )
280
288
  """Linked schemas."""
281
289
  records: Record = models.ManyToManyField(
282
- Record, through="RecordProject", related_name="projects"
290
+ Record, through="RecordProject", related_name="linked_projects"
283
291
  )
284
292
  """Linked records."""
285
- sheets: Sheet = models.ManyToManyField(
286
- Sheet, through="SheetProject", related_name="projects"
287
- )
288
- """Linked sheets."""
289
293
  collections: Collection = models.ManyToManyField(
290
294
  Collection, through="CollectionProject", related_name="projects"
291
295
  )
@@ -414,23 +418,36 @@ class SchemaProject(BaseSQLRecord, IsLink, TracksRun):
414
418
  unique_together = ("schema", "project")
415
419
 
416
420
 
417
- class RecordProject(BaseSQLRecord, IsLink):
421
+ class RecordPerson(BaseSQLRecord, IsLink):
418
422
  id: int = models.BigAutoField(primary_key=True)
419
- record: Record = ForeignKey(Record, CASCADE, related_name="values_project")
420
- feature: Feature = ForeignKey(Feature, CASCADE, related_name="links_recordproject")
421
- value: Project = ForeignKey(Project, PROTECT, related_name="links_record")
423
+ record: Record = ForeignKey(Record, CASCADE, related_name="values_person")
424
+ feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_recordperson")
425
+ value: Person = ForeignKey(Person, PROTECT, related_name="links_record")
422
426
 
423
427
  class Meta:
424
- unique_together = ("record", "feature")
428
+ unique_together = ("record", "feature", "value")
425
429
 
426
430
 
427
- class SheetProject(BaseSQLRecord, IsLink, TracksRun):
431
+ class RecordReference(BaseSQLRecord, IsLink):
428
432
  id: int = models.BigAutoField(primary_key=True)
429
- sheet: Sheet = ForeignKey(Sheet, CASCADE, related_name="links_project")
430
- project: Project = ForeignKey(Project, PROTECT, related_name="links_sheet")
433
+ record: Record = ForeignKey(Record, CASCADE, related_name="values_reference")
434
+ feature: Feature = ForeignKey(
435
+ Feature, PROTECT, related_name="links_recordreference"
436
+ )
437
+ value: Reference = ForeignKey(Reference, PROTECT, related_name="links_record")
438
+
439
+ class Meta:
440
+ unique_together = ("record", "feature", "value")
441
+
442
+
443
+ class RecordProject(BaseSQLRecord, IsLink):
444
+ id: int = models.BigAutoField(primary_key=True)
445
+ record: Record = ForeignKey(Record, CASCADE, related_name="values_project")
446
+ feature: Feature = ForeignKey(Feature, PROTECT, related_name="links_recordproject")
447
+ value: Project = ForeignKey(Project, PROTECT, related_name="links_record")
431
448
 
432
449
  class Meta:
433
- unique_together = ("sheet", "project")
450
+ unique_together = ("record", "feature", "value")
434
451
 
435
452
 
436
453
  class ArtifactReference(BaseSQLRecord, IsLink, TracksRun):
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import re
4
4
  from functools import reduce
5
- from typing import TYPE_CHECKING, NamedTuple
5
+ from typing import TYPE_CHECKING, Literal, NamedTuple
6
6
 
7
7
  from django.db.models import (
8
8
  IntegerField,
@@ -173,12 +173,17 @@ def _lookup(
173
173
  field: StrField | None = None,
174
174
  return_field: StrField | None = None,
175
175
  using_key: str | None = None,
176
+ keep: Literal["first", "last", False] = "first",
176
177
  ) -> NamedTuple:
177
178
  """Return an auto-complete object for a field.
178
179
 
179
180
  Args:
180
181
  field: The field to look up the values for. Defaults to first string field.
181
182
  return_field: The field to return. If `None`, returns the whole record.
183
+ keep: When multiple records are found for a lookup, how to return the records.
184
+ - `"first"`: return the first record.
185
+ - `"last"`: return the last record.
186
+ - `False`: return all records.
182
187
 
183
188
  Returns:
184
189
  A `NamedTuple` of lookup information of the field values with a
@@ -209,6 +214,7 @@ def _lookup(
209
214
  values=[i.get(field) for i in queryset.values()],
210
215
  tuple_name=cls.__class__.__name__,
211
216
  prefix="ln",
217
+ keep=keep,
212
218
  ).lookup(
213
219
  return_field=(
214
220
  get_name_field(registry=queryset.model, field=return_field)