lamindb 1.10.2__py3-none-any.whl → 1.11a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. lamindb/__init__.py +89 -49
  2. lamindb/_finish.py +14 -12
  3. lamindb/_tracked.py +2 -4
  4. lamindb/_view.py +1 -1
  5. lamindb/base/__init__.py +2 -1
  6. lamindb/base/dtypes.py +76 -0
  7. lamindb/core/_settings.py +2 -2
  8. lamindb/core/storage/_anndata_accessor.py +29 -9
  9. lamindb/curators/_legacy.py +16 -3
  10. lamindb/curators/core.py +432 -186
  11. lamindb/examples/cellxgene/__init__.py +8 -3
  12. lamindb/examples/cellxgene/_cellxgene.py +127 -13
  13. lamindb/examples/cellxgene/{cxg_schema_versions.csv → cellxgene_schema_versions.csv} +11 -0
  14. lamindb/examples/croissant/__init__.py +12 -2
  15. lamindb/examples/datasets/__init__.py +2 -2
  16. lamindb/examples/datasets/_core.py +1 -1
  17. lamindb/examples/datasets/_small.py +66 -22
  18. lamindb/examples/datasets/mini_immuno.py +1 -0
  19. lamindb/migrations/0119_squashed.py +5 -2
  20. lamindb/migrations/0120_add_record_fk_constraint.py +64 -0
  21. lamindb/migrations/0121_recorduser.py +53 -0
  22. lamindb/models/__init__.py +3 -1
  23. lamindb/models/_describe.py +2 -2
  24. lamindb/models/_feature_manager.py +53 -53
  25. lamindb/models/_from_values.py +2 -2
  26. lamindb/models/_is_versioned.py +4 -4
  27. lamindb/models/_label_manager.py +4 -4
  28. lamindb/models/artifact.py +305 -116
  29. lamindb/models/artifact_set.py +36 -1
  30. lamindb/models/can_curate.py +1 -2
  31. lamindb/models/collection.py +3 -34
  32. lamindb/models/feature.py +111 -7
  33. lamindb/models/has_parents.py +11 -11
  34. lamindb/models/project.py +18 -0
  35. lamindb/models/query_manager.py +16 -7
  36. lamindb/models/query_set.py +59 -34
  37. lamindb/models/record.py +25 -4
  38. lamindb/models/run.py +8 -6
  39. lamindb/models/schema.py +54 -26
  40. lamindb/models/sqlrecord.py +123 -25
  41. lamindb/models/storage.py +59 -14
  42. lamindb/models/transform.py +17 -17
  43. lamindb/models/ulabel.py +6 -1
  44. {lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/METADATA +4 -5
  45. {lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/RECORD +47 -44
  46. {lamindb-1.10.2.dist-info → lamindb-1.11a1.dist-info}/WHEEL +1 -1
  47. {lamindb-1.10.2.dist-info/licenses → lamindb-1.11a1.dist-info}/LICENSE +0 -0
@@ -3,9 +3,14 @@
3
3
  .. autosummary::
4
4
  :toctree: .
5
5
 
6
- save_cxg_defaults
7
- get_cxg_schema
6
+ save_cellxgene_defaults
7
+ create_cellxgene_schema
8
8
 
9
9
  """
10
10
 
11
- from ._cellxgene import get_cxg_schema, save_cxg_defaults
11
+ from ._cellxgene import (
12
+ create_cellxgene_schema,
13
+ get_cxg_schema,
14
+ save_cellxgene_defaults,
15
+ save_cxg_defaults,
16
+ )
@@ -3,7 +3,9 @@ from __future__ import annotations
3
3
  from typing import TYPE_CHECKING, Collection, Literal, NamedTuple
4
4
 
5
5
  import pandas as pd
6
+ from lamindb_setup.core import deprecated
6
7
  from lamindb_setup.core.upath import UPath
8
+ from packaging import version
7
9
 
8
10
  from lamindb.models._from_values import _format_values
9
11
 
@@ -11,11 +13,25 @@ if TYPE_CHECKING:
11
13
  from lamindb.base.types import FieldAttr
12
14
  from lamindb.models import Schema, SQLRecord
13
15
 
14
- CELLxGENESchemaVersions = Literal["4.0.0", "5.0.0", "5.1.0", "5.2.0", "5.3.0"]
16
+ CELLxGENESchemaVersions = Literal["4.0.0", "5.0.0", "5.1.0", "5.2.0", "5.3.0", "6.0.0"]
17
+ CELLxGENEOrganisms = Literal[
18
+ "human",
19
+ "mouse",
20
+ "zebra danio",
21
+ "rhesus macaquedomestic pig",
22
+ "chimpanzee",
23
+ "white-tufted-ear marmoset",
24
+ "sars-2",
25
+ ]
15
26
  FieldType = Literal["ontology_id", "name"]
16
27
 
17
28
 
29
+ @deprecated(new_name="save_cellxgene_defaults")
18
30
  def save_cxg_defaults() -> None:
31
+ return save_cxg_defaults()
32
+
33
+
34
+ def save_cellxgene_defaults() -> None:
19
35
  """Save default values of the CELLxGENE schema to the instance.
20
36
 
21
37
  Adds CELLxGENE specific (control) values that are not available in the ontologies:
@@ -25,7 +41,6 @@ def save_cxg_defaults() -> None:
25
41
  - "unknown" entries for DevelopmentalStage, Phenotype, and CellType
26
42
  - "tissue", "organoid", and "cell culture" ULabels (tissue_type)
27
43
  - "cell", "nucleus", "na" ULabels (suspension_type)
28
-
29
44
  """
30
45
  import bionty as bt
31
46
 
@@ -47,12 +62,13 @@ def save_cxg_defaults() -> None:
47
62
  # na, unknown
48
63
  for model, name in zip(
49
64
  [
65
+ bt.Ethnicity,
50
66
  bt.Ethnicity,
51
67
  bt.DevelopmentalStage,
52
68
  bt.Phenotype,
53
69
  bt.CellType,
54
70
  ],
55
- ["na", "unknown", "unknown", "unknown"],
71
+ ["na", "unknown", "unknown", "unknown", "unknown"],
56
72
  ):
57
73
  model(ontology_id=name, name=name, description="From CellxGene schema.").save()
58
74
 
@@ -76,8 +92,24 @@ def save_cxg_defaults() -> None:
76
92
  name=name, type=suspension_type, description="From CellxGene schema."
77
93
  ).save()
78
94
 
95
+ # organisms
96
+ taxonomy_ids = [
97
+ "NCBITaxon:9606", # Homo sapiens (Human)
98
+ "NCBITaxon:10090", # Mus musculus (House mouse)
99
+ "NCBITaxon:9544", # Macaca mulatta (Rhesus monkey)
100
+ "NCBITaxon:9825", # Sus scrofa domesticus (Domestic pig)
101
+ "NCBITaxon:9598", # Pan troglodytes (Chimpanzee)
102
+ "NCBITaxon:9483", # Callithrix jacchus (White-tufted-ear marmoset)
103
+ "NCBITaxon:7955", # Danio rerio (Zebrafish)
104
+ ]
105
+ for ontology_id in taxonomy_ids:
106
+ bt.Organism.from_source(
107
+ ontology_id=ontology_id,
108
+ source=bt.Source.get(name="ncbitaxon", currently_used=True),
109
+ ).save()
110
+
79
111
 
80
- def _create_cxg_sources(
112
+ def _create_cellxgene_sources(
81
113
  categoricals: dict[str, FieldAttr], schema_version: str, organism: str
82
114
  ) -> dict[str, SQLRecord]:
83
115
  """Create a source dictionary of CELLxGENE categoricals to Source."""
@@ -105,7 +137,7 @@ def _create_cxg_sources(
105
137
  )
106
138
  return source
107
139
 
108
- sources_df = pd.read_csv(UPath(__file__).parent / "cxg_schema_versions.csv")
140
+ sources_df = pd.read_csv(UPath(__file__).parent / "cellxgene_schema_versions.csv")
109
141
  sources_df = sources_df[sources_df.schema_version == schema_version]
110
142
  if sources_df.empty:
111
143
  raise ValueError(
@@ -126,11 +158,28 @@ def _create_cxg_sources(
126
158
  return key_to_source
127
159
 
128
160
 
161
+ @deprecated(new_name="create_cellxgene_schema")
129
162
  def get_cxg_schema(
130
163
  schema_version: CELLxGENESchemaVersions,
131
164
  *,
132
165
  field_types: FieldType | Collection[FieldType] = "ontology_id",
133
- organism: Literal["human", "mouse"] = "human",
166
+ organism: CELLxGENEOrganisms = "human",
167
+ spatial_library_id: str | None = None,
168
+ ) -> Schema:
169
+ return create_cellxgene_schema(
170
+ schema_version,
171
+ field_types=field_types,
172
+ organism=organism,
173
+ spatial_library_id=spatial_library_id,
174
+ )
175
+
176
+
177
+ def create_cellxgene_schema(
178
+ schema_version: CELLxGENESchemaVersions,
179
+ *,
180
+ field_types: FieldType | Collection[FieldType] = "ontology_id",
181
+ organism: CELLxGENEOrganisms = "human",
182
+ spatial_library_id: str | None = None,
134
183
  ) -> Schema:
135
184
  """Generates a :class:`~lamindb.Schema` for a specific CELLxGENE schema version.
136
185
 
@@ -138,6 +187,8 @@ def get_cxg_schema(
138
187
  schema_version: The CELLxGENE Schema version.
139
188
  field_types: One or several of 'ontology_id', 'name'.
140
189
  organism: The organism of the Schema.
190
+ library_id: Identifier for the spatial library.
191
+ Specifying this value enables curation against spatial requirements.
141
192
  """
142
193
  import bionty as bt
143
194
 
@@ -168,7 +219,7 @@ def get_cxg_schema(
168
219
  "tissue": CategorySpec(bt.Tissue.name, None),
169
220
  "tissue_ontology_term_id": CategorySpec(bt.Tissue.ontology_id, None),
170
221
  "tissue_type": CategorySpec(ULabel.name, "tissue"),
171
- "organism": CategorySpec(bt.Organism.name, None),
222
+ "organism": CategorySpec(bt.Organism.scientific_name, None),
172
223
  "organism_ontology_term_id": CategorySpec(bt.Organism.ontology_id, None),
173
224
  "donor_id": CategorySpec(str, "unknown"),
174
225
  }
@@ -195,7 +246,17 @@ def get_cxg_schema(
195
246
  f"Invalid field_types: {field_types}. Must contain 'ontology_id', 'name', or both."
196
247
  )
197
248
 
198
- sources = _create_cxg_sources(
249
+ is_version_6_or_later = version.parse(schema_version) >= version.parse("6.0.0")
250
+
251
+ organism_fields = {"organism", "organism_ontology_term_id"}
252
+ if is_version_6_or_later:
253
+ obs_categoricals = {
254
+ k: v for k, v in categoricals.items() if k not in organism_fields
255
+ }
256
+ else:
257
+ obs_categoricals = categoricals
258
+
259
+ sources = _create_cellxgene_sources(
199
260
  categoricals=categoricals,
200
261
  schema_version=schema_version,
201
262
  organism=organism,
@@ -217,30 +278,83 @@ def get_cxg_schema(
217
278
  obs_features = [
218
279
  Feature(
219
280
  name=field,
220
- dtype=categoricals[field],
281
+ dtype=obs_categoricals[field],
221
282
  cat_filters={"source": source},
222
283
  default_value=categoricals_to_spec[field].default,
223
284
  ).save()
224
285
  for field, source in sources.items()
225
- if field != "var_index"
286
+ if field != "var_index" and field in obs_categoricals
226
287
  ]
227
288
  for name in ["is_primary_data", "suspension_type", "tissue_type"]:
228
289
  obs_features.append(Feature(name=name, dtype=ULabel.name).save())
229
290
 
230
291
  obs_schema = Schema(
231
- name=f"obs of CELLxGENE version {schema_version}",
292
+ name=f"obs of CELLxGENE version {schema_version} for {organism} of {field_types}",
232
293
  features=obs_features,
233
294
  otype="DataFrame",
234
295
  minimal_set=True,
235
296
  coerce_dtype=True,
236
297
  ).save()
237
298
 
299
+ slots = {"var": var_schema, "obs": obs_schema}
300
+
301
+ if is_version_6_or_later:
302
+ uns_categoricals = {
303
+ k: v for k, v in categoricals.items() if k in organism_fields
304
+ }
305
+
306
+ uns_features = [
307
+ Feature(
308
+ name=field,
309
+ dtype=uns_categoricals[field],
310
+ cat_filters={"source": sources[field]},
311
+ default_value=categoricals_to_spec[field].default,
312
+ ).save()
313
+ for field in uns_categoricals
314
+ ]
315
+
316
+ uns_schema = Schema(
317
+ name=f"uns of CELLxGENE version {schema_version}",
318
+ features=uns_features,
319
+ otype="DataFrame",
320
+ minimal_set=True,
321
+ coerce_dtype=True,
322
+ ).save()
323
+
324
+ slots["uns"] = uns_schema
325
+
326
+ # Add spatial validation if library_id is provided
327
+ if spatial_library_id:
328
+ scalefactors_schema = Schema(
329
+ name=f"scalefactors of spatial {spatial_library_id}",
330
+ features=[
331
+ Feature(name="spot_diameter_fullres", dtype=float).save(),
332
+ Feature(name="tissue_hires_scalef", dtype=float).save(),
333
+ ],
334
+ ).save()
335
+
336
+ spatial_schema = Schema(
337
+ name="CELLxGENE spatial metadata",
338
+ features=[
339
+ Feature(
340
+ name="is_single",
341
+ dtype=bool,
342
+ description="True if dataset represents single spatial unit (tissue section for Visium, array for Slide-seqV2)",
343
+ ).save()
344
+ ],
345
+ ).save()
346
+
347
+ slots["uns:spatial"] = spatial_schema
348
+ slots[f"uns:spatial:{spatial_library_id}:scalefactors"] = (
349
+ scalefactors_schema
350
+ )
351
+
238
352
  full_cxg_schema = Schema(
239
- name=f"AnnData of CELLxGENE version {schema_version}",
353
+ name=f"AnnData of CELLxGENE version {schema_version} for {organism} of {', '.join(field_types) if isinstance(field_types, list) else field_types}",
240
354
  otype="AnnData",
241
355
  minimal_set=True,
242
356
  coerce_dtype=True,
243
- slots={"var": var_schema, "obs": obs_schema},
357
+ slots=slots,
244
358
  ).save()
245
359
 
246
360
  return full_cxg_schema
@@ -52,3 +52,14 @@ schema_version,entity,organism,source,version
52
52
  5.3.0,Tissue,all,uberon,2025-01-15
53
53
  5.3.0,Gene,human,ensembl,release-110
54
54
  5.3.0,Gene,mouse,ensembl,release-110
55
+ 6.0.0,CellType,all,cl,2025-04-10
56
+ 6.0.0,ExperimentalFactor,all,efo,3.78.0
57
+ 6.0.0,Ethnicity,human,hancestro,3.0
58
+ 6.0.0,DevelopmentalStage,human,hsapdv,2025-01-23
59
+ 6.0.0,DevelopmentalStage,mouse,mmusdv,2025-01-23
60
+ 6.0.0,Disease,all,mondo,2025-05-06
61
+ 6.0.0,Organism,all,ncbitaxon,2025-03-13
62
+ 6.0.0,Phenotype,all,pato,2025-05-14
63
+ 6.0.0,Tissue,all,uberon,2025-05-28
64
+ 6.0.0,Gene,human,ensembl,release-110
65
+ 6.0.0,Gene,mouse,ensembl,release-110
@@ -1,6 +1,10 @@
1
- """Example Croissant files.
1
+ """Examples for MLCommons Croissant files, which are used to store metadata about datasets.
2
+
3
+ .. autosummary::
4
+ :toctree: .
5
+
6
+ mini_immuno
2
7
 
3
- Examples for MLCommons Croissant files, which are used to store metadata about datasets.
4
8
  """
5
9
 
6
10
  import json
@@ -12,6 +16,12 @@ def mini_immuno(n_files: int = 1) -> list[Path]:
12
16
 
13
17
  Args:
14
18
  n_files: Number of files inside the croissant file. Default is 1.
19
+
20
+ Example
21
+
22
+ ::
23
+
24
+ croissant_path, dataset1_path = ln.examples.croissant.mini_immuno()
15
25
  """
16
26
  from ..datasets import file_mini_csv
17
27
  from ..datasets.mini_immuno import get_dataset1
@@ -41,7 +41,7 @@ Dictionary, Dataframe, AnnData, MuData, SpatialData.
41
41
  .. autosummary::
42
42
  :toctree: .
43
43
 
44
- dict_cxg_uns
44
+ dict_cellxgene_uns
45
45
  df_iris
46
46
  df_iris_in_meter
47
47
  df_iris_in_meter_study1
@@ -78,7 +78,7 @@ from ._core import (
78
78
  df_iris_in_meter,
79
79
  df_iris_in_meter_study1,
80
80
  df_iris_in_meter_study2,
81
- dict_cxg_uns,
81
+ dict_cellxgene_uns,
82
82
  dir_iris_images,
83
83
  dir_scrnaseq_cellranger,
84
84
  file_bam,
@@ -418,7 +418,7 @@ def mudata_papalexi21_subset() -> MuData: # pragma: no cover
418
418
  return mdata
419
419
 
420
420
 
421
- def dict_cxg_uns() -> dict[str, Any]:
421
+ def dict_cellxgene_uns() -> dict[str, Any]:
422
422
  """An example CELLxGENE AnnData `.uns` dictionary."""
423
423
  uns = {
424
424
  "organism_ontology_term_id": "NCBITaxon:9606",
@@ -9,32 +9,36 @@ import pandas as pd
9
9
 
10
10
  def small_dataset3_cellxgene(
11
11
  otype: Literal["DataFrame", "AnnData"] = "AnnData",
12
+ *,
12
13
  with_obs_defaults: bool = False,
14
+ with_var_typo: bool = False,
13
15
  with_obs_typo: bool = False,
16
+ with_uns_organism: bool = False,
17
+ with_uns_spatial: bool = False,
14
18
  ) -> tuple[pd.DataFrame, dict[str, Any]] | ad.AnnData:
15
- # TODO: consider other ids for other organisms
16
- # "ENSMUSG00002076988"
17
- var_ids = ["invalid_ensembl_id", "ENSG00000000419", "ENSG00000139618"]
18
-
19
+ var_id = "invalid_ensembl_id" if with_var_typo else "ENSG00000000457"
20
+ var_ids = [var_id, "ENSG00000000419", "ENSG00000139618"]
19
21
  lung_id = "UBERON:0002048XXX" if with_obs_typo else "UBERON:0002048"
22
+
23
+ obs_data = {
24
+ "disease_ontology_term_id": [
25
+ "MONDO:0004975",
26
+ "MONDO:0004980",
27
+ "MONDO:0004980",
28
+ ],
29
+ "development_stage_ontology_term_id": ["unknown", "unknown", "unknown"],
30
+ "sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
31
+ "tissue_ontology_term_id": [lung_id, lung_id, "UBERON:0000948"],
32
+ "cell_type": ["T cell", "B cell", "B cell"],
33
+ "self_reported_ethnicity": ["South Asian", "South Asian", "South Asian"],
34
+ "donor_id": ["-1", "1", "2"],
35
+ "is_primary_data": [False, False, False],
36
+ "suspension_type": ["cell", "cell", "cell"],
37
+ "tissue_type": ["tissue", "tissue", "tissue"],
38
+ }
39
+
20
40
  obs_df = pd.DataFrame(
21
- {
22
- "disease_ontology_term_id": [
23
- "MONDO:0004975",
24
- "MONDO:0004980",
25
- "MONDO:0004980",
26
- ],
27
- "development_stage_ontology_term_id": ["unknown", "unknown", "unknown"],
28
- "organism": ["human", "human", "human"],
29
- "sex_ontology_term_id": ["PATO:0000383", "PATO:0000384", "unknown"],
30
- "tissue_ontology_term_id": [lung_id, lung_id, "UBERON:0000948"],
31
- "cell_type": ["T cell", "B cell", "B cell"],
32
- "self_reported_ethnicity": ["South Asian", "South Asian", "South Asian"],
33
- "donor_id": ["-1", "1", "2"],
34
- "is_primary_data": [False, False, False],
35
- "suspension_type": ["cell", "cell", "cell"],
36
- "tissue_type": ["tissue", "tissue", "tissue"],
37
- },
41
+ obs_data,
38
42
  index=["barcode1", "barcode2", "barcode3"],
39
43
  )
40
44
 
@@ -65,8 +69,38 @@ def small_dataset3_cellxgene(
65
69
  # CELLxGENE requires the `.raw` slot to be set - https://github.com/chanzuckerberg/single-cell-curation/issues/1304
66
70
  adata.raw = adata.copy()
67
71
  adata.raw.var.drop(columns="feature_is_filtered", inplace=True)
72
+
68
73
  if with_obs_defaults:
74
+ adata.obs["cell_type_ontology_term_id"] = [
75
+ "CL:0000084",
76
+ "CL:0000236",
77
+ "CL:0000236",
78
+ ]
79
+ adata.obs["self_reported_ethnicity_ontology_term_id"] = "na"
80
+ adata.obs["assay_ontology_term_id"] = "EFO:1001982"
69
81
  adata.obs["assay"] = "single-cell RNA sequencing"
82
+ if with_uns_organism:
83
+ adata.uns["organism_ontology_term_id"] = "NCBITaxon:9606"
84
+ adata.uns["organism"] = "Homo sapiens"
85
+ else:
86
+ adata.obs["organism_ontology_term_id"] = "NCBITaxon:9606"
87
+ obs_data["organism"] = ["Homo sapiens", "Homo sapiens", "Homo sapiens"]
88
+ if with_uns_spatial:
89
+ adata.uns["spatial"] = {
90
+ "is_single": True,
91
+ "library_123": {
92
+ "scalefactors": {
93
+ "spot_diameter_fullres": 165.0,
94
+ "tissue_hires_scalef": 0.5,
95
+ },
96
+ "images": {
97
+ "hires": np.random.default_rng().integers(
98
+ 0, 255, (2000, 2000, 3), dtype=np.uint8
99
+ )
100
+ },
101
+ },
102
+ }
103
+
70
104
  return adata
71
105
 
72
106
 
@@ -92,6 +126,16 @@ def anndata_with_obs() -> ad.AnnData:
92
126
  df.index = "obs" + df.index.astype(str)
93
127
 
94
128
  adata = ad.AnnData(X=np.zeros(shape=(40, 100), dtype=np.float32), obs=df)
95
- adata.var.index = bionty_base.Gene().df().head(100)["ensembl_gene_id"].values
129
+ bionty_genes = bionty_base.Gene()
130
+ # backwards compatible
131
+ adata.var.index = (
132
+ (
133
+ bionty_genes.to_dataframe()
134
+ if hasattr(bionty_genes, "to_dataframe")
135
+ else bionty_genes.df()
136
+ )
137
+ .head(100)["ensembl_gene_id"]
138
+ .values
139
+ )
96
140
 
97
141
  return adata
@@ -78,6 +78,7 @@ def get_dataset1(
78
78
  with_outdated_gene: bool = False,
79
79
  with_wrong_subtype: bool = False,
80
80
  with_index_type_mismatch: bool = False,
81
+ with_nested_uns: bool = False,
81
82
  ) -> pd.DataFrame | ad.AnnData:
82
83
  """A small tabular dataset measuring expression & metadata."""
83
84
  # define the data in the dataset
@@ -219,9 +219,8 @@ class Migration(migrations.Migration):
219
219
  "uid",
220
220
  lamindb.base.fields.CharField(
221
221
  blank=True,
222
- db_default="aaaaaaaaaaaa",
223
222
  db_index=True,
224
- default="aaaaaaaaaaaaa",
223
+ default=lamindb.base.uids.base62_12,
225
224
  editable=False,
226
225
  max_length=12,
227
226
  unique=True,
@@ -4582,4 +4581,8 @@ class Migration(migrations.Migration):
4582
4581
  name="unique_artifact_storage_hash_null_key",
4583
4582
  ),
4584
4583
  ),
4584
+ migrations.AlterModelOptions(
4585
+ name="user",
4586
+ options={},
4587
+ ),
4585
4588
  ]
@@ -0,0 +1,64 @@
1
+ # Generated by Django 5.2 on 2025-08-07 18:52
2
+
3
+ from django.db import migrations
4
+
5
+ CREATE_FUNCTION_SQL = """
6
+ CREATE OR REPLACE FUNCTION is_valid_record_type(record_type_id INTEGER, record_is_type BOOLEAN)
7
+ RETURNS BOOLEAN AS $$
8
+ BEGIN
9
+ -- Record with no type is valid
10
+ IF record_type_id IS NULL THEN
11
+ RETURN TRUE;
12
+ END IF;
13
+
14
+ -- If current record is a type, it can only reference schema-less types
15
+ IF record_is_type THEN
16
+ RETURN EXISTS (
17
+ SELECT 1 FROM lamindb_record r
18
+ WHERE r.id = record_type_id AND r.is_type AND r.schema_id IS NULL
19
+ );
20
+ END IF;
21
+
22
+ -- Regular records can reference any type
23
+ RETURN EXISTS (
24
+ SELECT 1 FROM lamindb_record r
25
+ WHERE r.id = record_type_id AND r.is_type
26
+ );
27
+ END;
28
+ $$ LANGUAGE plpgsql;
29
+ """
30
+
31
+ ADD_CONSTRAINT_SQL = """
32
+ ALTER TABLE lamindb_record
33
+ ADD CONSTRAINT record_type_is_valid_fk
34
+ CHECK (is_valid_record_type(type_id, is_type));
35
+ """
36
+
37
+ DROP_CONSTRAINT_SQL = (
38
+ "ALTER TABLE lamindb_record DROP CONSTRAINT IF EXISTS record_type_is_valid_fk;"
39
+ )
40
+ DROP_FUNCTION_SQL = "DROP FUNCTION IF EXISTS is_valid_record_type(INTEGER, BOOLEAN);"
41
+
42
+
43
+ def apply_postgres_constraint(apps, schema_editor):
44
+ if schema_editor.connection.vendor == "postgresql":
45
+ schema_editor.execute(CREATE_FUNCTION_SQL)
46
+ schema_editor.execute(ADD_CONSTRAINT_SQL)
47
+
48
+
49
+ def revert_postgres_constraint(apps, schema_editor):
50
+ if schema_editor.connection.vendor == "postgresql":
51
+ schema_editor.execute(DROP_CONSTRAINT_SQL)
52
+ schema_editor.execute(DROP_FUNCTION_SQL)
53
+
54
+
55
+ class Migration(migrations.Migration):
56
+ dependencies = [
57
+ ("lamindb", "0119_squashed"),
58
+ ]
59
+
60
+ operations = [
61
+ migrations.RunPython(
62
+ apply_postgres_constraint, reverse_code=revert_postgres_constraint
63
+ ),
64
+ ]
@@ -0,0 +1,53 @@
1
+ # Generated by Django 5.2 on 2025-09-05 12:25
2
+
3
+ import django.db.models.deletion
4
+ from django.db import migrations, models
5
+
6
+ import lamindb.base.fields
7
+ import lamindb.models.sqlrecord
8
+
9
+
10
+ class Migration(migrations.Migration):
11
+ dependencies = [
12
+ ("lamindb", "0120_add_record_fk_constraint"),
13
+ ]
14
+
15
+ operations = [
16
+ migrations.CreateModel(
17
+ name="RecordUser",
18
+ fields=[
19
+ ("id", models.BigAutoField(primary_key=True, serialize=False)),
20
+ (
21
+ "feature",
22
+ lamindb.base.fields.ForeignKey(
23
+ blank=True,
24
+ on_delete=django.db.models.deletion.PROTECT,
25
+ related_name="links_recorduser",
26
+ to="lamindb.feature",
27
+ ),
28
+ ),
29
+ (
30
+ "record",
31
+ lamindb.base.fields.ForeignKey(
32
+ blank=True,
33
+ on_delete=django.db.models.deletion.CASCADE,
34
+ related_name="values_user",
35
+ to="lamindb.record",
36
+ ),
37
+ ),
38
+ (
39
+ "value",
40
+ lamindb.base.fields.ForeignKey(
41
+ blank=True,
42
+ on_delete=django.db.models.deletion.PROTECT,
43
+ related_name="links_record",
44
+ to="lamindb.user",
45
+ ),
46
+ ),
47
+ ],
48
+ options={
49
+ "unique_together": {("record", "feature", "value")},
50
+ },
51
+ bases=(models.Model, lamindb.models.sqlrecord.IsLink),
52
+ ),
53
+ ]
@@ -9,6 +9,7 @@
9
9
  BasicQuerySet
10
10
  QuerySet
11
11
  ArtifactSet
12
+ LazyArtifact
12
13
  QueryManager
13
14
  SQLRecordList
14
15
  FeatureManager
@@ -49,7 +50,7 @@ from .schema import Schema
49
50
  from .ulabel import ULabel
50
51
 
51
52
  # should come last as it needs everything else
52
- from .artifact import Artifact
53
+ from .artifact import Artifact, LazyArtifact
53
54
  from ._feature_manager import FeatureManager
54
55
  from ._label_manager import LabelManager
55
56
  from .collection import Collection, CollectionArtifact
@@ -78,6 +79,7 @@ from .project import (
78
79
  PersonProject,
79
80
  RecordPerson,
80
81
  RecordReference,
82
+ ProjectRecord,
81
83
  )
82
84
  from .run import RunFeatureValue
83
85
  from .schema import (
@@ -8,8 +8,6 @@ from lamin_utils import logger
8
8
  from rich.text import Text
9
9
  from rich.tree import Tree
10
10
 
11
- from ..core._context import is_run_from_ipython
12
-
13
11
  if TYPE_CHECKING:
14
12
  from lamindb.models import Artifact, Collection, Run
15
13
 
@@ -41,6 +39,8 @@ def format_rich_tree(
41
39
  ) -> str | None:
42
40
  from rich.console import Console
43
41
 
42
+ from ..core._context import is_run_from_ipython
43
+
44
44
  # If tree has no children, return fallback
45
45
  if not tree.children:
46
46
  return fallback