lamindb 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. lamindb/__init__.py +1 -1
  2. lamindb/core/__init__.py +2 -2
  3. lamindb/core/storage/__init__.py +2 -1
  4. lamindb/core/storage/_anndata_accessor.py +16 -1
  5. lamindb/core/storage/_backed_access.py +4 -0
  6. lamindb/core/storage/_spatialdata_accessor.py +52 -0
  7. lamindb/examples/__init__.py +3 -18
  8. lamindb/examples/cellxgene/_cellxgene.py +11 -3
  9. lamindb/examples/croissant/__init__.py +44 -0
  10. lamindb/examples/croissant/mini_immuno.anndata.zarr_metadata.json +73 -0
  11. lamindb/{core → examples}/datasets/__init__.py +5 -2
  12. lamindb/{core → examples}/datasets/_core.py +33 -1
  13. lamindb/{core → examples}/datasets/mini_immuno.py +19 -8
  14. lamindb/examples/schemas/_anndata.py +26 -16
  15. lamindb/examples/schemas/_simple.py +24 -10
  16. lamindb/integrations/__init__.py +2 -0
  17. lamindb/integrations/_croissant.py +122 -0
  18. lamindb/integrations/_vitessce.py +14 -12
  19. lamindb/migrations/0116_remove_artifact_unique_artifact_storage_key_hash_and_more.py +51 -0
  20. lamindb/migrations/0117_fix_artifact_storage_hash_unique_constraints.py +32 -0
  21. lamindb/migrations/{0115_squashed.py → 0117_squashed.py} +29 -6
  22. lamindb/models/_describe.py +107 -1
  23. lamindb/models/_django.py +63 -6
  24. lamindb/models/_feature_manager.py +0 -1
  25. lamindb/models/artifact.py +41 -11
  26. lamindb/models/collection.py +4 -9
  27. lamindb/models/project.py +2 -2
  28. lamindb/models/record.py +1 -1
  29. lamindb/models/run.py +1 -1
  30. lamindb/models/sqlrecord.py +3 -0
  31. {lamindb-1.9.1.dist-info → lamindb-1.10.1.dist-info}/METADATA +4 -4
  32. {lamindb-1.9.1.dist-info → lamindb-1.10.1.dist-info}/RECORD +36 -30
  33. /lamindb/{core → examples}/datasets/_fake.py +0 -0
  34. /lamindb/{core → examples}/datasets/_small.py +0 -0
  35. {lamindb-1.9.1.dist-info → lamindb-1.10.1.dist-info}/LICENSE +0 -0
  36. {lamindb-1.9.1.dist-info → lamindb-1.10.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,122 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ if TYPE_CHECKING:
8
+ import lamindb as ln
9
+
10
+
11
+ def curate_from_croissant(
12
+ croissant_data: str | Path | dict[str, Any],
13
+ run: ln.Run | None = None,
14
+ ) -> ln.Artifact | ln.Collection:
15
+ """Create annotated artifacts from a CroissantML file.
16
+
17
+ Returns a collection if multiple files are found in `croissant_data`, otherwise a single artifact.
18
+
19
+ Args:
20
+ croissant_data: Path to CroissantML JSON file or dictionary.
21
+
22
+ Example:
23
+
24
+ ::
25
+
26
+ artifact = ln.integrations.curate_from_croissant("dataset_metadata.json")
27
+ """
28
+ import lamindb as ln
29
+
30
+ # Load CroissantML data
31
+ if isinstance(croissant_data, (str, Path)):
32
+ if not Path(croissant_data).exists():
33
+ raise FileNotFoundError(f"File not found: {croissant_data}")
34
+ with open(croissant_data, encoding="utf-8") as f:
35
+ data = json.load(f)
36
+ elif isinstance(croissant_data, dict):
37
+ data = croissant_data
38
+ else:
39
+ raise ValueError(
40
+ "croissant_data must be a file path, JSON string, or dictionary"
41
+ )
42
+
43
+ # Validate basic structure
44
+ if data.get("@type") != "Dataset":
45
+ raise ValueError("CroissantML @type must be 'Dataset'")
46
+
47
+ if "name" not in data:
48
+ raise ValueError("CroissantML must have a 'name' field")
49
+
50
+ # Extract basic metadata
51
+ dataset_name = data["name"]
52
+ description = data.get("description", "")
53
+ version = data.get("version", "1.0")
54
+ license_info = data.get("license", "")
55
+ project_name = data.get("cr:projectName", "")
56
+
57
+ # Create license feature and label if license info exists
58
+ license_label = None
59
+ if license_info:
60
+ license_label_type = ln.ULabel.filter(name="License", is_type=True).first()
61
+ if not license_label_type:
62
+ license_label_type = ln.ULabel(name="License", is_type=True).save()
63
+ license_label = ln.ULabel.filter(name=license_info).first()
64
+ if not license_label:
65
+ license_label = ln.ULabel(
66
+ name=license_info,
67
+ description="Dataset license",
68
+ type=license_label_type,
69
+ ).save()
70
+ project_label = None
71
+ if project_name:
72
+ project_label = ln.Project.filter(name=project_name).first()
73
+ if not project_label:
74
+ project_label = ln.Project(name=project_name).save()
75
+
76
+ # Extract file distributions
77
+ artifacts = []
78
+ file_distributions = data.get("distribution", [])
79
+ if not file_distributions:
80
+ raise ValueError("No file distributions found in croissant data")
81
+ for dist in file_distributions:
82
+ file_id = dist.get("@id", "")
83
+ if Path(file_id).exists():
84
+ file_path = file_id
85
+ else:
86
+ content_url = dist.get("contentUrl", "")
87
+ file_path = content_url or data.get("url", "")
88
+ if not file_path:
89
+ raise ValueError(
90
+ f"No valid file path found in croissant distribution: {dist}"
91
+ )
92
+ if len(file_distributions) == 1:
93
+ artifact_description = f"{dataset_name}"
94
+ if file_id != dataset_name:
95
+ artifact_description += f" ({file_id})"
96
+ artifact_description += f" - {description}"
97
+ else:
98
+ artifact_description = f"{file_id}"
99
+ artifact = ln.Artifact( # type: ignore
100
+ file_path,
101
+ description=artifact_description,
102
+ version=version,
103
+ kind="dataset",
104
+ run=run,
105
+ ).save()
106
+ if license_label:
107
+ artifact.ulabels.add(license_label)
108
+ if project_label:
109
+ artifact.projects.add(project_label)
110
+ artifacts.append(artifact)
111
+
112
+ if len(artifacts) == 1:
113
+ return artifacts[0]
114
+ else:
115
+ collection = ln.Collection( # type: ignore
116
+ artifacts, key=dataset_name, description=description, version=version
117
+ ).save()
118
+ if license_label:
119
+ collection.ulabels.add(license_label)
120
+ if project_label:
121
+ collection.projects.add(project_label)
122
+ return collection
@@ -28,21 +28,17 @@ def save_vitessce_config(
28
28
  If the `VitessceConfig` object references multiple artifacts, automatically
29
29
  creates a `Collection` and displays the "Vitessce button" next to it.
30
30
 
31
+ The `VitessceConfig` artifact has `.suffix = ".vitessce.json"` and `.kind = "__lamindb_config__"`,
32
+ which is by default hidden on the hub UI.
33
+
31
34
  Guide: :doc:`docs:vitessce`.
32
35
 
33
36
  Args:
34
37
  vitessce_config: A `VitessceConfig` object.
35
- key: A key for the `VitessceConfig` object. Is used as `key` for a
36
- `Collection` in case the `VitessceConfig` object references
37
- multiple artifacts.
38
- description: A description for the `VitessceConfig` object.
39
-
40
- .. versionchanged:: 0.76.12
41
- Now assumes `vitessce-python >= 3.4.0`, which allows passing artifacts within `VitessceConfig`.
42
- .. versionchanged:: 0.75.1
43
- Now displays the "Vitessce button" on the hub next to the dataset. It additionally keeps displaying it next to the configuration file.
44
- .. versionchanged:: 0.70.2
45
- No longer saves the dataset. It only saves the `VitessceConfig` object.
38
+ key: A `key` for the `VitessceConfig` artifact.
39
+ description: A `description` for the `VitessceConfig` aritifact. Is additionally
40
+ used as `key` for a `Collection` in case the `VitessceConfig` object
41
+ references multiple artifacts.
46
42
  """
47
43
  # can only import here because vitessce is not a dependency
48
44
  from vitessce import VitessceConfig
@@ -73,6 +69,8 @@ def save_vitessce_config(
73
69
  if len(dataset_artifacts) > 1:
74
70
  # if we have more datasets, we should create a collection
75
71
  # and attach an action to the collection
72
+ # consicious use of description for key, see here
73
+ # https://github.com/laminlabs/lamindb/pull/2997
76
74
  collection = Collection(dataset_artifacts, key=description).save()
77
75
 
78
76
  # create a JSON export
@@ -80,7 +78,11 @@ def save_vitessce_config(
80
78
  with open(config_file_local_path, "w") as file:
81
79
  json.dump(vc_dict, file)
82
80
  vitessce_config_artifact = Artifact(
83
- config_file_local_path, key=key, description=description, run=run
81
+ config_file_local_path,
82
+ key=key,
83
+ description=description,
84
+ run=run,
85
+ kind="__lamindb_config__",
84
86
  ).save()
85
87
  slug = ln_setup.settings.instance.slug
86
88
  logger.important(
@@ -0,0 +1,51 @@
1
+ # Generated by Django 5.2 on 2025-07-26 15:55
2
+
3
+ from django.db import migrations, models
4
+
5
+ import lamindb.base.fields
6
+
7
+
8
+ class Migration(migrations.Migration):
9
+ dependencies = [
10
+ ("lamindb", "0115_alter_space_uid"),
11
+ ]
12
+
13
+ operations = [
14
+ migrations.RemoveConstraint(
15
+ model_name="artifact",
16
+ name="unique_artifact_storage_key_hash",
17
+ ),
18
+ migrations.AlterField(
19
+ model_name="record",
20
+ name="description",
21
+ field=lamindb.base.fields.CharField(
22
+ blank=True, db_index=True, default=None, max_length=255, null=True
23
+ ),
24
+ ),
25
+ migrations.AlterField(
26
+ model_name="reference",
27
+ name="text",
28
+ field=lamindb.base.fields.TextField(
29
+ blank=True, db_index=True, default=None, null=True
30
+ ),
31
+ ),
32
+ migrations.AlterField(
33
+ model_name="reference",
34
+ name="url",
35
+ field=lamindb.base.fields.URLField(blank=True, db_index=True, null=True),
36
+ ),
37
+ migrations.AlterField(
38
+ model_name="run",
39
+ name="name",
40
+ field=lamindb.base.fields.CharField(
41
+ blank=True, db_index=True, default=None, max_length=150, null=True
42
+ ),
43
+ ),
44
+ migrations.AddConstraint(
45
+ model_name="artifact",
46
+ constraint=models.UniqueConstraint(
47
+ fields=("storage", "key", "hash"),
48
+ name="unique_artifact_storage_key_hash",
49
+ ),
50
+ ),
51
+ ]
@@ -0,0 +1,32 @@
1
+ # Generated by Django 5.2 on 2025-07-26 18:50
2
+
3
+ from django.db import migrations, models
4
+
5
+
6
+ class Migration(migrations.Migration):
7
+ dependencies = [
8
+ ("lamindb", "0116_remove_artifact_unique_artifact_storage_key_hash_and_more"),
9
+ ]
10
+
11
+ operations = [
12
+ migrations.RemoveConstraint(
13
+ model_name="artifact",
14
+ name="unique_artifact_storage_key_hash",
15
+ ),
16
+ migrations.AddConstraint(
17
+ model_name="artifact",
18
+ constraint=models.UniqueConstraint(
19
+ condition=models.Q(("key__isnull", False)),
20
+ fields=("storage", "key", "hash"),
21
+ name="unique_artifact_storage_key_hash_not_null",
22
+ ),
23
+ ),
24
+ migrations.AddConstraint(
25
+ model_name="artifact",
26
+ constraint=models.UniqueConstraint(
27
+ condition=models.Q(("key__isnull", True)),
28
+ fields=("storage", "hash"),
29
+ name="unique_artifact_storage_hash_null_key",
30
+ ),
31
+ ),
32
+ ]
@@ -1,4 +1,4 @@
1
- # Generated by Django 5.2 on 2025-07-06 09:12
1
+ # Generated by Django 5.2 on 2025-07-26 18:58
2
2
 
3
3
  import django.core.validators
4
4
  import django.db.models.deletion
@@ -137,6 +137,8 @@ class Migration(migrations.Migration):
137
137
  ("lamindb", "0113_lower_case_branch_and_space_names"),
138
138
  ("lamindb", "0114_alter_run__status_code"),
139
139
  ("lamindb", "0115_alter_space_uid"),
140
+ ("lamindb", "0116_remove_artifact_unique_artifact_storage_key_hash_and_more"),
141
+ ("lamindb", "0117_fix_artifact_storage_hash_unique_constraints"),
140
142
  ]
141
143
 
142
144
  dependencies = [] # type: ignore
@@ -1477,7 +1479,11 @@ class Migration(migrations.Migration):
1477
1479
  (
1478
1480
  "description",
1479
1481
  lamindb.base.fields.CharField(
1480
- blank=True, default=None, max_length=255, null=True
1482
+ blank=True,
1483
+ db_index=True,
1484
+ default=None,
1485
+ max_length=255,
1486
+ null=True,
1481
1487
  ),
1482
1488
  ),
1483
1489
  (
@@ -1825,7 +1831,10 @@ class Migration(migrations.Migration):
1825
1831
  null=True,
1826
1832
  ),
1827
1833
  ),
1828
- ("url", lamindb.base.fields.URLField(blank=True, null=True)),
1834
+ (
1835
+ "url",
1836
+ lamindb.base.fields.URLField(blank=True, db_index=True, null=True),
1837
+ ),
1829
1838
  (
1830
1839
  "pubmed_id",
1831
1840
  lamindb.base.fields.BigIntegerField(
@@ -1860,7 +1869,9 @@ class Migration(migrations.Migration):
1860
1869
  ),
1861
1870
  (
1862
1871
  "text",
1863
- lamindb.base.fields.TextField(blank=True, default=None, null=True),
1872
+ lamindb.base.fields.TextField(
1873
+ blank=True, db_index=True, default=None, null=True
1874
+ ),
1864
1875
  ),
1865
1876
  (
1866
1877
  "date",
@@ -1989,7 +2000,11 @@ class Migration(migrations.Migration):
1989
2000
  (
1990
2001
  "name",
1991
2002
  lamindb.base.fields.CharField(
1992
- blank=True, default=None, max_length=150, null=True
2003
+ blank=True,
2004
+ db_index=True,
2005
+ default=None,
2006
+ max_length=150,
2007
+ null=True,
1993
2008
  ),
1994
2009
  ),
1995
2010
  (
@@ -4472,7 +4487,15 @@ class Migration(migrations.Migration):
4472
4487
  constraint=models.UniqueConstraint(
4473
4488
  condition=models.Q(("key__isnull", False)),
4474
4489
  fields=("storage", "key", "hash"),
4475
- name="unique_artifact_storage_key_hash",
4490
+ name="unique_artifact_storage_key_hash_not_null",
4491
+ ),
4492
+ ),
4493
+ migrations.AddConstraint(
4494
+ model_name="artifact",
4495
+ constraint=models.UniqueConstraint(
4496
+ condition=models.Q(("key__isnull", True)),
4497
+ fields=("storage", "hash"),
4498
+ name="unique_artifact_storage_hash_null_key",
4476
4499
  ),
4477
4500
  ),
4478
4501
  ]
@@ -179,7 +179,7 @@ def describe_artifact_general(
179
179
  two_column_items.append(Text.assemble(("branch: ", "dim"), branch_name))
180
180
  # actually not name field here, but handle
181
181
  created_by_handle = (
182
- foreign_key_data["branch"]["name"]
182
+ foreign_key_data["created_by"]["name"]
183
183
  if foreign_key_data
184
184
  else self.created_by.handle
185
185
  )
@@ -234,3 +234,109 @@ def describe_artifact_general(
234
234
  )
235
235
  )
236
236
  return tree
237
+
238
+
239
+ def describe_collection_general(
240
+ self: Collection,
241
+ tree: Tree | None = None,
242
+ foreign_key_data: dict[str, dict[str, int | str]] | None = None,
243
+ ) -> Tree:
244
+ if tree is None:
245
+ tree = describe_header(self)
246
+
247
+ # add general information (order is the same as in API docs)
248
+ general = tree.add(Text("General", style="bold bright_cyan"))
249
+
250
+ if self.key:
251
+ general.add(Text.assemble(("key: ", "dim"), (f"{self.key}", "cyan3")))
252
+ if self.description:
253
+ general.add(
254
+ Text.assemble(
255
+ ("description: ", "dim"),
256
+ f"{self.description}",
257
+ )
258
+ )
259
+
260
+ # Two column items (short content)
261
+ two_column_items = []
262
+
263
+ two_column_items.append(Text.assemble(("uid: ", "dim"), f"{self.uid}"))
264
+
265
+ transform_name = (
266
+ foreign_key_data["transform"]["name"]
267
+ if foreign_key_data and "transform" in foreign_key_data
268
+ else self.transform.name
269
+ if self.transform
270
+ else None
271
+ )
272
+ if transform_name:
273
+ two_column_items.append(
274
+ Text.assemble(
275
+ ("transform: ", "dim"),
276
+ (f"{transform_name}", "cyan3"),
277
+ )
278
+ )
279
+
280
+ space_name = (
281
+ foreign_key_data["space"]["name"]
282
+ if foreign_key_data and "space" in foreign_key_data
283
+ else self.space.name
284
+ if self.space
285
+ else None
286
+ )
287
+ if space_name:
288
+ two_column_items.append(Text.assemble(("space: ", "dim"), space_name))
289
+
290
+ branch_name = (
291
+ foreign_key_data["branch"]["name"]
292
+ if foreign_key_data and "branch" in foreign_key_data
293
+ else self.branch.name
294
+ if self.branch
295
+ else None
296
+ )
297
+ if branch_name:
298
+ two_column_items.append(Text.assemble(("branch: ", "dim"), branch_name))
299
+
300
+ created_by_handle = (
301
+ foreign_key_data["created_by"]["name"]
302
+ if foreign_key_data and "created_by" in foreign_key_data
303
+ else self.created_by.handle
304
+ if self.created_by
305
+ else None
306
+ )
307
+ if created_by_handle:
308
+ two_column_items.append(
309
+ Text.assemble(
310
+ ("created_by: ", "dim"),
311
+ (created_by_handle),
312
+ )
313
+ )
314
+
315
+ if self.created_at:
316
+ two_column_items.append(
317
+ Text.assemble(("created_at: ", "dim"), highlight_time(str(self.created_at)))
318
+ )
319
+
320
+ if self.version:
321
+ two_column_items.append(Text.assemble(("version: ", "dim"), f"{self.version}"))
322
+
323
+ # Add two-column items in pairs
324
+ for i in range(0, len(two_column_items), 2):
325
+ if i + 1 < len(two_column_items):
326
+ # Two items side by side
327
+ left_item = two_column_items[i]
328
+ right_item = two_column_items[i + 1]
329
+
330
+ # Create padded version by calculating the plain text length
331
+ left_plain_text = (
332
+ left_item.plain if hasattr(left_item, "plain") else str(left_item)
333
+ )
334
+ padding_needed = max(0, 45 - len(left_plain_text))
335
+ padding = " " * padding_needed
336
+
337
+ general.add(Text.assemble(left_item, padding, right_item))
338
+ else:
339
+ # Single item (odd number)
340
+ general.add(two_column_items[i])
341
+
342
+ return tree
lamindb/models/_django.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING
3
+ from typing import TYPE_CHECKING, Any
4
4
 
5
5
  from django.contrib.postgres.aggregates import ArrayAgg
6
6
  from django.db import connection
@@ -13,8 +13,7 @@ from ._relations import dict_related_model_to_related_name, get_schema_modules
13
13
  from .schema import Schema
14
14
 
15
15
  if TYPE_CHECKING:
16
- from .artifact import Artifact
17
- from .sqlrecord import SQLRecord
16
+ from .artifact import Artifact, Collection
18
17
 
19
18
 
20
19
  def patch_many_to_many_descriptor() -> None:
@@ -32,7 +31,8 @@ def patch_many_to_many_descriptor() -> None:
32
31
  def patched_get(self, instance, cls=None):
33
32
  if instance is not None and instance.pk is None:
34
33
  raise ValueError(
35
- f"You are trying to access the many-to-many relationships of an unsaved {instance.__class__.__name__} object. Please save it first using '.save()'."
34
+ f"You are trying to access the many-to-many relationships of an unsaved {instance.__class__.__name__} object. "
35
+ f"Please save it first using '.save()'."
36
36
  )
37
37
 
38
38
  manager = original_get(self, instance, cls)
@@ -77,12 +77,12 @@ def get_related_model(model, field_name):
77
77
 
78
78
 
79
79
  def get_artifact_with_related(
80
- artifact: SQLRecord,
80
+ artifact: Artifact,
81
81
  include_fk: bool = False,
82
82
  include_m2m: bool = False,
83
83
  include_feature_link: bool = False,
84
84
  include_schema: bool = False,
85
- ) -> dict:
85
+ ) -> dict[str, Any]:
86
86
  """Fetch an artifact with its related data."""
87
87
  from ._label_manager import EXCLUDE_LABELS
88
88
  from .can_curate import get_name_field
@@ -234,6 +234,63 @@ def get_artifact_with_related(
234
234
  }
235
235
 
236
236
 
237
+ def get_collection_with_related(
238
+ collection: Collection,
239
+ include_fk: bool = False,
240
+ ) -> dict[str, Any]:
241
+ """Fetch a collection with its related data."""
242
+ from .can_curate import get_name_field
243
+
244
+ model = collection.__class__
245
+ schema_modules = get_schema_modules(collection._state.db)
246
+
247
+ foreign_key_fields = [
248
+ f.name
249
+ for f in model._meta.fields
250
+ if f.is_relation and f.related_model.__get_module_name__() in schema_modules
251
+ ]
252
+
253
+ # Clear previous queries
254
+ connection.queries_log.clear()
255
+
256
+ annotations = {}
257
+
258
+ if include_fk:
259
+ for fk in foreign_key_fields:
260
+ name_field = get_name_field(get_related_model(model, fk))
261
+ if fk == "run":
262
+ annotations[f"fkfield_{fk}"] = JSONObject(
263
+ id=F(f"{fk}__id"),
264
+ name=F(f"{fk}__{name_field}"),
265
+ transform_key=F(f"{fk}__transform__key"),
266
+ )
267
+ else:
268
+ annotations[f"fkfield_{fk}"] = JSONObject(
269
+ id=F(f"{fk}__id"), name=F(f"{fk}__{name_field}")
270
+ )
271
+
272
+ collection_meta = (
273
+ model.objects.using(collection._state.db)
274
+ .filter(uid=collection.uid)
275
+ .annotate(**annotations)
276
+ .values(*["id", "uid"], *annotations.keys())
277
+ .first()
278
+ )
279
+
280
+ if not collection_meta:
281
+ return None
282
+
283
+ related_data: dict = {"fk": {}}
284
+ for k, v in collection_meta.items():
285
+ if k.startswith("fkfield_") and v is not None:
286
+ related_data["fk"][k[8:]] = v
287
+
288
+ return {
289
+ **{name: collection_meta[name] for name in ["id", "uid"]},
290
+ "related_data": related_data,
291
+ }
292
+
293
+
237
294
  def get_schema_m2m_relations(artifact: Artifact, slot_schema: dict, limit: int = 20):
238
295
  """Fetch all many-to-many relationships for given feature sets."""
239
296
  from .can_curate import get_name_field
@@ -969,7 +969,6 @@ class FeatureManager:
969
969
  }
970
970
  else:
971
971
  result = parse_dtype(feature.dtype)[0]
972
- print(result["field"])
973
972
  validated = result["registry"].validate( # type: ignore
974
973
  values, field=result["field"], mute=True
975
974
  )