lamindb 0.77.2__py3-none-any.whl → 1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. lamindb/__init__.py +39 -32
  2. lamindb/_artifact.py +95 -64
  3. lamindb/_can_curate.py +19 -10
  4. lamindb/_collection.py +51 -49
  5. lamindb/_feature.py +9 -9
  6. lamindb/_finish.py +99 -86
  7. lamindb/_from_values.py +20 -17
  8. lamindb/_is_versioned.py +2 -1
  9. lamindb/_parents.py +23 -16
  10. lamindb/_query_manager.py +3 -3
  11. lamindb/_query_set.py +85 -18
  12. lamindb/_record.py +121 -46
  13. lamindb/_run.py +3 -3
  14. lamindb/_save.py +14 -8
  15. lamindb/{_feature_set.py → _schema.py} +34 -31
  16. lamindb/_storage.py +2 -1
  17. lamindb/_transform.py +51 -23
  18. lamindb/_ulabel.py +17 -8
  19. lamindb/_view.py +15 -14
  20. lamindb/base/__init__.py +24 -0
  21. lamindb/base/fields.py +281 -0
  22. lamindb/base/ids.py +103 -0
  23. lamindb/base/types.py +51 -0
  24. lamindb/base/users.py +30 -0
  25. lamindb/base/validation.py +67 -0
  26. lamindb/core/__init__.py +19 -14
  27. lamindb/core/_context.py +297 -228
  28. lamindb/core/_data.py +44 -49
  29. lamindb/core/_describe.py +41 -31
  30. lamindb/core/_django.py +59 -44
  31. lamindb/core/_feature_manager.py +192 -168
  32. lamindb/core/_label_manager.py +22 -22
  33. lamindb/core/_mapped_collection.py +17 -14
  34. lamindb/core/_settings.py +1 -12
  35. lamindb/core/_sync_git.py +56 -9
  36. lamindb/core/_track_environment.py +1 -1
  37. lamindb/core/datasets/_core.py +5 -6
  38. lamindb/core/exceptions.py +0 -7
  39. lamindb/core/fields.py +1 -1
  40. lamindb/core/loaders.py +18 -2
  41. lamindb/core/{schema.py → relations.py} +22 -19
  42. lamindb/core/storage/_anndata_accessor.py +1 -2
  43. lamindb/core/storage/_backed_access.py +2 -1
  44. lamindb/core/storage/_tiledbsoma.py +40 -13
  45. lamindb/core/storage/objects.py +1 -1
  46. lamindb/core/storage/paths.py +13 -8
  47. lamindb/core/subsettings/__init__.py +0 -2
  48. lamindb/core/types.py +2 -23
  49. lamindb/core/versioning.py +11 -7
  50. lamindb/{_curate.py → curators/__init__.py} +700 -57
  51. lamindb/curators/_spatial.py +528 -0
  52. lamindb/integrations/_vitessce.py +1 -3
  53. lamindb/migrations/0052_squashed.py +1261 -0
  54. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
  55. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
  56. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
  57. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
  58. lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
  59. lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
  60. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
  61. lamindb/migrations/0060_alter_artifact__actions.py +22 -0
  62. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
  63. lamindb/migrations/0062_add_is_latest_field.py +32 -0
  64. lamindb/migrations/0063_populate_latest_field.py +45 -0
  65. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
  66. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
  67. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
  68. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
  69. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
  70. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
  71. lamindb/migrations/0069_squashed.py +1770 -0
  72. lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
  73. lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
  74. lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
  75. lamindb/migrations/0073_merge_ourprojects.py +945 -0
  76. lamindb/migrations/0074_lamindbv1_part4.py +374 -0
  77. lamindb/migrations/0075_lamindbv1_part5.py +276 -0
  78. lamindb/migrations/0076_lamindbv1_part6.py +621 -0
  79. lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
  80. lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
  81. lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
  82. lamindb/migrations/__init__.py +0 -0
  83. lamindb/models.py +4064 -0
  84. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/METADATA +15 -20
  85. lamindb-1.0rc1.dist-info/RECORD +100 -0
  86. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
  87. lamindb/core/subsettings/_transform_settings.py +0 -21
  88. lamindb-0.77.2.dist-info/RECORD +0 -63
  89. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0
@@ -5,12 +5,12 @@ from collections import defaultdict
5
5
  from typing import TYPE_CHECKING
6
6
 
7
7
  from django.db import connections
8
- from lamin_utils import colors, logger
9
- from lnschema_core.models import CanCurate, Feature
8
+ from lamin_utils import logger
10
9
  from rich.table import Column, Table
11
10
  from rich.text import Text
11
+ from rich.tree import Tree
12
12
 
13
- from lamindb._from_values import _print_values
13
+ from lamindb._from_values import _format_values
14
14
  from lamindb._record import (
15
15
  REGISTRY_UNIQUE_FIELD,
16
16
  get_name_field,
@@ -18,6 +18,7 @@ from lamindb._record import (
18
18
  transfer_to_default_db,
19
19
  )
20
20
  from lamindb._save import save
21
+ from lamindb.models import CanCurate, Feature
21
22
 
22
23
  from ._describe import (
23
24
  NAME_WIDTH,
@@ -28,15 +29,13 @@ from ._describe import (
28
29
  )
29
30
  from ._django import get_artifact_with_related, get_related_model
30
31
  from ._settings import settings
31
- from .schema import dict_related_model_to_related_name
32
+ from .relations import dict_related_model_to_related_name
32
33
 
33
34
  if TYPE_CHECKING:
34
- from lnschema_core.models import Artifact, Collection, Record
35
- from rich.tree import Tree
36
-
37
35
  from lamindb._query_set import QuerySet
36
+ from lamindb.models import Artifact, Collection, Record
38
37
 
39
- EXCLUDE_LABELS = {"feature_sets"}
38
+ EXCLUDE_LABELS = {"_schemas_m2m"}
40
39
 
41
40
 
42
41
  def _get_labels(
@@ -99,39 +98,40 @@ def describe_labels(
99
98
  return tree
100
99
 
101
100
  labels_table = Table(
102
- Column(
103
- Text.assemble(("Labels", "green_yellow")),
104
- style="",
105
- no_wrap=True,
106
- width=NAME_WIDTH,
107
- ),
101
+ Column("", style="", no_wrap=True, width=NAME_WIDTH),
108
102
  Column("", style="dim", no_wrap=True, width=TYPE_WIDTH),
109
103
  Column("", width=VALUES_WIDTH, no_wrap=True),
110
- # show_header=True,
104
+ show_header=False,
111
105
  box=None,
112
106
  pad_edge=False,
113
107
  )
114
108
  for related_name, labels in labels_data.items():
115
- if not labels or related_name == "feature_sets":
109
+ if not labels or related_name == "_schemas_m2m":
116
110
  continue
117
111
  if isinstance(labels, dict): # postgres, labels are a dict[id, name]
118
- print_values = _print_values(labels.values(), n=10)
112
+ print_values = _format_values(labels.values(), n=10, quotes=False)
119
113
  else: # labels are a QuerySet
120
114
  field = get_name_field(labels)
121
- print_values = _print_values(labels.values_list(field, flat=True), n=10)
115
+ print_values = _format_values(
116
+ labels.values_list(field, flat=True), n=10, quotes=False
117
+ )
122
118
  if print_values:
123
119
  related_model = get_related_model(self, related_name)
124
- type_str = related_model.__get_name_with_schema__()
120
+ type_str = related_model.__get_name_with_module__()
125
121
  labels_table.add_row(
126
122
  f".{related_name}", Text(type_str, style="dim"), print_values
127
123
  )
128
124
 
125
+ labels_header = Text("Labels", style="bold green_yellow")
129
126
  if as_subtree:
130
127
  if labels_table.rows:
131
- return labels_table
128
+ labels_tree = Tree(labels_header, guide_style="dim")
129
+ labels_tree.add(labels_table)
130
+ return labels_tree
132
131
  else:
133
132
  if labels_table.rows:
134
- tree.add(labels_table)
133
+ labels_tree = tree.add(labels_header)
134
+ labels_tree.add(labels_table)
135
135
  return tree
136
136
 
137
137
 
@@ -310,7 +310,7 @@ class LabelManager:
310
310
  """
311
311
  d = dict_related_model_to_related_name(self._host)
312
312
  registry = label.__class__
313
- related_name = d.get(registry.__get_name_with_schema__())
313
+ related_name = d.get(registry.__get_name_with_module__())
314
314
  link_model = getattr(self._host, related_name).through
315
315
  link_records = link_model.filter(
316
316
  artifact_id=self._host.id, **{f"{registry.__name__.lower()}_id": label.id}
@@ -2,7 +2,6 @@ from __future__ import annotations
2
2
 
3
3
  from collections import Counter
4
4
  from functools import reduce
5
- from pathlib import Path
6
5
  from typing import TYPE_CHECKING, Literal
7
6
 
8
7
  import numpy as np
@@ -86,9 +85,9 @@ class MappedCollection:
86
85
  retrieves ``.X``.
87
86
  obsm_keys: Keys from the ``.obsm`` slots.
88
87
  obs_keys: Keys from the ``.obs`` slots.
89
- obs_filter: Select only observations with these values for the given obs column.
90
- Should be a tuple with an obs column name as the first element
91
- and filtering values (a string or a tuple of strings) as the second element.
88
+ obs_filter: Select only observations with these values for the given obs columns.
89
+ Should be a dictionary with obs column names as keys
90
+ and filtering values (a string or a tuple of strings) as values.
92
91
  join: `"inner"` or `"outer"` virtual joins. If ``None`` is passed,
93
92
  does not join.
94
93
  encode_labels: Encode labels into integers.
@@ -107,7 +106,7 @@ class MappedCollection:
107
106
  layers_keys: str | list[str] | None = None,
108
107
  obs_keys: str | list[str] | None = None,
109
108
  obsm_keys: str | list[str] | None = None,
110
- obs_filter: tuple[str, str | tuple[str, ...]] | None = None,
109
+ obs_filter: dict[str, str | tuple[str, ...]] | None = None,
111
110
  join: Literal["inner", "outer"] | None = "inner",
112
111
  encode_labels: bool | list[str] = True,
113
112
  unknown_label: str | dict[str, str] | None = None,
@@ -121,11 +120,11 @@ class MappedCollection:
121
120
  )
122
121
 
123
122
  self.filtered = obs_filter is not None
124
- if self.filtered and len(obs_filter) != 2:
125
- raise ValueError(
126
- "obs_filter should be a tuple with obs column name "
127
- "as the first element and filtering values as the second element"
123
+ if self.filtered and not isinstance(obs_filter, dict):
124
+ logger.warning(
125
+ "Passing a tuple to `obs_filter` is deprecated, use a dictionary"
128
126
  )
127
+ obs_filter = {obs_filter[0]: obs_filter[1]}
129
128
 
130
129
  if layers_keys is None:
131
130
  self.layers_keys = ["X"]
@@ -183,12 +182,16 @@ class MappedCollection:
183
182
  store_path = self.path_list[i]
184
183
  self._check_csc_raise_error(X, "X", store_path)
185
184
  if self.filtered:
186
- obs_filter_key, obs_filter_values = obs_filter
187
- indices_storage = np.where(
188
- np.isin(
185
+ indices_storage_mask = None
186
+ for obs_filter_key, obs_filter_values in obs_filter.items():
187
+ obs_filter_mask = np.isin(
189
188
  self._get_labels(store, obs_filter_key), obs_filter_values
190
189
  )
191
- )[0]
190
+ if indices_storage_mask is None:
191
+ indices_storage_mask = obs_filter_mask
192
+ else:
193
+ indices_storage_mask &= obs_filter_mask
194
+ indices_storage = np.where(indices_storage_mask)[0]
192
195
  n_obs_storage = len(indices_storage)
193
196
  else:
194
197
  if isinstance(X, ArrayTypes): # type: ignore
@@ -348,7 +351,7 @@ class MappedCollection:
348
351
 
349
352
  @property
350
353
  def original_shapes(self) -> list[tuple[int, int]]:
351
- """Shapes of the underlying AnnData objects."""
354
+ """Shapes of the underlying AnnData objects (with `obs_filter` applied)."""
352
355
  if self.n_vars_list is None:
353
356
  n_vars_list = [None] * len(self.n_obs_list)
354
357
  else:
lamindb/core/_settings.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import os
4
- from typing import TYPE_CHECKING, Literal
4
+ from typing import TYPE_CHECKING
5
5
 
6
6
  import lamindb_setup as ln_setup
7
7
  from lamin_utils import logger
@@ -10,7 +10,6 @@ from lamindb_setup.core._settings import settings as setup_settings
10
10
  from lamindb_setup.core._settings_instance import sanitize_git_repo_url
11
11
 
12
12
  from .subsettings._creation_settings import CreationSettings, creation_settings
13
- from .subsettings._transform_settings import TransformSettings, transform_settings
14
13
 
15
14
  if TYPE_CHECKING:
16
15
  from collections.abc import Mapping
@@ -80,16 +79,6 @@ class Settings:
80
79
  storage_settings = ln_setup.core.StorageSettings(root=self._using_storage)
81
80
  return storage_settings
82
81
 
83
- @property
84
- def transform(self) -> TransformSettings:
85
- """Transform settings.
86
-
87
- Is deprecated since version 0.76.1.
88
- """
89
- # enable warning soon
90
- # logger.warning("Transform settings are deprecated, please instead set `ln.context.uid`")
91
- return transform_settings
92
-
93
82
  @property
94
83
  def sync_git_repo(self) -> str | None:
95
84
  """Sync transforms with scripts in git repository.
lamindb/core/_sync_git.py CHANGED
@@ -53,22 +53,69 @@ def check_local_git_repo() -> bool:
53
53
 
54
54
 
55
55
  def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
56
- command = ["git", "log", f"--find-object={blob_hash}", "--pretty=format:%H"]
56
+ # Fetch all remote branches so that we can also search them
57
+ fetch_command = ["git", "fetch", "origin", "+refs/heads/*:refs/remotes/origin/*"]
58
+ subprocess.run(fetch_command, cwd=repo_dir, check=True)
59
+
60
+ # Find the commit containing the blob hash in all branches
61
+ command = [
62
+ "git",
63
+ "log",
64
+ "--all",
65
+ f"--find-object={blob_hash}",
66
+ "--pretty=format:%H",
67
+ ]
57
68
  result = subprocess.run(
58
69
  command,
59
70
  capture_output=True,
60
71
  cwd=repo_dir,
61
72
  )
62
- # we just care to find one commit
63
- # hence, we split by new line ("\n") and use the first one
73
+ # We just care to find one commit
74
+ # Hence, we split by new line ("\n") and use the first one
64
75
  commit_hash = result.stdout.decode().split("\n")[0]
65
- if commit_hash == "" or result.returncode == 1:
76
+
77
+ if not commit_hash or result.returncode == 1:
66
78
  return None
67
- else:
68
- assert ( # noqa: S101
69
- len(commit_hash) == 40
70
- ), f"commit hash |{commit_hash}| is not 40 characters long"
71
- return commit_hash
79
+
80
+ default_branch = (
81
+ subprocess.run(
82
+ ["git", "rev-parse", "--abbrev-ref", "origin/HEAD"],
83
+ capture_output=True,
84
+ cwd=repo_dir,
85
+ text=True,
86
+ )
87
+ .stdout.strip()
88
+ .split("/")[-1]
89
+ )
90
+
91
+ # Find all branches containing the commit
92
+ commit_containing_branches = subprocess.run(
93
+ ["git", "branch", "--all", "--contains", commit_hash],
94
+ capture_output=True,
95
+ cwd=repo_dir,
96
+ text=True,
97
+ ).stdout.split("\n")
98
+
99
+ # Clean up branch names and filter out the default branch
100
+ commit_containing_branches = [
101
+ branch.strip().replace("remotes/", "")
102
+ for branch in commit_containing_branches
103
+ if branch.strip()
104
+ ]
105
+ non_default_branches = [
106
+ branch for branch in commit_containing_branches if default_branch not in branch
107
+ ]
108
+
109
+ if non_default_branches:
110
+ logger.warning(
111
+ f"code blob hash {blob_hash} was found in non-default branch(es): {', '.join(non_default_branches)}"
112
+ )
113
+
114
+ assert ( # noqa: S101
115
+ len(commit_hash) == 40
116
+ ), f"commit hash |{commit_hash}| is not 40 characters long"
117
+
118
+ return commit_hash
72
119
 
73
120
 
74
121
  def get_filepath_within_git_repo(
@@ -7,7 +7,7 @@ import lamindb_setup as ln_setup
7
7
  from lamin_utils import logger
8
8
 
9
9
  if TYPE_CHECKING:
10
- from lnschema_core.models import Run
10
+ from lamindb.models import Run
11
11
 
12
12
 
13
13
  def track_environment(run: Run) -> None:
@@ -5,11 +5,10 @@ from typing import TYPE_CHECKING
5
5
  from urllib.request import urlretrieve
6
6
 
7
7
  import anndata as ad
8
- import numpy as np
9
8
  import pandas as pd
10
- from lnschema_core import ids
11
9
  from upath import UPath
12
10
 
11
+ from lamindb.base.ids import base62
13
12
  from lamindb.core._settings import settings
14
13
 
15
14
  if TYPE_CHECKING:
@@ -146,7 +145,7 @@ def dir_iris_images() -> UPath: # pragma: no cover
146
145
  This is why on the UI, the artifact shows up as output of the downstream
147
146
  demo notebook rather than the upstream curation notebook. The lineage
148
147
  information should still be captured by
149
- https://github.com/laminlabs/lnschema-core/blob/a90437e91dfbd6b9002f18c3e978bd0f9c9a632d/lnschema_core/models.py#L2050-L2052
148
+ https://github.com/laminlabs/lnschema-core/blob/a90437e91dfbd6b9002f18c3e978bd0f9c9a632d/lamindb/models.py#L2050-L2052
150
149
  but we don't use this in the UI yet.
151
150
  """
152
151
  return UPath("s3://lamindata/iris_studies")
@@ -481,11 +480,11 @@ def dir_scrnaseq_cellranger(
481
480
  fastqdir.mkdir(parents=True, exist_ok=True)
482
481
  fastqfile1 = fastqdir / f"{sample_name}_R1_001.fastq.gz"
483
482
  with open(fastqfile1, "w") as f:
484
- f.write(f"{ids.base62(n_char=6)}")
483
+ f.write(f"{base62(n_char=6)}")
485
484
  fastqfile2 = fastqdir / f"{sample_name}_R2_001.fastq.gz"
486
485
  fastqfile2.touch(exist_ok=True)
487
486
  with open(fastqfile2, "w") as f:
488
- f.write(f"{ids.base62(n_char=6)}")
487
+ f.write(f"{base62(n_char=6)}")
489
488
 
490
489
  sampledir = basedir / f"{sample_name}"
491
490
  for folder in ["raw_feature_bc_matrix", "filtered_feature_bc_matrix", "analysis"]:
@@ -511,7 +510,7 @@ def dir_scrnaseq_cellranger(
511
510
  ]:
512
511
  file = sampledir / filename
513
512
  with open(file, "w") as f:
514
- f.write(f"{ids.base62(n_char=6)}")
513
+ f.write(f"{base62(n_char=6)}")
515
514
 
516
515
  return sampledir
517
516
 
@@ -7,7 +7,6 @@
7
7
  DoesNotExist
8
8
  ValidationError
9
9
  NotebookNotSaved
10
- NoTitleError
11
10
  MissingContextUID
12
11
  UpdateContext
13
12
  IntegrityError
@@ -79,12 +78,6 @@ class IntegrityError(Exception):
79
78
  pass
80
79
 
81
80
 
82
- class NoTitleError(SystemExit):
83
- """Notebook has no title."""
84
-
85
- pass
86
-
87
-
88
81
  class MissingContextUID(SystemExit):
89
82
  """User didn't define transform settings."""
90
83
 
lamindb/core/fields.py CHANGED
@@ -9,4 +9,4 @@ The field accessor of a :class:`~lamindb.core.Record`:
9
9
 
10
10
  """
11
11
 
12
- from lnschema_core.types import FieldAttr
12
+ from lamindb.base.types import FieldAttr # noqa: F401
lamindb/core/loaders.py CHANGED
@@ -33,7 +33,6 @@ from lamindb_setup.core.upath import (
33
33
  from ._settings import settings
34
34
 
35
35
  if TYPE_CHECKING:
36
- import mudata as md
37
36
  from lamindb_setup.core.types import UPathStr
38
37
 
39
38
  try:
@@ -110,8 +109,23 @@ def load_json(path: UPathStr) -> dict:
110
109
  return data
111
110
 
112
111
 
112
+ def load_yaml(path: UPathStr) -> dict | UPathStr:
113
+ """Load `.yaml` to `dict`."""
114
+ try:
115
+ import yaml # type: ignore
116
+
117
+ with open(path) as f:
118
+ data = yaml.safe_load(f)
119
+ return data
120
+ except ImportError:
121
+ logger.warning(
122
+ "Please install PyYAML (`pip install PyYAML`) to load `.yaml` files."
123
+ )
124
+ return path
125
+
126
+
113
127
  def load_image(path: UPathStr) -> None | UPathStr:
114
- """Display `.svg` in ipython, otherwise return path."""
128
+ """Display `.jpg`, `.gif` or `.png` in ipython, otherwise return path."""
115
129
  if is_run_from_ipython:
116
130
  from IPython.display import Image, display
117
131
 
@@ -147,7 +161,9 @@ FILE_LOADERS = {
147
161
  ".zarr": load_anndata_zarr,
148
162
  ".html": load_html,
149
163
  ".json": load_json,
164
+ ".yaml": load_yaml,
150
165
  ".h5mu": load_h5mu,
166
+ ".gif": load_image,
151
167
  ".jpg": load_image,
152
168
  ".png": load_image,
153
169
  ".svg": load_svg,
@@ -7,47 +7,50 @@ from lamindb_setup._connect_instance import (
7
7
  load_instance_settings,
8
8
  )
9
9
  from lamindb_setup.core._settings_store import instance_settings_file
10
- from lnschema_core.models import Feature, FeatureSet, LinkORM, Record
11
10
 
11
+ from lamindb.models import LinkORM, Record, Schema
12
12
 
13
- def get_schemas_modules(instance: str | None) -> set[str]:
13
+
14
+ def get_schema_modules(instance: str | None) -> set[str]:
14
15
  if instance is None or instance == "default":
15
- schema_modules = set(ln_setup.settings.instance.schema)
16
+ schema_modules = set(ln_setup.settings.instance.modules)
16
17
  schema_modules.add("core")
17
18
  return schema_modules
18
19
  owner, name = get_owner_name_from_identifier(instance)
19
20
  settings_file = instance_settings_file(name, owner)
20
21
  if settings_file.exists():
21
- schema = set(load_instance_settings(settings_file).schema)
22
+ modules = set(load_instance_settings(settings_file).modules)
22
23
  else:
23
24
  cache_filepath = (
24
25
  ln_setup.settings.cache_dir / f"instance--{owner}--{name}--uid.txt"
25
26
  )
26
27
  if cache_filepath.exists():
27
- schema = set(cache_filepath.read_text().split("\n")[1].split(","))
28
+ modules = set(cache_filepath.read_text().split("\n")[1].split(","))
28
29
  else:
29
30
  raise ValueError(f"Instance {instance} not found")
30
- shared_schema_modules = set(ln_setup.settings.instance.schema).intersection(schema)
31
+ shared_schema_modules = set(ln_setup.settings.instance.modules).intersection(
32
+ modules
33
+ )
31
34
  shared_schema_modules.add("core")
32
35
  return shared_schema_modules
33
36
 
34
37
 
35
- def dict_schema_name_to_model_name(
38
+ def dict_module_name_to_model_name(
36
39
  registry: type[Record], instance: str | None = None
37
40
  ) -> dict[str, Record]:
38
- schema_modules = get_schemas_modules(instance)
41
+ schema_modules = get_schema_modules(instance)
39
42
  d: dict = {
40
- i.related_model.__get_name_with_schema__(): i.related_model
43
+ i.related_model.__get_name_with_module__(): i.related_model
41
44
  for i in registry._meta.related_objects
42
45
  if i.related_name is not None
43
- and i.related_model.__get_schema_name__() in schema_modules
46
+ and i.related_model.__get_module_name__() in schema_modules
44
47
  }
45
48
  d.update(
46
49
  {
47
- i.related_model.__get_name_with_schema__(): i.related_model
50
+ i.related_model.__get_name_with_module__(): i.related_model
48
51
  for i in registry._meta.many_to_many
49
52
  if i.name is not None
50
- and i.related_model.__get_schema_name__() in schema_modules
53
+ and i.related_model.__get_module_name__() in schema_modules
51
54
  }
52
55
  )
53
56
  return d
@@ -59,11 +62,11 @@ def dict_related_model_to_related_name(
59
62
  def include(model: Record):
60
63
  return not links != issubclass(model, LinkORM)
61
64
 
62
- schema_modules = get_schemas_modules(instance)
65
+ schema_modules = get_schema_modules(instance)
63
66
 
64
67
  related_objects = registry._meta.related_objects + registry._meta.many_to_many
65
68
  d: dict = {
66
- record.related_model.__get_name_with_schema__(): (
69
+ record.related_model.__get_name_with_module__(): (
67
70
  record.related_name
68
71
  if not isinstance(record, ManyToManyField)
69
72
  else record.name
@@ -72,7 +75,7 @@ def dict_related_model_to_related_name(
72
75
  if (
73
76
  record.name is not None
74
77
  and include(record.related_model)
75
- and record.related_model.__get_schema_name__() in schema_modules
78
+ and record.related_model.__get_module_name__() in schema_modules
76
79
  )
77
80
  }
78
81
  return d
@@ -81,15 +84,15 @@ def dict_related_model_to_related_name(
81
84
  def get_related_name(features_type: type[Record]) -> str:
82
85
  candidates = [
83
86
  field.related_name
84
- for field in FeatureSet._meta.related_objects
87
+ for field in Schema._meta.related_objects
85
88
  if field.related_model == features_type
86
89
  ]
87
90
  if not candidates:
88
91
  raise ValueError(
89
92
  f"Can't create feature sets from {features_type.__name__} because it's not"
90
- " related to it!\nYou need to create a link model between FeatureSet and"
91
- " your Record in your custom schema.\nTo do so, add a"
92
- " line:\nfeature_sets = models.ManyToMany(FeatureSet,"
93
+ " related to it!\nYou need to create a link model between Schema and"
94
+ " your Record in your custom module.\nTo do so, add a"
95
+ " line:\n_schemas_m2m = models.ManyToMany(Schema,"
93
96
  " related_name='mythings')\n"
94
97
  )
95
98
  return candidates[0]
@@ -17,12 +17,11 @@ from anndata._io.specs.registry import get_spec, read_elem, read_elem_partial
17
17
  from anndata.compat import _read_attr
18
18
  from fsspec.implementations.local import LocalFileSystem
19
19
  from lamin_utils import logger
20
- from lamindb_setup.core.upath import UPath, create_mapper, infer_filesystem
20
+ from lamindb_setup.core.upath import create_mapper, infer_filesystem
21
21
  from packaging import version
22
22
 
23
23
  if TYPE_CHECKING:
24
24
  from collections.abc import Mapping
25
- from pathlib import Path
26
25
 
27
26
  from fsspec.core import OpenFile
28
27
  from lamindb_setup.core.types import UPathStr
@@ -4,7 +4,8 @@ from dataclasses import dataclass
4
4
  from typing import TYPE_CHECKING, Any, Callable
5
5
 
6
6
  from anndata._io.specs.registry import get_spec
7
- from lnschema_core import Artifact
7
+
8
+ from lamindb.models import Artifact
8
9
 
9
10
  from ._anndata_accessor import AnnDataAccessor, StorageType, registry
10
11
  from ._pyarrow_dataset import _is_pyarrow_dataset, _open_pyarrow_dataset
@@ -2,11 +2,16 @@ from __future__ import annotations
2
2
 
3
3
  from typing import TYPE_CHECKING, Literal
4
4
 
5
+ import pandas as pd
6
+ import pyarrow as pa
5
7
  from anndata import AnnData, read_h5ad
8
+ from lamin_utils import logger
6
9
  from lamindb_setup import settings as setup_settings
7
10
  from lamindb_setup.core._settings_storage import get_storage_region
8
11
  from lamindb_setup.core.upath import LocalPathClasses, create_path
9
- from lnschema_core import Artifact, Run
12
+ from packaging import version
13
+
14
+ from lamindb.models import Artifact, Run
10
15
 
11
16
  if TYPE_CHECKING:
12
17
  from lamindb_setup.core.types import UPathStr
@@ -137,9 +142,17 @@ def save_tiledbsoma_experiment(
137
142
  storepath = storepath.as_posix()
138
143
 
139
144
  add_run_uid = True
145
+ run_uid_dtype = "category"
140
146
  if appending:
141
147
  with soma.Experiment.open(storepath, mode="r", context=ctx) as store:
142
- add_run_uid = "lamin_run_uid" in store["obs"].schema.names
148
+ obs_schema = store["obs"].schema
149
+ add_run_uid = "lamin_run_uid" in obs_schema.names
150
+ # this is needed to enable backwards compatibility with tiledbsoma stores
151
+ # created before PR 2300
152
+ if add_run_uid:
153
+ column_type = obs_schema.types[obs_schema.names.index("lamin_run_uid")]
154
+ if not isinstance(column_type, pa.DictionaryType):
155
+ run_uid_dtype = None
143
156
 
144
157
  if add_run_uid and run is None:
145
158
  raise ValueError("Pass `run`")
@@ -147,17 +160,16 @@ def save_tiledbsoma_experiment(
147
160
  adata_objects = []
148
161
  for adata in adatas:
149
162
  if isinstance(adata, AnnData):
150
- if add_run_uid:
151
- if adata.is_view:
152
- raise ValueError(
153
- "Can not write an `AnnData` view, please do `adata.copy()` before passing."
154
- )
155
- else:
156
- adata.obs["lamin_run_uid"] = run.uid
163
+ if add_run_uid and adata.is_view:
164
+ raise ValueError(
165
+ "Can not write an `AnnData` view, please do `adata.copy()` before passing."
166
+ )
157
167
  else:
158
168
  adata = _load_h5ad_zarr(create_path(adata))
159
- if add_run_uid:
160
- adata.obs["lamin_run_uid"] = run.uid
169
+ if add_run_uid:
170
+ adata.obs["lamin_run_uid"] = pd.Series(
171
+ run.uid, index=adata.obs.index, dtype=run_uid_dtype
172
+ )
161
173
  adata_objects.append(adata)
162
174
 
163
175
  registration_mapping = kwargs.get("registration_mapping", None)
@@ -172,13 +184,28 @@ def save_tiledbsoma_experiment(
172
184
  context=ctx,
173
185
  )
174
186
 
187
+ resize_experiment = False
175
188
  if registration_mapping is not None:
176
- n_observations = len(registration_mapping.obs_axis.data)
189
+ if version.parse(soma.__version__) < version.parse("1.15.0rc4"):
190
+ n_observations = len(registration_mapping.obs_axis.data)
191
+ else:
192
+ n_observations = registration_mapping.get_obs_shape()
193
+ resize_experiment = True
177
194
  else: # happens only if not appending and only one adata passed
178
195
  assert len(adata_objects) == 1 # noqa: S101
179
196
  n_observations = adata_objects[0].n_obs
180
197
 
198
+ logger.important(f"Writing the tiledbsoma store to {storepath}")
181
199
  for adata_obj in adata_objects:
200
+ if resize_experiment and soma.Experiment.exists(storepath, context=ctx):
201
+ # can only happen if registration_mapping is not None
202
+ soma_io.resize_experiment(
203
+ storepath,
204
+ nobs=n_observations,
205
+ nvars=registration_mapping.get_var_shapes(),
206
+ context=ctx,
207
+ )
208
+ resize_experiment = False
182
209
  soma_io.from_anndata(
183
210
  storepath,
184
211
  adata_obj,
@@ -199,6 +226,6 @@ def save_tiledbsoma_experiment(
199
226
  _is_internal_call=True,
200
227
  )
201
228
  artifact.n_observations = n_observations
202
- artifact._accessor = "tiledbsoma"
229
+ artifact.otype = "tiledbsoma"
203
230
 
204
231
  return artifact.save()
@@ -12,7 +12,7 @@ if TYPE_CHECKING:
12
12
 
13
13
  def _mudata_is_installed():
14
14
  try:
15
- import mudata
15
+ import mudata # noqa: F401c
16
16
  except ImportError:
17
17
  return False
18
18
  return True