lamindb 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. lamindb/__init__.py +14 -5
  2. lamindb/_artifact.py +174 -57
  3. lamindb/_can_curate.py +27 -8
  4. lamindb/_collection.py +85 -51
  5. lamindb/_feature.py +177 -41
  6. lamindb/_finish.py +222 -81
  7. lamindb/_from_values.py +83 -98
  8. lamindb/_parents.py +4 -4
  9. lamindb/_query_set.py +59 -17
  10. lamindb/_record.py +171 -53
  11. lamindb/_run.py +4 -4
  12. lamindb/_save.py +33 -10
  13. lamindb/_schema.py +135 -38
  14. lamindb/_storage.py +1 -1
  15. lamindb/_tracked.py +106 -0
  16. lamindb/_transform.py +21 -8
  17. lamindb/_ulabel.py +5 -14
  18. lamindb/base/validation.py +2 -6
  19. lamindb/core/__init__.py +13 -14
  20. lamindb/core/_context.py +39 -36
  21. lamindb/core/_data.py +29 -25
  22. lamindb/core/_describe.py +1 -1
  23. lamindb/core/_django.py +1 -1
  24. lamindb/core/_feature_manager.py +54 -44
  25. lamindb/core/_label_manager.py +4 -4
  26. lamindb/core/_mapped_collection.py +20 -7
  27. lamindb/core/datasets/__init__.py +6 -1
  28. lamindb/core/datasets/_core.py +12 -11
  29. lamindb/core/datasets/_small.py +66 -20
  30. lamindb/core/exceptions.py +1 -90
  31. lamindb/core/loaders.py +7 -13
  32. lamindb/core/relations.py +6 -4
  33. lamindb/core/storage/_anndata_accessor.py +41 -0
  34. lamindb/core/storage/_backed_access.py +2 -2
  35. lamindb/core/storage/_pyarrow_dataset.py +25 -15
  36. lamindb/core/storage/_tiledbsoma.py +56 -12
  37. lamindb/core/storage/paths.py +41 -22
  38. lamindb/core/subsettings/_creation_settings.py +4 -16
  39. lamindb/curators/__init__.py +2168 -833
  40. lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
  41. lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
  42. lamindb/errors.py +96 -0
  43. lamindb/integrations/_vitessce.py +3 -3
  44. lamindb/migrations/0069_squashed.py +76 -75
  45. lamindb/migrations/0075_lamindbv1_part5.py +4 -5
  46. lamindb/migrations/0082_alter_feature_dtype.py +21 -0
  47. lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
  48. lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
  49. lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
  50. lamindb/migrations/0086_various.py +95 -0
  51. lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
  52. lamindb/migrations/0088_schema_components.py +273 -0
  53. lamindb/migrations/0088_squashed.py +4372 -0
  54. lamindb/models.py +423 -156
  55. {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/METADATA +10 -7
  56. lamindb-1.1.0.dist-info/RECORD +95 -0
  57. lamindb/curators/_spatial.py +0 -528
  58. lamindb/migrations/0052_squashed.py +0 -1261
  59. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
  60. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
  61. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
  62. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
  63. lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
  64. lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
  65. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
  66. lamindb/migrations/0060_alter_artifact__actions.py +0 -22
  67. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
  68. lamindb/migrations/0062_add_is_latest_field.py +0 -32
  69. lamindb/migrations/0063_populate_latest_field.py +0 -45
  70. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
  71. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
  72. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
  73. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
  74. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
  75. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
  76. lamindb-1.0.4.dist-info/RECORD +0 -102
  77. {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/LICENSE +0 -0
  78. {lamindb-1.0.4.dist-info → lamindb-1.1.0.dist-info}/WHEEL +0 -0
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from typing import TYPE_CHECKING, Literal
4
+ from urllib.parse import urlparse
4
5
 
5
6
  import pandas as pd
6
7
  import pyarrow as pa
@@ -17,6 +18,7 @@ if TYPE_CHECKING:
17
18
  from lamindb_setup.core.types import UPathStr
18
19
  from tiledbsoma import Collection as SOMACollection
19
20
  from tiledbsoma import Experiment as SOMAExperiment
21
+ from tiledbsoma import Measurement as SOMAMeasurement
20
22
  from upath import UPath
21
23
 
22
24
 
@@ -36,9 +38,21 @@ def _load_h5ad_zarr(objpath: UPath):
36
38
 
37
39
 
38
40
  def _tiledb_config_s3(storepath: UPath) -> dict:
39
- region = get_storage_region(storepath)
40
- tiledb_config = {"vfs.s3.region": region}
41
41
  storage_options = storepath.storage_options
42
+ tiledb_config = {}
43
+
44
+ endpoint_url = storage_options.get("endpoint_url", None)
45
+ if endpoint_url is not None:
46
+ tiledb_config["vfs.s3.region"] = ""
47
+ tiledb_config["vfs.s3.use_virtual_addressing"] = "false"
48
+ parsed = urlparse(endpoint_url)
49
+ tiledb_config["vfs.s3.scheme"] = parsed.scheme
50
+ tiledb_config["vfs.s3.endpoint_override"] = (
51
+ parsed._replace(scheme="").geturl().lstrip("/")
52
+ )
53
+ else:
54
+ tiledb_config["vfs.s3.region"] = get_storage_region(storepath)
55
+
42
56
  if "key" in storage_options:
43
57
  tiledb_config["vfs.s3.aws_access_key_id"] = storage_options["key"]
44
58
  if "secret" in storage_options:
@@ -51,7 +65,7 @@ def _tiledb_config_s3(storepath: UPath) -> dict:
51
65
 
52
66
  def _open_tiledbsoma(
53
67
  storepath: UPath, mode: Literal["r", "w"] = "r"
54
- ) -> SOMACollection | SOMAExperiment:
68
+ ) -> SOMACollection | SOMAExperiment | SOMAMeasurement:
55
69
  try:
56
70
  import tiledbsoma as soma
57
71
  except ImportError as e:
@@ -71,6 +85,8 @@ def _open_tiledbsoma(
71
85
  soma_objects = [obj.name for obj in storepath.iterdir()]
72
86
  if "obs" in soma_objects and "ms" in soma_objects:
73
87
  SOMAType = soma.Experiment
88
+ elif "var" in soma_objects:
89
+ SOMAType = soma.Measurement
74
90
  else:
75
91
  SOMAType = soma.Collection
76
92
  return SOMAType.open(storepath_str, mode=mode, context=ctx)
@@ -134,17 +150,17 @@ def save_tiledbsoma_experiment(
134
150
  )
135
151
  storepath = setup_settings.storage.root / storage_key
136
152
 
137
- if storepath.protocol == "s3":
153
+ if storepath.protocol == "s3": # type: ignore
138
154
  ctx = soma.SOMATileDBContext(tiledb_config=_tiledb_config_s3(storepath))
139
155
  else:
140
156
  ctx = None
141
157
 
142
- storepath = storepath.as_posix()
158
+ storepath_str = storepath.as_posix()
143
159
 
144
160
  add_run_uid = True
145
161
  run_uid_dtype = "category"
146
162
  if appending:
147
- with soma.Experiment.open(storepath, mode="r", context=ctx) as store:
163
+ with soma.Experiment.open(storepath_str, mode="r", context=ctx) as store:
148
164
  obs_schema = store["obs"].schema
149
165
  add_run_uid = "lamin_run_uid" in obs_schema.names
150
166
  # this is needed to enable backwards compatibility with tiledbsoma stores
@@ -175,7 +191,7 @@ def save_tiledbsoma_experiment(
175
191
  registration_mapping = kwargs.get("registration_mapping", None)
176
192
  if registration_mapping is None and (appending or len(adata_objects) > 1):
177
193
  registration_mapping = soma_io.register_anndatas(
178
- experiment_uri=storepath if appending else None,
194
+ experiment_uri=storepath_str if appending else None,
179
195
  adatas=adata_objects,
180
196
  measurement_name=measurement_name,
181
197
  obs_field_name=obs_id_name,
@@ -195,19 +211,19 @@ def save_tiledbsoma_experiment(
195
211
  assert len(adata_objects) == 1 # noqa: S101
196
212
  n_observations = adata_objects[0].n_obs
197
213
 
198
- logger.important(f"Writing the tiledbsoma store to {storepath}")
214
+ logger.important(f"Writing the tiledbsoma store to {storepath_str}")
199
215
  for adata_obj in adata_objects:
200
- if resize_experiment and soma.Experiment.exists(storepath, context=ctx):
216
+ if resize_experiment and soma.Experiment.exists(storepath_str, context=ctx):
201
217
  # can only happen if registration_mapping is not None
202
218
  soma_io.resize_experiment(
203
- storepath,
219
+ storepath_str,
204
220
  nobs=n_observations,
205
221
  nvars=registration_mapping.get_var_shapes(),
206
222
  context=ctx,
207
223
  )
208
224
  resize_experiment = False
209
225
  soma_io.from_anndata(
210
- storepath,
226
+ storepath_str,
211
227
  adata_obj,
212
228
  measurement_name,
213
229
  context=ctx,
@@ -217,7 +233,7 @@ def save_tiledbsoma_experiment(
217
233
  **kwargs,
218
234
  )
219
235
 
220
- artifact = Artifact(
236
+ artifact = Artifact( # type: ignore
221
237
  storepath,
222
238
  key=key,
223
239
  description=description,
@@ -229,3 +245,31 @@ def save_tiledbsoma_experiment(
229
245
  artifact.otype = "tiledbsoma"
230
246
 
231
247
  return artifact.save()
248
+
249
+
250
+ # this is less defensive than _anndata_n_observations
251
+ # this doesn't really catches errors
252
+ # assumes that the tiledbsoma object is well-formed
253
+ def _soma_store_n_observations(obj) -> int:
254
+ if obj.soma_type in {"SOMADataFrame", "SOMASparseNDArray", "SOMADenseNDArray"}:
255
+ return obj.non_empty_domain()[0][1] + 1
256
+ elif obj.soma_type == "SOMAExperiment":
257
+ return _soma_store_n_observations(obj["obs"])
258
+ elif obj.soma_type == "SOMAMeasurement":
259
+ keys = obj.keys()
260
+ for slot in ("X", "obsm", "obsp"):
261
+ if slot in keys:
262
+ return _soma_store_n_observations(next(iter(obj[slot].values())))
263
+ elif obj.soma_type == "SOMACollection":
264
+ n_obs = 0
265
+ for value in obj.values():
266
+ n_obs += _soma_store_n_observations(value)
267
+ return n_obs
268
+ raise ValueError(
269
+ "Could not infer the number of observations from the tiledbsoma object."
270
+ )
271
+
272
+
273
+ def _soma_n_observations(objectpath: UPath) -> int:
274
+ with _open_tiledbsoma(objectpath, mode="r") as store:
275
+ return _soma_store_n_observations(store)
@@ -4,7 +4,6 @@ import shutil
4
4
  from typing import TYPE_CHECKING
5
5
 
6
6
  import fsspec
7
- from lamin_utils import logger
8
7
  from lamindb_setup.core import StorageSettings
9
8
  from lamindb_setup.core.upath import (
10
9
  LocalPathClasses,
@@ -42,25 +41,27 @@ def auto_storage_key_from_artifact_uid(uid: str, suffix: str, is_dir: bool) -> s
42
41
  return storage_key
43
42
 
44
43
 
45
- def check_path_is_child_of_root(path: UPathStr, root: UPathStr) -> bool:
46
- # str is needed to eliminate UPath storage_options
47
- # from the equality checks below
48
- # and for fsspec.utils.get_protocol
49
- path_str = str(path)
50
- root_str = str(root)
51
- root_protocol = fsspec.utils.get_protocol(root_str)
52
- # check that the protocols are the same first
53
- if fsspec.utils.get_protocol(path_str) != root_protocol:
54
- return False
55
- if root_protocol in {"http", "https"}:
56
- # in this case it is a base url, not a file
57
- # so formally does not exist
44
+ def _safely_resolve(upath: UPath) -> UPath:
45
+ if upath.protocol in {"http", "https"}:
58
46
  resolve_kwargs = {"follow_redirects": False}
59
47
  else:
60
48
  resolve_kwargs = {}
61
- return (
62
- UPath(root_str).resolve(**resolve_kwargs) in UPath(path_str).resolve().parents
63
- )
49
+ return upath.resolve(**resolve_kwargs)
50
+
51
+
52
+ def check_path_is_child_of_root(path: UPathStr, root: UPathStr) -> bool:
53
+ if fsspec.utils.get_protocol(str(path)) != fsspec.utils.get_protocol(str(root)):
54
+ return False
55
+ path_upath = _safely_resolve(UPath(path))
56
+ root_upath = _safely_resolve(UPath(root))
57
+ if path_upath.protocol == "s3":
58
+ endpoint_path = path_upath.storage_options.get("endpoint_url", "")
59
+ endpoint_root = root_upath.storage_options.get("endpoint_url", "")
60
+ if endpoint_path != endpoint_root:
61
+ return False
62
+ # str is needed to eliminate UPath storage_options
63
+ # which affect equality checks
64
+ return UPath(str(root_upath)) in UPath(str(path_upath)).parents
64
65
 
65
66
 
66
67
  # returns filepath and root of the storage
@@ -139,7 +140,20 @@ def store_file_or_folder(
139
140
  local_path = UPath(local_path)
140
141
  if not isinstance(storage_path, LocalPathClasses):
141
142
  # this uploads files and directories
142
- create_folder = False if local_path.is_dir() else None
143
+ if local_path.is_dir():
144
+ create_folder = False
145
+ try:
146
+ # if storage_path already exists we need to delete it
147
+ # if local_path is a directory
148
+ # to replace storage_path correctly
149
+ if storage_path.stat().as_info()["type"] == "directory":
150
+ storage_path.rmdir()
151
+ else:
152
+ storage_path.unlink()
153
+ except (FileNotFoundError, PermissionError):
154
+ pass
155
+ else:
156
+ create_folder = None
143
157
  storage_path.upload_from(
144
158
  local_path, create_folder=create_folder, print_progress=print_progress
145
159
  )
@@ -156,10 +170,15 @@ def store_file_or_folder(
156
170
 
157
171
 
158
172
  def delete_storage_using_key(
159
- artifact: Artifact, storage_key: str, using_key: str | None
160
- ):
173
+ artifact: Artifact,
174
+ storage_key: str,
175
+ raise_file_not_found_error: bool = True,
176
+ using_key: str | None = None,
177
+ ) -> None | str:
161
178
  filepath, _ = attempt_accessing_path(artifact, storage_key, using_key=using_key)
162
- delete_storage(filepath)
179
+ return delete_storage(
180
+ filepath, raise_file_not_found_error=raise_file_not_found_error
181
+ )
163
182
 
164
183
 
165
184
  def delete_storage(
@@ -178,5 +197,5 @@ def delete_storage(
178
197
  elif raise_file_not_found_error:
179
198
  raise FileNotFoundError(f"{storagepath} is not an existing path!")
180
199
  else:
181
- logger.warning(f"{storagepath} is not an existing path!")
200
+ return "did-not-delete"
182
201
  return None
@@ -1,13 +1,8 @@
1
- from typing import Literal
2
-
3
-
4
1
  class CreationSettings:
5
- artifact_if_hash_exists: Literal[
6
- "warn_return_existing", "error", "warn_create_new"
7
- ] = "warn_return_existing"
8
- """Behavior if file hash exists (default `"warn_return_existing"`).
2
+ search_names: bool = True
3
+ """Switch off to speed up creating records (default `True`).
9
4
 
10
- One of `["warn_return_existing", "error", "warn_create_new"]`.
5
+ If `True`, search for alternative names and avoids duplicates.
11
6
 
12
7
  FAQ: :doc:`/faq/idempotency`
13
8
  """
@@ -18,15 +13,8 @@ class CreationSettings:
18
13
 
19
14
  It speeds up file creation by about a factor 100.
20
15
  """
21
- search_names: bool = True
22
- """To speed up creating records (default `True`).
23
-
24
- If `True`, search for alternative names.
25
-
26
- FAQ: :doc:`/faq/idempotency`
27
- """
28
16
  artifact_silence_missing_run_warning: bool = False
29
- """Silence warning about missing run & transform during artifact creation."""
17
+ """Silence warning about missing run & transform during artifact creation (default `False`)."""
30
18
  _artifact_use_virtual_keys: bool = True
31
19
  """Treat `key` parameter in :class:`~lamindb.Artifact` as virtual.
32
20