lamindb 0.76.2__py3-none-any.whl → 0.76.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_filter.py CHANGED
@@ -1,11 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
- from lnschema_core import Artifact, Collection, Record
4
- from lnschema_core.types import VisibilityChoice
3
+ from typing import TYPE_CHECKING
5
4
 
6
- from lamindb import settings
5
+ from lnschema_core import Artifact, Collection
7
6
 
8
- from ._query_set import QuerySet
7
+ from ._query_set import QuerySet, process_expressions
8
+
9
+ if TYPE_CHECKING:
10
+ from lnschema_core import Record
9
11
 
10
12
 
11
13
  def filter(registry: type[Record], **expressions) -> QuerySet:
@@ -13,23 +15,7 @@ def filter(registry: type[Record], **expressions) -> QuerySet:
13
15
  _using_key = None
14
16
  if "_using_key" in expressions:
15
17
  _using_key = expressions.pop("_using_key")
16
- if registry in {Artifact, Collection}:
17
- # visibility is set to 0 unless expressions contains id or uid equality
18
- if not (
19
- "id" in expressions
20
- or "uid" in expressions
21
- or "uid__startswith" in expressions
22
- ):
23
- visibility = "visibility"
24
- if not any(e.startswith(visibility) for e in expressions):
25
- expressions[visibility] = (
26
- VisibilityChoice.default.value
27
- ) # default visibility
28
- # if visibility is None, do not apply a filter
29
- # otherwise, it would mean filtering for NULL values, which doesn't make
30
- # sense for a non-NULLABLE column
31
- elif visibility in expressions and expressions[visibility] is None:
32
- expressions.pop(visibility)
18
+ expressions = process_expressions(registry, expressions)
33
19
  qs = QuerySet(model=registry, using=_using_key)
34
20
  if len(expressions) > 0:
35
21
  return qs.filter(**expressions)
lamindb/_finish.py CHANGED
@@ -1,8 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import os
4
+ import re
4
5
  import shutil
5
- import subprocess
6
6
  from datetime import datetime, timezone
7
7
  from typing import TYPE_CHECKING
8
8
 
@@ -18,12 +18,81 @@ if TYPE_CHECKING:
18
18
  from ._query_set import QuerySet
19
19
 
20
20
 
21
+ # this is from the get_title function in nbproject
22
+ # should be moved into lamindb sooner or later
23
+ def prepare_notebook(
24
+ nb,
25
+ strip_title: bool = False,
26
+ ) -> str | None:
27
+ """Strip title from the notebook if requested."""
28
+ title_found = False
29
+ for cell in nb.cells:
30
+ cell.metadata.clear() # strip cell metadata
31
+ if not title_found and cell["cell_type"] == "markdown":
32
+ lines = cell["source"].split("\n")
33
+ for i, line in enumerate(lines):
34
+ if line.startswith("# "):
35
+ line.lstrip("#").strip(" .").strip()
36
+ title_found = True
37
+ if strip_title:
38
+ lines.pop(i)
39
+ cell["source"] = "\n".join(lines)
40
+ return None
41
+
42
+
43
+ def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
44
+ import nbformat
45
+ import traitlets.config as config
46
+ from nbconvert import HTMLExporter
47
+
48
+ with open(notebook_path, encoding="utf-8") as f:
49
+ notebook = nbformat.read(f, as_version=4)
50
+ prepare_notebook(notebook, strip_title=True)
51
+ notebook.metadata.clear() # strip notebook metadata
52
+ # if we were to export as ipynb, the following two lines would do it
53
+ # with open(output_path, "w", encoding="utf-8") as f:
54
+ # nbformat.write(notebook, f)
55
+ # instead we need all this code
56
+ c = config.Config()
57
+ c.HTMLExporter.preprocessors = []
58
+ c.HTMLExporter.exclude_input_prompt = True
59
+ c.HTMLExporter.exclude_output_prompt = True
60
+ c.HTMLExporter.anchor_link_text = " "
61
+ html_exporter = HTMLExporter(config=c)
62
+ html, _ = html_exporter.from_notebook_node(notebook)
63
+ output_path.write_text(html, encoding="utf-8")
64
+
65
+
66
+ def notebook_to_script(
67
+ transform: Transform, notebook_path: Path, script_path: Path
68
+ ) -> None:
69
+ import jupytext
70
+
71
+ notebook = jupytext.read(notebook_path)
72
+ py_content = jupytext.writes(notebook, fmt="py:percent")
73
+ # remove global metadata header
74
+ py_content = re.sub(r"^# ---\n.*?# ---\n\n", "", py_content, flags=re.DOTALL)
75
+ # replace title
76
+ py_content = py_content.replace(f"# # {transform.name}", "# # transform.name")
77
+ script_path.write_text(py_content)
78
+
79
+
80
+ def script_to_notebook(transform: Transform, notebook_path: Path) -> None:
81
+ import jupytext
82
+
83
+ # get title back
84
+ py_content = transform.source_code.replace(
85
+ "# # transform.name", f"# # {transform.name}"
86
+ )
87
+ notebook = jupytext.reads(py_content, fmt="py:percent")
88
+ jupytext.write(notebook, notebook_path)
89
+
90
+
21
91
  def save_context_core(
22
92
  *,
23
93
  run: Run,
24
94
  transform: Transform,
25
95
  filepath: Path,
26
- transform_family: QuerySet | None = None,
27
96
  finished_at: bool = False,
28
97
  from_cli: bool = False,
29
98
  ) -> str | None:
@@ -36,23 +105,21 @@ def save_context_core(
36
105
  # for scripts, things are easy
37
106
  is_consecutive = True
38
107
  is_notebook = transform.type == "notebook"
39
- _source_code_artifact_path = filepath
108
+ source_code_path = filepath
40
109
  # for notebooks, we need more work
41
110
  if is_notebook:
42
111
  try:
43
- import nbstripout
112
+ import jupytext
44
113
  from nbproject.dev import (
45
114
  check_consecutiveness,
46
115
  read_notebook,
47
116
  )
48
117
  except ImportError:
49
- logger.error(
50
- "install nbproject & nbstripout: pip install nbproject nbstripout"
51
- )
118
+ logger.error("install nbproject & jupytext: pip install nbproject jupytext")
52
119
  return None
53
120
  notebook_content = read_notebook(filepath) # type: ignore
54
121
  is_consecutive = check_consecutiveness(
55
- notebook_content, calling_statement="ln.finish()"
122
+ notebook_content, calling_statement=".finish()"
56
123
  )
57
124
  if not is_consecutive:
58
125
  msg = " Do you still want to proceed with finishing? (y/n) "
@@ -62,66 +129,30 @@ def save_context_core(
62
129
  response = "n"
63
130
  if response != "y":
64
131
  return "aborted-non-consecutive"
65
- # convert the notebook file to html
66
- # log_level is set to 40 to silence the nbconvert logging
67
- subprocess.run(
68
- [
69
- "jupyter",
70
- "nbconvert",
71
- "--to",
72
- "html",
73
- filepath.as_posix(),
74
- "--Application.log_level=40",
75
- ],
76
- check=True,
132
+ # write the report
133
+ report_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
134
+ ".ipynb", ".html"
77
135
  )
78
- # move the temporary file into the cache dir in case it's accidentally
79
- # in an existing storage location -> we want to move associated
80
- # artifacts into default storage and not register them in an existing
81
- # location
82
- report_path_orig = filepath.with_suffix(".html") # current location
83
- report_path = ln_setup.settings.storage.cache_dir / report_path_orig.name
84
- # don't use Path.rename here because of cross-device link error
85
- # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
86
- shutil.move(
87
- report_path_orig, # type: ignore
88
- report_path,
136
+ notebook_to_report(filepath, report_path)
137
+ # write the source code
138
+ source_code_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
139
+ ".ipynb", ".py"
89
140
  )
90
- # strip the output from the notebook to create the source code file
91
- # first, copy the notebook file to a temporary file in the cache
92
- _source_code_artifact_path = ln_setup.settings.storage.cache_dir / filepath.name
93
- shutil.copy2(filepath, _source_code_artifact_path) # copy
94
- subprocess.run(
95
- [
96
- "nbstripout",
97
- _source_code_artifact_path,
98
- "--extra-keys",
99
- "metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension",
100
- ],
101
- check=True,
102
- )
103
- # find initial versions of source codes and html reports
104
- prev_report = None
105
- prev_source = None
106
- if transform_family is None:
107
- transform_family = transform.versions
108
- if len(transform_family) > 0:
109
- for prev_transform in transform_family.order_by("-created_at"):
110
- if (
111
- prev_transform.latest_run is not None
112
- and prev_transform.latest_run.report_id is not None
113
- ):
114
- prev_report = prev_transform.latest_run.report
115
- if prev_transform._source_code_artifact_id is not None:
116
- prev_source = prev_transform._source_code_artifact
141
+ notebook_to_script(transform, filepath, source_code_path)
117
142
  ln.settings.creation.artifact_silence_missing_run_warning = True
118
-
119
143
  # track source code
120
- if transform._source_code_artifact_id is not None:
144
+ hash, _ = hash_file(source_code_path) # ignore hash_type for now
145
+ if (
146
+ transform._source_code_artifact_id is not None
147
+ or transform.source_code is not None
148
+ ):
121
149
  # check if the hash of the transform source code matches
122
150
  # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
123
- hash, _ = hash_file(_source_code_artifact_path) # ignore hash_type for now
124
- if hash != transform._source_code_artifact.hash:
151
+ if transform.hash is not None:
152
+ condition = hash != transform.hash
153
+ else:
154
+ condition = hash != transform._source_code_artifact.hash
155
+ if condition:
125
156
  if os.getenv("LAMIN_TESTING") is None:
126
157
  # in test, auto-confirm overwrite
127
158
  response = input(
@@ -131,11 +162,8 @@ def save_context_core(
131
162
  else:
132
163
  response = "y"
133
164
  if response == "y":
134
- transform._source_code_artifact.replace(_source_code_artifact_path)
135
- transform._source_code_artifact.save(upload=True)
136
- logger.success(
137
- f"replaced transform._source_code_artifact: {transform._source_code_artifact}"
138
- )
165
+ transform.source_code = source_code_path.read_text()
166
+ transform.hash = hash
139
167
  else:
140
168
  logger.warning(
141
169
  "Please re-run `ln.context.track()` to make a new version"
@@ -144,19 +172,8 @@ def save_context_core(
144
172
  else:
145
173
  logger.important("source code is already saved")
146
174
  else:
147
- _source_code_artifact = ln.Artifact(
148
- _source_code_artifact_path,
149
- description=f"Source of transform {transform.uid}",
150
- version=transform.version,
151
- revises=prev_source,
152
- visibility=0, # hidden file
153
- run=False,
154
- )
155
- _source_code_artifact.save(upload=True, print_progress=False)
156
- transform._source_code_artifact = _source_code_artifact
157
- logger.debug(
158
- f"saved transform._source_code_artifact: {transform._source_code_artifact}"
159
- )
175
+ transform.source_code = source_code_path.read_text()
176
+ transform.hash = hash
160
177
 
161
178
  # track environment
162
179
  env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
@@ -211,7 +228,6 @@ def save_context_core(
211
228
  report_file = ln.Artifact(
212
229
  report_path,
213
230
  description=f"Report of run {run.uid}",
214
- revises=prev_report,
215
231
  visibility=0, # hidden file
216
232
  run=False,
217
233
  )
lamindb/_query_set.py CHANGED
@@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Iterable, NamedTuple
6
6
  import pandas as pd
7
7
  from django.db import models
8
8
  from django.db.models import F
9
+ from lamin_utils import logger
9
10
  from lamindb_setup.core._docs import doc_args
10
11
  from lnschema_core.models import (
11
12
  Artifact,
@@ -13,8 +14,10 @@ from lnschema_core.models import (
13
14
  Collection,
14
15
  IsVersioned,
15
16
  Record,
17
+ Registry,
16
18
  Run,
17
19
  Transform,
20
+ VisibilityChoice,
18
21
  )
19
22
 
20
23
  from lamindb.core.exceptions import DoesNotExist
@@ -64,6 +67,27 @@ def one_helper(self):
64
67
  return self[0]
65
68
 
66
69
 
70
+ def process_expressions(registry: Registry, expressions: dict) -> dict:
71
+ if registry in {Artifact, Collection}:
72
+ # visibility is set to 0 unless expressions contains id or uid equality
73
+ if not (
74
+ "id" in expressions
75
+ or "uid" in expressions
76
+ or "uid__startswith" in expressions
77
+ ):
78
+ visibility = "visibility"
79
+ if not any(e.startswith(visibility) for e in expressions):
80
+ expressions[visibility] = (
81
+ VisibilityChoice.default.value
82
+ ) # default visibility
83
+ # if visibility is None, do not apply a filter
84
+ # otherwise, it would mean filtering for NULL values, which doesn't make
85
+ # sense for a non-NULLABLE column
86
+ elif visibility in expressions and expressions[visibility] is None:
87
+ expressions.pop(visibility)
88
+ return expressions
89
+
90
+
67
91
  def get(
68
92
  registry_or_queryset: type[Record] | QuerySet,
69
93
  idlike: int | str | None = None,
@@ -88,7 +112,7 @@ def get(
88
112
  return qs.one()
89
113
  else:
90
114
  assert idlike is None # noqa: S101
91
- # below behaves exactly like `.one()`
115
+ expressions = process_expressions(registry, expressions)
92
116
  return registry.objects.get(**expressions)
93
117
 
94
118
 
@@ -108,7 +132,7 @@ class RecordsList(UserList):
108
132
  return one_helper(self)
109
133
 
110
134
 
111
- class QuerySet(models.QuerySet, CanValidate):
135
+ class QuerySet(models.QuerySet):
112
136
  """Sets of records returned by queries.
113
137
 
114
138
  See Also:
@@ -221,6 +245,7 @@ class QuerySet(models.QuerySet, CanValidate):
221
245
  # both Transform & Run might reference artifacts
222
246
  if self.model in {Artifact, Collection, Transform, Run}:
223
247
  for record in self:
248
+ logger.important(f"deleting {record}")
224
249
  record.delete(*args, **kwargs)
225
250
  else:
226
251
  self._delete_base_class(*args, **kwargs)
@@ -276,42 +301,50 @@ class QuerySet(models.QuerySet, CanValidate):
276
301
  else:
277
302
  raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
278
303
 
279
- @doc_args(Record.search.__doc__)
280
- def search(self, string: str, **kwargs):
281
- """{}""" # noqa: D415
282
- from ._record import _search
283
304
 
284
- return _search(cls=self, string=string, **kwargs)
305
+ # -------------------------------------------------------------------------------------
306
+ # CanValidate
307
+ # -------------------------------------------------------------------------------------
285
308
 
286
- @doc_args(Record.lookup.__doc__)
287
- def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
288
- """{}""" # noqa: D415
289
- from ._record import _lookup
290
309
 
291
- return _lookup(cls=self, field=field, **kwargs)
310
+ @doc_args(Record.search.__doc__)
311
+ def search(self, string: str, **kwargs):
312
+ """{}""" # noqa: D415
313
+ from ._record import _search
292
314
 
293
- @doc_args(CanValidate.validate.__doc__)
294
- def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
295
- """{}""" # noqa: D415
296
- from ._can_validate import _validate
315
+ return _search(cls=self, string=string, **kwargs)
297
316
 
298
- return _validate(cls=self, values=values, field=field, **kwargs)
299
317
 
300
- @doc_args(CanValidate.inspect.__doc__)
301
- def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
302
- """{}""" # noqa: D415
303
- from ._can_validate import _inspect
318
+ @doc_args(Record.lookup.__doc__)
319
+ def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
320
+ """{}""" # noqa: D415
321
+ from ._record import _lookup
304
322
 
305
- return _inspect(cls=self, values=values, field=field, **kwargs)
323
+ return _lookup(cls=self, field=field, **kwargs)
306
324
 
307
- @doc_args(CanValidate.standardize.__doc__)
308
- def standardize(
309
- self, values: Iterable, field: str | StrField | None = None, **kwargs
310
- ):
311
- """{}""" # noqa: D415
312
- from ._can_validate import _standardize
313
325
 
314
- return _standardize(cls=self, values=values, field=field, **kwargs)
326
+ @doc_args(CanValidate.validate.__doc__)
327
+ def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
328
+ """{}""" # noqa: D415
329
+ from ._can_validate import _validate
330
+
331
+ return _validate(cls=self, values=values, field=field, **kwargs)
332
+
333
+
334
+ @doc_args(CanValidate.inspect.__doc__)
335
+ def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
336
+ """{}""" # noqa: D415
337
+ from ._can_validate import _inspect
338
+
339
+ return _inspect(cls=self, values=values, field=field, **kwargs)
340
+
341
+
342
+ @doc_args(CanValidate.standardize.__doc__)
343
+ def standardize(self, values: Iterable, field: str | StrField | None = None, **kwargs):
344
+ """{}""" # noqa: D415
345
+ from ._can_validate import _standardize
346
+
347
+ return _standardize(cls=self, values=values, field=field, **kwargs)
315
348
 
316
349
 
317
350
  models.QuerySet.df = QuerySet.df
@@ -320,10 +353,10 @@ models.QuerySet.first = QuerySet.first
320
353
  models.QuerySet.one = QuerySet.one
321
354
  models.QuerySet.one_or_none = QuerySet.one_or_none
322
355
  models.QuerySet.latest_version = QuerySet.latest_version
323
- models.QuerySet.search = QuerySet.search
324
- models.QuerySet.lookup = QuerySet.lookup
325
- models.QuerySet.validate = QuerySet.validate
326
- models.QuerySet.inspect = QuerySet.inspect
327
- models.QuerySet.standardize = QuerySet.standardize
356
+ models.QuerySet.search = search
357
+ models.QuerySet.lookup = lookup
358
+ models.QuerySet.validate = validate
359
+ models.QuerySet.inspect = inspect
360
+ models.QuerySet.standardize = standardize
328
361
  models.QuerySet._delete_base_class = models.QuerySet.delete
329
362
  models.QuerySet.delete = QuerySet.delete
lamindb/_record.py CHANGED
@@ -57,7 +57,7 @@ def suggest_records_with_similar_names(record: Record, kwargs) -> bool:
57
57
  if kwargs.get("name") is None:
58
58
  return False
59
59
  queryset = _search(
60
- record.__class__, kwargs["name"], field="name", truncate_words=True, limit=20
60
+ record.__class__, kwargs["name"], field="name", truncate_words=True, limit=3
61
61
  )
62
62
  if not queryset.exists(): # empty queryset
63
63
  return False
@@ -586,7 +586,8 @@ def delete(self) -> None:
586
586
  # but that's for another time
587
587
  if isinstance(self, IsVersioned) and self.is_latest:
588
588
  new_latest = (
589
- self.__class__.filter(is_latest=False, uid__startswith=self.stem_uid)
589
+ self.__class__.objects.using(self._state.db)
590
+ .filter(is_latest=False, uid__startswith=self.stem_uid)
590
591
  .order_by("-created_at")
591
592
  .first()
592
593
  )
lamindb/_transform.py CHANGED
@@ -37,8 +37,7 @@ def __init__(transform: Transform, *args, **kwargs):
37
37
  "Only name, key, version, type, revises, reference, "
38
38
  f"reference_type can be passed, but you passed: {kwargs}"
39
39
  )
40
- # Transform allows passing a uid, all others don't
41
- if uid is None and key is not None:
40
+ if revises is None and key is not None:
42
41
  revises = Transform.filter(key=key).order_by("-created_at").first()
43
42
  if revises is not None and key is not None and revises.key != key:
44
43
  note = message_update_key_in_version_family(
lamindb/core/__init__.py CHANGED
@@ -10,8 +10,6 @@ Registries:
10
10
  QuerySet
11
11
  QueryManager
12
12
  RecordsList
13
- HasFeatures
14
- HasParams
15
13
  FeatureManager
16
14
  ParamManager
17
15
  LabelManager
@@ -30,6 +28,7 @@ Curators:
30
28
  .. autosummary::
31
29
  :toctree: .
32
30
 
31
+ BaseCurator
33
32
  DataFrameCurator
34
33
  AnnDataCurator
35
34
  MuDataCurator
@@ -80,6 +79,7 @@ from lnschema_core.models import (
80
79
 
81
80
  from lamindb._curate import (
82
81
  AnnDataCurator,
82
+ BaseCurator,
83
83
  CurateLookup,
84
84
  DataFrameCurator,
85
85
  MuDataCurator,
lamindb/core/_context.py CHANGED
@@ -18,7 +18,7 @@ from ._sync_git import get_transform_reference_from_git_repo
18
18
  from ._track_environment import track_environment
19
19
  from .exceptions import (
20
20
  MissingContext,
21
- NotebookNotSaved,
21
+ NotebookFileNotSavedToDisk,
22
22
  NotebookNotSavedError,
23
23
  NoTitleError,
24
24
  TrackNotCalled,
@@ -414,6 +414,10 @@ class Context:
414
414
  if transform is None:
415
415
  if uid is None:
416
416
  uid = f"{stem_uid}{get_uid_ext(version)}"
417
+ # note that here we're not passing revises because we're not querying it
418
+ # hence, we need to do a revision family lookup based on key
419
+ # hence, we need key to be not None
420
+ assert key is not None # noqa: S101
417
421
  transform = Transform(
418
422
  uid=uid,
419
423
  version=version,
@@ -422,8 +426,7 @@ class Context:
422
426
  reference=transform_ref,
423
427
  reference_type=transform_ref_type,
424
428
  type=transform_type,
425
- )
426
- transform.save()
429
+ ).save()
427
430
  self._logging_message += f"created Transform('{transform.uid}')"
428
431
  else:
429
432
  uid = transform.uid
@@ -449,29 +452,36 @@ class Context:
449
452
  "updated transform name, " # white space on purpose
450
453
  )
451
454
  # check whether transform source code was already saved
452
- if transform._source_code_artifact_id is not None:
453
- response = None
455
+ if (
456
+ transform._source_code_artifact_id is not None
457
+ or transform.source_code is not None
458
+ ):
459
+ bump_revision = False
454
460
  if is_run_from_ipython:
455
- response = "y" # auto-bump version
461
+ bump_revision = True
456
462
  else:
457
463
  hash, _ = hash_file(self._path) # ignore hash_type for now
458
- if hash != transform._source_code_artifact.hash:
459
- response = "y" # auto-bump version
464
+ if transform.hash is not None:
465
+ condition = hash != transform.hash
466
+ else:
467
+ condition = hash != transform._source_code_artifact.hash
468
+ if condition:
469
+ bump_revision = True
460
470
  else:
461
471
  self._logging_message += f"loaded Transform('{transform.uid}')"
462
- if response is not None:
472
+ if bump_revision:
463
473
  change_type = (
464
474
  "Re-running saved notebook"
465
475
  if is_run_from_ipython
466
476
  else "Source code changed"
467
477
  )
468
478
  suid, vuid = (
469
- uid[: Transform._len_stem_uid],
470
- uid[Transform._len_stem_uid :],
479
+ uid[:-4],
480
+ uid[-4:],
471
481
  )
472
482
  new_vuid = increment_base62(vuid)
473
483
  raise UpdateContext(
474
- f"{change_type}, bump version by setting:\n\n"
484
+ f"{change_type}, bump revision by setting:\n\n"
475
485
  f'ln.context.uid = "{suid}{new_vuid}"'
476
486
  )
477
487
  else:
@@ -504,8 +514,8 @@ class Context:
504
514
  get_seconds_since_modified(context._path) > 3
505
515
  and os.getenv("LAMIN_TESTING") is None
506
516
  ):
507
- raise NotebookNotSaved(
508
- "Please save the notebook in your editor right before running `ln.finish()`"
517
+ raise NotebookFileNotSavedToDisk(
518
+ "Please save the notebook manually in your editor right before running `ln.finish()`"
509
519
  )
510
520
  save_context_core(
511
521
  run=context.run,
@@ -118,13 +118,11 @@ def validate_labels(labels: QuerySet | list | dict):
118
118
 
119
119
 
120
120
  class LabelManager:
121
- """Label manager (:attr:`~lamindb.core.HasFeatures.labels`).
121
+ """Label manager.
122
122
 
123
123
  This allows to manage untyped labels :class:`~lamindb.ULabel` and arbitrary
124
124
  typed labels (e.g., :class:`~bionty.CellLine`) and associate labels
125
125
  with features.
126
-
127
- See :class:`~lamindb.core.HasFeatures` for more information.
128
126
  """
129
127
 
130
128
  def __init__(self, host: Artifact | Collection):
@@ -17,6 +17,7 @@ from .storage._anndata_accessor import (
17
17
  GroupTypes,
18
18
  StorageType,
19
19
  _safer_read_index,
20
+ get_spec,
20
21
  registry,
21
22
  )
22
23
 
@@ -153,13 +154,30 @@ class MappedCollection:
153
154
  self._make_connections(path_list, parallel)
154
155
 
155
156
  self.n_obs_list = []
156
- for storage in self.storages:
157
+ for i, storage in enumerate(self.storages):
157
158
  with _Connect(storage) as store:
158
159
  X = store["X"]
160
+ store_path = self.path_list[i]
161
+ self._check_csc_raise_error(X, "X", store_path)
159
162
  if isinstance(X, ArrayTypes): # type: ignore
160
163
  self.n_obs_list.append(X.shape[0])
161
164
  else:
162
165
  self.n_obs_list.append(X.attrs["shape"][0])
166
+ for layer_key in self.layers_keys:
167
+ if layer_key == "X":
168
+ continue
169
+ self._check_csc_raise_error(
170
+ store["layers"][layer_key],
171
+ f"layers/{layer_key}",
172
+ store_path,
173
+ )
174
+ if self.obsm_keys is not None:
175
+ for obsm_key in self.obsm_keys:
176
+ self._check_csc_raise_error(
177
+ store["obsm"][obsm_key],
178
+ f"obsm/{obsm_key}",
179
+ store_path,
180
+ )
163
181
  self.n_obs = sum(self.n_obs_list)
164
182
 
165
183
  self.indices = np.hstack([np.arange(n_obs) for n_obs in self.n_obs_list])
@@ -281,6 +299,18 @@ class MappedCollection:
281
299
  vars = pd.Index(vars)
282
300
  return [i for i, vrs in enumerate(self.var_list) if not vrs.equals(vars)]
283
301
 
302
+ def _check_csc_raise_error(
303
+ self, elem: GroupType | ArrayType, key: str, path: UPathStr
304
+ ):
305
+ if isinstance(elem, ArrayTypes): # type: ignore
306
+ return
307
+ if get_spec(elem).encoding_type == "csc_matrix":
308
+ if not self.parallel:
309
+ self.close()
310
+ raise ValueError(
311
+ f"{key} in {path} is a csc matrix, `MappedCollection` doesn't support this format yet."
312
+ )
313
+
284
314
  def __len__(self):
285
315
  return self.n_obs
286
316