lamindb 0.76.2__py3-none-any.whl → 0.76.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +7 -9
- lamindb/_artifact.py +43 -24
- lamindb/_can_validate.py +20 -4
- lamindb/_curate.py +120 -40
- lamindb/_filter.py +7 -21
- lamindb/_finish.py +97 -81
- lamindb/_query_set.py +67 -34
- lamindb/_record.py +3 -2
- lamindb/_transform.py +1 -2
- lamindb/core/__init__.py +2 -2
- lamindb/core/_context.py +24 -14
- lamindb/core/_label_manager.py +1 -3
- lamindb/core/_mapped_collection.py +31 -1
- lamindb/core/exceptions.py +1 -1
- lamindb/core/storage/__init__.py +1 -1
- lamindb/core/storage/_anndata_accessor.py +6 -1
- lamindb/core/storage/_tiledbsoma.py +99 -132
- lamindb/core/versioning.py +4 -0
- lamindb/integrations/__init__.py +3 -0
- lamindb/integrations/_vitessce.py +1 -11
- {lamindb-0.76.2.dist-info → lamindb-0.76.4.dist-info}/METADATA +7 -7
- {lamindb-0.76.2.dist-info → lamindb-0.76.4.dist-info}/RECORD +24 -24
- {lamindb-0.76.2.dist-info → lamindb-0.76.4.dist-info}/LICENSE +0 -0
- {lamindb-0.76.2.dist-info → lamindb-0.76.4.dist-info}/WHEEL +0 -0
lamindb/_filter.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
from
|
4
|
-
from lnschema_core.types import VisibilityChoice
|
3
|
+
from typing import TYPE_CHECKING
|
5
4
|
|
6
|
-
from
|
5
|
+
from lnschema_core import Artifact, Collection
|
7
6
|
|
8
|
-
from ._query_set import QuerySet
|
7
|
+
from ._query_set import QuerySet, process_expressions
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from lnschema_core import Record
|
9
11
|
|
10
12
|
|
11
13
|
def filter(registry: type[Record], **expressions) -> QuerySet:
|
@@ -13,23 +15,7 @@ def filter(registry: type[Record], **expressions) -> QuerySet:
|
|
13
15
|
_using_key = None
|
14
16
|
if "_using_key" in expressions:
|
15
17
|
_using_key = expressions.pop("_using_key")
|
16
|
-
|
17
|
-
# visibility is set to 0 unless expressions contains id or uid equality
|
18
|
-
if not (
|
19
|
-
"id" in expressions
|
20
|
-
or "uid" in expressions
|
21
|
-
or "uid__startswith" in expressions
|
22
|
-
):
|
23
|
-
visibility = "visibility"
|
24
|
-
if not any(e.startswith(visibility) for e in expressions):
|
25
|
-
expressions[visibility] = (
|
26
|
-
VisibilityChoice.default.value
|
27
|
-
) # default visibility
|
28
|
-
# if visibility is None, do not apply a filter
|
29
|
-
# otherwise, it would mean filtering for NULL values, which doesn't make
|
30
|
-
# sense for a non-NULLABLE column
|
31
|
-
elif visibility in expressions and expressions[visibility] is None:
|
32
|
-
expressions.pop(visibility)
|
18
|
+
expressions = process_expressions(registry, expressions)
|
33
19
|
qs = QuerySet(model=registry, using=_using_key)
|
34
20
|
if len(expressions) > 0:
|
35
21
|
return qs.filter(**expressions)
|
lamindb/_finish.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import os
|
4
|
+
import re
|
4
5
|
import shutil
|
5
|
-
import subprocess
|
6
6
|
from datetime import datetime, timezone
|
7
7
|
from typing import TYPE_CHECKING
|
8
8
|
|
@@ -18,12 +18,81 @@ if TYPE_CHECKING:
|
|
18
18
|
from ._query_set import QuerySet
|
19
19
|
|
20
20
|
|
21
|
+
# this is from the get_title function in nbproject
|
22
|
+
# should be moved into lamindb sooner or later
|
23
|
+
def prepare_notebook(
|
24
|
+
nb,
|
25
|
+
strip_title: bool = False,
|
26
|
+
) -> str | None:
|
27
|
+
"""Strip title from the notebook if requested."""
|
28
|
+
title_found = False
|
29
|
+
for cell in nb.cells:
|
30
|
+
cell.metadata.clear() # strip cell metadata
|
31
|
+
if not title_found and cell["cell_type"] == "markdown":
|
32
|
+
lines = cell["source"].split("\n")
|
33
|
+
for i, line in enumerate(lines):
|
34
|
+
if line.startswith("# "):
|
35
|
+
line.lstrip("#").strip(" .").strip()
|
36
|
+
title_found = True
|
37
|
+
if strip_title:
|
38
|
+
lines.pop(i)
|
39
|
+
cell["source"] = "\n".join(lines)
|
40
|
+
return None
|
41
|
+
|
42
|
+
|
43
|
+
def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
|
44
|
+
import nbformat
|
45
|
+
import traitlets.config as config
|
46
|
+
from nbconvert import HTMLExporter
|
47
|
+
|
48
|
+
with open(notebook_path, encoding="utf-8") as f:
|
49
|
+
notebook = nbformat.read(f, as_version=4)
|
50
|
+
prepare_notebook(notebook, strip_title=True)
|
51
|
+
notebook.metadata.clear() # strip notebook metadata
|
52
|
+
# if we were to export as ipynb, the following two lines would do it
|
53
|
+
# with open(output_path, "w", encoding="utf-8") as f:
|
54
|
+
# nbformat.write(notebook, f)
|
55
|
+
# instead we need all this code
|
56
|
+
c = config.Config()
|
57
|
+
c.HTMLExporter.preprocessors = []
|
58
|
+
c.HTMLExporter.exclude_input_prompt = True
|
59
|
+
c.HTMLExporter.exclude_output_prompt = True
|
60
|
+
c.HTMLExporter.anchor_link_text = " "
|
61
|
+
html_exporter = HTMLExporter(config=c)
|
62
|
+
html, _ = html_exporter.from_notebook_node(notebook)
|
63
|
+
output_path.write_text(html, encoding="utf-8")
|
64
|
+
|
65
|
+
|
66
|
+
def notebook_to_script(
|
67
|
+
transform: Transform, notebook_path: Path, script_path: Path
|
68
|
+
) -> None:
|
69
|
+
import jupytext
|
70
|
+
|
71
|
+
notebook = jupytext.read(notebook_path)
|
72
|
+
py_content = jupytext.writes(notebook, fmt="py:percent")
|
73
|
+
# remove global metadata header
|
74
|
+
py_content = re.sub(r"^# ---\n.*?# ---\n\n", "", py_content, flags=re.DOTALL)
|
75
|
+
# replace title
|
76
|
+
py_content = py_content.replace(f"# # {transform.name}", "# # transform.name")
|
77
|
+
script_path.write_text(py_content)
|
78
|
+
|
79
|
+
|
80
|
+
def script_to_notebook(transform: Transform, notebook_path: Path) -> None:
|
81
|
+
import jupytext
|
82
|
+
|
83
|
+
# get title back
|
84
|
+
py_content = transform.source_code.replace(
|
85
|
+
"# # transform.name", f"# # {transform.name}"
|
86
|
+
)
|
87
|
+
notebook = jupytext.reads(py_content, fmt="py:percent")
|
88
|
+
jupytext.write(notebook, notebook_path)
|
89
|
+
|
90
|
+
|
21
91
|
def save_context_core(
|
22
92
|
*,
|
23
93
|
run: Run,
|
24
94
|
transform: Transform,
|
25
95
|
filepath: Path,
|
26
|
-
transform_family: QuerySet | None = None,
|
27
96
|
finished_at: bool = False,
|
28
97
|
from_cli: bool = False,
|
29
98
|
) -> str | None:
|
@@ -36,23 +105,21 @@ def save_context_core(
|
|
36
105
|
# for scripts, things are easy
|
37
106
|
is_consecutive = True
|
38
107
|
is_notebook = transform.type == "notebook"
|
39
|
-
|
108
|
+
source_code_path = filepath
|
40
109
|
# for notebooks, we need more work
|
41
110
|
if is_notebook:
|
42
111
|
try:
|
43
|
-
import
|
112
|
+
import jupytext
|
44
113
|
from nbproject.dev import (
|
45
114
|
check_consecutiveness,
|
46
115
|
read_notebook,
|
47
116
|
)
|
48
117
|
except ImportError:
|
49
|
-
logger.error(
|
50
|
-
"install nbproject & nbstripout: pip install nbproject nbstripout"
|
51
|
-
)
|
118
|
+
logger.error("install nbproject & jupytext: pip install nbproject jupytext")
|
52
119
|
return None
|
53
120
|
notebook_content = read_notebook(filepath) # type: ignore
|
54
121
|
is_consecutive = check_consecutiveness(
|
55
|
-
notebook_content, calling_statement="
|
122
|
+
notebook_content, calling_statement=".finish()"
|
56
123
|
)
|
57
124
|
if not is_consecutive:
|
58
125
|
msg = " Do you still want to proceed with finishing? (y/n) "
|
@@ -62,66 +129,30 @@ def save_context_core(
|
|
62
129
|
response = "n"
|
63
130
|
if response != "y":
|
64
131
|
return "aborted-non-consecutive"
|
65
|
-
#
|
66
|
-
|
67
|
-
|
68
|
-
[
|
69
|
-
"jupyter",
|
70
|
-
"nbconvert",
|
71
|
-
"--to",
|
72
|
-
"html",
|
73
|
-
filepath.as_posix(),
|
74
|
-
"--Application.log_level=40",
|
75
|
-
],
|
76
|
-
check=True,
|
132
|
+
# write the report
|
133
|
+
report_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
|
134
|
+
".ipynb", ".html"
|
77
135
|
)
|
78
|
-
|
79
|
-
#
|
80
|
-
|
81
|
-
|
82
|
-
report_path_orig = filepath.with_suffix(".html") # current location
|
83
|
-
report_path = ln_setup.settings.storage.cache_dir / report_path_orig.name
|
84
|
-
# don't use Path.rename here because of cross-device link error
|
85
|
-
# https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
|
86
|
-
shutil.move(
|
87
|
-
report_path_orig, # type: ignore
|
88
|
-
report_path,
|
136
|
+
notebook_to_report(filepath, report_path)
|
137
|
+
# write the source code
|
138
|
+
source_code_path = ln_setup.settings.storage.cache_dir / filepath.name.replace(
|
139
|
+
".ipynb", ".py"
|
89
140
|
)
|
90
|
-
|
91
|
-
# first, copy the notebook file to a temporary file in the cache
|
92
|
-
_source_code_artifact_path = ln_setup.settings.storage.cache_dir / filepath.name
|
93
|
-
shutil.copy2(filepath, _source_code_artifact_path) # copy
|
94
|
-
subprocess.run(
|
95
|
-
[
|
96
|
-
"nbstripout",
|
97
|
-
_source_code_artifact_path,
|
98
|
-
"--extra-keys",
|
99
|
-
"metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension",
|
100
|
-
],
|
101
|
-
check=True,
|
102
|
-
)
|
103
|
-
# find initial versions of source codes and html reports
|
104
|
-
prev_report = None
|
105
|
-
prev_source = None
|
106
|
-
if transform_family is None:
|
107
|
-
transform_family = transform.versions
|
108
|
-
if len(transform_family) > 0:
|
109
|
-
for prev_transform in transform_family.order_by("-created_at"):
|
110
|
-
if (
|
111
|
-
prev_transform.latest_run is not None
|
112
|
-
and prev_transform.latest_run.report_id is not None
|
113
|
-
):
|
114
|
-
prev_report = prev_transform.latest_run.report
|
115
|
-
if prev_transform._source_code_artifact_id is not None:
|
116
|
-
prev_source = prev_transform._source_code_artifact
|
141
|
+
notebook_to_script(transform, filepath, source_code_path)
|
117
142
|
ln.settings.creation.artifact_silence_missing_run_warning = True
|
118
|
-
|
119
143
|
# track source code
|
120
|
-
|
144
|
+
hash, _ = hash_file(source_code_path) # ignore hash_type for now
|
145
|
+
if (
|
146
|
+
transform._source_code_artifact_id is not None
|
147
|
+
or transform.source_code is not None
|
148
|
+
):
|
121
149
|
# check if the hash of the transform source code matches
|
122
150
|
# (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
|
123
|
-
hash
|
124
|
-
|
151
|
+
if transform.hash is not None:
|
152
|
+
condition = hash != transform.hash
|
153
|
+
else:
|
154
|
+
condition = hash != transform._source_code_artifact.hash
|
155
|
+
if condition:
|
125
156
|
if os.getenv("LAMIN_TESTING") is None:
|
126
157
|
# in test, auto-confirm overwrite
|
127
158
|
response = input(
|
@@ -131,11 +162,8 @@ def save_context_core(
|
|
131
162
|
else:
|
132
163
|
response = "y"
|
133
164
|
if response == "y":
|
134
|
-
transform.
|
135
|
-
transform.
|
136
|
-
logger.success(
|
137
|
-
f"replaced transform._source_code_artifact: {transform._source_code_artifact}"
|
138
|
-
)
|
165
|
+
transform.source_code = source_code_path.read_text()
|
166
|
+
transform.hash = hash
|
139
167
|
else:
|
140
168
|
logger.warning(
|
141
169
|
"Please re-run `ln.context.track()` to make a new version"
|
@@ -144,19 +172,8 @@ def save_context_core(
|
|
144
172
|
else:
|
145
173
|
logger.important("source code is already saved")
|
146
174
|
else:
|
147
|
-
|
148
|
-
|
149
|
-
description=f"Source of transform {transform.uid}",
|
150
|
-
version=transform.version,
|
151
|
-
revises=prev_source,
|
152
|
-
visibility=0, # hidden file
|
153
|
-
run=False,
|
154
|
-
)
|
155
|
-
_source_code_artifact.save(upload=True, print_progress=False)
|
156
|
-
transform._source_code_artifact = _source_code_artifact
|
157
|
-
logger.debug(
|
158
|
-
f"saved transform._source_code_artifact: {transform._source_code_artifact}"
|
159
|
-
)
|
175
|
+
transform.source_code = source_code_path.read_text()
|
176
|
+
transform.hash = hash
|
160
177
|
|
161
178
|
# track environment
|
162
179
|
env_path = ln_setup.settings.storage.cache_dir / f"run_env_pip_{run.uid}.txt"
|
@@ -211,7 +228,6 @@ def save_context_core(
|
|
211
228
|
report_file = ln.Artifact(
|
212
229
|
report_path,
|
213
230
|
description=f"Report of run {run.uid}",
|
214
|
-
revises=prev_report,
|
215
231
|
visibility=0, # hidden file
|
216
232
|
run=False,
|
217
233
|
)
|
lamindb/_query_set.py
CHANGED
@@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Iterable, NamedTuple
|
|
6
6
|
import pandas as pd
|
7
7
|
from django.db import models
|
8
8
|
from django.db.models import F
|
9
|
+
from lamin_utils import logger
|
9
10
|
from lamindb_setup.core._docs import doc_args
|
10
11
|
from lnschema_core.models import (
|
11
12
|
Artifact,
|
@@ -13,8 +14,10 @@ from lnschema_core.models import (
|
|
13
14
|
Collection,
|
14
15
|
IsVersioned,
|
15
16
|
Record,
|
17
|
+
Registry,
|
16
18
|
Run,
|
17
19
|
Transform,
|
20
|
+
VisibilityChoice,
|
18
21
|
)
|
19
22
|
|
20
23
|
from lamindb.core.exceptions import DoesNotExist
|
@@ -64,6 +67,27 @@ def one_helper(self):
|
|
64
67
|
return self[0]
|
65
68
|
|
66
69
|
|
70
|
+
def process_expressions(registry: Registry, expressions: dict) -> dict:
|
71
|
+
if registry in {Artifact, Collection}:
|
72
|
+
# visibility is set to 0 unless expressions contains id or uid equality
|
73
|
+
if not (
|
74
|
+
"id" in expressions
|
75
|
+
or "uid" in expressions
|
76
|
+
or "uid__startswith" in expressions
|
77
|
+
):
|
78
|
+
visibility = "visibility"
|
79
|
+
if not any(e.startswith(visibility) for e in expressions):
|
80
|
+
expressions[visibility] = (
|
81
|
+
VisibilityChoice.default.value
|
82
|
+
) # default visibility
|
83
|
+
# if visibility is None, do not apply a filter
|
84
|
+
# otherwise, it would mean filtering for NULL values, which doesn't make
|
85
|
+
# sense for a non-NULLABLE column
|
86
|
+
elif visibility in expressions and expressions[visibility] is None:
|
87
|
+
expressions.pop(visibility)
|
88
|
+
return expressions
|
89
|
+
|
90
|
+
|
67
91
|
def get(
|
68
92
|
registry_or_queryset: type[Record] | QuerySet,
|
69
93
|
idlike: int | str | None = None,
|
@@ -88,7 +112,7 @@ def get(
|
|
88
112
|
return qs.one()
|
89
113
|
else:
|
90
114
|
assert idlike is None # noqa: S101
|
91
|
-
|
115
|
+
expressions = process_expressions(registry, expressions)
|
92
116
|
return registry.objects.get(**expressions)
|
93
117
|
|
94
118
|
|
@@ -108,7 +132,7 @@ class RecordsList(UserList):
|
|
108
132
|
return one_helper(self)
|
109
133
|
|
110
134
|
|
111
|
-
class QuerySet(models.QuerySet
|
135
|
+
class QuerySet(models.QuerySet):
|
112
136
|
"""Sets of records returned by queries.
|
113
137
|
|
114
138
|
See Also:
|
@@ -221,6 +245,7 @@ class QuerySet(models.QuerySet, CanValidate):
|
|
221
245
|
# both Transform & Run might reference artifacts
|
222
246
|
if self.model in {Artifact, Collection, Transform, Run}:
|
223
247
|
for record in self:
|
248
|
+
logger.important(f"deleting {record}")
|
224
249
|
record.delete(*args, **kwargs)
|
225
250
|
else:
|
226
251
|
self._delete_base_class(*args, **kwargs)
|
@@ -276,42 +301,50 @@ class QuerySet(models.QuerySet, CanValidate):
|
|
276
301
|
else:
|
277
302
|
raise ValueError("Record isn't subclass of `lamindb.core.IsVersioned`")
|
278
303
|
|
279
|
-
@doc_args(Record.search.__doc__)
|
280
|
-
def search(self, string: str, **kwargs):
|
281
|
-
"""{}""" # noqa: D415
|
282
|
-
from ._record import _search
|
283
304
|
|
284
|
-
|
305
|
+
# -------------------------------------------------------------------------------------
|
306
|
+
# CanValidate
|
307
|
+
# -------------------------------------------------------------------------------------
|
285
308
|
|
286
|
-
@doc_args(Record.lookup.__doc__)
|
287
|
-
def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
|
288
|
-
"""{}""" # noqa: D415
|
289
|
-
from ._record import _lookup
|
290
309
|
|
291
|
-
|
310
|
+
@doc_args(Record.search.__doc__)
|
311
|
+
def search(self, string: str, **kwargs):
|
312
|
+
"""{}""" # noqa: D415
|
313
|
+
from ._record import _search
|
292
314
|
|
293
|
-
|
294
|
-
def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
295
|
-
"""{}""" # noqa: D415
|
296
|
-
from ._can_validate import _validate
|
315
|
+
return _search(cls=self, string=string, **kwargs)
|
297
316
|
|
298
|
-
return _validate(cls=self, values=values, field=field, **kwargs)
|
299
317
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
318
|
+
@doc_args(Record.lookup.__doc__)
|
319
|
+
def lookup(self, field: StrField | None = None, **kwargs) -> NamedTuple:
|
320
|
+
"""{}""" # noqa: D415
|
321
|
+
from ._record import _lookup
|
304
322
|
|
305
|
-
|
323
|
+
return _lookup(cls=self, field=field, **kwargs)
|
306
324
|
|
307
|
-
@doc_args(CanValidate.standardize.__doc__)
|
308
|
-
def standardize(
|
309
|
-
self, values: Iterable, field: str | StrField | None = None, **kwargs
|
310
|
-
):
|
311
|
-
"""{}""" # noqa: D415
|
312
|
-
from ._can_validate import _standardize
|
313
325
|
|
314
|
-
|
326
|
+
@doc_args(CanValidate.validate.__doc__)
|
327
|
+
def validate(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
328
|
+
"""{}""" # noqa: D415
|
329
|
+
from ._can_validate import _validate
|
330
|
+
|
331
|
+
return _validate(cls=self, values=values, field=field, **kwargs)
|
332
|
+
|
333
|
+
|
334
|
+
@doc_args(CanValidate.inspect.__doc__)
|
335
|
+
def inspect(self, values: ListLike, field: str | StrField | None = None, **kwargs):
|
336
|
+
"""{}""" # noqa: D415
|
337
|
+
from ._can_validate import _inspect
|
338
|
+
|
339
|
+
return _inspect(cls=self, values=values, field=field, **kwargs)
|
340
|
+
|
341
|
+
|
342
|
+
@doc_args(CanValidate.standardize.__doc__)
|
343
|
+
def standardize(self, values: Iterable, field: str | StrField | None = None, **kwargs):
|
344
|
+
"""{}""" # noqa: D415
|
345
|
+
from ._can_validate import _standardize
|
346
|
+
|
347
|
+
return _standardize(cls=self, values=values, field=field, **kwargs)
|
315
348
|
|
316
349
|
|
317
350
|
models.QuerySet.df = QuerySet.df
|
@@ -320,10 +353,10 @@ models.QuerySet.first = QuerySet.first
|
|
320
353
|
models.QuerySet.one = QuerySet.one
|
321
354
|
models.QuerySet.one_or_none = QuerySet.one_or_none
|
322
355
|
models.QuerySet.latest_version = QuerySet.latest_version
|
323
|
-
models.QuerySet.search =
|
324
|
-
models.QuerySet.lookup =
|
325
|
-
models.QuerySet.validate =
|
326
|
-
models.QuerySet.inspect =
|
327
|
-
models.QuerySet.standardize =
|
356
|
+
models.QuerySet.search = search
|
357
|
+
models.QuerySet.lookup = lookup
|
358
|
+
models.QuerySet.validate = validate
|
359
|
+
models.QuerySet.inspect = inspect
|
360
|
+
models.QuerySet.standardize = standardize
|
328
361
|
models.QuerySet._delete_base_class = models.QuerySet.delete
|
329
362
|
models.QuerySet.delete = QuerySet.delete
|
lamindb/_record.py
CHANGED
@@ -57,7 +57,7 @@ def suggest_records_with_similar_names(record: Record, kwargs) -> bool:
|
|
57
57
|
if kwargs.get("name") is None:
|
58
58
|
return False
|
59
59
|
queryset = _search(
|
60
|
-
record.__class__, kwargs["name"], field="name", truncate_words=True, limit=
|
60
|
+
record.__class__, kwargs["name"], field="name", truncate_words=True, limit=3
|
61
61
|
)
|
62
62
|
if not queryset.exists(): # empty queryset
|
63
63
|
return False
|
@@ -586,7 +586,8 @@ def delete(self) -> None:
|
|
586
586
|
# but that's for another time
|
587
587
|
if isinstance(self, IsVersioned) and self.is_latest:
|
588
588
|
new_latest = (
|
589
|
-
self.__class__.
|
589
|
+
self.__class__.objects.using(self._state.db)
|
590
|
+
.filter(is_latest=False, uid__startswith=self.stem_uid)
|
590
591
|
.order_by("-created_at")
|
591
592
|
.first()
|
592
593
|
)
|
lamindb/_transform.py
CHANGED
@@ -37,8 +37,7 @@ def __init__(transform: Transform, *args, **kwargs):
|
|
37
37
|
"Only name, key, version, type, revises, reference, "
|
38
38
|
f"reference_type can be passed, but you passed: {kwargs}"
|
39
39
|
)
|
40
|
-
|
41
|
-
if uid is None and key is not None:
|
40
|
+
if revises is None and key is not None:
|
42
41
|
revises = Transform.filter(key=key).order_by("-created_at").first()
|
43
42
|
if revises is not None and key is not None and revises.key != key:
|
44
43
|
note = message_update_key_in_version_family(
|
lamindb/core/__init__.py
CHANGED
@@ -10,8 +10,6 @@ Registries:
|
|
10
10
|
QuerySet
|
11
11
|
QueryManager
|
12
12
|
RecordsList
|
13
|
-
HasFeatures
|
14
|
-
HasParams
|
15
13
|
FeatureManager
|
16
14
|
ParamManager
|
17
15
|
LabelManager
|
@@ -30,6 +28,7 @@ Curators:
|
|
30
28
|
.. autosummary::
|
31
29
|
:toctree: .
|
32
30
|
|
31
|
+
BaseCurator
|
33
32
|
DataFrameCurator
|
34
33
|
AnnDataCurator
|
35
34
|
MuDataCurator
|
@@ -80,6 +79,7 @@ from lnschema_core.models import (
|
|
80
79
|
|
81
80
|
from lamindb._curate import (
|
82
81
|
AnnDataCurator,
|
82
|
+
BaseCurator,
|
83
83
|
CurateLookup,
|
84
84
|
DataFrameCurator,
|
85
85
|
MuDataCurator,
|
lamindb/core/_context.py
CHANGED
@@ -18,7 +18,7 @@ from ._sync_git import get_transform_reference_from_git_repo
|
|
18
18
|
from ._track_environment import track_environment
|
19
19
|
from .exceptions import (
|
20
20
|
MissingContext,
|
21
|
-
|
21
|
+
NotebookFileNotSavedToDisk,
|
22
22
|
NotebookNotSavedError,
|
23
23
|
NoTitleError,
|
24
24
|
TrackNotCalled,
|
@@ -414,6 +414,10 @@ class Context:
|
|
414
414
|
if transform is None:
|
415
415
|
if uid is None:
|
416
416
|
uid = f"{stem_uid}{get_uid_ext(version)}"
|
417
|
+
# note that here we're not passing revises because we're not querying it
|
418
|
+
# hence, we need to do a revision family lookup based on key
|
419
|
+
# hence, we need key to be not None
|
420
|
+
assert key is not None # noqa: S101
|
417
421
|
transform = Transform(
|
418
422
|
uid=uid,
|
419
423
|
version=version,
|
@@ -422,8 +426,7 @@ class Context:
|
|
422
426
|
reference=transform_ref,
|
423
427
|
reference_type=transform_ref_type,
|
424
428
|
type=transform_type,
|
425
|
-
)
|
426
|
-
transform.save()
|
429
|
+
).save()
|
427
430
|
self._logging_message += f"created Transform('{transform.uid}')"
|
428
431
|
else:
|
429
432
|
uid = transform.uid
|
@@ -449,29 +452,36 @@ class Context:
|
|
449
452
|
"updated transform name, " # white space on purpose
|
450
453
|
)
|
451
454
|
# check whether transform source code was already saved
|
452
|
-
if
|
453
|
-
|
455
|
+
if (
|
456
|
+
transform._source_code_artifact_id is not None
|
457
|
+
or transform.source_code is not None
|
458
|
+
):
|
459
|
+
bump_revision = False
|
454
460
|
if is_run_from_ipython:
|
455
|
-
|
461
|
+
bump_revision = True
|
456
462
|
else:
|
457
463
|
hash, _ = hash_file(self._path) # ignore hash_type for now
|
458
|
-
if hash
|
459
|
-
|
464
|
+
if transform.hash is not None:
|
465
|
+
condition = hash != transform.hash
|
466
|
+
else:
|
467
|
+
condition = hash != transform._source_code_artifact.hash
|
468
|
+
if condition:
|
469
|
+
bump_revision = True
|
460
470
|
else:
|
461
471
|
self._logging_message += f"loaded Transform('{transform.uid}')"
|
462
|
-
if
|
472
|
+
if bump_revision:
|
463
473
|
change_type = (
|
464
474
|
"Re-running saved notebook"
|
465
475
|
if is_run_from_ipython
|
466
476
|
else "Source code changed"
|
467
477
|
)
|
468
478
|
suid, vuid = (
|
469
|
-
uid[
|
470
|
-
uid[
|
479
|
+
uid[:-4],
|
480
|
+
uid[-4:],
|
471
481
|
)
|
472
482
|
new_vuid = increment_base62(vuid)
|
473
483
|
raise UpdateContext(
|
474
|
-
f"{change_type}, bump
|
484
|
+
f"{change_type}, bump revision by setting:\n\n"
|
475
485
|
f'ln.context.uid = "{suid}{new_vuid}"'
|
476
486
|
)
|
477
487
|
else:
|
@@ -504,8 +514,8 @@ class Context:
|
|
504
514
|
get_seconds_since_modified(context._path) > 3
|
505
515
|
and os.getenv("LAMIN_TESTING") is None
|
506
516
|
):
|
507
|
-
raise
|
508
|
-
"Please save the notebook in your editor right before running `ln.finish()`"
|
517
|
+
raise NotebookFileNotSavedToDisk(
|
518
|
+
"Please save the notebook manually in your editor right before running `ln.finish()`"
|
509
519
|
)
|
510
520
|
save_context_core(
|
511
521
|
run=context.run,
|
lamindb/core/_label_manager.py
CHANGED
@@ -118,13 +118,11 @@ def validate_labels(labels: QuerySet | list | dict):
|
|
118
118
|
|
119
119
|
|
120
120
|
class LabelManager:
|
121
|
-
"""Label manager
|
121
|
+
"""Label manager.
|
122
122
|
|
123
123
|
This allows to manage untyped labels :class:`~lamindb.ULabel` and arbitrary
|
124
124
|
typed labels (e.g., :class:`~bionty.CellLine`) and associate labels
|
125
125
|
with features.
|
126
|
-
|
127
|
-
See :class:`~lamindb.core.HasFeatures` for more information.
|
128
126
|
"""
|
129
127
|
|
130
128
|
def __init__(self, host: Artifact | Collection):
|
@@ -17,6 +17,7 @@ from .storage._anndata_accessor import (
|
|
17
17
|
GroupTypes,
|
18
18
|
StorageType,
|
19
19
|
_safer_read_index,
|
20
|
+
get_spec,
|
20
21
|
registry,
|
21
22
|
)
|
22
23
|
|
@@ -153,13 +154,30 @@ class MappedCollection:
|
|
153
154
|
self._make_connections(path_list, parallel)
|
154
155
|
|
155
156
|
self.n_obs_list = []
|
156
|
-
for storage in self.storages:
|
157
|
+
for i, storage in enumerate(self.storages):
|
157
158
|
with _Connect(storage) as store:
|
158
159
|
X = store["X"]
|
160
|
+
store_path = self.path_list[i]
|
161
|
+
self._check_csc_raise_error(X, "X", store_path)
|
159
162
|
if isinstance(X, ArrayTypes): # type: ignore
|
160
163
|
self.n_obs_list.append(X.shape[0])
|
161
164
|
else:
|
162
165
|
self.n_obs_list.append(X.attrs["shape"][0])
|
166
|
+
for layer_key in self.layers_keys:
|
167
|
+
if layer_key == "X":
|
168
|
+
continue
|
169
|
+
self._check_csc_raise_error(
|
170
|
+
store["layers"][layer_key],
|
171
|
+
f"layers/{layer_key}",
|
172
|
+
store_path,
|
173
|
+
)
|
174
|
+
if self.obsm_keys is not None:
|
175
|
+
for obsm_key in self.obsm_keys:
|
176
|
+
self._check_csc_raise_error(
|
177
|
+
store["obsm"][obsm_key],
|
178
|
+
f"obsm/{obsm_key}",
|
179
|
+
store_path,
|
180
|
+
)
|
163
181
|
self.n_obs = sum(self.n_obs_list)
|
164
182
|
|
165
183
|
self.indices = np.hstack([np.arange(n_obs) for n_obs in self.n_obs_list])
|
@@ -281,6 +299,18 @@ class MappedCollection:
|
|
281
299
|
vars = pd.Index(vars)
|
282
300
|
return [i for i, vrs in enumerate(self.var_list) if not vrs.equals(vars)]
|
283
301
|
|
302
|
+
def _check_csc_raise_error(
|
303
|
+
self, elem: GroupType | ArrayType, key: str, path: UPathStr
|
304
|
+
):
|
305
|
+
if isinstance(elem, ArrayTypes): # type: ignore
|
306
|
+
return
|
307
|
+
if get_spec(elem).encoding_type == "csc_matrix":
|
308
|
+
if not self.parallel:
|
309
|
+
self.close()
|
310
|
+
raise ValueError(
|
311
|
+
f"{key} in {path} is a csc matrix, `MappedCollection` doesn't support this format yet."
|
312
|
+
)
|
313
|
+
|
284
314
|
def __len__(self):
|
285
315
|
return self.n_obs
|
286
316
|
|