lamindb 0.32.0rc1__py2.py3-none-any.whl → 0.33.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +21 -14
- lamindb/_check_versions.py +6 -6
- lamindb/_context.py +196 -0
- lamindb/_delete.py +13 -7
- lamindb/_load.py +23 -33
- lamindb/_nb.py +19 -84
- lamindb/_record.py +55 -21
- lamindb/dev/db/_add.py +22 -7
- lamindb/schema/__init__.py +1 -15
- lamindb-0.33.0.dist-info/METADATA +236 -0
- {lamindb-0.32.0rc1.dist-info → lamindb-0.33.0.dist-info}/RECORD +14 -16
- {lamindb-0.32.0rc1.dist-info → lamindb-0.33.0.dist-info}/WHEEL +1 -1
- lamindb/knowledge/__init__.py +0 -34
- lamindb/knowledge/_core.py +0 -71
- lamindb/knowledge/_lookup.py +0 -18
- lamindb-0.32.0rc1.dist-info/METADATA +0 -178
- {lamindb-0.32.0rc1.dist-info → lamindb-0.33.0.dist-info}/LICENSE +0 -0
- {lamindb-0.32.0rc1.dist-info → lamindb-0.33.0.dist-info}/entry_points.txt +0 -0
lamindb/__init__.py
CHANGED
@@ -14,6 +14,20 @@ and in-memory data objects (`DataFrame`, `AnnData`, etc.).
|
|
14
14
|
DObject
|
15
15
|
DFolder
|
16
16
|
|
17
|
+
Data objects are transformed by runs:
|
18
|
+
|
19
|
+
.. autosummary::
|
20
|
+
:toctree: .
|
21
|
+
|
22
|
+
Run
|
23
|
+
|
24
|
+
Tracking data by features:
|
25
|
+
|
26
|
+
.. autosummary::
|
27
|
+
:toctree: .
|
28
|
+
|
29
|
+
Features
|
30
|
+
|
17
31
|
Query & manipulate data:
|
18
32
|
|
19
33
|
.. autosummary::
|
@@ -44,13 +58,6 @@ Schema - entities and their relations:
|
|
44
58
|
|
45
59
|
schema
|
46
60
|
|
47
|
-
Track Jupyter notebooks:
|
48
|
-
|
49
|
-
.. autosummary::
|
50
|
-
:toctree: .
|
51
|
-
|
52
|
-
nb
|
53
|
-
|
54
61
|
Setup:
|
55
62
|
|
56
63
|
.. autosummary::
|
@@ -63,11 +70,12 @@ Developer API:
|
|
63
70
|
.. autosummary::
|
64
71
|
:toctree: .
|
65
72
|
|
73
|
+
context
|
66
74
|
settings
|
67
75
|
dev
|
68
76
|
"""
|
69
77
|
|
70
|
-
__version__ = "0.
|
78
|
+
__version__ = "0.33.0" # denote a release candidate for 0.1.0 with 0.1rc1
|
71
79
|
|
72
80
|
# prints warning of python versions
|
73
81
|
from lamin_logger import logger as _logger
|
@@ -104,12 +112,11 @@ else:
|
|
104
112
|
" instance."
|
105
113
|
)
|
106
114
|
|
107
|
-
from lnschema_core import DFolder # noqa
|
108
|
-
from lnschema_core import DObject # noqa
|
115
|
+
from lnschema_core import DFolder, DObject, Features, Run # noqa
|
109
116
|
|
110
117
|
dobject_doc = """Data objects in storage & memory.
|
111
118
|
|
112
|
-
- Guide: :doc:`/guide/
|
119
|
+
- Guide: :doc:`/guide/track`
|
113
120
|
- FAQ: :doc:`/faq/ingest`
|
114
121
|
|
115
122
|
A `DObject` is typically instantiated from data using the arguments below.
|
@@ -119,14 +126,13 @@ fields directly.
|
|
119
126
|
Args:
|
120
127
|
data: Filepath or in-memory data.
|
121
128
|
name: Name of the data object, required if an in-memory object is passed.
|
122
|
-
|
123
|
-
source: The source of the data object (a :class:`~lamindb.schema.Run`).
|
129
|
+
source: The source of the data object (a :class:`~lamindb.Run`).
|
124
130
|
id: The id of the dobject.
|
125
131
|
format: Whether to use `h5ad` or `zarr` to store an `AnnData` object.
|
126
132
|
|
127
133
|
Data objects (`dobjects`) represent atomic datasets in object storage:
|
128
134
|
jointly measured observations of variables (features).
|
129
|
-
They are generated by running code, instances of :class:`~lamindb.
|
135
|
+
They are generated by running code, instances of :class:`~lamindb.Run`.
|
130
136
|
|
131
137
|
A `dobject` may contain a single observation, for instance, a single image.
|
132
138
|
|
@@ -162,6 +168,7 @@ DObject.__doc__ = dobject_doc
|
|
162
168
|
from . import dev # noqa
|
163
169
|
from . import schema # noqa
|
164
170
|
from . import setup # noqa
|
171
|
+
from ._context import context # noqa
|
165
172
|
from ._delete import delete # noqa
|
166
173
|
from ._nb import nb # noqa
|
167
174
|
from ._settings import settings
|
lamindb/_check_versions.py
CHANGED
@@ -4,14 +4,14 @@ from lnschema_core import __version__ as lnschema_core_v
|
|
4
4
|
from nbproject import __version__ as nbproject_v
|
5
5
|
from packaging import version
|
6
6
|
|
7
|
-
if version.parse(lndb_v)
|
8
|
-
raise RuntimeError("Upgrade lndb! pip install lndb
|
7
|
+
if version.parse(lndb_v) < version.parse("0.37.4"):
|
8
|
+
raise RuntimeError("Upgrade lndb! pip install lndb>=0.37.4")
|
9
9
|
|
10
|
-
if version.parse(lnschema_core_v) != version.parse("0.29.
|
11
|
-
raise RuntimeError("lamindb needs lnschema_core==0.29.
|
10
|
+
if version.parse(lnschema_core_v) != version.parse("0.29.5"):
|
11
|
+
raise RuntimeError("lamindb needs lnschema_core==0.29.5")
|
12
12
|
|
13
|
-
if version.parse(nbproject_v) < version.parse("0.8.
|
14
|
-
raise RuntimeError("lamindb needs nbproject>=0.8.
|
13
|
+
if version.parse(nbproject_v) < version.parse("0.8.3"):
|
14
|
+
raise RuntimeError("lamindb needs nbproject>=0.8.3")
|
15
15
|
|
16
16
|
# ensure that the lamin package is not installed
|
17
17
|
try:
|
lamindb/_context.py
ADDED
@@ -0,0 +1,196 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from typing import List, Optional, Union
|
3
|
+
|
4
|
+
import lnschema_core
|
5
|
+
import nbproject
|
6
|
+
from lamin_logger import logger
|
7
|
+
from lndb import settings
|
8
|
+
from lndb.dev import InstanceSettings
|
9
|
+
from lnschema_core import Notebook, Pipeline, Run, dev
|
10
|
+
from nbproject._is_run_from_ipython import is_run_from_ipython
|
11
|
+
|
12
|
+
|
13
|
+
class context:
|
14
|
+
"""Global run context.
|
15
|
+
|
16
|
+
Set through `ln.Run(global_context=True)`.
|
17
|
+
|
18
|
+
Often, you'll want to call: `ln.Run(global_context=True, load_latest)`.
|
19
|
+
"""
|
20
|
+
|
21
|
+
instance: Optional[InstanceSettings] = None
|
22
|
+
"""Current instance."""
|
23
|
+
notebook: Optional[Notebook] = None
|
24
|
+
"""Current notebook."""
|
25
|
+
pipeline: Optional[Pipeline] = None
|
26
|
+
"""Current pipeline."""
|
27
|
+
run: Optional[Run] = None
|
28
|
+
"""Current run."""
|
29
|
+
|
30
|
+
@classmethod
|
31
|
+
def _track_notebook(
|
32
|
+
cls,
|
33
|
+
*,
|
34
|
+
id: Optional[str] = None,
|
35
|
+
v: Optional[str] = "0",
|
36
|
+
name: Optional[str] = None,
|
37
|
+
filepath: Optional[str] = None,
|
38
|
+
pypackage: Union[str, List[str], None] = None,
|
39
|
+
editor: Optional[str] = None,
|
40
|
+
):
|
41
|
+
"""Track notebook.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
id: Pass a notebook id manually.
|
45
|
+
v: Pass a notebook version manually.
|
46
|
+
name: Pass a notebook name manually.
|
47
|
+
pypackage: One or more python packages to track.
|
48
|
+
filepath: Filepath of notebook. Only needed if automatic inference fails.
|
49
|
+
editor: Editor environment. Only needed if automatic inference fails.
|
50
|
+
Pass `'lab'` for jupyter lab and `'notebook'` for jupyter notebook,
|
51
|
+
this can help to identify the correct mechanism for interactivity
|
52
|
+
when automatic inference fails.
|
53
|
+
"""
|
54
|
+
cls.instance = settings.instance
|
55
|
+
# original location of this code was _nb
|
56
|
+
# legacy code here, see duplicated version in _run
|
57
|
+
if id is None and name is None:
|
58
|
+
nbproject_failed_msg = (
|
59
|
+
"Auto-retrieval of notebook name & title failed.\nPlease paste error"
|
60
|
+
" at: https://github.com/laminlabs/nbproject/issues/new \n\nFix: Run"
|
61
|
+
f" ln.nb.header(id={dev.id.notebook()}, name='my-notebook-name')"
|
62
|
+
)
|
63
|
+
try:
|
64
|
+
nbproject.header(
|
65
|
+
pypackage=pypackage, filepath=filepath, env=editor, display=False
|
66
|
+
)
|
67
|
+
except Exception:
|
68
|
+
raise RuntimeError(nbproject_failed_msg)
|
69
|
+
# this contains filepath if the header was run successfully
|
70
|
+
from nbproject._header import _filepath
|
71
|
+
|
72
|
+
id = nbproject.meta.store.id
|
73
|
+
v = nbproject.meta.store.version
|
74
|
+
name = Path(_filepath).stem
|
75
|
+
title = nbproject.meta.live.title
|
76
|
+
elif id is None or name is None:
|
77
|
+
# Both id and name need to be passed if passing it manually
|
78
|
+
raise RuntimeError("Fix: Pass both id & name to ln.nb.header().")
|
79
|
+
else:
|
80
|
+
title = None
|
81
|
+
|
82
|
+
import lamindb as ln
|
83
|
+
import lamindb.schema as lns
|
84
|
+
|
85
|
+
notebook = ln.select(
|
86
|
+
lns.Notebook,
|
87
|
+
id=id,
|
88
|
+
v=v,
|
89
|
+
).one_or_none()
|
90
|
+
if notebook is None:
|
91
|
+
notebook = lns.Notebook(
|
92
|
+
id=id,
|
93
|
+
v=v,
|
94
|
+
name=name,
|
95
|
+
title=title,
|
96
|
+
)
|
97
|
+
notebook = ln.add(notebook)
|
98
|
+
logger.info(f"Added notebook: {notebook}")
|
99
|
+
else:
|
100
|
+
logger.info(f"Loaded notebook: {notebook}")
|
101
|
+
if notebook.name != name or notebook.title != title:
|
102
|
+
response = input(
|
103
|
+
"Updated notebook name and/or title: Do you want to assign a new id"
|
104
|
+
" or version? (y/n)"
|
105
|
+
)
|
106
|
+
if response == "y":
|
107
|
+
print("Notebook metadata will be re-initialized.")
|
108
|
+
new_id, new_v = None, None
|
109
|
+
response = input("Do you want to generate a new id? (y/n)")
|
110
|
+
if response == "y":
|
111
|
+
new_id = lnschema_core.dev.id.notebook()
|
112
|
+
response = input(
|
113
|
+
"Do you want to set a new version (e.g. '1.1')? Type 'n' for"
|
114
|
+
" 'no'. (version/n)"
|
115
|
+
)
|
116
|
+
if new_v != "n":
|
117
|
+
if new_v == "y":
|
118
|
+
response = input("Please type the version: ")
|
119
|
+
new_v = response
|
120
|
+
if new_id is not None or new_v is not None:
|
121
|
+
nbproject.meta.store.id = new_id
|
122
|
+
nbproject.meta.store.version = new_v
|
123
|
+
nbproject.meta.store.write()
|
124
|
+
# at this point, depending on the editor, the process
|
125
|
+
# might crash that is OK as upon re-running, the
|
126
|
+
# notebook will have new metadata and will be registered
|
127
|
+
# in the db in case the python process does not exit, we
|
128
|
+
# need a new Notebook record
|
129
|
+
notebook = lns.Notebook(id=id, v=v)
|
130
|
+
|
131
|
+
notebook.name = name
|
132
|
+
notebook.title = title
|
133
|
+
ln.add(notebook)
|
134
|
+
|
135
|
+
# at this point, we have a notebook object
|
136
|
+
cls.notebook = notebook
|
137
|
+
|
138
|
+
@classmethod
|
139
|
+
def _track_pipeline(
|
140
|
+
cls,
|
141
|
+
name: str,
|
142
|
+
*,
|
143
|
+
version: Optional[str] = None,
|
144
|
+
):
|
145
|
+
"""Track pipeline.
|
146
|
+
|
147
|
+
Args:
|
148
|
+
name: Pipeline name.
|
149
|
+
version: Pipeline version. If `None`, load latest (sort by created_at).
|
150
|
+
"""
|
151
|
+
cls.instance = settings.instance
|
152
|
+
import lamindb as ln
|
153
|
+
import lamindb.schema as lns
|
154
|
+
|
155
|
+
if version is not None:
|
156
|
+
pipeline = ln.select(lns.Pipeline, name=name, v=version).one()
|
157
|
+
else:
|
158
|
+
pipeline = (
|
159
|
+
ln.select(lns.Pipeline, name=name)
|
160
|
+
.order_by(lns.Pipeline.created_at.desc())
|
161
|
+
.first()
|
162
|
+
)
|
163
|
+
if pipeline is None:
|
164
|
+
response = input(
|
165
|
+
f"Did not find any pipeline record with name '{name}'. Create a new"
|
166
|
+
" one? (y/n)"
|
167
|
+
)
|
168
|
+
if response == "y":
|
169
|
+
pipeline = lns.Pipeline(name=name)
|
170
|
+
cls.pipeline = pipeline
|
171
|
+
|
172
|
+
@classmethod
|
173
|
+
def _track_notebook_pipeline(
|
174
|
+
cls, *, pipeline_name: Optional[str] = None, load_latest=True
|
175
|
+
):
|
176
|
+
"""Track notebook/pipeline and run.
|
177
|
+
|
178
|
+
When called from within a Python script, pass `pipeline_name`.
|
179
|
+
|
180
|
+
Args:
|
181
|
+
pipeline_name: Pipeline name.
|
182
|
+
load_latest: Load the latest run of the notebook or pipeline.
|
183
|
+
"""
|
184
|
+
cls.instance = settings.instance
|
185
|
+
logger.info(f"Instance: {cls.instance.identifier}")
|
186
|
+
logger.info(f"User: {settings.user.handle}")
|
187
|
+
if is_run_from_ipython and pipeline_name is None:
|
188
|
+
if context.notebook is None:
|
189
|
+
cls._track_notebook()
|
190
|
+
else:
|
191
|
+
if pipeline_name is None:
|
192
|
+
raise ValueError(
|
193
|
+
"Pass a pipeline name: ln.context.track(pipeline_name='...')"
|
194
|
+
)
|
195
|
+
cls._track_pipeline(name=pipeline_name)
|
196
|
+
logger.info(f"Pipeline: {cls.pipeline}")
|
lamindb/_delete.py
CHANGED
@@ -48,14 +48,20 @@ def delete( # type: ignore
|
|
48
48
|
|
49
49
|
Example:
|
50
50
|
|
51
|
-
|
52
|
-
>>> experiment = ln.select(Experiment, id=experiment_id)
|
51
|
+
1) Delete by record
|
52
|
+
>>> experiment = ln.select(Experiment, id=experiment_id).one()
|
53
53
|
>>> ln.delete(experiment)
|
54
|
-
|
55
|
-
|
56
|
-
>>> ln.delete(
|
57
|
-
>>> #
|
58
|
-
|
54
|
+
|
55
|
+
2) Delete by fields
|
56
|
+
>>> ln.delete(Experiment, id=experiment_id)
|
57
|
+
>>> # the result of is equivalent to 1)
|
58
|
+
|
59
|
+
3) Delete data objects (deleting the metadata record and the storage file)
|
60
|
+
>>> dobject = ln.select(DObject, id=dobject_id).one()
|
61
|
+
>>> # deleting the metadata record occurs automatically
|
62
|
+
>>> # you will be asked whether to delete the file from storage
|
63
|
+
>>> # or pass boolean values to `delete_data_from_storage`
|
64
|
+
>>> ln.delete(dobject, delete_data_from_storage)
|
59
65
|
|
60
66
|
Args:
|
61
67
|
record: One or multiple records as instances of `SQLModel`.
|
lamindb/_load.py
CHANGED
@@ -1,46 +1,36 @@
|
|
1
|
-
|
1
|
+
from typing import Optional
|
2
|
+
|
2
3
|
from lamin_logger import logger
|
3
|
-
from
|
4
|
+
from lnschema_core import DObject
|
5
|
+
from sqlalchemy.orm.session import object_session
|
6
|
+
|
7
|
+
from lamindb._context import context
|
4
8
|
|
5
9
|
from ._settings import settings
|
6
10
|
from .dev._core import filepath_from_dobject
|
7
11
|
from .dev.file import load_to_memory
|
8
12
|
|
9
13
|
|
10
|
-
def populate_runin(dobject: core.DObject, run: core.Run):
|
11
|
-
setup_settings.instance._cloud_sqlite_locker.lock()
|
12
|
-
with setup_settings.instance.session() as ss:
|
13
|
-
result = ss.get(core.link.RunIn, (run.id, dobject.id))
|
14
|
-
if result is None:
|
15
|
-
ss.add(
|
16
|
-
core.link.RunIn(
|
17
|
-
run_id=run.id,
|
18
|
-
dobject_id=dobject.id,
|
19
|
-
)
|
20
|
-
)
|
21
|
-
ss.commit()
|
22
|
-
logger.info(f"Added dobject ({dobject.id}) as input for run ({run.id}).")
|
23
|
-
setup_settings.instance._update_cloud_sqlite_file()
|
24
|
-
setup_settings.instance._cloud_sqlite_locker.unlock()
|
25
|
-
|
26
|
-
|
27
14
|
# this is exposed to the user as DObject.load
|
28
|
-
def load(dobject:
|
15
|
+
def load(dobject: DObject, stream: bool = False, is_run_input: Optional[bool] = None):
|
29
16
|
if stream and dobject.suffix not in (".h5ad", ".zarr"):
|
30
17
|
logger.warning(f"Ignoring stream option for a {dobject.suffix} object.")
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
"
|
18
|
+
if is_run_input is None:
|
19
|
+
track_run_input = settings.track_run_inputs_upon_load
|
20
|
+
else:
|
21
|
+
track_run_input = is_run_input
|
22
|
+
if track_run_input:
|
23
|
+
if object_session(dobject) is None:
|
24
|
+
raise ValueError("Need to load with session open to track as input.")
|
25
|
+
if context.run is None:
|
26
|
+
raise ValueError(
|
27
|
+
"No global run context set. Call ln.context.track() or pass input run"
|
28
|
+
" directly."
|
41
29
|
)
|
42
30
|
else:
|
43
|
-
|
44
|
-
|
31
|
+
dobject.targets.append(context.run)
|
32
|
+
session = object_session(dobject)
|
33
|
+
session.add(dobject)
|
34
|
+
session.commit()
|
45
35
|
# track_usage(dobject.id, "load")
|
46
|
-
return load_to_memory(
|
36
|
+
return load_to_memory(filepath_from_dobject(dobject), stream=stream)
|
lamindb/_nb.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
from pathlib import Path
|
2
1
|
from typing import List, Optional, Union
|
3
2
|
|
4
3
|
import nbproject as _nb
|
5
4
|
from lamin_logger import logger
|
6
|
-
from
|
7
|
-
from lnschema_core import Notebook, Run, dev
|
5
|
+
from lnschema_core import Notebook, Run
|
8
6
|
|
7
|
+
from ._context import context
|
9
8
|
|
9
|
+
|
10
|
+
# this whole class is deprecated, see lamindb.context instead!
|
10
11
|
class nb:
|
11
12
|
"""Manage Jupyter notebooks.
|
12
13
|
|
@@ -30,7 +31,7 @@ class nb:
|
|
30
31
|
id: Optional[str] = None,
|
31
32
|
v: Optional[str] = "0",
|
32
33
|
name: Optional[str] = None,
|
33
|
-
):
|
34
|
+
) -> Run:
|
34
35
|
"""Track the notebook & display metadata.
|
35
36
|
|
36
37
|
Call without arguments in most settings.
|
@@ -54,88 +55,22 @@ class nb:
|
|
54
55
|
v: Pass a notebook version manually.
|
55
56
|
name: Pass a notebook name manually.
|
56
57
|
"""
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
_nb.header(pypackage=pypackage, filepath=filepath, env=env)
|
65
|
-
except Exception:
|
66
|
-
raise RuntimeError(nbproject_failed_msg)
|
67
|
-
# this contains filepath if the header was run successfully
|
68
|
-
from nbproject._header import _filepath
|
69
|
-
|
70
|
-
id = _nb.meta.store.id
|
71
|
-
v = _nb.meta.store.version
|
72
|
-
name = Path(_filepath).stem
|
73
|
-
title = _nb.meta.live.title
|
74
|
-
elif id is None or name is None:
|
75
|
-
# Both id and name need to be passed if passing it manually
|
76
|
-
raise RuntimeError("Fix: Pass both id & name to ln.nb.header().")
|
77
|
-
else:
|
78
|
-
title = None
|
79
|
-
|
80
|
-
logger.info(f"Instance: {settings.instance.owner}/{settings.instance.name}")
|
81
|
-
|
82
|
-
import lamindb as ln
|
83
|
-
import lamindb.schema as lns
|
84
|
-
|
85
|
-
notebook = ln.select(
|
86
|
-
lns.Notebook,
|
87
|
-
id=id,
|
88
|
-
v=v,
|
89
|
-
).one_or_none()
|
90
|
-
if notebook is None:
|
91
|
-
notebook = lns.Notebook(
|
92
|
-
id=id,
|
93
|
-
v=v,
|
94
|
-
name=name,
|
95
|
-
title=title,
|
96
|
-
)
|
97
|
-
notebook = ln.add(notebook)
|
98
|
-
logger.info(f"Added notebook: {notebook.id} v{notebook.v}")
|
99
|
-
else:
|
100
|
-
logger.info(f"Loaded notebook: {notebook.id} v{notebook.v}")
|
101
|
-
if notebook.name != name or notebook.title != title:
|
102
|
-
notebook.name = name
|
103
|
-
notebook.title = title
|
104
|
-
ln.add(notebook)
|
105
|
-
logger.info("Updated notebook name or title.")
|
106
|
-
|
107
|
-
# at this point, we have a notebook object
|
58
|
+
logger.warning(
|
59
|
+
"DeprecationWarning: Please replace ln.nb.header() with ln.Run()"
|
60
|
+
)
|
61
|
+
context._track_notebook(
|
62
|
+
pypackage=pypackage, filepath=filepath, id=id, v=v, name=name, editor=env
|
63
|
+
)
|
64
|
+
notebook = context.notebook
|
108
65
|
cls.notebook = notebook
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
# run_test = ln.select(lns.Run, id=run.id).one_or_none()
|
116
|
-
# if run_test is None:
|
117
|
-
# logger.info("Passed run does not exist, adding it")
|
118
|
-
# ln.add(run)
|
119
|
-
if run is None:
|
120
|
-
# retrieve the latest run
|
121
|
-
run = (
|
122
|
-
ln.select(lns.Run, notebook_id=notebook.id, notebook_v=notebook.v)
|
123
|
-
.order_by(lns.Run.created_at.desc())
|
124
|
-
.first()
|
125
|
-
)
|
126
|
-
if run is not None:
|
127
|
-
logger.info(f"Loaded run: {run.id}") # type: ignore
|
128
|
-
elif run != "new":
|
129
|
-
raise ValueError("Fix: ln.nb.header(run='new')!")
|
130
|
-
|
131
|
-
# create a new run if doesn't exist yet or is requested by the user ("new")
|
132
|
-
if run is None or run == "new":
|
133
|
-
run = lns.Run(notebook_id=notebook.id, notebook_v=notebook.v)
|
134
|
-
run = ln.add(run) # type: ignore
|
135
|
-
logger.info(f"Added run: {run.id}") # type: ignore
|
136
|
-
|
137
|
-
# at this point, we have a run object
|
66
|
+
if run == "new":
|
67
|
+
run = Run(global_context=True)
|
68
|
+
elif run is None:
|
69
|
+
run = Run(global_context=True, load_latest=True)
|
70
|
+
else:
|
71
|
+
raise ValueError("Pass 'new' to ln.nb.header().")
|
138
72
|
cls.run = run
|
73
|
+
return run
|
139
74
|
|
140
75
|
@classmethod
|
141
76
|
def publish(cls, version: str = None, i_confirm_i_saved: bool = False):
|
lamindb/_record.py
CHANGED
@@ -29,12 +29,10 @@ NO_SOURCE_ERROR = """
|
|
29
29
|
Error: Please link a data source using the `source` argument.
|
30
30
|
Fix: Link a data source by passing a run, e.g., via
|
31
31
|
|
32
|
-
pipeline = ln.select("My ingestion pipeline").one()
|
33
32
|
run = lns.Run(pipeline=pipeline)
|
34
33
|
dobject = ln.DObject(..., source=run)
|
35
34
|
|
36
|
-
Or,
|
37
|
-
a global run context for the notebook.
|
35
|
+
Or, by calling ln.context.track(), which sets a global run context.
|
38
36
|
|
39
37
|
More details: https://lamin.ai/docs/faq/ingest
|
40
38
|
"""
|
@@ -73,9 +71,11 @@ def serialize(
|
|
73
71
|
return memory_rep, filepath, name, suffix
|
74
72
|
|
75
73
|
|
76
|
-
def get_hash(local_filepath, suffix):
|
74
|
+
def get_hash(local_filepath, suffix, check_hash: bool = True):
|
77
75
|
if suffix != ".zarr": # if not streamed
|
78
76
|
hash = hash_file(local_filepath)
|
77
|
+
if not check_hash:
|
78
|
+
return hash
|
79
79
|
result = select(lns_DObject, hash=hash).all()
|
80
80
|
if len(result) > 0:
|
81
81
|
msg = f"A dobject with same hash is already in the DB: {result}"
|
@@ -171,19 +171,19 @@ def parse_features(
|
|
171
171
|
).one_or_none()
|
172
172
|
if features is not None:
|
173
173
|
return features # features already exists!
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
174
|
+
else:
|
175
|
+
features = Features(id=features_hash, type=features_ref.entity)
|
176
|
+
records = get_features_records(parsing_id, features_ref, df_curated)
|
177
|
+
|
178
|
+
if isinstance(features_ref, Gene):
|
179
|
+
for record in records:
|
180
|
+
features.genes.append(record)
|
181
|
+
elif isinstance(features_ref, Protein):
|
182
|
+
for record in records:
|
183
|
+
features.proteins.append(record)
|
184
|
+
elif isinstance(features_ref, CellMarker):
|
185
|
+
for record in records:
|
186
|
+
features.cell_markers.append(record)
|
187
187
|
|
188
188
|
return features
|
189
189
|
|
@@ -204,11 +204,15 @@ def get_features(dobject_privates, features_ref):
|
|
204
204
|
|
205
205
|
def get_run(run: Optional[Run]) -> Run:
|
206
206
|
if run is None:
|
207
|
-
from . import
|
207
|
+
from ._context import context
|
208
208
|
|
209
|
-
run =
|
209
|
+
run = context.run
|
210
210
|
if run is None:
|
211
211
|
raise ValueError(NO_SOURCE_ERROR)
|
212
|
+
# the following ensures that queried objects (within __init__)
|
213
|
+
# behave like queried objects, only example right now: Run
|
214
|
+
if run._ln_identity_key is not None:
|
215
|
+
run._sa_instance_state.key = run._ln_identity_key
|
212
216
|
return run
|
213
217
|
|
214
218
|
|
@@ -216,6 +220,7 @@ def get_path_size_hash(
|
|
216
220
|
filepath: Union[Path, UPath],
|
217
221
|
memory_rep: Optional[Union[pd.DataFrame, ad.AnnData]],
|
218
222
|
suffix: str,
|
223
|
+
check_hash: bool = True,
|
219
224
|
):
|
220
225
|
cloudpath = None
|
221
226
|
localpath = None
|
@@ -250,18 +255,20 @@ def get_path_size_hash(
|
|
250
255
|
else:
|
251
256
|
size = path.stat().st_size
|
252
257
|
localpath = filepath
|
253
|
-
hash = get_hash(filepath, suffix)
|
258
|
+
hash = get_hash(filepath, suffix, check_hash=check_hash)
|
254
259
|
|
255
260
|
return localpath, cloudpath, size, hash
|
256
261
|
|
257
262
|
|
263
|
+
# expose to user via ln.DObject
|
258
264
|
def get_dobject_kwargs_from_data(
|
259
265
|
data: Union[Path, UPath, str, pd.DataFrame, ad.AnnData],
|
260
266
|
*,
|
261
267
|
name: Optional[str] = None,
|
262
|
-
features_ref: Optional[Union[CellMarker, Gene, Protein]] = None,
|
263
268
|
source: Optional[Run] = None,
|
264
269
|
format: Optional[str] = None,
|
270
|
+
# backward compat
|
271
|
+
features_ref: Optional[Union[CellMarker, Gene, Protein]] = None,
|
265
272
|
):
|
266
273
|
run = get_run(source)
|
267
274
|
memory_rep, filepath, name, suffix = serialize(data, name, format)
|
@@ -278,10 +285,17 @@ def get_dobject_kwargs_from_data(
|
|
278
285
|
_memory_rep=memory_rep,
|
279
286
|
)
|
280
287
|
|
288
|
+
# TODO: remove later
|
289
|
+
# backward compat
|
281
290
|
if features_ref is not None:
|
291
|
+
logger.warning(
|
292
|
+
"DeprecationWarning: `features_ref` is deprecated, please use"
|
293
|
+
" `ln.Features`!"
|
294
|
+
)
|
282
295
|
features = [get_features(dobject_privates, features_ref)] # has to be list!
|
283
296
|
else:
|
284
297
|
features = []
|
298
|
+
|
285
299
|
dobject_kwargs = dict(
|
286
300
|
name=name,
|
287
301
|
suffix=suffix,
|
@@ -292,9 +306,29 @@ def get_dobject_kwargs_from_data(
|
|
292
306
|
source=run,
|
293
307
|
features=features,
|
294
308
|
)
|
309
|
+
|
295
310
|
return dobject_kwargs, dobject_privates
|
296
311
|
|
297
312
|
|
313
|
+
# expose to user via ln.Features
|
314
|
+
def get_features_from_data(
|
315
|
+
data: Union[Path, UPath, str, pd.DataFrame, ad.AnnData],
|
316
|
+
reference: Union[CellMarker, Gene, Protein],
|
317
|
+
format: Optional[str] = None,
|
318
|
+
):
|
319
|
+
memory_rep, filepath, _, suffix = serialize(data, "features", format)
|
320
|
+
localpath, cloudpath, _, _ = get_path_size_hash(
|
321
|
+
filepath, memory_rep, suffix, check_hash=False
|
322
|
+
)
|
323
|
+
|
324
|
+
dobject_privates = dict(
|
325
|
+
_local_filepath=localpath,
|
326
|
+
_cloud_filepath=cloudpath,
|
327
|
+
_memory_rep=memory_rep,
|
328
|
+
)
|
329
|
+
return get_features(dobject_privates, reference)
|
330
|
+
|
331
|
+
|
298
332
|
def to_b64_str(bstr: bytes):
|
299
333
|
b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
|
300
334
|
return b64
|