lamindb 0.32.0rc1__py2.py3-none-any.whl → 0.33.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -14,6 +14,20 @@ and in-memory data objects (`DataFrame`, `AnnData`, etc.).
14
14
  DObject
15
15
  DFolder
16
16
 
17
+ Data objects are transformed by runs:
18
+
19
+ .. autosummary::
20
+ :toctree: .
21
+
22
+ Run
23
+
24
+ Tracking data by features:
25
+
26
+ .. autosummary::
27
+ :toctree: .
28
+
29
+ Features
30
+
17
31
  Query & manipulate data:
18
32
 
19
33
  .. autosummary::
@@ -44,13 +58,6 @@ Schema - entities and their relations:
44
58
 
45
59
  schema
46
60
 
47
- Track Jupyter notebooks:
48
-
49
- .. autosummary::
50
- :toctree: .
51
-
52
- nb
53
-
54
61
  Setup:
55
62
 
56
63
  .. autosummary::
@@ -63,11 +70,12 @@ Developer API:
63
70
  .. autosummary::
64
71
  :toctree: .
65
72
 
73
+ context
66
74
  settings
67
75
  dev
68
76
  """
69
77
 
70
- __version__ = "0.32.0rc1" # denote a release candidate for 0.1.0 with 0.1rc1
78
+ __version__ = "0.33.0" # denote a release candidate for 0.1.0 with 0.1rc1
71
79
 
72
80
  # prints warning of python versions
73
81
  from lamin_logger import logger as _logger
@@ -104,12 +112,11 @@ else:
104
112
  " instance."
105
113
  )
106
114
 
107
- from lnschema_core import DFolder # noqa
108
- from lnschema_core import DObject # noqa
115
+ from lnschema_core import DFolder, DObject, Features, Run # noqa
109
116
 
110
117
  dobject_doc = """Data objects in storage & memory.
111
118
 
112
- - Guide: :doc:`/guide/ingest`
119
+ - Guide: :doc:`/guide/track`
113
120
  - FAQ: :doc:`/faq/ingest`
114
121
 
115
122
  A `DObject` is typically instantiated from data using the arguments below.
@@ -119,14 +126,13 @@ fields directly.
119
126
  Args:
120
127
  data: Filepath or in-memory data.
121
128
  name: Name of the data object, required if an in-memory object is passed.
122
- features_ref: Reference against which to link features.
123
- source: The source of the data object (a :class:`~lamindb.schema.Run`).
129
+ source: The source of the data object (a :class:`~lamindb.Run`).
124
130
  id: The id of the dobject.
125
131
  format: Whether to use `h5ad` or `zarr` to store an `AnnData` object.
126
132
 
127
133
  Data objects (`dobjects`) represent atomic datasets in object storage:
128
134
  jointly measured observations of variables (features).
129
- They are generated by running code, instances of :class:`~lamindb.schema.Run`.
135
+ They are generated by running code, instances of :class:`~lamindb.Run`.
130
136
 
131
137
  A `dobject` may contain a single observation, for instance, a single image.
132
138
 
@@ -162,6 +168,7 @@ DObject.__doc__ = dobject_doc
162
168
  from . import dev # noqa
163
169
  from . import schema # noqa
164
170
  from . import setup # noqa
171
+ from ._context import context # noqa
165
172
  from ._delete import delete # noqa
166
173
  from ._nb import nb # noqa
167
174
  from ._settings import settings
@@ -4,14 +4,14 @@ from lnschema_core import __version__ as lnschema_core_v
4
4
  from nbproject import __version__ as nbproject_v
5
5
  from packaging import version
6
6
 
7
- if version.parse(lndb_v) != version.parse("0.37.1"):
8
- raise RuntimeError("Upgrade lndb! pip install lndb==0.37.1")
7
+ if version.parse(lndb_v) < version.parse("0.37.4"):
8
+ raise RuntimeError("Upgrade lndb! pip install lndb>=0.37.4")
9
9
 
10
- if version.parse(lnschema_core_v) != version.parse("0.29.1"):
11
- raise RuntimeError("lamindb needs lnschema_core==0.29.1")
10
+ if version.parse(lnschema_core_v) != version.parse("0.29.5"):
11
+ raise RuntimeError("lamindb needs lnschema_core==0.29.5")
12
12
 
13
- if version.parse(nbproject_v) < version.parse("0.8.2"):
14
- raise RuntimeError("lamindb needs nbproject>=0.8.2")
13
+ if version.parse(nbproject_v) < version.parse("0.8.3"):
14
+ raise RuntimeError("lamindb needs nbproject>=0.8.3")
15
15
 
16
16
  # ensure that the lamin package is not installed
17
17
  try:
lamindb/_context.py ADDED
@@ -0,0 +1,196 @@
1
+ from pathlib import Path
2
+ from typing import List, Optional, Union
3
+
4
+ import lnschema_core
5
+ import nbproject
6
+ from lamin_logger import logger
7
+ from lndb import settings
8
+ from lndb.dev import InstanceSettings
9
+ from lnschema_core import Notebook, Pipeline, Run, dev
10
+ from nbproject._is_run_from_ipython import is_run_from_ipython
11
+
12
+
13
+ class context:
14
+ """Global run context.
15
+
16
+ Set through `ln.Run(global_context=True)`.
17
+
18
+ Often, you'll want to call: `ln.Run(global_context=True, load_latest)`.
19
+ """
20
+
21
+ instance: Optional[InstanceSettings] = None
22
+ """Current instance."""
23
+ notebook: Optional[Notebook] = None
24
+ """Current notebook."""
25
+ pipeline: Optional[Pipeline] = None
26
+ """Current pipeline."""
27
+ run: Optional[Run] = None
28
+ """Current run."""
29
+
30
+ @classmethod
31
+ def _track_notebook(
32
+ cls,
33
+ *,
34
+ id: Optional[str] = None,
35
+ v: Optional[str] = "0",
36
+ name: Optional[str] = None,
37
+ filepath: Optional[str] = None,
38
+ pypackage: Union[str, List[str], None] = None,
39
+ editor: Optional[str] = None,
40
+ ):
41
+ """Track notebook.
42
+
43
+ Args:
44
+ id: Pass a notebook id manually.
45
+ v: Pass a notebook version manually.
46
+ name: Pass a notebook name manually.
47
+ pypackage: One or more python packages to track.
48
+ filepath: Filepath of notebook. Only needed if automatic inference fails.
49
+ editor: Editor environment. Only needed if automatic inference fails.
50
+ Pass `'lab'` for jupyter lab and `'notebook'` for jupyter notebook,
51
+ this can help to identify the correct mechanism for interactivity
52
+ when automatic inference fails.
53
+ """
54
+ cls.instance = settings.instance
55
+ # original location of this code was _nb
56
+ # legacy code here, see duplicated version in _run
57
+ if id is None and name is None:
58
+ nbproject_failed_msg = (
59
+ "Auto-retrieval of notebook name & title failed.\nPlease paste error"
60
+ " at: https://github.com/laminlabs/nbproject/issues/new \n\nFix: Run"
61
+ f" ln.nb.header(id={dev.id.notebook()}, name='my-notebook-name')"
62
+ )
63
+ try:
64
+ nbproject.header(
65
+ pypackage=pypackage, filepath=filepath, env=editor, display=False
66
+ )
67
+ except Exception:
68
+ raise RuntimeError(nbproject_failed_msg)
69
+ # this contains filepath if the header was run successfully
70
+ from nbproject._header import _filepath
71
+
72
+ id = nbproject.meta.store.id
73
+ v = nbproject.meta.store.version
74
+ name = Path(_filepath).stem
75
+ title = nbproject.meta.live.title
76
+ elif id is None or name is None:
77
+ # Both id and name need to be passed if passing it manually
78
+ raise RuntimeError("Fix: Pass both id & name to ln.nb.header().")
79
+ else:
80
+ title = None
81
+
82
+ import lamindb as ln
83
+ import lamindb.schema as lns
84
+
85
+ notebook = ln.select(
86
+ lns.Notebook,
87
+ id=id,
88
+ v=v,
89
+ ).one_or_none()
90
+ if notebook is None:
91
+ notebook = lns.Notebook(
92
+ id=id,
93
+ v=v,
94
+ name=name,
95
+ title=title,
96
+ )
97
+ notebook = ln.add(notebook)
98
+ logger.info(f"Added notebook: {notebook}")
99
+ else:
100
+ logger.info(f"Loaded notebook: {notebook}")
101
+ if notebook.name != name or notebook.title != title:
102
+ response = input(
103
+ "Updated notebook name and/or title: Do you want to assign a new id"
104
+ " or version? (y/n)"
105
+ )
106
+ if response == "y":
107
+ print("Notebook metadata will be re-initialized.")
108
+ new_id, new_v = None, None
109
+ response = input("Do you want to generate a new id? (y/n)")
110
+ if response == "y":
111
+ new_id = lnschema_core.dev.id.notebook()
112
+ response = input(
113
+ "Do you want to set a new version (e.g. '1.1')? Type 'n' for"
114
+ " 'no'. (version/n)"
115
+ )
116
+ if new_v != "n":
117
+ if new_v == "y":
118
+ response = input("Please type the version: ")
119
+ new_v = response
120
+ if new_id is not None or new_v is not None:
121
+ nbproject.meta.store.id = new_id
122
+ nbproject.meta.store.version = new_v
123
+ nbproject.meta.store.write()
124
+ # at this point, depending on the editor, the process
125
+ # might crash that is OK as upon re-running, the
126
+ # notebook will have new metadata and will be registered
127
+ # in the db in case the python process does not exit, we
128
+ # need a new Notebook record
129
+ notebook = lns.Notebook(id=id, v=v)
130
+
131
+ notebook.name = name
132
+ notebook.title = title
133
+ ln.add(notebook)
134
+
135
+ # at this point, we have a notebook object
136
+ cls.notebook = notebook
137
+
138
+ @classmethod
139
+ def _track_pipeline(
140
+ cls,
141
+ name: str,
142
+ *,
143
+ version: Optional[str] = None,
144
+ ):
145
+ """Track pipeline.
146
+
147
+ Args:
148
+ name: Pipeline name.
149
+ version: Pipeline version. If `None`, load latest (sort by created_at).
150
+ """
151
+ cls.instance = settings.instance
152
+ import lamindb as ln
153
+ import lamindb.schema as lns
154
+
155
+ if version is not None:
156
+ pipeline = ln.select(lns.Pipeline, name=name, v=version).one()
157
+ else:
158
+ pipeline = (
159
+ ln.select(lns.Pipeline, name=name)
160
+ .order_by(lns.Pipeline.created_at.desc())
161
+ .first()
162
+ )
163
+ if pipeline is None:
164
+ response = input(
165
+ f"Did not find any pipeline record with name '{name}'. Create a new"
166
+ " one? (y/n)"
167
+ )
168
+ if response == "y":
169
+ pipeline = lns.Pipeline(name=name)
170
+ cls.pipeline = pipeline
171
+
172
+ @classmethod
173
+ def _track_notebook_pipeline(
174
+ cls, *, pipeline_name: Optional[str] = None, load_latest=True
175
+ ):
176
+ """Track notebook/pipeline and run.
177
+
178
+ When called from within a Python script, pass `pipeline_name`.
179
+
180
+ Args:
181
+ pipeline_name: Pipeline name.
182
+ load_latest: Load the latest run of the notebook or pipeline.
183
+ """
184
+ cls.instance = settings.instance
185
+ logger.info(f"Instance: {cls.instance.identifier}")
186
+ logger.info(f"User: {settings.user.handle}")
187
+ if is_run_from_ipython and pipeline_name is None:
188
+ if context.notebook is None:
189
+ cls._track_notebook()
190
+ else:
191
+ if pipeline_name is None:
192
+ raise ValueError(
193
+ "Pass a pipeline name: ln.context.track(pipeline_name='...')"
194
+ )
195
+ cls._track_pipeline(name=pipeline_name)
196
+ logger.info(f"Pipeline: {cls.pipeline}")
lamindb/_delete.py CHANGED
@@ -48,14 +48,20 @@ def delete( # type: ignore
48
48
 
49
49
  Example:
50
50
 
51
- >>> # Delete by record
52
- >>> experiment = ln.select(Experiment, id=experiment_id)
51
+ 1) Delete by record
52
+ >>> experiment = ln.select(Experiment, id=experiment_id).one()
53
53
  >>> ln.delete(experiment)
54
- >>> # Delete data objects
55
- >>> dobject = ln.select(DObject, id=dobject_id)
56
- >>> ln.delete(dobject)
57
- >>> # Delete by fields
58
- >>> ln.delete(DObject, id=dobject_id)
54
+
55
+ 2) Delete by fields
56
+ >>> ln.delete(Experiment, id=experiment_id)
57
+ >>> # the result of is equivalent to 1)
58
+
59
+ 3) Delete data objects (deleting the metadata record and the storage file)
60
+ >>> dobject = ln.select(DObject, id=dobject_id).one()
61
+ >>> # deleting the metadata record occurs automatically
62
+ >>> # you will be asked whether to delete the file from storage
63
+ >>> # or pass boolean values to `delete_data_from_storage`
64
+ >>> ln.delete(dobject, delete_data_from_storage)
59
65
 
60
66
  Args:
61
67
  record: One or multiple records as instances of `SQLModel`.
lamindb/_load.py CHANGED
@@ -1,46 +1,36 @@
1
- import lnschema_core as core
1
+ from typing import Optional
2
+
2
3
  from lamin_logger import logger
3
- from lndb import settings as setup_settings
4
+ from lnschema_core import DObject
5
+ from sqlalchemy.orm.session import object_session
6
+
7
+ from lamindb._context import context
4
8
 
5
9
  from ._settings import settings
6
10
  from .dev._core import filepath_from_dobject
7
11
  from .dev.file import load_to_memory
8
12
 
9
13
 
10
- def populate_runin(dobject: core.DObject, run: core.Run):
11
- setup_settings.instance._cloud_sqlite_locker.lock()
12
- with setup_settings.instance.session() as ss:
13
- result = ss.get(core.link.RunIn, (run.id, dobject.id))
14
- if result is None:
15
- ss.add(
16
- core.link.RunIn(
17
- run_id=run.id,
18
- dobject_id=dobject.id,
19
- )
20
- )
21
- ss.commit()
22
- logger.info(f"Added dobject ({dobject.id}) as input for run ({run.id}).")
23
- setup_settings.instance._update_cloud_sqlite_file()
24
- setup_settings.instance._cloud_sqlite_locker.unlock()
25
-
26
-
27
14
  # this is exposed to the user as DObject.load
28
- def load(dobject: core.DObject, stream: bool = False, is_run_input: bool = False):
15
+ def load(dobject: DObject, stream: bool = False, is_run_input: Optional[bool] = None):
29
16
  if stream and dobject.suffix not in (".h5ad", ".zarr"):
30
17
  logger.warning(f"Ignoring stream option for a {dobject.suffix} object.")
31
-
32
- filepath = filepath_from_dobject(dobject)
33
- # TODO: better design to track run inputs
34
- if settings.track_run_inputs_upon_load or is_run_input:
35
- from lamindb import nb
36
-
37
- if nb.run is None:
38
- logger.warning(
39
- "Input tracking for runs through `load` is currently only implemented"
40
- " for notebooks."
18
+ if is_run_input is None:
19
+ track_run_input = settings.track_run_inputs_upon_load
20
+ else:
21
+ track_run_input = is_run_input
22
+ if track_run_input:
23
+ if object_session(dobject) is None:
24
+ raise ValueError("Need to load with session open to track as input.")
25
+ if context.run is None:
26
+ raise ValueError(
27
+ "No global run context set. Call ln.context.track() or pass input run"
28
+ " directly."
41
29
  )
42
30
  else:
43
- populate_runin(dobject, nb.run)
44
- # TODO: enable track usage
31
+ dobject.targets.append(context.run)
32
+ session = object_session(dobject)
33
+ session.add(dobject)
34
+ session.commit()
45
35
  # track_usage(dobject.id, "load")
46
- return load_to_memory(filepath, stream=stream)
36
+ return load_to_memory(filepath_from_dobject(dobject), stream=stream)
lamindb/_nb.py CHANGED
@@ -1,12 +1,13 @@
1
- from pathlib import Path
2
1
  from typing import List, Optional, Union
3
2
 
4
3
  import nbproject as _nb
5
4
  from lamin_logger import logger
6
- from lndb import settings
7
- from lnschema_core import Notebook, Run, dev
5
+ from lnschema_core import Notebook, Run
8
6
 
7
+ from ._context import context
9
8
 
9
+
10
+ # this whole class is deprecated, see lamindb.context instead!
10
11
  class nb:
11
12
  """Manage Jupyter notebooks.
12
13
 
@@ -30,7 +31,7 @@ class nb:
30
31
  id: Optional[str] = None,
31
32
  v: Optional[str] = "0",
32
33
  name: Optional[str] = None,
33
- ):
34
+ ) -> Run:
34
35
  """Track the notebook & display metadata.
35
36
 
36
37
  Call without arguments in most settings.
@@ -54,88 +55,22 @@ class nb:
54
55
  v: Pass a notebook version manually.
55
56
  name: Pass a notebook name manually.
56
57
  """
57
- if id is None and name is None:
58
- nbproject_failed_msg = (
59
- "Auto-retrieval of notebook name & title failed.\nPlease paste error"
60
- " at: https://github.com/laminlabs/nbproject/issues/new \n\nFix: Run"
61
- f" ln.nb.header(id={dev.id.notebook()}, name='my-notebook-name')"
62
- )
63
- try:
64
- _nb.header(pypackage=pypackage, filepath=filepath, env=env)
65
- except Exception:
66
- raise RuntimeError(nbproject_failed_msg)
67
- # this contains filepath if the header was run successfully
68
- from nbproject._header import _filepath
69
-
70
- id = _nb.meta.store.id
71
- v = _nb.meta.store.version
72
- name = Path(_filepath).stem
73
- title = _nb.meta.live.title
74
- elif id is None or name is None:
75
- # Both id and name need to be passed if passing it manually
76
- raise RuntimeError("Fix: Pass both id & name to ln.nb.header().")
77
- else:
78
- title = None
79
-
80
- logger.info(f"Instance: {settings.instance.owner}/{settings.instance.name}")
81
-
82
- import lamindb as ln
83
- import lamindb.schema as lns
84
-
85
- notebook = ln.select(
86
- lns.Notebook,
87
- id=id,
88
- v=v,
89
- ).one_or_none()
90
- if notebook is None:
91
- notebook = lns.Notebook(
92
- id=id,
93
- v=v,
94
- name=name,
95
- title=title,
96
- )
97
- notebook = ln.add(notebook)
98
- logger.info(f"Added notebook: {notebook.id} v{notebook.v}")
99
- else:
100
- logger.info(f"Loaded notebook: {notebook.id} v{notebook.v}")
101
- if notebook.name != name or notebook.title != title:
102
- notebook.name = name
103
- notebook.title = title
104
- ln.add(notebook)
105
- logger.info("Updated notebook name or title.")
106
-
107
- # at this point, we have a notebook object
58
+ logger.warning(
59
+ "DeprecationWarning: Please replace ln.nb.header() with ln.Run()"
60
+ )
61
+ context._track_notebook(
62
+ pypackage=pypackage, filepath=filepath, id=id, v=v, name=name, editor=env
63
+ )
64
+ notebook = context.notebook
108
65
  cls.notebook = notebook
109
-
110
- # check user input
111
- # if isinstance(run, lns.Run):
112
- # This here might be something we may want in the future
113
- # but checking all the cases in which that run record has integrity
114
- # is quite a bit of code - not now!
115
- # run_test = ln.select(lns.Run, id=run.id).one_or_none()
116
- # if run_test is None:
117
- # logger.info("Passed run does not exist, adding it")
118
- # ln.add(run)
119
- if run is None:
120
- # retrieve the latest run
121
- run = (
122
- ln.select(lns.Run, notebook_id=notebook.id, notebook_v=notebook.v)
123
- .order_by(lns.Run.created_at.desc())
124
- .first()
125
- )
126
- if run is not None:
127
- logger.info(f"Loaded run: {run.id}") # type: ignore
128
- elif run != "new":
129
- raise ValueError("Fix: ln.nb.header(run='new')!")
130
-
131
- # create a new run if doesn't exist yet or is requested by the user ("new")
132
- if run is None or run == "new":
133
- run = lns.Run(notebook_id=notebook.id, notebook_v=notebook.v)
134
- run = ln.add(run) # type: ignore
135
- logger.info(f"Added run: {run.id}") # type: ignore
136
-
137
- # at this point, we have a run object
66
+ if run == "new":
67
+ run = Run(global_context=True)
68
+ elif run is None:
69
+ run = Run(global_context=True, load_latest=True)
70
+ else:
71
+ raise ValueError("Pass 'new' to ln.nb.header().")
138
72
  cls.run = run
73
+ return run
139
74
 
140
75
  @classmethod
141
76
  def publish(cls, version: str = None, i_confirm_i_saved: bool = False):
lamindb/_record.py CHANGED
@@ -29,12 +29,10 @@ NO_SOURCE_ERROR = """
29
29
  Error: Please link a data source using the `source` argument.
30
30
  Fix: Link a data source by passing a run, e.g., via
31
31
 
32
- pipeline = ln.select("My ingestion pipeline").one()
33
32
  run = lns.Run(pipeline=pipeline)
34
33
  dobject = ln.DObject(..., source=run)
35
34
 
36
- Or, if you're in a notebook, call `ln.nb.header()` at the top, which creates
37
- a global run context for the notebook.
35
+ Or, by calling ln.context.track(), which sets a global run context.
38
36
 
39
37
  More details: https://lamin.ai/docs/faq/ingest
40
38
  """
@@ -73,9 +71,11 @@ def serialize(
73
71
  return memory_rep, filepath, name, suffix
74
72
 
75
73
 
76
- def get_hash(local_filepath, suffix):
74
+ def get_hash(local_filepath, suffix, check_hash: bool = True):
77
75
  if suffix != ".zarr": # if not streamed
78
76
  hash = hash_file(local_filepath)
77
+ if not check_hash:
78
+ return hash
79
79
  result = select(lns_DObject, hash=hash).all()
80
80
  if len(result) > 0:
81
81
  msg = f"A dobject with same hash is already in the DB: {result}"
@@ -171,19 +171,19 @@ def parse_features(
171
171
  ).one_or_none()
172
172
  if features is not None:
173
173
  return features # features already exists!
174
-
175
- features = Features(id=features_hash, type=features_ref.entity)
176
- records = get_features_records(parsing_id, features_ref, df_curated)
177
-
178
- if isinstance(features_ref, Gene):
179
- for record in records:
180
- features.genes.append(record)
181
- elif isinstance(features_ref, Protein):
182
- for record in records:
183
- features.proteins.append(record)
184
- elif isinstance(features_ref, CellMarker):
185
- for record in records:
186
- features.cell_markers.append(record)
174
+ else:
175
+ features = Features(id=features_hash, type=features_ref.entity)
176
+ records = get_features_records(parsing_id, features_ref, df_curated)
177
+
178
+ if isinstance(features_ref, Gene):
179
+ for record in records:
180
+ features.genes.append(record)
181
+ elif isinstance(features_ref, Protein):
182
+ for record in records:
183
+ features.proteins.append(record)
184
+ elif isinstance(features_ref, CellMarker):
185
+ for record in records:
186
+ features.cell_markers.append(record)
187
187
 
188
188
  return features
189
189
 
@@ -204,11 +204,15 @@ def get_features(dobject_privates, features_ref):
204
204
 
205
205
  def get_run(run: Optional[Run]) -> Run:
206
206
  if run is None:
207
- from . import nb
207
+ from ._context import context
208
208
 
209
- run = nb.run
209
+ run = context.run
210
210
  if run is None:
211
211
  raise ValueError(NO_SOURCE_ERROR)
212
+ # the following ensures that queried objects (within __init__)
213
+ # behave like queried objects, only example right now: Run
214
+ if run._ln_identity_key is not None:
215
+ run._sa_instance_state.key = run._ln_identity_key
212
216
  return run
213
217
 
214
218
 
@@ -216,6 +220,7 @@ def get_path_size_hash(
216
220
  filepath: Union[Path, UPath],
217
221
  memory_rep: Optional[Union[pd.DataFrame, ad.AnnData]],
218
222
  suffix: str,
223
+ check_hash: bool = True,
219
224
  ):
220
225
  cloudpath = None
221
226
  localpath = None
@@ -250,18 +255,20 @@ def get_path_size_hash(
250
255
  else:
251
256
  size = path.stat().st_size
252
257
  localpath = filepath
253
- hash = get_hash(filepath, suffix)
258
+ hash = get_hash(filepath, suffix, check_hash=check_hash)
254
259
 
255
260
  return localpath, cloudpath, size, hash
256
261
 
257
262
 
263
+ # expose to user via ln.DObject
258
264
  def get_dobject_kwargs_from_data(
259
265
  data: Union[Path, UPath, str, pd.DataFrame, ad.AnnData],
260
266
  *,
261
267
  name: Optional[str] = None,
262
- features_ref: Optional[Union[CellMarker, Gene, Protein]] = None,
263
268
  source: Optional[Run] = None,
264
269
  format: Optional[str] = None,
270
+ # backward compat
271
+ features_ref: Optional[Union[CellMarker, Gene, Protein]] = None,
265
272
  ):
266
273
  run = get_run(source)
267
274
  memory_rep, filepath, name, suffix = serialize(data, name, format)
@@ -278,10 +285,17 @@ def get_dobject_kwargs_from_data(
278
285
  _memory_rep=memory_rep,
279
286
  )
280
287
 
288
+ # TODO: remove later
289
+ # backward compat
281
290
  if features_ref is not None:
291
+ logger.warning(
292
+ "DeprecationWarning: `features_ref` is deprecated, please use"
293
+ " `ln.Features`!"
294
+ )
282
295
  features = [get_features(dobject_privates, features_ref)] # has to be list!
283
296
  else:
284
297
  features = []
298
+
285
299
  dobject_kwargs = dict(
286
300
  name=name,
287
301
  suffix=suffix,
@@ -292,9 +306,29 @@ def get_dobject_kwargs_from_data(
292
306
  source=run,
293
307
  features=features,
294
308
  )
309
+
295
310
  return dobject_kwargs, dobject_privates
296
311
 
297
312
 
313
+ # expose to user via ln.Features
314
+ def get_features_from_data(
315
+ data: Union[Path, UPath, str, pd.DataFrame, ad.AnnData],
316
+ reference: Union[CellMarker, Gene, Protein],
317
+ format: Optional[str] = None,
318
+ ):
319
+ memory_rep, filepath, _, suffix = serialize(data, "features", format)
320
+ localpath, cloudpath, _, _ = get_path_size_hash(
321
+ filepath, memory_rep, suffix, check_hash=False
322
+ )
323
+
324
+ dobject_privates = dict(
325
+ _local_filepath=localpath,
326
+ _cloud_filepath=cloudpath,
327
+ _memory_rep=memory_rep,
328
+ )
329
+ return get_features(dobject_privates, reference)
330
+
331
+
298
332
  def to_b64_str(bstr: bytes):
299
333
  b64 = base64.urlsafe_b64encode(bstr).decode().strip("=")
300
334
  return b64