lamindb 0.49.3__py3-none-any.whl → 0.50.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ from typing import Dict, List, Union
2
+
3
+ import pandas as pd
4
+ from lnschema_core.models import Dataset, FeatureSet, File
5
+
6
+ from .._query_set import QuerySet
7
+
8
+
9
+ def create_features_df(
10
+ file: File, feature_sets: List[FeatureSet], exclude: bool = True
11
+ ):
12
+ features = []
13
+ for feature_set in feature_sets:
14
+ if exclude:
15
+ features_df = feature_set.features.exclude(registries__isnull=True).df()
16
+ else:
17
+ features_df = feature_set.features.df()
18
+ slots = file.feature_sets.through.objects.filter(
19
+ file=file, feature_set=feature_set
20
+ ).list("slot")
21
+ for slot in slots:
22
+ features_df["slot"] = slot
23
+ features.append(features_df)
24
+ features_df = pd.concat(features)
25
+ return features_df.sort_values(["slot", "registries"])
26
+
27
+
28
+ def get_accessor_by_orm(host: Union[File, Dataset]) -> Dict:
29
+ dictionary = {
30
+ field.related_model.__get_name_with_schema__(): field.name
31
+ for field in host._meta.related_objects
32
+ }
33
+ dictionary["core.Feature"] = "features"
34
+ dictionary["core.Label"] = "labels"
35
+ return dictionary
36
+
37
+
38
+ def get_feature_set_by_slot(host) -> Dict:
39
+ # if the host is not yet saved
40
+ if host._state.adding:
41
+ return host._feature_sets
42
+ # otherwise, we need a query
43
+ feature_set_links = host.feature_sets.through.objects.filter(file_id=host.id)
44
+ return {
45
+ feature_set_link.slot: FeatureSet.objects.get(
46
+ id=feature_set_link.feature_set_id
47
+ )
48
+ for feature_set_link in feature_set_links
49
+ }
50
+
51
+
52
+ class FeatureManager:
53
+ """Feature manager (:attr:`~lamindb.dev.Data.features`).
54
+
55
+ See :class:`~lamindb.dev.Data` for more information.
56
+ """
57
+
58
+ def __init__(self, host: Union[File, Dataset]):
59
+ self._host = host
60
+ self._feature_set_by_slot = get_feature_set_by_slot(host)
61
+ self._accessor_by_orm = get_accessor_by_orm(host)
62
+
63
+ def __repr__(self) -> str:
64
+ if len(self._feature_set_by_slot) > 0:
65
+ msg = ""
66
+ for slot, feature_set in self._feature_set_by_slot.items():
67
+ msg += f"'{slot}': {feature_set}\n"
68
+ return msg
69
+ else:
70
+ return "no linked features"
71
+
72
+ def __getitem__(self, slot) -> QuerySet:
73
+ if slot not in self._feature_set_by_slot:
74
+ raise ValueError(
75
+ f"No linked feature set for slot: {slot}\nDid you get validation"
76
+ " warnings? Only features that match registered features get validated"
77
+ " and linked."
78
+ )
79
+ feature_set = self._feature_set_by_slot[slot]
80
+ orm_name = feature_set.registry
81
+ return getattr(feature_set, self._accessor_by_orm[orm_name]).all()
82
+
83
+ def add_feature_set(self, feature_set: FeatureSet, slot: str):
84
+ """Add new feature set to a slot.
85
+
86
+ Args:
87
+ feature_set: `FeatureSet` A feature set object.
88
+ slot: `str` The access slot.
89
+ """
90
+ if self._host._state.adding:
91
+ raise ValueError(
92
+ "Please save the file or dataset before adding a feature set!"
93
+ )
94
+ feature_set.save()
95
+ link_record = self._host.feature_sets.through.objects.filter(
96
+ file=self._host, feature_set=feature_set, slot=slot
97
+ ).one_or_none()
98
+ if link_record is None:
99
+ self._host.feature_sets.through(
100
+ file=self._host, feature_set=feature_set, slot=slot
101
+ ).save()
102
+ self._feature_set_by_slot[slot] = feature_set
lamindb/dev/_settings.py CHANGED
@@ -14,7 +14,7 @@ class Settings:
14
14
  """
15
15
 
16
16
  def __init__(self):
17
- self._verbosity: int = 2 # info-level logging
17
+ self._verbosity: int = 4 # hint-level logging
18
18
  logger.set_verbosity(self._verbosity)
19
19
 
20
20
  upon_file_create_if_hash_exists: Literal[
@@ -34,7 +34,7 @@ class Settings:
34
34
  It speeds up file creation by about a factor 100.
35
35
  """
36
36
  upon_create_search_names: bool = True
37
- """To speed up creating ORM objects (default `True`).
37
+ """To speed up creating Registry objects (default `True`).
38
38
 
39
39
  If `True`, search for alternative names.
40
40
 
@@ -80,13 +80,14 @@ class Settings:
80
80
 
81
81
  @property
82
82
  def verbosity(self) -> int:
83
- """Verbosity (default 3).
84
-
85
- - 0: only show 'error' messages
86
- - 1: also show 'warning' messages
87
- - 2: also show 'info' messages
88
- - 3: also show 'hint' messages
89
- - 4: also show detailed 'debug' messages
83
+ """Verbosity (default 4 / 'hint').
84
+
85
+ - 0: only show 'error' messages
86
+ - 1: 🔶 also show 'warning' messages
87
+ - 2: also show 'success' messages
88
+ - 3: 💬 also show 'info' messages
89
+ - 4: 💡 also show 'hint' messages
90
+ - 5: 🐛 also show detailed 'debug' messages
90
91
 
91
92
  This is based on Scanpy's and Django's verbosity setting.
92
93
  """
@@ -1,7 +1,12 @@
1
1
  from typing import List, Set, Union
2
2
 
3
- from lnschema_core import ORM, File, Run
4
- from lnschema_core.models import format_datetime
3
+ from lnschema_core import File, Registry, Run, Transform
4
+ from lnschema_core.models import format_field_value
5
+
6
+ LAMIN_GREEN_LIGHTER = "#10b981"
7
+ LAMIN_GREEN_DARKER = "#065f46"
8
+ GREEN_FILL = "honeydew"
9
+ TRANSFORM_EMOJIS = {"notebook": "📔", "app": "🖥️", "pipeline": "🧩"}
5
10
 
6
11
 
7
12
  def view_lineage(file: File, with_children: bool = True):
@@ -33,8 +38,8 @@ def view_lineage(file: File, with_children: bool = True):
33
38
  u = graphviz.Digraph(
34
39
  file.id,
35
40
  node_attr={
36
- "fillcolor": "honeydew",
37
- "color": "seagreen",
41
+ "fillcolor": GREEN_FILL,
42
+ "color": LAMIN_GREEN_DARKER,
38
43
  "fontname": "Helvetica",
39
44
  "fontsize": "10",
40
45
  },
@@ -51,7 +56,7 @@ def view_lineage(file: File, with_children: bool = True):
51
56
  else:
52
57
  style = "rounded,filled"
53
58
  shape = "box"
54
- fillcolor = "honeydew"
59
+ fillcolor = GREEN_FILL
55
60
  u.node(
56
61
  node_id,
57
62
  label=node_label,
@@ -66,12 +71,12 @@ def view_lineage(file: File, with_children: bool = True):
66
71
  add_node(row["target_record"], row["target"], row["target_label"], u)
67
72
 
68
73
  u.edge(row["source"], row["target"], color="dimgrey")
69
- # label the searched file mediumseagreen
74
+ # label the searched file
70
75
  u.node(
71
76
  file.id,
72
77
  label=file_label,
73
78
  style="rounded,filled",
74
- fillcolor="mediumseagreen",
79
+ fillcolor=LAMIN_GREEN_LIGHTER,
75
80
  shape="box",
76
81
  )
77
82
 
@@ -79,7 +84,7 @@ def view_lineage(file: File, with_children: bool = True):
79
84
 
80
85
 
81
86
  def view_parents(
82
- record: ORM, field: str, with_children: bool = False, distance: int = 100
87
+ record: Registry, field: str, with_children: bool = False, distance: int = 100
83
88
  ):
84
89
  """Graph of parents."""
85
90
  if not hasattr(record, "parents"):
@@ -105,8 +110,8 @@ def view_parents(
105
110
  u = graphviz.Digraph(
106
111
  record.id,
107
112
  node_attr={
108
- "color": "seagreen",
109
- "fillcolor": "honeydew",
113
+ "color": LAMIN_GREEN_DARKER,
114
+ "fillcolor": GREEN_FILL,
110
115
  "shape": "box",
111
116
  "style": "rounded,filled",
112
117
  "fontname": "Helvetica",
@@ -116,17 +121,17 @@ def view_parents(
116
121
  )
117
122
  u.node(
118
123
  record_label.replace(":", "_"),
119
- label=record_label,
120
- fillcolor="mediumseagreen",
124
+ label=_add_emoji(record, record_label),
125
+ fillcolor=LAMIN_GREEN_LIGHTER,
121
126
  )
122
127
  for _, row in df_edges.iterrows():
123
- u.node(row["source"], label=row["source_label"])
128
+ u.node(row["source"], label=_add_emoji(record, row["source_label"]))
124
129
  u.edge(row["source"], row["target"], color="dimgrey")
125
130
 
126
131
  return u
127
132
 
128
133
 
129
- def _get_parents(record: ORM, field: str, distance: int, children: bool = False):
134
+ def _get_parents(record: Registry, field: str, distance: int, children: bool = False):
130
135
  """Recursively get parent records within a distance."""
131
136
  if children:
132
137
  key = "parents"
@@ -152,7 +157,7 @@ def _get_parents(record: ORM, field: str, distance: int, children: bool = False)
152
157
 
153
158
 
154
159
  def _df_edges_from_parents(
155
- record: ORM, field: str, distance: int, children: bool = False
160
+ record: Registry, field: str, distance: int, children: bool = False
156
161
  ):
157
162
  """Construct a DataFrame of edges as the input of graphviz.Digraph."""
158
163
  key = "children" if children else "parents"
@@ -178,6 +183,16 @@ def _df_edges_from_parents(
178
183
  return df_edges
179
184
 
180
185
 
186
+ def _add_emoji(record: Registry, label: str):
187
+ if record.__class__.__name__ == "Transform":
188
+ emoji = TRANSFORM_EMOJIS.get(record.type, "💫")
189
+ elif record.__class__.__name__ == "Run":
190
+ emoji = TRANSFORM_EMOJIS.get(record.transform.type, "💫")
191
+ else:
192
+ emoji = ""
193
+ return f"{emoji} {label}"
194
+
195
+
181
196
  def _get_all_parent_runs(file: File):
182
197
  """Get all input file runs recursively."""
183
198
  all_runs = {file.run}
@@ -222,9 +237,9 @@ def _label_file_run(record: Union[File, Run]):
222
237
  elif isinstance(record, Run):
223
238
  name = f'{record.transform.name.replace("&", "&")}'
224
239
  return (
225
- rf'<{name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
240
+ rf'<{TRANSFORM_EMOJIS.get(str(record.transform.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"' # noqa
226
241
  rf' FACE="Monospace">id={record.id}<BR/>type={record.transform.type},'
227
- rf" user={record.created_by.name}<BR/>run_at={format_datetime(record.run_at)}</FONT>>" # noqa
242
+ rf" user={record.created_by.name}<BR/>run_at={format_field_value(record.run_at)}</FONT>>" # noqa
228
243
  )
229
244
 
230
245
 
@@ -248,3 +263,7 @@ def _df_edges_from_runs(all_runs: List[Run]):
248
263
  df["source_label"] = df["source_record"].apply(_label_file_run)
249
264
  df["target_label"] = df["target_record"].apply(_label_file_run)
250
265
  return df
266
+
267
+
268
+ def _transform_emoji(transform: Transform):
269
+ return TRANSFORM_EMOJIS.get(transform.type, "💫")
@@ -1,17 +1,17 @@
1
- """Small example datasets.
1
+ """Test datasets.
2
2
 
3
3
  .. autosummary::
4
4
  :toctree: .
5
5
 
6
6
  file_fcs
7
7
  file_fcs_alpert19
8
+ file_tsv_rnaseq_nfcore_salmon_merged_gene_counts
8
9
  file_jpg_paradisi05
9
10
  file_tiff_suo22
10
11
  file_fastq
11
12
  file_bam
12
13
  file_mini_csv
13
14
  dir_scrnaseq_cellranger
14
- generate_cell_ranger_files
15
15
  df_iris
16
16
  df_iris_in_meter
17
17
  df_iris_in_meter_batch1
@@ -19,6 +19,7 @@
19
19
  anndata_mouse_sc_lymph_node
20
20
  anndata_human_immune_cells
21
21
  anndata_pbmc68k_reduced
22
+ anndata_file_pbmc68k_test
22
23
  anndata_pbmc3k_processed
23
24
  anndata_with_obs
24
25
  anndata_suo22_Visium10X
@@ -29,6 +30,7 @@
29
30
  """
30
31
 
31
32
  from ._core import (
33
+ anndata_file_pbmc68k_test,
32
34
  anndata_human_immune_cells,
33
35
  anndata_mouse_sc_lymph_node,
34
36
  anndata_pbmc3k_processed,
@@ -47,7 +49,7 @@ from ._core import (
47
49
  file_jpg_paradisi05,
48
50
  file_mini_csv,
49
51
  file_tiff_suo22,
50
- generate_cell_ranger_files,
52
+ file_tsv_rnaseq_nfcore_salmon_merged_gene_counts,
51
53
  mudata_papalexi21_subset,
52
54
  schmidt22_crispra_gws_IFNG,
53
55
  schmidt22_perturbseq,
@@ -39,6 +39,18 @@ def file_jpg_paradisi05() -> Path:
39
39
  return Path(filepath)
40
40
 
41
41
 
42
+ def file_tsv_rnaseq_nfcore_salmon_merged_gene_counts() -> Path:
43
+ """Gene counts table from nf-core RNA-seq pipeline.
44
+
45
+ Output of: https://nf-co.re/rnaseq
46
+ """
47
+ filepath, _ = urlretrieve(
48
+ "https://lamindb-test.s3.amazonaws.com/salmon.merged.gene_counts.tsv",
49
+ "salmon.merged.gene_counts.tsv",
50
+ )
51
+ return Path(filepath)
52
+
53
+
42
54
  def file_fastq(in_storage_root=False) -> Path:
43
55
  """Mini mock fastq file."""
44
56
  basedir = Path(".") if not in_storage_root else settings.storage
@@ -80,21 +92,6 @@ def file_tiff_suo22():
80
92
  return Path(filepath)
81
93
 
82
94
 
83
- def dir_scrnaseq_cellranger(in_storage_root=False) -> Path:
84
- """Directory with exemplary scrnaseq cellranger input and output."""
85
- filepath, _ = urlretrieve(
86
- "https://lamindb-test.s3.amazonaws.com/cellranger_run_001.zip"
87
- )
88
- from zipfile import ZipFile
89
-
90
- basedir = Path(".") if not in_storage_root else settings.storage
91
- with ZipFile(filepath, "r") as zipObj:
92
- # Extract all the contents of zip file in current directory
93
- zipObj.extractall(path=basedir)
94
-
95
- return basedir / "cellranger_run_001"
96
-
97
-
98
95
  def anndata_mouse_sc_lymph_node() -> ad.AnnData:
99
96
  """Mouse lymph node scRNA-seq dataset from EBI.
100
97
 
@@ -135,6 +132,28 @@ def anndata_pbmc68k_reduced() -> ad.AnnData:
135
132
  return ad.read(filepath)
136
133
 
137
134
 
135
+ def anndata_file_pbmc68k_test() -> Path:
136
+ """Modified from scanpy.datasets.pbmc68k_reduced().
137
+
138
+ Additional slots were added for testing purposes. Returns the filepath.
139
+
140
+ To reproduce::
141
+
142
+ pbmc68k = ln.dev.datasets.anndata_pbmc68k_reduced()
143
+ pbmc68k_test = pbmc68k[:30, :200].copy()
144
+ pbmc68k_test.raw = pbmc68k_test[:, :100]
145
+ pbmc68k_test.obsp["test"] = sparse.eye(pbmc68k_test.shape[0], format="csr")
146
+ pbmc68k_test.varp["test"] = sparse.eye(pbmc68k_test.shape[1], format="csr")
147
+ pbmc68k_test.layers["test"] = sparse.csr_matrix(pbmc68k_test.shape)
148
+ pbmc68k_test.layers["test"][0] = 1.
149
+ pbmc68k_test.write("pbmc68k_test.h5ad")
150
+ """
151
+ filepath, _ = urlretrieve(
152
+ "https://lamindb-test.s3.amazonaws.com/pbmc68k_test.h5ad", "pbmc68k_test.h5ad"
153
+ )
154
+ return Path(filepath)
155
+
156
+
138
157
  def anndata_pbmc3k_processed() -> ad.AnnData:
139
158
  """Modified from scanpy.pbmc3k_processed()."""
140
159
  filepath, _ = urlretrieve(
@@ -272,7 +291,7 @@ def df_iris_in_meter_batch2() -> pd.DataFrame:
272
291
  return df_iris.iloc[len(df_iris) // 2 :]
273
292
 
274
293
 
275
- def generate_cell_ranger_files(
294
+ def dir_scrnaseq_cellranger(
276
295
  sample_name: str, basedir: Union[str, Path] = "./", output_only: bool = True
277
296
  ):
278
297
  """Generate mock cell ranger outputs.
@@ -281,7 +300,6 @@ def generate_cell_ranger_files(
281
300
  sample_name: name of the sample
282
301
  basedir: run directory
283
302
  output_only: only generate output files
284
-
285
303
  """
286
304
  basedir = Path(basedir)
287
305
 
lamindb/dev/exc.py ADDED
@@ -0,0 +1,4 @@
1
+ class ValidationError(Exception):
2
+ """Validation error: not mapped in registry."""
3
+
4
+ pass
@@ -94,12 +94,23 @@ def read_dataframe(elem: Union[h5py.Dataset, h5py.Group]):
94
94
  def safer_read_partial(elem, indices):
95
95
  if get_spec(elem).encoding_type == "":
96
96
  if isinstance(elem, h5py.Dataset):
97
- return elem[indices]
98
- else:
99
- raise ValueError(
100
- "Can not get a subset of the element of type"
101
- f" {type(elem).__name__} with an empty spec."
102
- )
97
+ dims = len(elem.shape)
98
+ if dims == 2:
99
+ return elem[indices]
100
+ elif dims == 1:
101
+ if indices[0] == slice(None):
102
+ return elem[indices[1]]
103
+ elif indices[1] == slice(None):
104
+ return elem[indices[0]]
105
+ elif isinstance(elem, h5py.Group):
106
+ try:
107
+ return SparseDataset(elem)[indices]
108
+ except Exception:
109
+ pass
110
+ raise ValueError(
111
+ "Can not get a subset of the element of type"
112
+ f" {type(elem).__name__} with an empty spec."
113
+ )
103
114
  else:
104
115
  return read_elem_partial(elem, indices=indices)
105
116
 
@@ -140,6 +151,16 @@ if ZARR_INSTALLED:
140
151
  GroupTypes.append(zarr.Group)
141
152
  StorageTypes.append(zarr.Group)
142
153
 
154
+ def _subset_sparse_zarr(elem: zarr.Group, indices):
155
+ ds = SparseDataset(elem)
156
+ has_arrays = isinstance(indices[0], np.ndarray) or isinstance(
157
+ indices[1], np.ndarray
158
+ )
159
+ if not has_arrays and indices == (slice(None), slice(None)):
160
+ return ds.to_memory()
161
+ else:
162
+ return ds[indices]
163
+
143
164
  @registry.register_open("zarr")
144
165
  def open(filepath: Union[UPath, Path, str]): # noqa
145
166
  fs, file_path_str = infer_filesystem(filepath)
@@ -156,16 +177,31 @@ if ZARR_INSTALLED:
156
177
 
157
178
  @registry.register("zarr")
158
179
  def safer_read_partial(elem, indices): # noqa
159
- if get_spec(elem).encoding_type == "":
180
+ encoding_type = get_spec(elem).encoding_type
181
+ if encoding_type == "":
160
182
  if isinstance(elem, zarr.Array):
161
- return elem.oindex[indices]
162
- else:
163
- raise ValueError(
164
- "Can not get a subset of the element of type"
165
- f" {type(elem).__name__} with an empty spec."
166
- )
183
+ dims = len(elem.shape)
184
+ if dims == 2:
185
+ return elem.oindex[indices]
186
+ elif dims == 1:
187
+ if indices[0] == slice(None):
188
+ return elem.oindex[indices[1]]
189
+ elif indices[1] == slice(None):
190
+ return elem.oindex[indices[0]]
191
+ elif isinstance(elem, zarr.Group):
192
+ try:
193
+ return _subset_sparse_zarr(elem, indices)
194
+ except Exception:
195
+ pass
196
+ raise ValueError(
197
+ "Can not get a subset of the element of type"
198
+ f" {type(elem).__name__} with an empty spec."
199
+ )
167
200
  else:
168
- return read_elem_partial(elem, indices=indices)
201
+ if encoding_type in ("csr_matrix", "csc_matrix"):
202
+ return _subset_sparse_zarr(elem, indices)
203
+ else:
204
+ return read_elem_partial(elem, indices=indices)
169
205
 
170
206
  # this is needed because accessing zarr.Group.keys() directly is very slow
171
207
  @registry.register("zarr")
@@ -213,8 +249,6 @@ ArrayTypes = tuple(ArrayTypes) # type: ignore
213
249
  GroupTypes = tuple(GroupTypes) # type: ignore
214
250
  StorageTypes = tuple(StorageTypes) # type: ignore
215
251
 
216
- ArrayOrSparseTypes = ArrayTypes + (SparseDataset,) # type: ignore
217
-
218
252
 
219
253
  ArrayType = Union[ArrayTypes] # type: ignore
220
254
  GroupType = Union[GroupTypes] # type: ignore
@@ -222,8 +256,10 @@ StorageType = Union[StorageTypes] # type: ignore
222
256
 
223
257
 
224
258
  def _to_memory(elem):
225
- if isinstance(elem, ArrayOrSparseTypes):
259
+ if isinstance(elem, ArrayTypes):
226
260
  return elem[()]
261
+ elif isinstance(elem, SparseDataset):
262
+ return elem.to_memory()
227
263
  else:
228
264
  return elem
229
265
 
@@ -23,6 +23,43 @@ except ImportError:
23
23
  AUTO_KEY_PREFIX = ".lamindb/"
24
24
 
25
25
 
26
+ KNOWN_SUFFIXES = {
27
+ # without readers
28
+ ".txt",
29
+ ".tsv",
30
+ ".pdf",
31
+ ".fastq",
32
+ ".tar",
33
+ ".zip",
34
+ # with readers (see below)
35
+ ".h5ad",
36
+ ".parquet",
37
+ ".csv",
38
+ ".fcs",
39
+ ".zarr",
40
+ ".zrad",
41
+ }
42
+
43
+
44
+ def extract_suffix_from_path(path: Union[UPath, Path]) -> str:
45
+ # this if-clause is based on https://stackoverflow.com/questions/31890341
46
+ # the rest conscsiously deviates
47
+ if len(path.suffixes) <= 2:
48
+ return "".join(path.suffixes)
49
+ else:
50
+ msg = "file has more than two suffixes (path.suffixes), "
51
+ # first check the 2nd-to-last suffix because it might be followed by .gz
52
+ # or another compression-related suffix
53
+ if path.suffixes[-2] in KNOWN_SUFFIXES:
54
+ suffix = "".join(path.suffixes[-2:])
55
+ msg += f"inferring:'{suffix}'"
56
+ else:
57
+ suffix = path.suffixes[-1]
58
+ msg += f"using only last suffix: '{suffix}'"
59
+ logger.warning(msg)
60
+ return suffix
61
+
62
+
26
63
  # add type annotations back asap when re-organizing the module
27
64
  def auto_storage_key_from_file(file: File):
28
65
  if file.key is None:
@@ -35,25 +72,11 @@ def attempt_accessing_path(file: File, storage_key: str):
35
72
  if file.storage_id == settings.storage.id:
36
73
  path = settings.storage.key_to_filepath(storage_key)
37
74
  else:
38
- logger.warning(
39
- "file.path() is slightly slower for files outside default storage"
40
- )
75
+ logger.warning("file.path is slightly slower for files outside default storage")
41
76
  storage = Storage.filter(id=file.storage_id).one()
42
77
  # find a better way than passing None to instance_settings in the future!
43
78
  storage_settings = StorageSettings(storage.root)
44
79
  path = storage_settings.key_to_filepath(storage_key)
45
- # the following is for backward compat
46
- if storage_key.startswith(AUTO_KEY_PREFIX) and not path.exists():
47
- logger.warning(
48
- "You have auto-keyed files in your storage root, please move them into"
49
- f" {AUTO_KEY_PREFIX} within your storage location"
50
- )
51
- # try legacy_storage_key in root
52
- for previous_prefix in ["", "lndb/"]:
53
- legacy_storage_key = storage_key.replace(AUTO_KEY_PREFIX, previous_prefix)
54
- path = settings.storage.key_to_filepath(legacy_storage_key)
55
- if path.exists():
56
- return path
57
80
  return path
58
81
 
59
82
 
@@ -168,6 +191,11 @@ def read_fcs(*args, **kwargs):
168
191
  return readfcs.read(*args, **kwargs)
169
192
 
170
193
 
194
+ def read_tsv(path: Union[str, Path]) -> pd.DataFrame:
195
+ path_sanitized = Path(path)
196
+ return pd.read_csv(path_sanitized, sep="\t")
197
+
198
+
171
199
  def load_to_memory(filepath: Union[str, Path, UPath], stream: bool = False):
172
200
  """Load a file into memory.
173
201
 
@@ -189,6 +217,7 @@ def load_to_memory(filepath: Union[str, Path, UPath], stream: bool = False):
189
217
 
190
218
  READER_FUNCS = {
191
219
  ".csv": pd.read_csv,
220
+ ".tsv": read_tsv,
192
221
  ".h5ad": read_adata_h5ad,
193
222
  ".parquet": pd.read_parquet,
194
223
  ".fcs": read_fcs,