cap-anndata 0.2.2__py3-none-any.whl → 0.3.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,287 +1,600 @@
1
- import logging
2
- import anndata as ad
3
- import numpy as np
4
- import h5py
5
- from typing import List, Union, Dict, Tuple, Final
6
- from anndata._io.specs import read_elem, write_elem
7
-
8
- from cap_anndata import CapAnnDataDF, CapAnnDataUns
9
-
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- X_NOTATION = Union[h5py.Dataset, ad.experimental.CSRDataset, ad.experimental.CSCDataset]
14
- OBSM_NOTATION = Dict[str, X_NOTATION]
15
-
16
- NotLinkedObject: Final = "__NotLinkedObject"
17
-
18
-
19
- class BaseLayerMatrixAndDf:
20
- def __init__(self, file: h5py.File, path_to_content: str = "/") -> None:
21
- self._file = file
22
- self._path_to_content = path_to_content
23
- self._X: X_NOTATION = None
24
-
25
- @property
26
- def X(self) -> X_NOTATION:
27
- if self._X is None:
28
- self._link_x()
29
- return self._X
30
-
31
- def _link_x(self) -> None:
32
- x = self._file[self._path_to_content + "X"]
33
- if isinstance(x, h5py.Dataset):
34
- # dense X
35
- self._X = x
36
- else:
37
- # sparse dataset
38
- self._X = ad.experimental.sparse_dataset(x)
39
-
40
- @property
41
- def shape(self) -> Tuple[int, int]:
42
- if self.X is not None:
43
- shape = tuple(map(int, self.X.shape))
44
- else:
45
- shape = None
46
- return shape
47
-
48
- def _lazy_df_load(self, key: str) -> CapAnnDataDF:
49
- df = CapAnnDataDF()
50
- attribute = self._path_to_content + key
51
- column_order = self._read_attr(self._file[attribute], "column-order")
52
- df.column_order = column_order
53
- if df.column_order.dtype != object:
54
- # empty DataFrame will have column_order as float64
55
- # which leads to failure in overwrite method
56
- df.column_order = df.column_order.astype(object)
57
- return df
58
-
59
- @staticmethod
60
- def _read_attr(obj: Union[h5py.Group, h5py.Dataset], attr_name: str) -> any:
61
- attrs = dict(obj.attrs)
62
- if attr_name not in attrs.keys():
63
- raise KeyError(f"The {attr_name} doesn't exist!")
64
- return attrs[attr_name]
65
-
66
- def _read_df(self, key: str, columns: List[str]) -> CapAnnDataDF:
67
- group_path = self._path_to_content + key
68
- if group_path not in self._file.keys():
69
- raise ValueError(f"The group {group_path} doesn't exist in the file!")
70
-
71
- h5_group = self._file[group_path]
72
-
73
- column_order = self._read_attr(h5_group, "column-order")
74
-
75
- if columns is None:
76
- # read whole df
77
- df = CapAnnDataDF.from_df(read_elem(h5_group), column_order=column_order)
78
- else:
79
- cols_to_read = [c for c in columns if c in column_order]
80
- df = CapAnnDataDF()
81
- df.column_order = column_order
82
- index_col = self._read_attr(h5_group, "_index")
83
- df.index = read_elem(h5_group[index_col])
84
-
85
- for col in cols_to_read:
86
- df[col] = read_elem(h5_group[col])
87
-
88
- if df.column_order.dtype != object:
89
- # empty DataFrame will have column_order as float64
90
- # which leads to failure in overwrite method
91
- df.column_order = df.column_order.astype(object)
92
- return df
93
-
94
- def _write_elem(self, dest_key: str, elem: any, compression: str) -> None:
95
- write_elem(self._file, dest_key, elem, dataset_kwargs={"compression": compression})
96
-
97
- def _validate_cap_df(self, cap_df: CapAnnDataDF, axis: int) -> None:
98
- if not isinstance(cap_df, CapAnnDataDF):
99
- raise TypeError(
100
- f"The input should be an instance of CapAnnDataDF class but {type(cap_df)} given!"
101
- )
102
-
103
- if axis not in [0, 1]:
104
- raise ValueError("The axis should be either 0 or 1!")
105
-
106
- if cap_df.shape[0] != self.shape[axis]:
107
- items = "cells" if axis == 0 else "genes"
108
- raise ValueError(
109
- f"The number of rows in the input DataFrame should be equal to the number of {items} in the "
110
- "AnnData object!"
111
- )
112
-
113
-
114
- class RawLayer(BaseLayerMatrixAndDf):
115
- def __init__(self, h5_file: h5py.File):
116
- super().__init__(h5_file, path_to_content="/raw/")
117
- self._var: CapAnnDataDF = None
118
-
119
- @property
120
- def var(self) -> CapAnnDataDF:
121
- if self._var is None:
122
- self._var = self._lazy_df_load("var")
123
- return self._var
124
-
125
- @var.setter
126
- def var(self, cap_df: CapAnnDataDF) -> None:
127
- self._validate_cap_df(cap_df, axis=1)
128
- self._var = cap_df
129
-
130
- def read_var(self, columns: List[str] = None, reset: bool = False) -> None:
131
- df = self._read_df(key="var", columns=columns)
132
- if self.var.empty or reset:
133
- self._var = df
134
- else:
135
- for col in df.columns:
136
- self._var[col] = df[col]
137
-
138
-
139
- class CapAnnData(BaseLayerMatrixAndDf):
140
- def __init__(self, h5_file: h5py.File) -> None:
141
- super().__init__(h5_file, path_to_content="/")
142
- self._file: h5py.File = h5_file
143
- self._obs: CapAnnDataDF = None
144
- self._var: CapAnnDataDF = None
145
- self._X: X_NOTATION = None
146
- self._obsm: OBSM_NOTATION = None
147
- self._uns: CapAnnDataUns = None
148
- self._raw: RawLayer = None
149
- self._shape: Tuple[int, int] = None
150
-
151
- @property
152
- def obs(self) -> CapAnnDataDF:
153
- if self._obs is None:
154
- self._obs = self._lazy_df_load("obs")
155
- return self._obs
156
-
157
- @obs.setter
158
- def obs(self, cap_df: CapAnnDataDF) -> None:
159
- self._validate_cap_df(cap_df, axis=0)
160
- self._obs = cap_df
161
-
162
- @property
163
- def var(self) -> CapAnnDataDF:
164
- if self._var is None:
165
- self._var = self._lazy_df_load("var")
166
- return self._var
167
-
168
- @var.setter
169
- def var(self, cap_df: CapAnnDataDF) -> None:
170
- self._validate_cap_df(cap_df, axis=1)
171
- self._var = cap_df
172
-
173
- @property
174
- def obsm(self) -> OBSM_NOTATION:
175
- if self._obsm is None:
176
- self._link_obsm()
177
- return self._obsm
178
-
179
- @property
180
- def raw(self) -> RawLayer:
181
- if self._raw is None:
182
- if "raw" not in self._file.keys():
183
- logger.warning("Can't read raw.var since raw layer doesn't exist!")
184
- return
185
-
186
- self._raw = RawLayer(self._file)
187
- return self._raw
188
-
189
- @property
190
- def uns(self) -> CapAnnDataUns:
191
- if self._uns is None:
192
- self._uns = CapAnnDataUns(
193
- {k: NotLinkedObject for k in self._file["uns"].keys()}
194
- )
195
- return self._uns
196
-
197
- def read_obs(self, columns: List[str] = None, reset: bool = False) -> None:
198
- df = self._read_df("obs", columns=columns)
199
- if self.obs.empty or reset:
200
- self._obs = df
201
- else:
202
- for col in df.columns:
203
- self._obs[col] = df[col]
204
-
205
- def read_var(self, columns: List[str] = None, reset: bool = False) -> None:
206
- df = self._read_df("var", columns=columns)
207
- if self.var.empty or reset:
208
- self._var = df
209
- else:
210
- for col in df.columns:
211
- self._var[col] = df[col]
212
-
213
- def overwrite(self, fields: List[str] = None, compression: str = "lzf") -> None:
214
- field_to_entity = {
215
- "obs": self.obs,
216
- "var": self.var,
217
- "raw.var": self.raw.var if self.raw is not None else None,
218
- "uns": self.uns,
219
- }
220
-
221
- if fields is None:
222
- fields = list(field_to_entity.keys())
223
- else:
224
- for f in fields:
225
- if f not in field_to_entity.keys():
226
- raise KeyError(
227
- f"The field {f} is not supported! The list of supported fields are equal to supported "
228
- f"attributes of the CapAnnData class: obs, var, raw.var and uns."
229
- )
230
-
231
- for key in ["obs", "var", "raw.var"]:
232
- if key in fields:
233
- entity: CapAnnDataDF = field_to_entity[key]
234
- if entity is None:
235
- continue
236
-
237
- key = key.replace(".", "/") if key == "raw.var" else key
238
-
239
- for col in entity.columns:
240
- self._write_elem(f"{key}/{col}", entity[col].values, compression=compression)
241
-
242
- column_order = entity.column_order
243
- if (
244
- column_order.size == 0
245
- ): # Refs https://github.com/cellannotation/cap-anndata/issues/6
246
- column_order = np.array([], dtype=np.float64)
247
- self._file[key].attrs["column-order"] = column_order
248
-
249
- if "uns" in fields:
250
- for key in self.uns.keys():
251
- if self.uns[key] is not NotLinkedObject:
252
- dest = f"uns/{key}"
253
- self._write_elem(dest, self.uns[key], compression=compression)
254
- for key in self.uns.keys_to_remove:
255
- del self._file[f"uns/{key}"]
256
-
257
- def read_uns(self, keys: List[str] = None) -> None:
258
- if keys is None:
259
- keys = list(self.uns.keys())
260
-
261
- for key in keys:
262
- existing_keys = self.uns.keys()
263
- if key in existing_keys:
264
- source = self._file[f"uns/{key}"]
265
- self.uns[key] = read_elem(source)
266
-
267
- def _link_obsm(self) -> None:
268
- self._obsm = {}
269
- if "obsm" in self._file.keys():
270
- obsm_group = self._file["obsm"]
271
- for entity_name in obsm_group.keys():
272
- entity = obsm_group[entity_name]
273
- if isinstance(entity, h5py.Dataset):
274
- # dense array
275
- self._obsm[entity_name] = entity
276
- else:
277
- # sparse array
278
- self._obsm[entity_name] = ad.experimental.sparse_dataset(entity)
279
-
280
- def obsm_keys(self) -> List[str]:
281
- return list(self.obsm.keys())
282
-
283
- def obs_keys(self) -> List[str]:
284
- return self.obs.column_order.tolist()
285
-
286
- def var_keys(self) -> List[str]:
287
- return self.var.column_order.tolist()
1
+ import logging
2
+ import anndata as ad
3
+ import numpy as np
4
+ import h5py
5
+ from typing import List, Union, Any, Tuple, Final
6
+ import scipy.sparse as ss
7
+ from packaging import version
8
+
9
+ if version.parse(ad.__version__) < version.parse("0.11.0"):
10
+ from anndata.experimental import sparse_dataset, read_elem, write_elem
11
+ else:
12
+ from anndata.io import sparse_dataset, read_elem, write_elem
13
+
14
+ from cap_anndata import CapAnnDataDF, CapAnnDataDict
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ X_NOTATION = Union[
19
+ h5py.Dataset, ad.experimental.CSRDataset, ad.experimental.CSCDataset, None
20
+ ]
21
+ ARRAY_MAPPING_NOTATION = CapAnnDataDict[str, X_NOTATION]
22
+
23
+ NotLinkedObject: Final = "__NotLinkedObject"
24
+
25
+
26
+ class BaseLayerMatrixAndDf:
27
+ def __init__(self, file: h5py.File, path_to_content: str = "/") -> None:
28
+ self._file = file
29
+ self._path_to_content = path_to_content
30
+ self._X: X_NOTATION = None
31
+
32
+ @property
33
+ def file(self) -> h5py.File:
34
+ return self._file
35
+
36
+ @property
37
+ def X(self) -> X_NOTATION:
38
+ if self._X is None:
39
+ self._link_x()
40
+ return self._X
41
+
42
+ def _link_x(self) -> None:
43
+ x = self._file[self._path_to_content + "X"]
44
+ if isinstance(x, h5py.Dataset):
45
+ # dense X
46
+ self._X = x
47
+ else:
48
+ # sparse dataset
49
+ self._X = sparse_dataset(x)
50
+
51
+ @property
52
+ def shape(self) -> Tuple[int, int]:
53
+ if self.X is not None:
54
+ shape = tuple(map(int, self.X.shape))
55
+ else:
56
+ shape = None
57
+ return shape
58
+
59
+ def _lazy_df_load(self, key: str) -> CapAnnDataDF:
60
+ df = CapAnnDataDF()
61
+ attribute = self._path_to_content + key
62
+ column_order = self._read_attr(self._file[attribute], "column-order")
63
+ df.column_order = column_order
64
+ if df.column_order.dtype != object:
65
+ # empty DataFrame will have column_order as float64
66
+ # which leads to failure in overwrite method
67
+ df.column_order = df.column_order.astype(object)
68
+ return df
69
+
70
+ @staticmethod
71
+ def _read_attr(obj: Union[h5py.Group, h5py.Dataset], attr_name: str) -> any:
72
+ attrs = dict(obj.attrs)
73
+ if attr_name not in attrs.keys():
74
+ raise KeyError(f"The {attr_name} doesn't exist!")
75
+ return attrs[attr_name]
76
+
77
+ def _read_df(self, key: str, columns: List[str]) -> CapAnnDataDF:
78
+ group_path = self._path_to_content + key
79
+ if group_path not in self._file.keys():
80
+ raise ValueError(f"The group {group_path} doesn't exist in the file!")
81
+
82
+ h5_group = self._file[group_path]
83
+
84
+ column_order = self._read_attr(h5_group, "column-order")
85
+
86
+ if columns is None:
87
+ # read whole df
88
+ df = CapAnnDataDF.from_df(read_elem(h5_group), column_order=column_order)
89
+ else:
90
+ if isinstance(columns, str):
91
+ # single column provided instead of list
92
+ columns = [columns]
93
+ cols_to_read = [c for c in columns if c in column_order]
94
+ df = CapAnnDataDF()
95
+ df.column_order = column_order
96
+ index_col = self._read_attr(h5_group, "_index")
97
+ df.index = read_elem(h5_group[index_col])
98
+
99
+ for col in cols_to_read:
100
+ df[col] = read_elem(h5_group[col])
101
+
102
+ if df.column_order.dtype != object:
103
+ # empty DataFrame will have column_order as float64
104
+ # which leads to failure in overwrite method
105
+ df.column_order = df.column_order.astype(object)
106
+ return df
107
+
108
+ def _write_elem(self, dest_key: str, elem: any, compression: str) -> None:
109
+ write_elem(
110
+ self._file, dest_key, elem, dataset_kwargs={"compression": compression}
111
+ )
112
+
113
+ def _validate_cap_df(self, cap_df: CapAnnDataDF, axis: int) -> None:
114
+ if not isinstance(cap_df, CapAnnDataDF):
115
+ raise TypeError(
116
+ f"The input should be an instance of CapAnnDataDF class but {type(cap_df)} given!"
117
+ )
118
+
119
+ if axis not in [0, 1]:
120
+ raise ValueError("The axis should be either 0 or 1!")
121
+
122
+ if cap_df.shape[0] != self.shape[axis]:
123
+ items = "cells" if axis == 0 else "genes"
124
+ raise ValueError(
125
+ f"The number of rows in the input DataFrame should be equal to the number of {items} in the "
126
+ "AnnData object!"
127
+ )
128
+
129
+ def _link_array_mapping(self, cap_dict: CapAnnDataDict, key: str) -> None:
130
+ """Method to update given cap_dict with backed array entities from the file."""
131
+ if key not in self._file.keys():
132
+ raise KeyError(f"The key {key} doesn't exist in the file! Ignore linking.")
133
+
134
+ group = self._file[key]
135
+ if not isinstance(group, h5py.Group):
136
+ raise ValueError(f"The object {key} must be a group!")
137
+
138
+ for array_name in group.keys():
139
+ array = group[array_name]
140
+ if isinstance(array, h5py.Dataset):
141
+ cap_dict[array_name] = array
142
+ elif isinstance(array, h5py.Group):
143
+ cap_dict[array_name] = sparse_dataset(array)
144
+ else:
145
+ raise ValueError(
146
+ f"Can't link array in {key} due to unsupported type of object: {type(array)}"
147
+ )
148
+
149
+ def _create_new_matrix(
150
+ self,
151
+ dest: str,
152
+ matrix: Union[np.ndarray, ss.csr_matrix, ss.csc_matrix, None] = None,
153
+ matrix_shape: Union[tuple[int, int], None] = None,
154
+ data_dtype: Union[np.dtype, None] = None,
155
+ format: Union[str, None] = None, # TODO: use Enum instead of str
156
+ compression: str = "lzf",
157
+ ) -> None:
158
+ if matrix is not None:
159
+ self._write_elem(dest, matrix, compression=compression)
160
+ else:
161
+ if format == "dense":
162
+ group = self._file.create_dataset(
163
+ name=dest,
164
+ shape=matrix_shape,
165
+ dtype=data_dtype,
166
+ compression=compression,
167
+ )
168
+ # https://anndata.readthedocs.io/en/latest/fileformat-prose.html#dense-arrays-specification-v0-2-0
169
+ group.attrs["encoding-type"] = "array"
170
+ group.attrs["encoding-version"] = "0.2.0"
171
+ elif format in [
172
+ "csr",
173
+ "csc",
174
+ ]: # Based on https://github.com/appier/h5sparse/blob/master/h5sparse/h5sparse.py
175
+ if data_dtype is None:
176
+ data_dtype = np.float64
177
+ if matrix_shape is None:
178
+ matrix_shape = (0, 0)
179
+ sparse_class = ss.csr_matrix if format == "csr" else ss.csc_matrix
180
+ data = sparse_class(matrix_shape, dtype=data_dtype)
181
+ self._write_elem(dest, data, compression=compression)
182
+ else:
183
+ raise NotImplementedError(
184
+ f"Format must be 'dense', 'csr' or 'csc' but {format} given!"
185
+ )
186
+
187
+
188
+ class RawLayer(BaseLayerMatrixAndDf):
189
+ def __init__(self, h5_file: h5py.File):
190
+ super().__init__(h5_file, path_to_content="/raw/")
191
+ self._var: CapAnnDataDF = None
192
+
193
+ @property
194
+ def var(self) -> CapAnnDataDF:
195
+ if self._var is None:
196
+ self._var = self._lazy_df_load("var")
197
+ return self._var
198
+
199
+ @var.setter
200
+ def var(self, cap_df: CapAnnDataDF) -> None:
201
+ self._validate_cap_df(cap_df, axis=1)
202
+ self._var = cap_df
203
+
204
+ def read_var(self, columns: List[str] = None, reset: bool = False) -> None:
205
+ df = self._read_df(key="var", columns=columns)
206
+ if self.var.empty or reset:
207
+ self._var = df
208
+ else:
209
+ for col in df.columns:
210
+ self._var[col] = df[col]
211
+
212
+
213
+ class CapAnnData(BaseLayerMatrixAndDf):
214
+ def __init__(self, h5_file: h5py.File) -> None:
215
+ super().__init__(h5_file, path_to_content="/")
216
+ self._file: h5py.File = h5_file
217
+ self._obs: CapAnnDataDF = None
218
+ self._var: CapAnnDataDF = None
219
+ self._X: X_NOTATION = None
220
+ self._obsm: CapAnnDataDict = None
221
+ self._varm: CapAnnDataDict = None
222
+ self._layers: CapAnnDataDict = None
223
+ self._uns: CapAnnDataDict = None
224
+ self._obsp: CapAnnDataDict = None
225
+ self._varp: CapAnnDataDict = None
226
+ self._raw: RawLayer = None
227
+ self._shape: Tuple[int, int] = None
228
+
229
+ @property
230
+ def obs(self) -> CapAnnDataDF:
231
+ if self._obs is None:
232
+ self._obs = self._lazy_df_load("obs")
233
+ return self._obs
234
+
235
+ @obs.setter
236
+ def obs(self, cap_df: CapAnnDataDF) -> None:
237
+ self._validate_cap_df(cap_df, axis=0)
238
+ self._obs = cap_df
239
+
240
+ @property
241
+ def var(self) -> CapAnnDataDF:
242
+ if self._var is None:
243
+ self._var = self._lazy_df_load("var")
244
+ return self._var
245
+
246
+ @var.setter
247
+ def var(self, cap_df: CapAnnDataDF) -> None:
248
+ self._validate_cap_df(cap_df, axis=1)
249
+ self._var = cap_df
250
+
251
+ @property
252
+ def raw(self) -> RawLayer:
253
+ if self._raw is None:
254
+ if "raw" not in self._file.keys():
255
+ logger.warning("Can't read raw.var since raw layer doesn't exist!")
256
+ return
257
+
258
+ if len(self._file["raw"].keys()) == 0:
259
+ logger.warning("The raw layer is empty!")
260
+ return
261
+
262
+ self._raw = RawLayer(self._file)
263
+ return self._raw
264
+
265
+ @property
266
+ def uns(self) -> CapAnnDataDict[str, Any]:
267
+ if self._uns is None:
268
+ self._uns = CapAnnDataDict(
269
+ {k: NotLinkedObject for k in self._file["uns"].keys()}
270
+ )
271
+ return self._uns
272
+
273
+ @property
274
+ def layers(self) -> CapAnnDataDict[str, X_NOTATION]:
275
+ if self._layers is None:
276
+ self._link_layers()
277
+ return self._layers
278
+
279
+ @property
280
+ def obsm(self) -> CapAnnDataDict[str, X_NOTATION]:
281
+ if self._obsm is None:
282
+ self._link_obsm()
283
+ return self._obsm
284
+
285
+ @property
286
+ def varm(self) -> CapAnnDataDict[str, X_NOTATION]:
287
+ if self._varm is None:
288
+ self._link_varm()
289
+ return self._varm
290
+
291
+ @property
292
+ def obsp(self) -> CapAnnDataDict[str, X_NOTATION]:
293
+ if self._obsp is None:
294
+ self._link_obsp()
295
+ return self._obsp
296
+
297
+ @property
298
+ def varp(self) -> CapAnnDataDict[str, X_NOTATION]:
299
+ if self._varp is None:
300
+ self._link_varp()
301
+ return self._varp
302
+
303
+ def read_obs(self, columns: List[str] = None, reset: bool = False) -> None:
304
+ df = self._read_df("obs", columns=columns)
305
+ if self.obs.empty or reset:
306
+ self._obs = df
307
+ else:
308
+ for col in df.columns:
309
+ self._obs[col] = df[col]
310
+
311
+ def read_var(self, columns: List[str] = None, reset: bool = False) -> None:
312
+ df = self._read_df("var", columns=columns)
313
+ if self.var.empty or reset:
314
+ self._var = df
315
+ else:
316
+ for col in df.columns:
317
+ self._var[col] = df[col]
318
+
319
+ def read_uns(self, keys: List[str] = None) -> None:
320
+ if keys is None:
321
+ keys = list(self.uns.keys())
322
+
323
+ for key in keys:
324
+ existing_keys = self.uns.keys()
325
+ if key in existing_keys:
326
+ source = self._file[f"uns/{key}"]
327
+ self.uns[key] = read_elem(source)
328
+
329
+ def _link_layers(self) -> None:
330
+ if self._layers is None:
331
+ self._layers = CapAnnDataDict()
332
+ if "layers" in self._file.keys():
333
+ self._link_array_mapping(cap_dict=self._layers, key="layers")
334
+
335
+ def _link_obsm(self) -> None:
336
+ key = "obsm"
337
+ if self._obsm is None:
338
+ self._obsm = CapAnnDataDict()
339
+ if key in self._file.keys():
340
+ self._link_array_mapping(cap_dict=self._obsm, key=key)
341
+
342
+ def _link_varm(self) -> None:
343
+ key = "varm"
344
+ if self._varm is None:
345
+ self._varm = CapAnnDataDict()
346
+ if key in self._file.keys():
347
+ self._link_array_mapping(cap_dict=self._varm, key=key)
348
+
349
+ def _link_obsp(self):
350
+ key = "obsp"
351
+ if self._obsp is None:
352
+ self._obsp = CapAnnDataDict()
353
+
354
+ if key in self._file.keys():
355
+ self._link_array_mapping(cap_dict=self._obsp, key=key)
356
+
357
+ def _link_varp(self):
358
+ key = "varp"
359
+ if self._varp is None:
360
+ self._varp = CapAnnDataDict()
361
+
362
+ if key in self._file.keys():
363
+ self._link_array_mapping(cap_dict=self._varp, key=key)
364
+
365
+ def obsm_keys(self) -> List[str]:
366
+ return list(self.obsm.keys())
367
+
368
+ def obs_keys(self) -> List[str]:
369
+ return self.obs.column_order.tolist()
370
+
371
+ def var_keys(self) -> List[str]:
372
+ return self.var.column_order.tolist()
373
+
374
+ def overwrite(self, fields: List[str] = None, compression: str = "lzf") -> None:
375
+ field_to_entity = {
376
+ "obs": self.obs,
377
+ "var": self.var,
378
+ "raw.var": self.raw.var if self.raw is not None else None,
379
+ "uns": self.uns,
380
+ "layers": self.layers,
381
+ "obsm": self.obsm,
382
+ "varm": self.varm,
383
+ "obsp": self.obsp,
384
+ "varp": self.varp,
385
+ }
386
+
387
+ if fields is None:
388
+ fields = list(field_to_entity.keys())
389
+ else:
390
+ for f in fields:
391
+ if f not in field_to_entity.keys():
392
+ raise KeyError(
393
+ f"The field {f} is not supported! The list of supported fields are equal to supported "
394
+ f"attributes of the CapAnnData class: obs, var, raw.var and uns."
395
+ )
396
+
397
+ for key in ["obs", "var", "raw.var"]:
398
+ if key in fields:
399
+ entity: CapAnnDataDF = field_to_entity[key]
400
+ if entity is None:
401
+ continue
402
+
403
+ key = key.replace(".", "/") if key == "raw.var" else key
404
+
405
+ for col in entity.columns:
406
+ self._write_elem(
407
+ f"{key}/{col}", entity[col].values, compression=compression
408
+ )
409
+
410
+ column_order = entity.column_order
411
+ if (
412
+ column_order.size == 0
413
+ ): # Refs https://github.com/cellannotation/cap-anndata/issues/6
414
+ column_order = np.array([], dtype=np.float64)
415
+ self._file[key].attrs["column-order"] = column_order
416
+
417
+ if "uns" in fields:
418
+ for key in self.uns.keys():
419
+ if self.uns[key] is not NotLinkedObject:
420
+ dest = f"uns/{key}"
421
+ self._write_elem(dest, self.uns[key], compression=compression)
422
+ for key in self.uns.keys_to_remove:
423
+ del self._file[f"uns/{key}"]
424
+
425
+ for field in ["layers", "obsm", "varm", "obsp", "varp"]:
426
+ if field in fields:
427
+ for key in field_to_entity[field].keys_to_remove:
428
+ del self._file[f"{field}/{key}"]
429
+
430
+ def create_layer(
431
+ self,
432
+ name: str,
433
+ matrix: Union[np.ndarray, ss.csr_matrix, ss.csc_matrix, None] = None,
434
+ matrix_shape: Union[tuple[int, int], None] = None,
435
+ data_dtype: Union[np.dtype, None] = None,
436
+ format: Union[str, None] = None,
437
+ compression: str = "lzf",
438
+ ) -> None:
439
+ """
440
+ The empty layer will be created in the case of `matrix` is None.
441
+ """
442
+ self._create_new_matrix_in_field(
443
+ field="layers",
444
+ name=name,
445
+ matrix=matrix,
446
+ matrix_shape=matrix_shape,
447
+ data_dtype=data_dtype,
448
+ format=format,
449
+ compression=compression,
450
+ )
451
+ self._link_layers()
452
+
453
+ def create_obsm(
454
+ self,
455
+ name: str,
456
+ matrix: Union[np.ndarray, ss.csr_matrix, ss.csc_matrix, None] = None,
457
+ matrix_shape: Union[tuple[int, int], None] = None,
458
+ data_dtype: Union[np.dtype, None] = None,
459
+ format: Union[str, None] = None,
460
+ compression: str = "lzf",
461
+ ) -> None:
462
+ self._create_new_matrix_in_field(
463
+ field="obsm",
464
+ name=name,
465
+ matrix=matrix,
466
+ matrix_shape=matrix_shape,
467
+ data_dtype=data_dtype,
468
+ format=format,
469
+ compression=compression,
470
+ )
471
+ self._link_obsm()
472
+
473
+ def create_varm(
474
+ self,
475
+ name: str,
476
+ matrix: Union[np.ndarray, ss.csr_matrix, ss.csc_matrix, None] = None,
477
+ matrix_shape: Union[tuple[int, int], None] = None,
478
+ data_dtype: Union[np.dtype, None] = None,
479
+ format: Union[str, None] = None,
480
+ compression: str = "lzf",
481
+ ) -> None:
482
+ self._create_new_matrix_in_field(
483
+ field="varm",
484
+ name=name,
485
+ matrix=matrix,
486
+ matrix_shape=matrix_shape,
487
+ data_dtype=data_dtype,
488
+ format=format,
489
+ compression=compression,
490
+ )
491
+ self._link_varm()
492
+
493
+ def create_obsp(
494
+ self,
495
+ name: str,
496
+ matrix: Union[np.ndarray, ss.csr_matrix, ss.csc_matrix, None] = None,
497
+ matrix_shape: Union[tuple[int, int], None] = None,
498
+ data_dtype: Union[np.dtype, None] = None,
499
+ format: Union[str, None] = None,
500
+ compression: str = "lzf",
501
+ ) -> None:
502
+ self._create_new_matrix_in_field(
503
+ field="obsp",
504
+ name=name,
505
+ matrix=matrix,
506
+ matrix_shape=matrix_shape,
507
+ data_dtype=data_dtype,
508
+ format=format,
509
+ compression=compression,
510
+ )
511
+ self._link_obsp()
512
+
513
+ def create_varp(
514
+ self,
515
+ name: str,
516
+ matrix: Union[np.ndarray, ss.csr_matrix, ss.csc_matrix, None] = None,
517
+ matrix_shape: Union[tuple[int, int], None] = None,
518
+ data_dtype: Union[np.dtype, None] = None,
519
+ format: Union[str, None] = None,
520
+ compression: str = "lzf",
521
+ ) -> None:
522
+
523
+ self._create_new_matrix_in_field(
524
+ field="varp",
525
+ name=name,
526
+ matrix=matrix,
527
+ matrix_shape=matrix_shape,
528
+ data_dtype=data_dtype,
529
+ format=format,
530
+ compression=compression,
531
+ )
532
+ self._link_varp()
533
+
534
+ def _create_new_matrix_in_field(self, field, name, **kwargs):
535
+ """**kwargs: matrix, matrix_shape, data_dtype, format, compression"""
536
+ dest = f"{field}/{name}"
537
+ field_entity = getattr(self, field)
538
+ if name in field_entity.keys():
539
+ raise ValueError(
540
+ f"Please explicitly remove the existing '{name}' entity from {field} "
541
+ f"before creating a new one!"
542
+ )
543
+ if field not in self._file.keys():
544
+ self._file.create_group(field)
545
+ self._create_new_matrix(dest=dest, **kwargs)
546
+
547
+ def remove_layer(self, name: str) -> None:
548
+ del self._file[f"layers/{name}"]
549
+ self._link_layers()
550
+
551
+ def remove_obsp(self, name: str) -> None:
552
+ del self._file[f"obsp/{name}"]
553
+ self._link_obsp()
554
+
555
+ def remove_varp(self, name: str) -> None:
556
+ del self._file[f"varp/{name}"]
557
+ self._link_varp()
558
+
559
+ def remove_obsm(self, name: str) -> None:
560
+ del self._file[f"obsm/{name}"]
561
+ self._link_obsm()
562
+
563
+ def remove_varm(self, name: str) -> None:
564
+ del self._file[f"varm/{name}"]
565
+ self._link_varm()
566
+
567
+ def create_repr(self) -> str:
568
+ indent = " " * 4
569
+ s = f"CapAnnData object"
570
+ s += f"\n{indent}File: {self._file}"
571
+ s += f"\n{indent}X shape: {self.shape}"
572
+ s += f"\n{indent}Has raw X: {self.raw is not None}"
573
+ for field in ["obs", "obsm", "var", "uns", "layers"]:
574
+ if field in self._file:
575
+ in_memory = set()
576
+ if field in ["obs", "var", "uns"]:
577
+ attr = getattr(self, field)
578
+ if attr is not None:
579
+ in_memory = set(attr.keys())
580
+ keys = list(self._file[field].keys())
581
+ keys = [k for k in keys if k != "_index"]
582
+ keys = [(k if k not in in_memory else f"{k}*") for k in keys]
583
+ keys_str = str(keys).replace("*'", "'*")
584
+ s += f"\n{indent}{field}: {keys_str}"
585
+ s += f"\n{indent}Note: fields marked with * are in-memory objects."
586
+ return s
587
+
588
+ def __repr__(self) -> str:
589
+ return self.create_repr()
590
+
591
+ def __str__(self) -> str:
592
+ return self.create_repr()
593
+
594
+ def __enter__(self):
595
+ return self
596
+
597
+ def __exit__(self, *args):
598
+ if self._file is not None:
599
+ self._file.close()
600
+ logger.debug("CapAnnData closed!")